Merge branch 'mh/notes-duplicate-entries'
[git/git.git] / git-archimport.perl
CommitLineData
3328aced 1#!/usr/bin/perl
d3968363
ML
2#
3# This tool is copyright (c) 2005, Martin Langhoff.
4# It is released under the Gnu Public License, version 2.
5#
a6080a0a
JH
6# The basic idea is to walk the output of tla abrowse,
7# fetch the changesets and apply them.
d3968363 8#
241b5967 9
d3968363
ML
10=head1 Invocation
11
1b1dd23f 12 git archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ]
a6080a0a 13 [ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
d3968363 14
241b5967
ML
15Imports a project from one or more Arch repositories. It will follow branches
16and repositories within the namespaces defined by the <archive/branch>
82e5a82f 17parameters supplied. If it cannot find the remote branch a merge comes from
a6080a0a 18it will just import it as a regular commit. If it can find it, it will mark it
241b5967 19as a merge whenever possible.
d3968363 20
241b5967 21See man (1) git-archimport for more details.
d3968363 22
241b5967 23=head1 TODO
d3968363 24
241b5967 25 - create tag objects instead of ref tags
d3968363 26 - audit shell-escaping of filenames
241b5967 27 - hide our private tags somewhere smarter
a6080a0a 28 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
3e525e67
EW
29 - sort and apply patches by graphing ancestry relations instead of just
30 relying in dates supplied in the changeset itself.
31 tla ancestry-graph -m could be helpful here...
d3968363
ML
32
33=head1 Devel tricks
34
a6080a0a 35Add print in front of the shell commands invoked via backticks.
d3968363 36
22ff00fc
EW
37=head1 Devel Notes
38
39There are several places where Arch and git terminology are intermixed
40and potentially confused.
41
42The notion of a "branch" in git is approximately equivalent to
43a "archive/category--branch--version" in Arch. Also, it should be noted
44that the "--branch" portion of "archive/category--branch--version" is really
45optional in Arch although not many people (nor tools!) seem to know this.
46This means that "archive/category--version" is also a valid "branch"
47in git terms.
48
49We always refer to Arch names by their fully qualified variant (which
50means the "archive" name is prefixed.
51
52For people unfamiliar with Arch, an "archive" is the term for "repository",
53and can contain multiple, unrelated branches.
54
d3968363
ML
55=cut
56
d48b2841 57use 5.008;
d3968363
ML
58use strict;
59use warnings;
60use Getopt::Std;
42f44b08 61use File::Temp qw(tempdir);
f88961a8 62use File::Path qw(mkpath rmtree);
d3968363 63use File::Basename qw(basename dirname);
d3968363
ML
64use Data::Dumper qw/ Dumper /;
65use IPC::Open2;
66
67$SIG{'PIPE'}="IGNORE";
68$ENV{'TZ'}="UTC";
69
1d4710d0
ML
70my $git_dir = $ENV{"GIT_DIR"} || ".git";
71$ENV{"GIT_DIR"} = $git_dir;
a7fb51d3 72my $ptag_dir = "$git_dir/archimport/tags";
1d4710d0 73
3e525e67 74our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
d3968363
ML
75
76sub usage() {
77 print STDERR <<END;
165c4b13 78usage: git archimport # fetch/update GIT from Arch
42f4570c 79 [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ]
d3968363
ML
80 repository/arch-branch [ repository/arch-branch] ...
81END
82 exit(1);
83}
84
3e525e67 85getopts("fThvat:D:") or usage();
d3968363
ML
86usage if $opt_h;
87
88@ARGV >= 1 or usage();
42f44b08
EW
89# $arch_branches:
90# values associated with keys:
91# =1 - Arch version / git 'branch' detected via abrowse on a limit
82e5a82f 92# >1 - Arch version / git 'branch' of an auxiliary branch we've merged
d9cb5399
PB
93my %arch_branches = map { my $branch = $_; $branch =~ s/:[^:]*$//; $branch => 1 } @ARGV;
94
95# $branch_name_map:
96# maps arch branches to git branch names
97my %branch_name_map = map { m/^(.*):([^:]*)$/; $1 => $2 } grep { m/:/ } @ARGV;
d3968363 98
5744f277
EW
99$ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
100my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
127bf00f 101$opt_v && print "+ Using $tmp as temporary directory\n";
d3968363 102
11dbe9e8
GP
103unless (-d $git_dir) { # initial import needs empty directory
104 opendir DIR, '.' or die "Unable to open current directory: $!\n";
105 while (my $entry = readdir DIR) {
106 $entry =~ /^\.\.?$/ or
107 die "Initial import needs an empty current working directory.\n"
108 }
109 closedir DIR
110}
111
d9cb5399 112my $default_archive; # default Arch archive
42f44b08
EW
113my %reachable = (); # Arch repositories we can access
114my %unreachable = (); # Arch repositories we can't access :<
d3968363 115my @psets = (); # the collection
b779d5f0 116my %psets = (); # the collection, by name
3e525e67
EW
117my %stats = ( # Track which strategy we used to import:
118 get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
119 simple_changeset => 0, import_or_tag => 0
120);
b779d5f0
ML
121
122my %rptags = (); # my reverse private tags
123 # to map a SHA1 to a commitid
2777ef76 124my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
d3968363 125
42f44b08
EW
126sub do_abrowse {
127 my $stage = shift;
128 while (my ($limit, $level) = each %arch_branches) {
129 next unless $level == $stage;
a6080a0a
JH
130
131 open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
42f44b08 132 or die "Problems with tla abrowse: $!";
a6080a0a 133
42f44b08
EW
134 my %ps = (); # the current one
135 my $lastseen = '';
a6080a0a 136
42f44b08
EW
137 while (<ABROWSE>) {
138 chomp;
a6080a0a 139
42f44b08
EW
140 # first record padded w 8 spaces
141 if (s/^\s{8}\b//) {
142 my ($id, $type) = split(m/\s+/, $_, 2);
143
144 my %last_ps;
145 # store the record we just captured
146 if (%ps && !exists $psets{ $ps{id} }) {
147 %last_ps = %ps; # break references
148 push (@psets, \%last_ps);
149 $psets{ $last_ps{id} } = \%last_ps;
150 }
a6080a0a 151
42f44b08
EW
152 my $branch = extract_versionname($id);
153 %ps = ( id => $id, branch => $branch );
154 if (%last_ps && ($last_ps{branch} eq $branch)) {
155 $ps{parent_id} = $last_ps{id};
156 }
a6080a0a 157
42f44b08
EW
158 $arch_branches{$branch} = 1;
159 $lastseen = 'id';
160
161 # deal with types (should work with baz or tla):
162 if ($type =~ m/\(.*changeset\)/) {
163 $ps{type} = 's';
164 } elsif ($type =~ /\(.*import\)/) {
165 $ps{type} = 'i';
6df896b5 166 } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
42f44b08
EW
167 $ps{type} = 't';
168 # read which revision we've tagged when we parse the log
6df896b5 169 $ps{tag} = $1;
a6080a0a 170 } else {
42f44b08
EW
171 warn "Unknown type $type";
172 }
173
174 $arch_branches{$branch} = 1;
175 $lastseen = 'id';
a6080a0a
JH
176 } elsif (s/^\s{10}//) {
177 # 10 leading spaces or more
42f44b08 178 # indicate commit metadata
a6080a0a 179
42f44b08
EW
180 # date
181 if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
182 $ps{date} = $1;
183 $lastseen = 'date';
184 } elsif ($_ eq 'merges in:') {
185 $ps{merges} = [];
186 $lastseen = 'merges';
187 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
188 my $id = $_;
189 push (@{$ps{merges}}, $id);
a6080a0a 190
42f44b08
EW
191 # aggressive branch finding:
192 if ($opt_D) {
193 my $branch = extract_versionname($id);
194 my $repo = extract_reponame($branch);
a6080a0a 195
42f44b08
EW
196 if (archive_reachable($repo) &&
197 !defined $arch_branches{$branch}) {
198 $arch_branches{$branch} = $stage + 1;
199 }
200 }
201 } else {
202 warn "more metadata after merges!?: $_\n" unless /^\s*$/;
d3968363 203 }
d3968363 204 }
d3968363 205 }
d3968363 206
42f44b08
EW
207 if (%ps && !exists $psets{ $ps{id} }) {
208 my %temp = %ps; # break references
209 if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
210 $temp{parent_id} = $psets[$#psets]{id};
211 }
a6080a0a 212 push (@psets, \%temp);
42f44b08 213 $psets{ $temp{id} } = \%temp;
a6080a0a
JH
214 }
215
42f44b08
EW
216 close ABROWSE or die "$TLA abrowse failed on $limit\n";
217 }
d3968363
ML
218} # end foreach $root
219
42f44b08
EW
220do_abrowse(1);
221my $depth = 2;
222$opt_D ||= 0;
223while ($depth <= $opt_D) {
224 do_abrowse($depth);
225 $depth++;
226}
227
d3968363 228## Order patches by time
42f44b08
EW
229# FIXME see if we can find a more optimal way to do this by graphing
230# the ancestry data and walking it, that way we won't have to rely on
231# client-supplied dates
d3968363
ML
232@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
233
234#print Dumper \@psets;
235
236##
237## TODO cleanup irrelevant patches
238## and put an initial import
239## or a full tag
3292ae47 240my $import = 0;
1d4710d0 241unless (-d $git_dir) { # initial import
d3968363
ML
242 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
243 print "Starting import from $psets[0]{id}\n";
5c94f87e 244 `git-init`;
3292ae47
ML
245 die $! if $?;
246 $import = 1;
d3968363
ML
247 } else {
248 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
249 }
b779d5f0
ML
250} else { # progressing an import
251 # load the rptags
42f44b08 252 opendir(DIR, $ptag_dir)
b779d5f0
ML
253 || die "can't opendir: $!";
254 while (my $file = readdir(DIR)) {
a7fb51d3
EW
255 # skip non-interesting-files
256 next unless -f "$ptag_dir/$file";
a6080a0a 257
a7fb51d3
EW
258 # convert first '--' to '/' from old git-archimport to use
259 # as an archivename/c--b--v private tag
260 if ($file !~ m!,!) {
261 my $oldfile = $file;
262 $file =~ s!--!,!;
263 print STDERR "converting old tag $oldfile to $file\n";
264 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
265 }
b779d5f0
ML
266 my $sha = ptag($file);
267 chomp $sha;
b779d5f0
ML
268 $rptags{$sha} = $file;
269 }
270 closedir DIR;
d3968363
ML
271}
272
3292ae47 273# process patchsets
22ff00fc
EW
274# extract the Arch repository name (Arch "archive" in Arch-speak)
275sub extract_reponame {
276 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
277 return (split(/\//, $fq_cvbr))[0];
278}
a6080a0a 279
22ff00fc
EW
280sub extract_versionname {
281 my $name = shift;
282 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
283 return $name;
284}
d3968363 285
22ff00fc 286# convert a fully-qualified revision or version to a unique dirname:
a6080a0a 287# normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
22ff00fc
EW
288# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
289#
290# the git notion of a branch is closer to
291# archive/category--branch--version than archive/category--branch, so we
292# use this to convert to git branch names.
293# Also, keep archive names but replace '/' with ',' since it won't require
294# subdirectories, and is safer than swapping '--' which could confuse
295# reverse-mapping when dealing with bastard branches that
296# are just archive/category--version (no --branch)
297sub tree_dirname {
298 my $revision = shift;
299 my $name = extract_versionname($revision);
300 $name =~ s#/#,#;
301 return $name;
302}
303
fee3365f
ML
304# old versions of git-archimport just use the <category--branch> part:
305sub old_style_branchname {
306 my $id = shift;
307 my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
308 chomp $ret;
309 return $ret;
310}
311
d9cb5399
PB
312*git_default_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
313
314# retrieve default archive, since $branch_name_map keys might not include it
315sub get_default_archive {
316 if (!defined $default_archive) {
317 $default_archive = safe_pipe_capture($TLA,'my-default-archive');
318 chomp $default_archive;
319 }
320 return $default_archive;
321}
322
323sub git_branchname {
324 my $revision = shift;
325 my $name = extract_versionname($revision);
326
327 if (exists $branch_name_map{$name}) {
328 return $branch_name_map{$name};
329
330 } elsif ($name =~ m#^([^/]*)/(.*)$#
331 && $1 eq get_default_archive()
332 && exists $branch_name_map{$2}) {
333 # the names given in the command-line lacked the archive.
334 return $branch_name_map{$2};
335
336 } else {
337 return git_default_branchname($revision);
338 }
339}
22ff00fc 340
3e525e67
EW
341sub process_patchset_accurate {
342 my $ps = shift;
a6080a0a 343
3e525e67
EW
344 # switch to that branch if we're not already in that branch:
345 if (-e "$git_dir/refs/heads/$ps->{branch}") {
346 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
d3968363 347
3e525e67
EW
348 # remove any old stuff that got leftover:
349 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
350 rmtree(split(/\0/,$rm)) if $rm;
3292ae47 351 }
a6080a0a 352
3e525e67
EW
353 # Apply the import/changeset/merge into the working tree
354 my $dir = sync_to_ps($ps);
355 # read the new log entry:
356 my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
357 die "Error in cat-log: $!" if $?;
358 chomp @commitlog;
359
360 # grab variables we want from the log, new fields get added to $ps:
361 # (author, date, email, summary, message body ...)
362 parselog($ps, \@commitlog);
363
364 if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
a6080a0a 365 # this should work when importing continuations
3e525e67 366 if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
a6080a0a 367
3e525e67 368 # find where we are supposed to branch from
d9cb5399
PB
369 if (! -e "$git_dir/refs/heads/$ps->{branch}") {
370 system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n";
371
372 # We trust Arch with the fact that this is just a tag,
373 # and it does not affect the state of the tree, so
374 # we just tag and move on. If the user really wants us
375 # to consolidate more branches into one, don't tag because
376 # the tag name would be already taken.
377 tag($ps->{id}, $branchpoint);
378 ptag($ps->{id}, $branchpoint);
379 print " * Tagged $ps->{id} at $branchpoint\n";
380 }
381 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
382
3e525e67
EW
383 # remove any old stuff that got leftover:
384 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
385 rmtree(split(/\0/,$rm)) if $rm;
3e525e67
EW
386 return 0;
387 } else {
388 warn "Tagging from unknown id unsupported\n" if $ps->{tag};
389 }
390 # allow multiple bases/imports here since Arch supports cherry-picks
391 # from unrelated trees
a6080a0a
JH
392 }
393
3e525e67 394 # update the index with all the changes we got
3ff903bf
EW
395 system('git-diff-files --name-only -z | '.
396 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
3e525e67
EW
397 system('git-ls-files --others -z | '.
398 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
3e525e67
EW
399 return 1;
400}
37f15d50 401
3e525e67
EW
402# the native changeset processing strategy. This is very fast, but
403# does not handle permissions or any renames involving directories
404sub process_patchset_fast {
405 my $ps = shift;
a6080a0a 406 #
d3968363
ML
407 # create the branch if needed
408 #
3292ae47
ML
409 if ($ps->{type} eq 'i' && !$import) {
410 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
d3968363
ML
411 }
412
3292ae47 413 unless ($import) { # skip for import
1d4710d0 414 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
d3968363 415 # we know about this branch
f88961a8 416 system('git-checkout',$ps->{branch});
d3968363
ML
417 } else {
418 # new branch! we need to verify a few things
419 die "Branch on a non-tag!" unless $ps->{type} eq 't';
420 my $branchpoint = ptag($ps->{tag});
a6080a0a 421 die "Tagging from unknown id unsupported: $ps->{tag}"
d3968363 422 unless $branchpoint;
a6080a0a 423
d3968363 424 # find where we are supposed to branch from
d9cb5399
PB
425 if (! -e "$git_dir/refs/heads/$ps->{branch}") {
426 system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n";
427
428 # We trust Arch with the fact that this is just a tag,
429 # and it does not affect the state of the tree, so
430 # we just tag and move on. If the user really wants us
431 # to consolidate more branches into one, don't tag because
432 # the tag name would be already taken.
433 tag($ps->{id}, $branchpoint);
434 ptag($ps->{id}, $branchpoint);
435 print " * Tagged $ps->{id} at $branchpoint\n";
436 }
437 system('git-checkout',$ps->{branch}) == 0 or die "$! $?\n";
3e525e67 438 return 0;
a6080a0a 439 }
d3968363 440 die $! if $?;
a6080a0a 441 }
d3968363 442
d3968363
ML
443 #
444 # Apply the import/changeset/merge into the working tree
a6080a0a 445 #
d3968363 446 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
d3968363 447 apply_import($ps) or die $!;
3e525e67 448 $stats{import_or_tag}++;
3292ae47 449 $import=0;
d3968363
ML
450 } elsif ($ps->{type} eq 's') {
451 apply_cset($ps);
3e525e67 452 $stats{simple_changeset}++;
d3968363
ML
453 }
454
455 #
456 # prepare update git's index, based on what arch knows
457 # about the pset, resolve parents, etc
458 #
a6080a0a
JH
459
460 my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
d3968363 461 die "Error in cat-archive-log: $!" if $?;
a6080a0a 462
6df896b5 463 parselog($ps,\@commitlog);
d3968363
ML
464
465 # imports don't give us good info
466 # on added files. Shame on them
6df896b5 467 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
6df896b5
EW
468 system('git-ls-files --deleted -z | '.
469 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
3ff903bf
EW
470 system('git-ls-files --others -z | '.
471 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
d3968363
ML
472 }
473
6df896b5 474 # TODO: handle removed_directories and renamed_directories:
3ff903bf 475
6df896b5
EW
476 if (my $del = $ps->{removed_files}) {
477 unlink @$del;
d3968363
ML
478 while (@$del) {
479 my @slice = splice(@$del, 0, 100);
6df896b5
EW
480 system('git-update-index','--remove','--',@slice) == 0 or
481 die "Error in git-update-index --remove: $! $?\n";
d3968363
ML
482 }
483 }
6df896b5
EW
484
485 if (my $ren = $ps->{renamed_files}) { # renamed
d3968363
ML
486 if (@$ren % 2) {
487 die "Odd number of entries in rename!?";
488 }
a6080a0a 489
d3968363 490 while (@$ren) {
6df896b5 491 my $from = shift @$ren;
a6080a0a 492 my $to = shift @$ren;
d3968363
ML
493
494 unless (-d dirname($to)) {
495 mkpath(dirname($to)); # will die on err
496 }
3e525e67 497 # print "moving $from $to";
6df896b5
EW
498 rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
499 system('git-update-index','--remove','--',$from) == 0 or
500 die "Error in git-update-index --remove: $! $?\n";
501 system('git-update-index','--add','--',$to) == 0 or
502 die "Error in git-update-index --add: $! $?\n";
d3968363 503 }
d3968363 504 }
6df896b5 505
3ff903bf
EW
506 if (my $add = $ps->{new_files}) {
507 while (@$add) {
508 my @slice = splice(@$add, 0, 100);
509 system('git-update-index','--add','--',@slice) == 0 or
510 die "Error in git-update-index --add: $! $?\n";
511 }
512 }
513
6df896b5 514 if (my $mod = $ps->{modified_files}) {
d3968363
ML
515 while (@$mod) {
516 my @slice = splice(@$mod, 0, 100);
6df896b5
EW
517 system('git-update-index','--',@slice) == 0 or
518 die "Error in git-update-index: $! $?\n";
d3968363
ML
519 }
520 }
3e525e67
EW
521 return 1; # we successfully applied the changeset
522}
523
524if ($opt_f) {
525 print "Will import patchsets using the fast strategy\n",
526 "Renamed directories and permission changes will be missed\n";
527 *process_patchset = *process_patchset_fast;
528} else {
529 print "Using the default (accurate) import strategy.\n",
530 "Things may be a bit slow\n";
531 *process_patchset = *process_patchset_accurate;
532}
a6080a0a 533
3e525e67
EW
534foreach my $ps (@psets) {
535 # process patchsets
536 $ps->{branch} = git_branchname($ps->{id});
537
538 #
a6080a0a
JH
539 # ensure we have a clean state
540 #
3e525e67
EW
541 if (my $dirty = `git-diff-files`) {
542 die "Unclean tree when about to process $ps->{id} " .
543 " - did we fail to commit cleanly before?\n$dirty";
544 }
545 die $! if $?;
a6080a0a 546
3e525e67
EW
547 #
548 # skip commits already in repo
549 #
550 if (ptag($ps->{id})) {
551 $opt_v && print " * Skipping already imported: $ps->{id}\n";
10945e00 552 next;
3e525e67
EW
553 }
554
555 print " * Starting to work on $ps->{id}\n";
556
557 process_patchset($ps) or next;
558
215a7ad1 559 # warn "errors when running git-update-index! $!";
3e525e67 560 my $tree = `git-write-tree`;
d3968363
ML
561 die "cannot write tree $!" if $?;
562 chomp $tree;
a6080a0a 563
d3968363
ML
564 #
565 # Who's your daddy?
566 #
567 my @par;
1d4710d0 568 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
f88961a8 569 if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
d3968363
ML
570 my $p = <HEAD>;
571 close HEAD;
572 chomp $p;
573 push @par, '-p', $p;
a6080a0a 574 } else {
d3968363
ML
575 if ($ps->{type} eq 's') {
576 warn "Could not find the right head for the branch $ps->{branch}";
577 }
578 }
579 }
a6080a0a 580
b779d5f0
ML
581 if ($ps->{merges}) {
582 push @par, find_parents($ps);
583 }
d3968363 584
a6080a0a 585 #
d3968363
ML
586 # Commit, tag and clean state
587 #
588 $ENV{TZ} = 'GMT';
589 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
590 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
591 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
592 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
593 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
594 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
595
a6080a0a 596 my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
d3968363 597 or die $!;
a94f457e 598 print WRITER $ps->{summary},"\n\n";
608403d7
MB
599
600 # only print message if it's not empty, to avoid a spurious blank line;
601 # also append an extra newline, so there's a blank line before the
602 # following "git-archimport-id:" line.
603 print WRITER $ps->{message},"\n\n" if ($ps->{message} ne "");
a6080a0a 604
6df896b5
EW
605 # make it easy to backtrack and figure out which Arch revision this was:
606 print WRITER 'git-archimport-id: ',$ps->{id},"\n";
a6080a0a 607
d3968363
ML
608 close WRITER;
609 my $commitid = <READER>; # read
610 chomp $commitid;
611 close READER;
612 waitpid $pid,0; # close;
613
614 if (length $commitid != 40) {
615 die "Something went wrong with the commit! $! $commitid";
616 }
617 #
618 # Update the branch
a6080a0a 619 #
f88961a8 620 open HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
d3968363
ML
621 print HEAD $commitid;
622 close HEAD;
8366a10a 623 system('git-update-ref', 'HEAD', "$ps->{branch}");
d3968363
ML
624
625 # tag accordingly
626 ptag($ps->{id}, $commitid); # private tag
627 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
628 tag($ps->{id}, $commitid);
629 }
630 print " * Committed $ps->{id}\n";
631 print " + tree $tree\n";
632 print " + commit $commitid\n";
b779d5f0 633 $opt_v && print " + commit date is $ps->{date} \n";
f88961a8 634 $opt_v && print " + parents: ",join(' ',@par),"\n";
3e525e67
EW
635}
636
637if ($opt_v) {
638 foreach (sort keys %stats) {
639 print" $_: $stats{$_}\n";
640 }
641}
642exit 0;
643
644# used by the accurate strategy:
645sub sync_to_ps {
646 my $ps = shift;
647 my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
a6080a0a 648
3e525e67
EW
649 $opt_v && print "sync_to_ps($ps->{id}) method: ";
650
651 if (-d $tree_dir) {
652 if ($ps->{type} eq 't') {
653 $opt_v && print "get (tag)\n";
654 # looks like a tag-only or (worse,) a mixed tags/changeset branch,
655 # can't rely on replay to work correctly on these
656 rmtree($tree_dir);
657 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
658 $stats{get_tag}++;
659 } else {
660 my $tree_id = arch_tree_id($tree_dir);
661 if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
662 # the common case (hopefully)
663 $opt_v && print "replay\n";
664 safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
665 $stats{replay}++;
666 } else {
667 # getting one tree is usually faster than getting two trees
668 # and applying the delta ...
669 rmtree($tree_dir);
670 $opt_v && print "apply-delta\n";
671 safe_pipe_capture($TLA,'get','--no-pristine',
672 $ps->{id},$tree_dir);
673 $stats{get_delta}++;
674 }
675 }
676 } else {
677 # new branch work
678 $opt_v && print "get (new tree)\n";
679 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
680 $stats{get_new}++;
681 }
a6080a0a 682
3e525e67
EW
683 # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
684 system('rsync','-aI','--delete','--exclude',$git_dir,
685# '--exclude','.arch-inventory',
686 '--exclude','.arch-ids','--exclude','{arch}',
687 '--exclude','+*','--exclude',',*',
688 "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
689 return $tree_dir;
d3968363
ML
690}
691
d3968363
ML
692sub apply_import {
693 my $ps = shift;
22ff00fc 694 my $bname = git_branchname($ps->{id});
d3968363 695
f88961a8 696 mkpath($tmp);
d3968363 697
f88961a8 698 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
a6080a0a 699 die "Cannot get import: $!" if $?;
f88961a8
EW
700 system('rsync','-aI','--delete', '--exclude',$git_dir,
701 '--exclude','.arch-ids','--exclude','{arch}',
702 "$tmp/import/", './');
d3968363 703 die "Cannot rsync import:$!" if $?;
a6080a0a 704
f88961a8 705 rmtree("$tmp/import");
d3968363 706 die "Cannot remove tempdir: $!" if $?;
a6080a0a 707
d3968363
ML
708
709 return 1;
710}
711
712sub apply_cset {
713 my $ps = shift;
714
f88961a8 715 mkpath($tmp);
d3968363
ML
716
717 # get the changeset
f88961a8 718 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
d3968363 719 die "Cannot get changeset: $!" if $?;
a6080a0a 720
d3968363
ML
721 # apply patches
722 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
723 # this can be sped up considerably by doing
724 # (find | xargs cat) | patch
82e5a82f 725 # but that can get mucked up by patches
a6080a0a 726 # with missing trailing newlines or the standard
d3968363
ML
727 # 'missing newline' flag in the patch - possibly
728 # produced with an old/buggy diff.
729 # slow and safe, we invoke patch once per patchfile
730 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
731 die "Problem applying patches! $!" if $?;
732 }
733
734 # apply changed binary files
735 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
736 foreach my $mod (@modified) {
737 chomp $mod;
738 my $orig = $mod;
739 $orig =~ s/\.modified$//; # lazy
740 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
741 #print "rsync -p '$mod' '$orig'";
f88961a8 742 system('rsync','-p',$mod,"./$orig");
d3968363
ML
743 die "Problem applying binary changes! $!" if $?;
744 }
745 }
746
747 # bring in new files
f88961a8 748 system('rsync','-aI','--exclude',$git_dir,
a6080a0a 749 '--exclude','.arch-ids',
f88961a8
EW
750 '--exclude', '{arch}',
751 "$tmp/changeset/new-files-archive/",'./');
d3968363
ML
752
753 # deleted files are hinted from the commitlog processing
754
f88961a8 755 rmtree("$tmp/changeset");
d3968363
ML
756}
757
758
759# =for reference
6df896b5
EW
760# notes: *-files/-directories keys cannot have spaces, they're always
761# pika-escaped. Everything after the first newline
762# A log entry looks like:
d3968363
ML
763# Revision: moodle-org--moodle--1.3.3--patch-15
764# Archive: arch-eduforge@catalyst.net.nz--2004
765# Creator: Penny Leach <penny@catalyst.net.nz>
766# Date: Wed May 25 14:15:34 NZST 2005
767# Standard-date: 2005-05-25 02:15:34 GMT
768# New-files: lang/de/.arch-ids/block_glossary_random.php.id
769# lang/de/.arch-ids/block_html.php.id
770# New-directories: lang/de/help/questionnaire
771# lang/de/help/questionnaire/.arch-ids
772# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
773# db_sears.sql db/db_sears.sql
774# Removed-files: lang/be/docs/.arch-ids/release.html.id
775# lang/be/docs/.arch-ids/releaseold.html.id
776# Modified-files: admin/cron.php admin/delete.php
777# admin/editor.html backup/lib.php backup/restore.php
778# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
779# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
6df896b5 780# summary can be multiline with a leading space just like the above fields
d3968363
ML
781# Keywords:
782#
783# Updating yadda tadda tadda madda
784sub parselog {
6df896b5
EW
785 my ($ps, $log) = @_;
786 my $key = undef;
787
788 # headers we want that contain filenames:
789 my %want_headers = (
790 new_files => 1,
791 modified_files => 1,
792 renamed_files => 1,
793 renamed_directories => 1,
794 removed_files => 1,
795 removed_directories => 1,
796 );
a6080a0a 797
6df896b5
EW
798 chomp (@$log);
799 while ($_ = shift @$log) {
800 if (/^Continuation-of:\s*(.*)/) {
801 $ps->{tag} = $1;
802 $key = undef;
803 } elsif (/^Summary:\s*(.*)$/ ) {
a94f457e
PB
804 # summary can be multiline as long as it has a leading space.
805 # we squeeze it onto a single line, though.
6df896b5
EW
806 $ps->{summary} = [ $1 ];
807 $key = 'summary';
808 } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
809 $ps->{author} = $1;
810 $ps->{email} = $2;
811 $key = undef;
812 # any *-files or *-directories can be read here:
813 } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
814 my $val = $2;
815 $key = lc $1;
816 $key =~ tr/-/_/; # too lazy to quote :P
817 if ($want_headers{$key}) {
818 push @{$ps->{$key}}, split(/\s+/, $val);
819 } else {
820 $key = undef;
821 }
822 } elsif (/^$/) {
823 last; # remainder of @$log that didn't get shifted off is message
824 } elsif ($key) {
825 if (/^\s+(.*)$/) {
826 if ($key eq 'summary') {
827 push @{$ps->{$key}}, $1;
828 } else { # files/directories:
829 push @{$ps->{$key}}, split(/\s+/, $1);
830 }
831 } else {
832 $key = undef;
833 }
834 }
d3968363 835 }
a6080a0a 836
a94f457e
PB
837 # drop leading empty lines from the log message
838 while (@$log && $log->[0] eq '') {
839 shift @$log;
840 }
841 if (exists $ps->{summary} && @{$ps->{summary}}) {
842 $ps->{summary} = join(' ', @{$ps->{summary}});
843 }
844 elsif (@$log == 0) {
845 $ps->{summary} = 'empty commit message';
846 } else {
847 $ps->{summary} = $log->[0] . '...';
848 }
6df896b5 849 $ps->{message} = join("\n",@$log);
a6080a0a 850
6df896b5
EW
851 # skip Arch control files, unescape pika-escaped files
852 foreach my $k (keys %want_headers) {
853 next unless (defined $ps->{$k});
6e33101a 854 my @tmp = ();
6df896b5
EW
855 foreach my $t (@{$ps->{$k}}) {
856 next unless length ($t);
857 next if $t =~ m!\{arch\}/!;
858 next if $t =~ m!\.arch-ids/!;
859 # should we skip this?
860 next if $t =~ m!\.arch-inventory$!;
f84f9d38
ML
861 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
862 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
6df896b5 863 if ($t =~ /\\/ ){
f88961a8 864 $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
f84f9d38 865 }
6df896b5 866 push @tmp, $t;
d3968363 867 }
6e33101a 868 $ps->{$k} = \@tmp;
d3968363 869 }
d3968363
ML
870}
871
872# write/read a tag
873sub tag {
874 my ($tag, $commit) = @_;
a6080a0a 875
fee3365f
ML
876 if ($opt_o) {
877 $tag =~ s|/|--|g;
878 } else {
d9cb5399
PB
879 my $patchname = $tag;
880 $patchname =~ s/.*--//;
881 $tag = git_branchname ($tag) . '--' . $patchname;
fee3365f 882 }
a6080a0a 883
d3968363 884 if ($commit) {
a7fb51d3 885 open(C,">","$git_dir/refs/tags/$tag")
d3968363
ML
886 or die "Cannot create tag $tag: $!\n";
887 print C "$commit\n"
888 or die "Cannot write tag $tag: $!\n";
889 close(C)
890 or die "Cannot write tag $tag: $!\n";
a7fb51d3 891 print " * Created tag '$tag' on '$commit'\n" if $opt_v;
d3968363 892 } else { # read
a7fb51d3 893 open(C,"<","$git_dir/refs/tags/$tag")
d3968363
ML
894 or die "Cannot read tag $tag: $!\n";
895 $commit = <C>;
896 chomp $commit;
897 die "Error reading tag $tag: $!\n" unless length $commit == 40;
898 close(C)
899 or die "Cannot read tag $tag: $!\n";
900 return $commit;
901 }
902}
903
904# write/read a private tag
905# reads fail softly if the tag isn't there
906sub ptag {
907 my ($tag, $commit) = @_;
a7fb51d3
EW
908
909 # don't use subdirs for tags yet, it could screw up other porcelains
a6080a0a
JH
910 $tag =~ s|/|,|g;
911
a7fb51d3
EW
912 my $tag_file = "$ptag_dir/$tag";
913 my $tag_branch_dir = dirname($tag_file);
914 mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
d3968363
ML
915
916 if ($commit) { # write
a7fb51d3 917 open(C,">",$tag_file)
d3968363
ML
918 or die "Cannot create tag $tag: $!\n";
919 print C "$commit\n"
920 or die "Cannot write tag $tag: $!\n";
921 close(C)
922 or die "Cannot write tag $tag: $!\n";
a6080a0a 923 $rptags{$commit} = $tag
b779d5f0 924 unless $tag =~ m/--base-0$/;
d3968363
ML
925 } else { # read
926 # if the tag isn't there, return 0
a7fb51d3 927 unless ( -s $tag_file) {
d3968363
ML
928 return 0;
929 }
a7fb51d3 930 open(C,"<",$tag_file)
d3968363
ML
931 or die "Cannot read tag $tag: $!\n";
932 $commit = <C>;
933 chomp $commit;
934 die "Error reading tag $tag: $!\n" unless length $commit == 40;
935 close(C)
936 or die "Cannot read tag $tag: $!\n";
b779d5f0
ML
937 unless (defined $rptags{$commit}) {
938 $rptags{$commit} = $tag;
939 }
d3968363
ML
940 return $commit;
941 }
942}
b779d5f0
ML
943
944sub find_parents {
945 #
946 # Identify what branches are merging into me
947 # and whether we are fully merged
948 # git-merge-base <headsha> <headsha> should tell
a6080a0a 949 # me what the base of the merge should be
b779d5f0
ML
950 #
951 my $ps = shift;
952
953 my %branches; # holds an arrayref per branch
954 # the arrayref contains a list of
955 # merged patches between the base
956 # of the merge and the current head
957
958 my @parents; # parents found for this commit
959
960 # simple loop to split the merges
961 # per branch
962 foreach my $merge (@{$ps->{merges}}) {
22ff00fc 963 my $branch = git_branchname($merge);
b779d5f0
ML
964 unless (defined $branches{$branch} ){
965 $branches{$branch} = [];
966 }
967 push @{$branches{$branch}}, $merge;
968 }
969
970 #
a6080a0a 971 # foreach branch find a merge base and walk it to the
b779d5f0
ML
972 # head where we are, collecting the merged patchsets that
973 # Arch has recorded. Keep that in @have
974 # Compare that with the commits on the other branch
975 # between merge-base and the tip of the branch (@need)
976 # and see if we have a series of consecutive patches
977 # starting from the merge base. The tip of the series
a6080a0a 978 # of consecutive patches merged is our new parent for
b779d5f0
ML
979 # that branch.
980 #
981 foreach my $branch (keys %branches) {
37f15d50
ML
982
983 # check that we actually know about the branch
984 next unless -e "$git_dir/refs/heads/$branch";
985
8d0fad0a 986 my $mergebase = safe_pipe_capture(qw(git-merge-base), $branch, $ps->{branch});
a6080a0a
JH
987 if ($?) {
988 # Don't die here, Arch supports one-way cherry-picking
989 # between branches with no common base (or any relationship
990 # at all beforehand)
991 warn "Cannot find merge base for $branch and $ps->{branch}";
992 next;
993 }
b779d5f0
ML
994 chomp $mergebase;
995
996 # now walk up to the mergepoint collecting what patches we have
997 my $branchtip = git_rev_parse($ps->{branch});
765ac8ec 998 my @ancestors = `git-rev-list --topo-order $branchtip ^$mergebase`;
b779d5f0
ML
999 my %have; # collected merges this branch has
1000 foreach my $merge (@{$ps->{merges}}) {
1001 $have{$merge} = 1;
1002 }
1003 my %ancestorshave;
1004 foreach my $par (@ancestors) {
1005 $par = commitid2pset($par);
1006 if (defined $par->{merges}) {
1007 foreach my $merge (@{$par->{merges}}) {
1008 $ancestorshave{$merge}=1;
1009 }
1010 }
1011 }
1012 # print "++++ Merges in $ps->{id} are....\n";
1013 # my @have = sort keys %have; print Dumper(\@have);
1014
1015 # merge what we have with what ancestors have
1016 %have = (%have, %ancestorshave);
1017
a6080a0a 1018 # see what the remote branch has - these are the merges we
b779d5f0
ML
1019 # will want to have in a consecutive series from the mergebase
1020 my $otherbranchtip = git_rev_parse($branch);
765ac8ec 1021 my @needraw = `git-rev-list --topo-order $otherbranchtip ^$mergebase`;
b779d5f0
ML
1022 my @need;
1023 foreach my $needps (@needraw) { # get the psets
1024 $needps = commitid2pset($needps);
1025 # git-rev-list will also
a6080a0a 1026 # list commits merged in via earlier
b779d5f0
ML
1027 # merges. we are only interested in commits
1028 # from the branch we're looking at
1029 if ($branch eq $needps->{branch}) {
1030 push @need, $needps->{id};
1031 }
1032 }
1033
1034 # print "++++ Merges from $branch we want are....\n";
1035 # print Dumper(\@need);
1036
1037 my $newparent;
1038 while (my $needed_commit = pop @need) {
1039 if ($have{$needed_commit}) {
1040 $newparent = $needed_commit;
1041 } else {
1042 last; # break out of the while
1043 }
1044 }
1045 if ($newparent) {
1046 push @parents, $newparent;
1047 }
1048
1049
1050 } # end foreach branch
1051
1052 # prune redundant parents
1053 my %parents;
1054 foreach my $p (@parents) {
1055 $parents{$p} = 1;
1056 }
1057 foreach my $p (@parents) {
1058 next unless exists $psets{$p}{merges};
1059 next unless ref $psets{$p}{merges};
1060 my @merges = @{$psets{$p}{merges}};
1061 foreach my $merge (@merges) {
a6080a0a 1062 if ($parents{$merge}) {
b779d5f0
ML
1063 delete $parents{$merge};
1064 }
1065 }
1066 }
42f44b08 1067
f88961a8
EW
1068 @parents = ();
1069 foreach (keys %parents) {
1070 push @parents, '-p', ptag($_);
1071 }
b779d5f0
ML
1072 return @parents;
1073}
1074
1075sub git_rev_parse {
1076 my $name = shift;
8d0fad0a 1077 my $val = safe_pipe_capture(qw(git-rev-parse), $name);
b779d5f0
ML
1078 die "Error: git-rev-parse $name" if $?;
1079 chomp $val;
1080 return $val;
1081}
1082
1083# resolve a SHA1 to a known patchset
1084sub commitid2pset {
1085 my $commitid = shift;
1086 chomp $commitid;
a6080a0a 1087 my $name = $rptags{$commitid}
b779d5f0 1088 || die "Cannot find reverse tag mapping for $commitid";
a7fb51d3 1089 $name =~ s|,|/|;
a6080a0a 1090 my $ps = $psets{$name}
b779d5f0
ML
1091 || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
1092 return $ps;
1093}
2777ef76 1094
42f44b08 1095
82e5a82f 1096# an alternative to `command` that allows input to be passed as an array
2777ef76
EW
1097# to work around shell problems with weird characters in arguments
1098sub safe_pipe_capture {
1099 my @output;
1100 if (my $pid = open my $child, '-|') {
1101 @output = (<$child>);
1102 close $child or die join(' ',@_).": $! $?";
1103 } else {
3e525e67 1104 exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
2777ef76
EW
1105 }
1106 return wantarray ? @output : join('',@output);
1107}
1108
42f44b08
EW
1109# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
1110sub arch_tree_id {
1111 my $dir = shift;
1112 chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
1113 return $ret;
1114}
1115
1116sub archive_reachable {
1117 my $archive = shift;
1118 return 1 if $reachable{$archive};
1119 return 0 if $unreachable{$archive};
a6080a0a 1120
42f44b08
EW
1121 if (system "$TLA whereis-archive $archive >/dev/null") {
1122 if ($opt_a && (system($TLA,'register-archive',
1123 "http://mirrors.sourcecontrol.net/$archive") == 0)) {
1124 $reachable{$archive} = 1;
1125 return 1;
1126 }
1127 print STDERR "Archive is unreachable: $archive\n";
1128 $unreachable{$archive} = 1;
1129 return 0;
1130 } else {
1131 $reachable{$archive} = 1;
1132 return 1;
1133 }
1134}