c3bed08086d69fe3cc79b64117193a9173faa91f
[git/git.git] / git-archimport.perl
1 #!/usr/bin/perl -w
2 #
3 # This tool is copyright (c) 2005, Martin Langhoff.
4 # It is released under the Gnu Public License, version 2.
5 #
6 # The basic idea is to walk the output of tla abrowse,
7 # fetch the changesets and apply them.
8 #
9
10 =head1 Invocation
11
12 git-archimport [ -h ] [ -v ] [ -T ] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
13
14 Imports a project from one or more Arch repositories. It will follow branches
15 and repositories within the namespaces defined by the <archive/branch>
16 parameters suppplied. If it cannot find the remote branch a merge comes from
17 it will just import it as a regular commit. If it can find it, it will mark it
18 as a merge whenever possible.
19
20 See man (1) git-archimport for more details.
21
22 =head1 TODO
23
24 - create tag objects instead of ref tags
25 - audit shell-escaping of filenames
26 - hide our private tags somewhere smarter
27 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
28
29 =head1 Devel tricks
30
31 Add print in front of the shell commands invoked via backticks.
32
33 =head1 Devel Notes
34
35 There are several places where Arch and git terminology are intermixed
36 and potentially confused.
37
38 The notion of a "branch" in git is approximately equivalent to
39 a "archive/category--branch--version" in Arch. Also, it should be noted
40 that the "--branch" portion of "archive/category--branch--version" is really
41 optional in Arch although not many people (nor tools!) seem to know this.
42 This means that "archive/category--version" is also a valid "branch"
43 in git terms.
44
45 We always refer to Arch names by their fully qualified variant (which
46 means the "archive" name is prefixed.
47
48 For people unfamiliar with Arch, an "archive" is the term for "repository",
49 and can contain multiple, unrelated branches.
50
51 =cut
52
53 use strict;
54 use warnings;
55 use Getopt::Std;
56 use File::Spec;
57 use File::Temp qw(tempfile tempdir);
58 use File::Path qw(mkpath);
59 use File::Basename qw(basename dirname);
60 use String::ShellQuote;
61 use Time::Local;
62 use IO::Socket;
63 use IO::Pipe;
64 use POSIX qw(strftime dup2);
65 use Data::Dumper qw/ Dumper /;
66 use IPC::Open2;
67
68 $SIG{'PIPE'}="IGNORE";
69 $ENV{'TZ'}="UTC";
70
71 my $git_dir = $ENV{"GIT_DIR"} || ".git";
72 $ENV{"GIT_DIR"} = $git_dir;
73 my $ptag_dir = "$git_dir/archimport/tags";
74
75 our($opt_h,$opt_v, $opt_T,$opt_t,$opt_o);
76
77 sub usage() {
78 print STDERR <<END;
79 Usage: ${\basename $0} # fetch/update GIT from Arch
80 [ -o ] [ -h ] [ -v ] [ -T ] [ -t tempdir ]
81 repository/arch-branch [ repository/arch-branch] ...
82 END
83 exit(1);
84 }
85
86 getopts("Thvt:") or usage();
87 usage if $opt_h;
88
89 @ARGV >= 1 or usage();
90 my @arch_roots = @ARGV;
91
92 my ($tmpdir, $tmpdirname) = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
93 my $tmp = $opt_t || 1;
94 $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
95 $opt_v && print "+ Using $tmp as temporary directory\n";
96
97 my @psets = (); # the collection
98 my %psets = (); # the collection, by name
99
100 my %rptags = (); # my reverse private tags
101 # to map a SHA1 to a commitid
102
103 foreach my $root (@arch_roots) {
104 my ($arepo, $abranch) = split(m!/!, $root);
105 open ABROWSE, "tla abrowse -f -A $arepo --desc --merges $abranch |"
106 or die "Problems with tla abrowse: $!";
107
108 my %ps = (); # the current one
109 my $mode = '';
110 my $lastseen = '';
111
112 while (<ABROWSE>) {
113 chomp;
114
115 # first record padded w 8 spaces
116 if (s/^\s{8}\b//) {
117
118 # store the record we just captured
119 if (%ps) {
120 my %temp = %ps; # break references
121 push (@psets, \%temp);
122 $psets{$temp{id}} = \%temp;
123 %ps = ();
124 }
125
126 my ($id, $type) = split(m/\s{3}/, $_);
127 $ps{id} = $id;
128 $ps{repo} = $arepo;
129
130 # deal with types
131 if ($type =~ m/^\(simple changeset\)/) {
132 $ps{type} = 's';
133 } elsif ($type eq '(initial import)') {
134 $ps{type} = 'i';
135 } elsif ($type =~ m/^\(tag revision of (.+)\)/) {
136 $ps{type} = 't';
137 $ps{tag} = $1;
138 } else {
139 warn "Unknown type $type";
140 }
141 $lastseen = 'id';
142 }
143
144 if (s/^\s{10}//) {
145 # 10 leading spaces or more
146 # indicate commit metadata
147
148 # date & author
149 if ($lastseen eq 'id' && m/^\d{4}-\d{2}-\d{2}/) {
150
151 my ($date, $authoremail) = split(m/\s{2,}/, $_);
152 $ps{date} = $date;
153 $ps{date} =~ s/\bGMT$//; # strip off trailign GMT
154 if ($ps{date} =~ m/\b\w+$/) {
155 warn 'Arch dates not in GMT?! - imported dates will be wrong';
156 }
157
158 $authoremail =~ m/^(.+)\s(\S+)$/;
159 $ps{author} = $1;
160 $ps{email} = $2;
161
162 $lastseen = 'date';
163
164 } elsif ($lastseen eq 'date') {
165 # the only hint is position
166 # subject is after date
167 $ps{subj} = $_;
168 $lastseen = 'subj';
169
170 } elsif ($lastseen eq 'subj' && $_ eq 'merges in:') {
171 $ps{merges} = [];
172 $lastseen = 'merges';
173
174 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
175 push (@{$ps{merges}}, $_);
176 } else {
177 warn 'more metadata after merges!?';
178 }
179
180 }
181 }
182
183 if (%ps) {
184 my %temp = %ps; # break references
185 push (@psets, \%temp);
186 $psets{ $temp{id} } = \%temp;
187 %ps = ();
188 }
189 close ABROWSE;
190 } # end foreach $root
191
192 ## Order patches by time
193 @psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
194
195 #print Dumper \@psets;
196
197 ##
198 ## TODO cleanup irrelevant patches
199 ## and put an initial import
200 ## or a full tag
201 my $import = 0;
202 unless (-d $git_dir) { # initial import
203 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
204 print "Starting import from $psets[0]{id}\n";
205 `git-init-db`;
206 die $! if $?;
207 $import = 1;
208 } else {
209 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
210 }
211 } else { # progressing an import
212 # load the rptags
213 opendir(DIR, "$git_dir/archimport/tags")
214 || die "can't opendir: $!";
215 while (my $file = readdir(DIR)) {
216 # skip non-interesting-files
217 next unless -f "$ptag_dir/$file";
218
219 # convert first '--' to '/' from old git-archimport to use
220 # as an archivename/c--b--v private tag
221 if ($file !~ m!,!) {
222 my $oldfile = $file;
223 $file =~ s!--!,!;
224 print STDERR "converting old tag $oldfile to $file\n";
225 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
226 }
227 my $sha = ptag($file);
228 chomp $sha;
229 $rptags{$sha} = $file;
230 }
231 closedir DIR;
232 }
233
234 # process patchsets
235 # extract the Arch repository name (Arch "archive" in Arch-speak)
236 sub extract_reponame {
237 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
238 return (split(/\//, $fq_cvbr))[0];
239 }
240
241 sub extract_versionname {
242 my $name = shift;
243 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
244 return $name;
245 }
246
247 # convert a fully-qualified revision or version to a unique dirname:
248 # normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
249 # becomes: normalperson@yhbt.net-05,mpd--uclinux--1
250 #
251 # the git notion of a branch is closer to
252 # archive/category--branch--version than archive/category--branch, so we
253 # use this to convert to git branch names.
254 # Also, keep archive names but replace '/' with ',' since it won't require
255 # subdirectories, and is safer than swapping '--' which could confuse
256 # reverse-mapping when dealing with bastard branches that
257 # are just archive/category--version (no --branch)
258 sub tree_dirname {
259 my $revision = shift;
260 my $name = extract_versionname($revision);
261 $name =~ s#/#,#;
262 return $name;
263 }
264
265 # old versions of git-archimport just use the <category--branch> part:
266 sub old_style_branchname {
267 my $id = shift;
268 my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
269 chomp $ret;
270 return $ret;
271 }
272
273 *git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
274
275 # process patchsets
276 foreach my $ps (@psets) {
277 $ps->{branch} = git_branchname($ps->{id});
278
279 #
280 # ensure we have a clean state
281 #
282 if (`git diff-files`) {
283 die "Unclean tree when about to process $ps->{id} " .
284 " - did we fail to commit cleanly before?";
285 }
286 die $! if $?;
287
288 #
289 # skip commits already in repo
290 #
291 if (ptag($ps->{id})) {
292 $opt_v && print " * Skipping already imported: $ps->{id}\n";
293 next;
294 }
295
296 print " * Starting to work on $ps->{id}\n";
297
298 #
299 # create the branch if needed
300 #
301 if ($ps->{type} eq 'i' && !$import) {
302 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
303 }
304
305 unless ($import) { # skip for import
306 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
307 # we know about this branch
308 `git checkout $ps->{branch}`;
309 } else {
310 # new branch! we need to verify a few things
311 die "Branch on a non-tag!" unless $ps->{type} eq 't';
312 my $branchpoint = ptag($ps->{tag});
313 die "Tagging from unknown id unsupported: $ps->{tag}"
314 unless $branchpoint;
315
316 # find where we are supposed to branch from
317 `git checkout -b $ps->{branch} $branchpoint`;
318
319 # If we trust Arch with the fact that this is just
320 # a tag, and it does not affect the state of the tree
321 # then we just tag and move on
322 tag($ps->{id}, $branchpoint);
323 ptag($ps->{id}, $branchpoint);
324 print " * Tagged $ps->{id} at $branchpoint\n";
325 next;
326 }
327 die $! if $?;
328 }
329
330 #
331 # Apply the import/changeset/merge into the working tree
332 #
333 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
334 apply_import($ps) or die $!;
335 $import=0;
336 } elsif ($ps->{type} eq 's') {
337 apply_cset($ps);
338 }
339
340 #
341 # prepare update git's index, based on what arch knows
342 # about the pset, resolve parents, etc
343 #
344 my $tree;
345
346 my $commitlog = `tla cat-archive-log -A $ps->{repo} $ps->{id}`;
347 die "Error in cat-archive-log: $!" if $?;
348
349 # parselog will git-add/rm files
350 # and generally prepare things for the commit
351 # NOTE: parselog will shell-quote filenames!
352 my ($sum, $msg, $add, $del, $mod, $ren) = parselog($commitlog);
353 my $logmessage = "$sum\n$msg";
354
355
356 # imports don't give us good info
357 # on added files. Shame on them
358 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
359 `find . -type f -print0 | grep -zv '^./$git_dir' | xargs -0 -l100 git-update-index --add`;
360 `git-ls-files --deleted -z | xargs --no-run-if-empty -0 -l100 git-update-index --remove`;
361 }
362
363 if (@$add) {
364 while (@$add) {
365 my @slice = splice(@$add, 0, 100);
366 my $slice = join(' ', @slice);
367 `git-update-index --add $slice`;
368 die "Error in git-update-index --add: $!" if $?;
369 }
370 }
371 if (@$del) {
372 foreach my $file (@$del) {
373 unlink $file or die "Problems deleting $file : $!";
374 }
375 while (@$del) {
376 my @slice = splice(@$del, 0, 100);
377 my $slice = join(' ', @slice);
378 `git-update-index --remove $slice`;
379 die "Error in git-update-index --remove: $!" if $?;
380 }
381 }
382 if (@$ren) { # renamed
383 if (@$ren % 2) {
384 die "Odd number of entries in rename!?";
385 }
386 ;
387 while (@$ren) {
388 my $from = pop @$ren;
389 my $to = pop @$ren;
390
391 unless (-d dirname($to)) {
392 mkpath(dirname($to)); # will die on err
393 }
394 #print "moving $from $to";
395 `mv $from $to`;
396 die "Error renaming $from $to : $!" if $?;
397 `git-update-index --remove $from`;
398 die "Error in git-update-index --remove: $!" if $?;
399 `git-update-index --add $to`;
400 die "Error in git-update-index --add: $!" if $?;
401 }
402
403 }
404 if (@$mod) { # must be _after_ renames
405 while (@$mod) {
406 my @slice = splice(@$mod, 0, 100);
407 my $slice = join(' ', @slice);
408 `git-update-index $slice`;
409 die "Error in git-update-index: $!" if $?;
410 }
411 }
412
413 # warn "errors when running git-update-index! $!";
414 $tree = `git-write-tree`;
415 die "cannot write tree $!" if $?;
416 chomp $tree;
417
418
419 #
420 # Who's your daddy?
421 #
422 my @par;
423 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
424 if (open HEAD, "<$git_dir/refs/heads/$ps->{branch}") {
425 my $p = <HEAD>;
426 close HEAD;
427 chomp $p;
428 push @par, '-p', $p;
429 } else {
430 if ($ps->{type} eq 's') {
431 warn "Could not find the right head for the branch $ps->{branch}";
432 }
433 }
434 }
435
436 if ($ps->{merges}) {
437 push @par, find_parents($ps);
438 }
439 my $par = join (' ', @par);
440
441 #
442 # Commit, tag and clean state
443 #
444 $ENV{TZ} = 'GMT';
445 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
446 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
447 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
448 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
449 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
450 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
451
452 my ($pid, $commit_rh, $commit_wh);
453 $commit_rh = 'commit_rh';
454 $commit_wh = 'commit_wh';
455
456 $pid = open2(*READER, *WRITER, "git-commit-tree $tree $par")
457 or die $!;
458 print WRITER $logmessage; # write
459 close WRITER;
460 my $commitid = <READER>; # read
461 chomp $commitid;
462 close READER;
463 waitpid $pid,0; # close;
464
465 if (length $commitid != 40) {
466 die "Something went wrong with the commit! $! $commitid";
467 }
468 #
469 # Update the branch
470 #
471 open HEAD, ">$git_dir/refs/heads/$ps->{branch}";
472 print HEAD $commitid;
473 close HEAD;
474 system('git-update-ref', 'HEAD', "$ps->{branch}");
475
476 # tag accordingly
477 ptag($ps->{id}, $commitid); # private tag
478 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
479 tag($ps->{id}, $commitid);
480 }
481 print " * Committed $ps->{id}\n";
482 print " + tree $tree\n";
483 print " + commit $commitid\n";
484 $opt_v && print " + commit date is $ps->{date} \n";
485 $opt_v && print " + parents: $par \n";
486 }
487
488 sub apply_import {
489 my $ps = shift;
490 my $bname = git_branchname($ps->{id});
491
492 `mkdir -p $tmp`;
493
494 `tla get -s --no-pristine -A $ps->{repo} $ps->{id} $tmp/import`;
495 die "Cannot get import: $!" if $?;
496 `rsync -v --archive --delete --exclude '$git_dir' --exclude '.arch-ids' --exclude '{arch}' $tmp/import/* ./`;
497 die "Cannot rsync import:$!" if $?;
498
499 `rm -fr $tmp/import`;
500 die "Cannot remove tempdir: $!" if $?;
501
502
503 return 1;
504 }
505
506 sub apply_cset {
507 my $ps = shift;
508
509 `mkdir -p $tmp`;
510
511 # get the changeset
512 `tla get-changeset -A $ps->{repo} $ps->{id} $tmp/changeset`;
513 die "Cannot get changeset: $!" if $?;
514
515 # apply patches
516 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
517 # this can be sped up considerably by doing
518 # (find | xargs cat) | patch
519 # but that cna get mucked up by patches
520 # with missing trailing newlines or the standard
521 # 'missing newline' flag in the patch - possibly
522 # produced with an old/buggy diff.
523 # slow and safe, we invoke patch once per patchfile
524 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
525 die "Problem applying patches! $!" if $?;
526 }
527
528 # apply changed binary files
529 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
530 foreach my $mod (@modified) {
531 chomp $mod;
532 my $orig = $mod;
533 $orig =~ s/\.modified$//; # lazy
534 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
535 #print "rsync -p '$mod' '$orig'";
536 `rsync -p $mod ./$orig`;
537 die "Problem applying binary changes! $!" if $?;
538 }
539 }
540
541 # bring in new files
542 `rsync --archive --exclude '$git_dir' --exclude '.arch-ids' --exclude '{arch}' $tmp/changeset/new-files-archive/* ./`;
543
544 # deleted files are hinted from the commitlog processing
545
546 `rm -fr $tmp/changeset`;
547 }
548
549
550 # =for reference
551 # A log entry looks like
552 # Revision: moodle-org--moodle--1.3.3--patch-15
553 # Archive: arch-eduforge@catalyst.net.nz--2004
554 # Creator: Penny Leach <penny@catalyst.net.nz>
555 # Date: Wed May 25 14:15:34 NZST 2005
556 # Standard-date: 2005-05-25 02:15:34 GMT
557 # New-files: lang/de/.arch-ids/block_glossary_random.php.id
558 # lang/de/.arch-ids/block_html.php.id
559 # New-directories: lang/de/help/questionnaire
560 # lang/de/help/questionnaire/.arch-ids
561 # Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
562 # db_sears.sql db/db_sears.sql
563 # Removed-files: lang/be/docs/.arch-ids/release.html.id
564 # lang/be/docs/.arch-ids/releaseold.html.id
565 # Modified-files: admin/cron.php admin/delete.php
566 # admin/editor.html backup/lib.php backup/restore.php
567 # New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
568 # Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
569 # Keywords:
570 #
571 # Updating yadda tadda tadda madda
572 sub parselog {
573 my $log = shift;
574 #print $log;
575
576 my (@add, @del, @mod, @ren, @kw, $sum, $msg );
577
578 if ($log =~ m/(?:\n|^)New-files:(.*?)(?=\n\w)/s ) {
579 my $files = $1;
580 @add = split(m/\s+/s, $files);
581 }
582
583 if ($log =~ m/(?:\n|^)Removed-files:(.*?)(?=\n\w)/s ) {
584 my $files = $1;
585 @del = split(m/\s+/s, $files);
586 }
587
588 if ($log =~ m/(?:\n|^)Modified-files:(.*?)(?=\n\w)/s ) {
589 my $files = $1;
590 @mod = split(m/\s+/s, $files);
591 }
592
593 if ($log =~ m/(?:\n|^)Renamed-files:(.*?)(?=\n\w)/s ) {
594 my $files = $1;
595 @ren = split(m/\s+/s, $files);
596 }
597
598 $sum ='';
599 if ($log =~ m/^Summary:(.+?)$/m ) {
600 $sum = $1;
601 $sum =~ s/^\s+//;
602 $sum =~ s/\s+$//;
603 }
604
605 $msg = '';
606 if ($log =~ m/\n\n(.+)$/s) {
607 $msg = $1;
608 $msg =~ s/^\s+//;
609 $msg =~ s/\s+$//;
610 }
611
612
613 # cleanup the arrays
614 foreach my $ref ( (\@add, \@del, \@mod, \@ren) ) {
615 my @tmp = ();
616 while (my $t = pop @$ref) {
617 next unless length ($t);
618 next if $t =~ m!\{arch\}/!;
619 next if $t =~ m!\.arch-ids/!;
620 next if $t =~ m!\.arch-inventory$!;
621 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
622 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
623 if ($t =~ /\\/ ){
624 $t = `tla escape --unescaped '$t'`;
625 }
626 push (@tmp, shell_quote($t));
627 }
628 @$ref = @tmp;
629 }
630
631 #print Dumper [$sum, $msg, \@add, \@del, \@mod, \@ren];
632 return ($sum, $msg, \@add, \@del, \@mod, \@ren);
633 }
634
635 # write/read a tag
636 sub tag {
637 my ($tag, $commit) = @_;
638
639 if ($opt_o) {
640 $tag =~ s|/|--|g;
641 } else {
642 # don't use subdirs for tags yet, it could screw up other porcelains
643 $tag =~ s|/|,|g;
644 }
645
646 if ($commit) {
647 open(C,">","$git_dir/refs/tags/$tag")
648 or die "Cannot create tag $tag: $!\n";
649 print C "$commit\n"
650 or die "Cannot write tag $tag: $!\n";
651 close(C)
652 or die "Cannot write tag $tag: $!\n";
653 print " * Created tag '$tag' on '$commit'\n" if $opt_v;
654 } else { # read
655 open(C,"<","$git_dir/refs/tags/$tag")
656 or die "Cannot read tag $tag: $!\n";
657 $commit = <C>;
658 chomp $commit;
659 die "Error reading tag $tag: $!\n" unless length $commit == 40;
660 close(C)
661 or die "Cannot read tag $tag: $!\n";
662 return $commit;
663 }
664 }
665
666 # write/read a private tag
667 # reads fail softly if the tag isn't there
668 sub ptag {
669 my ($tag, $commit) = @_;
670
671 # don't use subdirs for tags yet, it could screw up other porcelains
672 $tag =~ s|/|,|g;
673
674 my $tag_file = "$ptag_dir/$tag";
675 my $tag_branch_dir = dirname($tag_file);
676 mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
677
678 if ($commit) { # write
679 open(C,">",$tag_file)
680 or die "Cannot create tag $tag: $!\n";
681 print C "$commit\n"
682 or die "Cannot write tag $tag: $!\n";
683 close(C)
684 or die "Cannot write tag $tag: $!\n";
685 $rptags{$commit} = $tag
686 unless $tag =~ m/--base-0$/;
687 } else { # read
688 # if the tag isn't there, return 0
689 unless ( -s $tag_file) {
690 return 0;
691 }
692 open(C,"<",$tag_file)
693 or die "Cannot read tag $tag: $!\n";
694 $commit = <C>;
695 chomp $commit;
696 die "Error reading tag $tag: $!\n" unless length $commit == 40;
697 close(C)
698 or die "Cannot read tag $tag: $!\n";
699 unless (defined $rptags{$commit}) {
700 $rptags{$commit} = $tag;
701 }
702 return $commit;
703 }
704 }
705
706 sub find_parents {
707 #
708 # Identify what branches are merging into me
709 # and whether we are fully merged
710 # git-merge-base <headsha> <headsha> should tell
711 # me what the base of the merge should be
712 #
713 my $ps = shift;
714
715 my %branches; # holds an arrayref per branch
716 # the arrayref contains a list of
717 # merged patches between the base
718 # of the merge and the current head
719
720 my @parents; # parents found for this commit
721
722 # simple loop to split the merges
723 # per branch
724 foreach my $merge (@{$ps->{merges}}) {
725 my $branch = git_branchname($merge);
726 unless (defined $branches{$branch} ){
727 $branches{$branch} = [];
728 }
729 push @{$branches{$branch}}, $merge;
730 }
731
732 #
733 # foreach branch find a merge base and walk it to the
734 # head where we are, collecting the merged patchsets that
735 # Arch has recorded. Keep that in @have
736 # Compare that with the commits on the other branch
737 # between merge-base and the tip of the branch (@need)
738 # and see if we have a series of consecutive patches
739 # starting from the merge base. The tip of the series
740 # of consecutive patches merged is our new parent for
741 # that branch.
742 #
743 foreach my $branch (keys %branches) {
744
745 # check that we actually know about the branch
746 next unless -e "$git_dir/refs/heads/$branch";
747
748 my $mergebase = `git-merge-base $branch $ps->{branch}`;
749 if ($?) {
750 # Don't die here, Arch supports one-way cherry-picking
751 # between branches with no common base (or any relationship
752 # at all beforehand)
753 warn "Cannot find merge base for $branch and $ps->{branch}";
754 next;
755 }
756 chomp $mergebase;
757
758 # now walk up to the mergepoint collecting what patches we have
759 my $branchtip = git_rev_parse($ps->{branch});
760 my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
761 my %have; # collected merges this branch has
762 foreach my $merge (@{$ps->{merges}}) {
763 $have{$merge} = 1;
764 }
765 my %ancestorshave;
766 foreach my $par (@ancestors) {
767 $par = commitid2pset($par);
768 if (defined $par->{merges}) {
769 foreach my $merge (@{$par->{merges}}) {
770 $ancestorshave{$merge}=1;
771 }
772 }
773 }
774 # print "++++ Merges in $ps->{id} are....\n";
775 # my @have = sort keys %have; print Dumper(\@have);
776
777 # merge what we have with what ancestors have
778 %have = (%have, %ancestorshave);
779
780 # see what the remote branch has - these are the merges we
781 # will want to have in a consecutive series from the mergebase
782 my $otherbranchtip = git_rev_parse($branch);
783 my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
784 my @need;
785 foreach my $needps (@needraw) { # get the psets
786 $needps = commitid2pset($needps);
787 # git-rev-list will also
788 # list commits merged in via earlier
789 # merges. we are only interested in commits
790 # from the branch we're looking at
791 if ($branch eq $needps->{branch}) {
792 push @need, $needps->{id};
793 }
794 }
795
796 # print "++++ Merges from $branch we want are....\n";
797 # print Dumper(\@need);
798
799 my $newparent;
800 while (my $needed_commit = pop @need) {
801 if ($have{$needed_commit}) {
802 $newparent = $needed_commit;
803 } else {
804 last; # break out of the while
805 }
806 }
807 if ($newparent) {
808 push @parents, $newparent;
809 }
810
811
812 } # end foreach branch
813
814 # prune redundant parents
815 my %parents;
816 foreach my $p (@parents) {
817 $parents{$p} = 1;
818 }
819 foreach my $p (@parents) {
820 next unless exists $psets{$p}{merges};
821 next unless ref $psets{$p}{merges};
822 my @merges = @{$psets{$p}{merges}};
823 foreach my $merge (@merges) {
824 if ($parents{$merge}) {
825 delete $parents{$merge};
826 }
827 }
828 }
829 @parents = keys %parents;
830 @parents = map { " -p " . ptag($_) } @parents;
831 return @parents;
832 }
833
834 sub git_rev_parse {
835 my $name = shift;
836 my $val = `git-rev-parse $name`;
837 die "Error: git-rev-parse $name" if $?;
838 chomp $val;
839 return $val;
840 }
841
842 # resolve a SHA1 to a known patchset
843 sub commitid2pset {
844 my $commitid = shift;
845 chomp $commitid;
846 my $name = $rptags{$commitid}
847 || die "Cannot find reverse tag mapping for $commitid";
848 $name =~ s|,|/|;
849 my $ps = $psets{$name}
850 || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
851 return $ps;
852 }