archimport: add -D <depth> and -a switch
[git/git.git] / git-archimport.perl
CommitLineData
d3968363
ML
1#!/usr/bin/perl -w
2#
3# This tool is copyright (c) 2005, Martin Langhoff.
4# It is released under the Gnu Public License, version 2.
5#
6# The basic idea is to walk the output of tla abrowse,
7# fetch the changesets and apply them.
8#
241b5967 9
d3968363
ML
10=head1 Invocation
11
241b5967 12 git-archimport [ -h ] [ -v ] [ -T ] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
d3968363 13
241b5967
ML
14Imports a project from one or more Arch repositories. It will follow branches
15and repositories within the namespaces defined by the <archive/branch>
16parameters suppplied. If it cannot find the remote branch a merge comes from
17it will just import it as a regular commit. If it can find it, it will mark it
18as a merge whenever possible.
d3968363 19
241b5967 20See man (1) git-archimport for more details.
d3968363 21
241b5967 22=head1 TODO
d3968363 23
241b5967 24 - create tag objects instead of ref tags
d3968363 25 - audit shell-escaping of filenames
241b5967
ML
26 - hide our private tags somewhere smarter
27 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
d3968363
ML
28
29=head1 Devel tricks
30
31Add print in front of the shell commands invoked via backticks.
32
22ff00fc
EW
33=head1 Devel Notes
34
35There are several places where Arch and git terminology are intermixed
36and potentially confused.
37
38The notion of a "branch" in git is approximately equivalent to
39a "archive/category--branch--version" in Arch. Also, it should be noted
40that the "--branch" portion of "archive/category--branch--version" is really
41optional in Arch although not many people (nor tools!) seem to know this.
42This means that "archive/category--version" is also a valid "branch"
43in git terms.
44
45We always refer to Arch names by their fully qualified variant (which
46means the "archive" name is prefixed.
47
48For people unfamiliar with Arch, an "archive" is the term for "repository",
49and can contain multiple, unrelated branches.
50
d3968363
ML
51=cut
52
53use strict;
54use warnings;
55use Getopt::Std;
42f44b08 56use File::Temp qw(tempdir);
f88961a8 57use File::Path qw(mkpath rmtree);
d3968363 58use File::Basename qw(basename dirname);
d3968363
ML
59use Data::Dumper qw/ Dumper /;
60use IPC::Open2;
61
62$SIG{'PIPE'}="IGNORE";
63$ENV{'TZ'}="UTC";
64
1d4710d0
ML
65my $git_dir = $ENV{"GIT_DIR"} || ".git";
66$ENV{"GIT_DIR"} = $git_dir;
a7fb51d3 67my $ptag_dir = "$git_dir/archimport/tags";
1d4710d0 68
42f44b08 69our($opt_h,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
d3968363
ML
70
71sub usage() {
72 print STDERR <<END;
73Usage: ${\basename $0} # fetch/update GIT from Arch
42f44b08 74 [ -o ] [ -h ] [ -v ] [ -T ] [ -a ] [ -D depth ] [ -t tempdir ]
d3968363
ML
75 repository/arch-branch [ repository/arch-branch] ...
76END
77 exit(1);
78}
79
42f44b08 80getopts("Thvat:D:") or usage();
d3968363
ML
81usage if $opt_h;
82
83@ARGV >= 1 or usage();
42f44b08
EW
84# $arch_branches:
85# values associated with keys:
86# =1 - Arch version / git 'branch' detected via abrowse on a limit
87# >1 - Arch version / git 'branch' of an auxilliary branch we've merged
88my %arch_branches = map { $_ => 1 } @ARGV;
d3968363 89
5744f277
EW
90$ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
91my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
127bf00f 92$opt_v && print "+ Using $tmp as temporary directory\n";
d3968363 93
42f44b08
EW
94my %reachable = (); # Arch repositories we can access
95my %unreachable = (); # Arch repositories we can't access :<
d3968363 96my @psets = (); # the collection
b779d5f0
ML
97my %psets = (); # the collection, by name
98
99my %rptags = (); # my reverse private tags
100 # to map a SHA1 to a commitid
2777ef76 101my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
d3968363 102
42f44b08
EW
103sub do_abrowse {
104 my $stage = shift;
105 while (my ($limit, $level) = each %arch_branches) {
106 next unless $level == $stage;
107
108 open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
109 or die "Problems with tla abrowse: $!";
d3968363 110
42f44b08
EW
111 my %ps = (); # the current one
112 my $lastseen = '';
d3968363 113
42f44b08
EW
114 while (<ABROWSE>) {
115 chomp;
d3968363 116
42f44b08
EW
117 # first record padded w 8 spaces
118 if (s/^\s{8}\b//) {
119 my ($id, $type) = split(m/\s+/, $_, 2);
120
121 my %last_ps;
122 # store the record we just captured
123 if (%ps && !exists $psets{ $ps{id} }) {
124 %last_ps = %ps; # break references
125 push (@psets, \%last_ps);
126 $psets{ $last_ps{id} } = \%last_ps;
127 }
d3968363 128
42f44b08
EW
129 my $branch = extract_versionname($id);
130 %ps = ( id => $id, branch => $branch );
131 if (%last_ps && ($last_ps{branch} eq $branch)) {
132 $ps{parent_id} = $last_ps{id};
133 }
134
135 $arch_branches{$branch} = 1;
136 $lastseen = 'id';
137
138 # deal with types (should work with baz or tla):
139 if ($type =~ m/\(.*changeset\)/) {
140 $ps{type} = 's';
141 } elsif ($type =~ /\(.*import\)/) {
142 $ps{type} = 'i';
143 } elsif ($type =~ m/\(tag.*\)/) {
144 $ps{type} = 't';
145 # read which revision we've tagged when we parse the log
146 #$ps{tag} = $1;
147 } else {
148 warn "Unknown type $type";
149 }
150
151 $arch_branches{$branch} = 1;
152 $lastseen = 'id';
153 } elsif (s/^\s{10}//) {
154 # 10 leading spaces or more
155 # indicate commit metadata
156
157 # date
158 if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
159 $ps{date} = $1;
160 $lastseen = 'date';
161 } elsif ($_ eq 'merges in:') {
162 $ps{merges} = [];
163 $lastseen = 'merges';
164 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
165 my $id = $_;
166 push (@{$ps{merges}}, $id);
167
168 # aggressive branch finding:
169 if ($opt_D) {
170 my $branch = extract_versionname($id);
171 my $repo = extract_reponame($branch);
172
173 if (archive_reachable($repo) &&
174 !defined $arch_branches{$branch}) {
175 $arch_branches{$branch} = $stage + 1;
176 }
177 }
178 } else {
179 warn "more metadata after merges!?: $_\n" unless /^\s*$/;
d3968363 180 }
d3968363 181 }
d3968363 182 }
d3968363 183
42f44b08
EW
184 if (%ps && !exists $psets{ $ps{id} }) {
185 my %temp = %ps; # break references
186 if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
187 $temp{parent_id} = $psets[$#psets]{id};
188 }
189 push (@psets, \%temp);
190 $psets{ $temp{id} } = \%temp;
191 }
192
193 close ABROWSE or die "$TLA abrowse failed on $limit\n";
194 }
d3968363
ML
195} # end foreach $root
196
42f44b08
EW
197do_abrowse(1);
198my $depth = 2;
199$opt_D ||= 0;
200while ($depth <= $opt_D) {
201 do_abrowse($depth);
202 $depth++;
203}
204
d3968363 205## Order patches by time
42f44b08
EW
206# FIXME see if we can find a more optimal way to do this by graphing
207# the ancestry data and walking it, that way we won't have to rely on
208# client-supplied dates
d3968363
ML
209@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
210
211#print Dumper \@psets;
212
213##
214## TODO cleanup irrelevant patches
215## and put an initial import
216## or a full tag
3292ae47 217my $import = 0;
1d4710d0 218unless (-d $git_dir) { # initial import
d3968363
ML
219 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
220 print "Starting import from $psets[0]{id}\n";
3292ae47
ML
221 `git-init-db`;
222 die $! if $?;
223 $import = 1;
d3968363
ML
224 } else {
225 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
226 }
b779d5f0
ML
227} else { # progressing an import
228 # load the rptags
42f44b08 229 opendir(DIR, $ptag_dir)
b779d5f0
ML
230 || die "can't opendir: $!";
231 while (my $file = readdir(DIR)) {
a7fb51d3
EW
232 # skip non-interesting-files
233 next unless -f "$ptag_dir/$file";
234
235 # convert first '--' to '/' from old git-archimport to use
236 # as an archivename/c--b--v private tag
237 if ($file !~ m!,!) {
238 my $oldfile = $file;
239 $file =~ s!--!,!;
240 print STDERR "converting old tag $oldfile to $file\n";
241 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
242 }
b779d5f0
ML
243 my $sha = ptag($file);
244 chomp $sha;
b779d5f0
ML
245 $rptags{$sha} = $file;
246 }
247 closedir DIR;
d3968363
ML
248}
249
3292ae47 250# process patchsets
22ff00fc
EW
251# extract the Arch repository name (Arch "archive" in Arch-speak)
252sub extract_reponame {
253 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
254 return (split(/\//, $fq_cvbr))[0];
255}
256
257sub extract_versionname {
258 my $name = shift;
259 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
260 return $name;
261}
d3968363 262
22ff00fc
EW
263# convert a fully-qualified revision or version to a unique dirname:
264# normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
265# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
266#
267# the git notion of a branch is closer to
268# archive/category--branch--version than archive/category--branch, so we
269# use this to convert to git branch names.
270# Also, keep archive names but replace '/' with ',' since it won't require
271# subdirectories, and is safer than swapping '--' which could confuse
272# reverse-mapping when dealing with bastard branches that
273# are just archive/category--version (no --branch)
274sub tree_dirname {
275 my $revision = shift;
276 my $name = extract_versionname($revision);
277 $name =~ s#/#,#;
278 return $name;
279}
280
fee3365f
ML
281# old versions of git-archimport just use the <category--branch> part:
282sub old_style_branchname {
283 my $id = shift;
284 my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
285 chomp $ret;
286 return $ret;
287}
288
289*git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
22ff00fc
EW
290
291# process patchsets
292foreach my $ps (@psets) {
293 $ps->{branch} = git_branchname($ps->{id});
d3968363
ML
294
295 #
296 # ensure we have a clean state
297 #
1136fb52 298 if (`git-diff-files`) {
d3968363
ML
299 die "Unclean tree when about to process $ps->{id} " .
300 " - did we fail to commit cleanly before?";
301 }
302 die $! if $?;
303
3292ae47
ML
304 #
305 # skip commits already in repo
306 #
307 if (ptag($ps->{id})) {
37f15d50 308 $opt_v && print " * Skipping already imported: $ps->{id}\n";
3292ae47
ML
309 next;
310 }
311
37f15d50
ML
312 print " * Starting to work on $ps->{id}\n";
313
d3968363
ML
314 #
315 # create the branch if needed
316 #
3292ae47
ML
317 if ($ps->{type} eq 'i' && !$import) {
318 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
d3968363
ML
319 }
320
3292ae47 321 unless ($import) { # skip for import
1d4710d0 322 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
d3968363 323 # we know about this branch
f88961a8 324 system('git-checkout',$ps->{branch});
d3968363
ML
325 } else {
326 # new branch! we need to verify a few things
327 die "Branch on a non-tag!" unless $ps->{type} eq 't';
328 my $branchpoint = ptag($ps->{tag});
329 die "Tagging from unknown id unsupported: $ps->{tag}"
330 unless $branchpoint;
331
332 # find where we are supposed to branch from
f88961a8 333 system('git-checkout','-b',$ps->{branch},$branchpoint);
52586ecb
ML
334
335 # If we trust Arch with the fact that this is just
336 # a tag, and it does not affect the state of the tree
337 # then we just tag and move on
338 tag($ps->{id}, $branchpoint);
339 ptag($ps->{id}, $branchpoint);
340 print " * Tagged $ps->{id} at $branchpoint\n";
341 next;
d3968363
ML
342 }
343 die $! if $?;
344 }
345
d3968363
ML
346 #
347 # Apply the import/changeset/merge into the working tree
348 #
349 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
d3968363 350 apply_import($ps) or die $!;
3292ae47 351 $import=0;
d3968363
ML
352 } elsif ($ps->{type} eq 's') {
353 apply_cset($ps);
354 }
355
356 #
357 # prepare update git's index, based on what arch knows
358 # about the pset, resolve parents, etc
359 #
360 my $tree;
361
f88961a8 362 my $commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
d3968363
ML
363 die "Error in cat-archive-log: $!" if $?;
364
365 # parselog will git-add/rm files
366 # and generally prepare things for the commit
367 # NOTE: parselog will shell-quote filenames!
368 my ($sum, $msg, $add, $del, $mod, $ren) = parselog($commitlog);
369 my $logmessage = "$sum\n$msg";
370
371
372 # imports don't give us good info
373 # on added files. Shame on them
374 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
1d4710d0 375 `find . -type f -print0 | grep -zv '^./$git_dir' | xargs -0 -l100 git-update-index --add`;
215a7ad1 376 `git-ls-files --deleted -z | xargs --no-run-if-empty -0 -l100 git-update-index --remove`;
d3968363
ML
377 }
378
379 if (@$add) {
380 while (@$add) {
381 my @slice = splice(@$add, 0, 100);
382 my $slice = join(' ', @slice);
215a7ad1
JH
383 `git-update-index --add $slice`;
384 die "Error in git-update-index --add: $!" if $?;
d3968363
ML
385 }
386 }
387 if (@$del) {
388 foreach my $file (@$del) {
389 unlink $file or die "Problems deleting $file : $!";
390 }
391 while (@$del) {
392 my @slice = splice(@$del, 0, 100);
393 my $slice = join(' ', @slice);
215a7ad1
JH
394 `git-update-index --remove $slice`;
395 die "Error in git-update-index --remove: $!" if $?;
d3968363
ML
396 }
397 }
398 if (@$ren) { # renamed
399 if (@$ren % 2) {
400 die "Odd number of entries in rename!?";
401 }
402 ;
403 while (@$ren) {
404 my $from = pop @$ren;
405 my $to = pop @$ren;
406
407 unless (-d dirname($to)) {
408 mkpath(dirname($to)); # will die on err
409 }
410 #print "moving $from $to";
411 `mv $from $to`;
412 die "Error renaming $from $to : $!" if $?;
215a7ad1
JH
413 `git-update-index --remove $from`;
414 die "Error in git-update-index --remove: $!" if $?;
415 `git-update-index --add $to`;
416 die "Error in git-update-index --add: $!" if $?;
d3968363
ML
417 }
418
419 }
420 if (@$mod) { # must be _after_ renames
421 while (@$mod) {
422 my @slice = splice(@$mod, 0, 100);
423 my $slice = join(' ', @slice);
215a7ad1
JH
424 `git-update-index $slice`;
425 die "Error in git-update-index: $!" if $?;
d3968363
ML
426 }
427 }
428
215a7ad1 429 # warn "errors when running git-update-index! $!";
d3968363
ML
430 $tree = `git-write-tree`;
431 die "cannot write tree $!" if $?;
432 chomp $tree;
433
434
435 #
436 # Who's your daddy?
437 #
438 my @par;
1d4710d0 439 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
f88961a8 440 if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
d3968363
ML
441 my $p = <HEAD>;
442 close HEAD;
443 chomp $p;
444 push @par, '-p', $p;
445 } else {
446 if ($ps->{type} eq 's') {
447 warn "Could not find the right head for the branch $ps->{branch}";
448 }
449 }
450 }
451
b779d5f0
ML
452 if ($ps->{merges}) {
453 push @par, find_parents($ps);
454 }
d3968363
ML
455
456 #
457 # Commit, tag and clean state
458 #
459 $ENV{TZ} = 'GMT';
460 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
461 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
462 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
463 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
464 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
465 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
466
467 my ($pid, $commit_rh, $commit_wh);
468 $commit_rh = 'commit_rh';
469 $commit_wh = 'commit_wh';
470
f88961a8 471 $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
d3968363
ML
472 or die $!;
473 print WRITER $logmessage; # write
474 close WRITER;
475 my $commitid = <READER>; # read
476 chomp $commitid;
477 close READER;
478 waitpid $pid,0; # close;
479
480 if (length $commitid != 40) {
481 die "Something went wrong with the commit! $! $commitid";
482 }
483 #
484 # Update the branch
485 #
f88961a8 486 open HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
d3968363
ML
487 print HEAD $commitid;
488 close HEAD;
8366a10a 489 system('git-update-ref', 'HEAD', "$ps->{branch}");
d3968363
ML
490
491 # tag accordingly
492 ptag($ps->{id}, $commitid); # private tag
493 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
494 tag($ps->{id}, $commitid);
495 }
496 print " * Committed $ps->{id}\n";
497 print " + tree $tree\n";
498 print " + commit $commitid\n";
b779d5f0 499 $opt_v && print " + commit date is $ps->{date} \n";
f88961a8 500 $opt_v && print " + parents: ",join(' ',@par),"\n";
d3968363
ML
501}
502
d3968363
ML
503sub apply_import {
504 my $ps = shift;
22ff00fc 505 my $bname = git_branchname($ps->{id});
d3968363 506
f88961a8 507 mkpath($tmp);
d3968363 508
f88961a8 509 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
d3968363 510 die "Cannot get import: $!" if $?;
f88961a8
EW
511 system('rsync','-aI','--delete', '--exclude',$git_dir,
512 '--exclude','.arch-ids','--exclude','{arch}',
513 "$tmp/import/", './');
d3968363
ML
514 die "Cannot rsync import:$!" if $?;
515
f88961a8 516 rmtree("$tmp/import");
d3968363
ML
517 die "Cannot remove tempdir: $!" if $?;
518
519
520 return 1;
521}
522
523sub apply_cset {
524 my $ps = shift;
525
f88961a8 526 mkpath($tmp);
d3968363
ML
527
528 # get the changeset
f88961a8 529 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
d3968363
ML
530 die "Cannot get changeset: $!" if $?;
531
532 # apply patches
533 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
534 # this can be sped up considerably by doing
535 # (find | xargs cat) | patch
536 # but that cna get mucked up by patches
537 # with missing trailing newlines or the standard
538 # 'missing newline' flag in the patch - possibly
539 # produced with an old/buggy diff.
540 # slow and safe, we invoke patch once per patchfile
541 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
542 die "Problem applying patches! $!" if $?;
543 }
544
545 # apply changed binary files
546 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
547 foreach my $mod (@modified) {
548 chomp $mod;
549 my $orig = $mod;
550 $orig =~ s/\.modified$//; # lazy
551 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
552 #print "rsync -p '$mod' '$orig'";
f88961a8 553 system('rsync','-p',$mod,"./$orig");
d3968363
ML
554 die "Problem applying binary changes! $!" if $?;
555 }
556 }
557
558 # bring in new files
f88961a8
EW
559 system('rsync','-aI','--exclude',$git_dir,
560 '--exclude','.arch-ids',
561 '--exclude', '{arch}',
562 "$tmp/changeset/new-files-archive/",'./');
d3968363
ML
563
564 # deleted files are hinted from the commitlog processing
565
f88961a8 566 rmtree("$tmp/changeset");
d3968363
ML
567}
568
569
570# =for reference
571# A log entry looks like
572# Revision: moodle-org--moodle--1.3.3--patch-15
573# Archive: arch-eduforge@catalyst.net.nz--2004
574# Creator: Penny Leach <penny@catalyst.net.nz>
575# Date: Wed May 25 14:15:34 NZST 2005
576# Standard-date: 2005-05-25 02:15:34 GMT
577# New-files: lang/de/.arch-ids/block_glossary_random.php.id
578# lang/de/.arch-ids/block_html.php.id
579# New-directories: lang/de/help/questionnaire
580# lang/de/help/questionnaire/.arch-ids
581# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
582# db_sears.sql db/db_sears.sql
583# Removed-files: lang/be/docs/.arch-ids/release.html.id
584# lang/be/docs/.arch-ids/releaseold.html.id
585# Modified-files: admin/cron.php admin/delete.php
586# admin/editor.html backup/lib.php backup/restore.php
587# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
588# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
589# Keywords:
590#
591# Updating yadda tadda tadda madda
592sub parselog {
593 my $log = shift;
594 #print $log;
595
596 my (@add, @del, @mod, @ren, @kw, $sum, $msg );
597
598 if ($log =~ m/(?:\n|^)New-files:(.*?)(?=\n\w)/s ) {
599 my $files = $1;
600 @add = split(m/\s+/s, $files);
601 }
602
603 if ($log =~ m/(?:\n|^)Removed-files:(.*?)(?=\n\w)/s ) {
604 my $files = $1;
605 @del = split(m/\s+/s, $files);
606 }
607
608 if ($log =~ m/(?:\n|^)Modified-files:(.*?)(?=\n\w)/s ) {
609 my $files = $1;
610 @mod = split(m/\s+/s, $files);
611 }
612
613 if ($log =~ m/(?:\n|^)Renamed-files:(.*?)(?=\n\w)/s ) {
614 my $files = $1;
615 @ren = split(m/\s+/s, $files);
616 }
617
618 $sum ='';
619 if ($log =~ m/^Summary:(.+?)$/m ) {
620 $sum = $1;
621 $sum =~ s/^\s+//;
622 $sum =~ s/\s+$//;
623 }
624
625 $msg = '';
626 if ($log =~ m/\n\n(.+)$/s) {
627 $msg = $1;
628 $msg =~ s/^\s+//;
629 $msg =~ s/\s+$//;
630 }
631
632
633 # cleanup the arrays
634 foreach my $ref ( (\@add, \@del, \@mod, \@ren) ) {
635 my @tmp = ();
636 while (my $t = pop @$ref) {
637 next unless length ($t);
638 next if $t =~ m!\{arch\}/!;
639 next if $t =~ m!\.arch-ids/!;
640 next if $t =~ m!\.arch-inventory$!;
f84f9d38
ML
641 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
642 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
643 if ($t =~ /\\/ ){
f88961a8 644 $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
f84f9d38 645 }
f88961a8 646 push (@tmp, $t);
d3968363
ML
647 }
648 @$ref = @tmp;
649 }
650
651 #print Dumper [$sum, $msg, \@add, \@del, \@mod, \@ren];
652 return ($sum, $msg, \@add, \@del, \@mod, \@ren);
653}
654
655# write/read a tag
656sub tag {
657 my ($tag, $commit) = @_;
a7fb51d3 658
fee3365f
ML
659 if ($opt_o) {
660 $tag =~ s|/|--|g;
661 } else {
662 # don't use subdirs for tags yet, it could screw up other porcelains
663 $tag =~ s|/|,|g;
664 }
d3968363
ML
665
666 if ($commit) {
a7fb51d3 667 open(C,">","$git_dir/refs/tags/$tag")
d3968363
ML
668 or die "Cannot create tag $tag: $!\n";
669 print C "$commit\n"
670 or die "Cannot write tag $tag: $!\n";
671 close(C)
672 or die "Cannot write tag $tag: $!\n";
a7fb51d3 673 print " * Created tag '$tag' on '$commit'\n" if $opt_v;
d3968363 674 } else { # read
a7fb51d3 675 open(C,"<","$git_dir/refs/tags/$tag")
d3968363
ML
676 or die "Cannot read tag $tag: $!\n";
677 $commit = <C>;
678 chomp $commit;
679 die "Error reading tag $tag: $!\n" unless length $commit == 40;
680 close(C)
681 or die "Cannot read tag $tag: $!\n";
682 return $commit;
683 }
684}
685
686# write/read a private tag
687# reads fail softly if the tag isn't there
688sub ptag {
689 my ($tag, $commit) = @_;
a7fb51d3
EW
690
691 # don't use subdirs for tags yet, it could screw up other porcelains
692 $tag =~ s|/|,|g;
d3968363 693
a7fb51d3
EW
694 my $tag_file = "$ptag_dir/$tag";
695 my $tag_branch_dir = dirname($tag_file);
696 mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
d3968363
ML
697
698 if ($commit) { # write
a7fb51d3 699 open(C,">",$tag_file)
d3968363
ML
700 or die "Cannot create tag $tag: $!\n";
701 print C "$commit\n"
702 or die "Cannot write tag $tag: $!\n";
703 close(C)
704 or die "Cannot write tag $tag: $!\n";
b779d5f0
ML
705 $rptags{$commit} = $tag
706 unless $tag =~ m/--base-0$/;
d3968363
ML
707 } else { # read
708 # if the tag isn't there, return 0
a7fb51d3 709 unless ( -s $tag_file) {
d3968363
ML
710 return 0;
711 }
a7fb51d3 712 open(C,"<",$tag_file)
d3968363
ML
713 or die "Cannot read tag $tag: $!\n";
714 $commit = <C>;
715 chomp $commit;
716 die "Error reading tag $tag: $!\n" unless length $commit == 40;
717 close(C)
718 or die "Cannot read tag $tag: $!\n";
b779d5f0
ML
719 unless (defined $rptags{$commit}) {
720 $rptags{$commit} = $tag;
721 }
d3968363
ML
722 return $commit;
723 }
724}
b779d5f0
ML
725
726sub find_parents {
727 #
728 # Identify what branches are merging into me
729 # and whether we are fully merged
730 # git-merge-base <headsha> <headsha> should tell
731 # me what the base of the merge should be
732 #
733 my $ps = shift;
734
735 my %branches; # holds an arrayref per branch
736 # the arrayref contains a list of
737 # merged patches between the base
738 # of the merge and the current head
739
740 my @parents; # parents found for this commit
741
742 # simple loop to split the merges
743 # per branch
744 foreach my $merge (@{$ps->{merges}}) {
22ff00fc 745 my $branch = git_branchname($merge);
b779d5f0
ML
746 unless (defined $branches{$branch} ){
747 $branches{$branch} = [];
748 }
749 push @{$branches{$branch}}, $merge;
750 }
751
752 #
753 # foreach branch find a merge base and walk it to the
754 # head where we are, collecting the merged patchsets that
755 # Arch has recorded. Keep that in @have
756 # Compare that with the commits on the other branch
757 # between merge-base and the tip of the branch (@need)
758 # and see if we have a series of consecutive patches
759 # starting from the merge base. The tip of the series
760 # of consecutive patches merged is our new parent for
761 # that branch.
762 #
763 foreach my $branch (keys %branches) {
37f15d50
ML
764
765 # check that we actually know about the branch
766 next unless -e "$git_dir/refs/heads/$branch";
767
b779d5f0 768 my $mergebase = `git-merge-base $branch $ps->{branch}`;
9b626e75
EW
769 if ($?) {
770 # Don't die here, Arch supports one-way cherry-picking
771 # between branches with no common base (or any relationship
772 # at all beforehand)
773 warn "Cannot find merge base for $branch and $ps->{branch}";
774 next;
775 }
b779d5f0
ML
776 chomp $mergebase;
777
778 # now walk up to the mergepoint collecting what patches we have
779 my $branchtip = git_rev_parse($ps->{branch});
780 my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
781 my %have; # collected merges this branch has
782 foreach my $merge (@{$ps->{merges}}) {
783 $have{$merge} = 1;
784 }
785 my %ancestorshave;
786 foreach my $par (@ancestors) {
787 $par = commitid2pset($par);
788 if (defined $par->{merges}) {
789 foreach my $merge (@{$par->{merges}}) {
790 $ancestorshave{$merge}=1;
791 }
792 }
793 }
794 # print "++++ Merges in $ps->{id} are....\n";
795 # my @have = sort keys %have; print Dumper(\@have);
796
797 # merge what we have with what ancestors have
798 %have = (%have, %ancestorshave);
799
800 # see what the remote branch has - these are the merges we
801 # will want to have in a consecutive series from the mergebase
802 my $otherbranchtip = git_rev_parse($branch);
803 my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
804 my @need;
805 foreach my $needps (@needraw) { # get the psets
806 $needps = commitid2pset($needps);
807 # git-rev-list will also
808 # list commits merged in via earlier
809 # merges. we are only interested in commits
810 # from the branch we're looking at
811 if ($branch eq $needps->{branch}) {
812 push @need, $needps->{id};
813 }
814 }
815
816 # print "++++ Merges from $branch we want are....\n";
817 # print Dumper(\@need);
818
819 my $newparent;
820 while (my $needed_commit = pop @need) {
821 if ($have{$needed_commit}) {
822 $newparent = $needed_commit;
823 } else {
824 last; # break out of the while
825 }
826 }
827 if ($newparent) {
828 push @parents, $newparent;
829 }
830
831
832 } # end foreach branch
833
834 # prune redundant parents
835 my %parents;
836 foreach my $p (@parents) {
837 $parents{$p} = 1;
838 }
839 foreach my $p (@parents) {
840 next unless exists $psets{$p}{merges};
841 next unless ref $psets{$p}{merges};
842 my @merges = @{$psets{$p}{merges}};
843 foreach my $merge (@merges) {
844 if ($parents{$merge}) {
845 delete $parents{$merge};
846 }
847 }
848 }
42f44b08 849
f88961a8
EW
850 @parents = ();
851 foreach (keys %parents) {
852 push @parents, '-p', ptag($_);
853 }
b779d5f0
ML
854 return @parents;
855}
856
857sub git_rev_parse {
858 my $name = shift;
859 my $val = `git-rev-parse $name`;
860 die "Error: git-rev-parse $name" if $?;
861 chomp $val;
862 return $val;
863}
864
865# resolve a SHA1 to a known patchset
866sub commitid2pset {
867 my $commitid = shift;
868 chomp $commitid;
869 my $name = $rptags{$commitid}
870 || die "Cannot find reverse tag mapping for $commitid";
a7fb51d3 871 $name =~ s|,|/|;
b779d5f0
ML
872 my $ps = $psets{$name}
873 || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
874 return $ps;
875}
2777ef76 876
42f44b08 877
2777ef76
EW
878# an alterative to `command` that allows input to be passed as an array
879# to work around shell problems with weird characters in arguments
880sub safe_pipe_capture {
881 my @output;
882 if (my $pid = open my $child, '-|') {
883 @output = (<$child>);
884 close $child or die join(' ',@_).": $! $?";
885 } else {
886 exec(@_) or die $?; # exec() can fail the executable can't be found
887 }
888 return wantarray ? @output : join('',@output);
889}
890
42f44b08
EW
891# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
892sub arch_tree_id {
893 my $dir = shift;
894 chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
895 return $ret;
896}
897
898sub archive_reachable {
899 my $archive = shift;
900 return 1 if $reachable{$archive};
901 return 0 if $unreachable{$archive};
902
903 if (system "$TLA whereis-archive $archive >/dev/null") {
904 if ($opt_a && (system($TLA,'register-archive',
905 "http://mirrors.sourcecontrol.net/$archive") == 0)) {
906 $reachable{$archive} = 1;
907 return 1;
908 }
909 print STDERR "Archive is unreachable: $archive\n";
910 $unreachable{$archive} = 1;
911 return 0;
912 } else {
913 $reachable{$archive} = 1;
914 return 1;
915 }
916}
2777ef76 917