archimport: safer log file parsing
[git/git.git] / git-archimport.perl
CommitLineData
d3968363
ML
1#!/usr/bin/perl -w
2#
3# This tool is copyright (c) 2005, Martin Langhoff.
4# It is released under the Gnu Public License, version 2.
5#
6# The basic idea is to walk the output of tla abrowse,
7# fetch the changesets and apply them.
8#
241b5967 9
d3968363
ML
10=head1 Invocation
11
241b5967 12 git-archimport [ -h ] [ -v ] [ -T ] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
d3968363 13
241b5967
ML
14Imports a project from one or more Arch repositories. It will follow branches
15and repositories within the namespaces defined by the <archive/branch>
16parameters suppplied. If it cannot find the remote branch a merge comes from
17it will just import it as a regular commit. If it can find it, it will mark it
18as a merge whenever possible.
d3968363 19
241b5967 20See man (1) git-archimport for more details.
d3968363 21
241b5967 22=head1 TODO
d3968363 23
241b5967 24 - create tag objects instead of ref tags
d3968363 25 - audit shell-escaping of filenames
241b5967
ML
26 - hide our private tags somewhere smarter
27 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
d3968363
ML
28
29=head1 Devel tricks
30
31Add print in front of the shell commands invoked via backticks.
32
22ff00fc
EW
33=head1 Devel Notes
34
35There are several places where Arch and git terminology are intermixed
36and potentially confused.
37
38The notion of a "branch" in git is approximately equivalent to
39a "archive/category--branch--version" in Arch. Also, it should be noted
40that the "--branch" portion of "archive/category--branch--version" is really
41optional in Arch although not many people (nor tools!) seem to know this.
42This means that "archive/category--version" is also a valid "branch"
43in git terms.
44
45We always refer to Arch names by their fully qualified variant (which
46means the "archive" name is prefixed.
47
48For people unfamiliar with Arch, an "archive" is the term for "repository",
49and can contain multiple, unrelated branches.
50
d3968363
ML
51=cut
52
53use strict;
54use warnings;
55use Getopt::Std;
42f44b08 56use File::Temp qw(tempdir);
f88961a8 57use File::Path qw(mkpath rmtree);
d3968363 58use File::Basename qw(basename dirname);
d3968363
ML
59use Data::Dumper qw/ Dumper /;
60use IPC::Open2;
61
62$SIG{'PIPE'}="IGNORE";
63$ENV{'TZ'}="UTC";
64
1d4710d0
ML
65my $git_dir = $ENV{"GIT_DIR"} || ".git";
66$ENV{"GIT_DIR"} = $git_dir;
a7fb51d3 67my $ptag_dir = "$git_dir/archimport/tags";
1d4710d0 68
42f44b08 69our($opt_h,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
d3968363
ML
70
71sub usage() {
72 print STDERR <<END;
73Usage: ${\basename $0} # fetch/update GIT from Arch
42f44b08 74 [ -o ] [ -h ] [ -v ] [ -T ] [ -a ] [ -D depth ] [ -t tempdir ]
d3968363
ML
75 repository/arch-branch [ repository/arch-branch] ...
76END
77 exit(1);
78}
79
42f44b08 80getopts("Thvat:D:") or usage();
d3968363
ML
81usage if $opt_h;
82
83@ARGV >= 1 or usage();
42f44b08
EW
84# $arch_branches:
85# values associated with keys:
86# =1 - Arch version / git 'branch' detected via abrowse on a limit
87# >1 - Arch version / git 'branch' of an auxilliary branch we've merged
88my %arch_branches = map { $_ => 1 } @ARGV;
d3968363 89
5744f277
EW
90$ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
91my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
127bf00f 92$opt_v && print "+ Using $tmp as temporary directory\n";
d3968363 93
42f44b08
EW
94my %reachable = (); # Arch repositories we can access
95my %unreachable = (); # Arch repositories we can't access :<
d3968363 96my @psets = (); # the collection
b779d5f0
ML
97my %psets = (); # the collection, by name
98
99my %rptags = (); # my reverse private tags
100 # to map a SHA1 to a commitid
2777ef76 101my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
d3968363 102
42f44b08
EW
103sub do_abrowse {
104 my $stage = shift;
105 while (my ($limit, $level) = each %arch_branches) {
106 next unless $level == $stage;
107
108 open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
109 or die "Problems with tla abrowse: $!";
d3968363 110
42f44b08
EW
111 my %ps = (); # the current one
112 my $lastseen = '';
d3968363 113
42f44b08
EW
114 while (<ABROWSE>) {
115 chomp;
d3968363 116
42f44b08
EW
117 # first record padded w 8 spaces
118 if (s/^\s{8}\b//) {
119 my ($id, $type) = split(m/\s+/, $_, 2);
120
121 my %last_ps;
122 # store the record we just captured
123 if (%ps && !exists $psets{ $ps{id} }) {
124 %last_ps = %ps; # break references
125 push (@psets, \%last_ps);
126 $psets{ $last_ps{id} } = \%last_ps;
127 }
d3968363 128
42f44b08
EW
129 my $branch = extract_versionname($id);
130 %ps = ( id => $id, branch => $branch );
131 if (%last_ps && ($last_ps{branch} eq $branch)) {
132 $ps{parent_id} = $last_ps{id};
133 }
134
135 $arch_branches{$branch} = 1;
136 $lastseen = 'id';
137
138 # deal with types (should work with baz or tla):
139 if ($type =~ m/\(.*changeset\)/) {
140 $ps{type} = 's';
141 } elsif ($type =~ /\(.*import\)/) {
142 $ps{type} = 'i';
6df896b5 143 } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
42f44b08
EW
144 $ps{type} = 't';
145 # read which revision we've tagged when we parse the log
6df896b5 146 $ps{tag} = $1;
42f44b08
EW
147 } else {
148 warn "Unknown type $type";
149 }
150
151 $arch_branches{$branch} = 1;
152 $lastseen = 'id';
153 } elsif (s/^\s{10}//) {
154 # 10 leading spaces or more
155 # indicate commit metadata
156
157 # date
158 if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
159 $ps{date} = $1;
160 $lastseen = 'date';
161 } elsif ($_ eq 'merges in:') {
162 $ps{merges} = [];
163 $lastseen = 'merges';
164 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
165 my $id = $_;
166 push (@{$ps{merges}}, $id);
167
168 # aggressive branch finding:
169 if ($opt_D) {
170 my $branch = extract_versionname($id);
171 my $repo = extract_reponame($branch);
172
173 if (archive_reachable($repo) &&
174 !defined $arch_branches{$branch}) {
175 $arch_branches{$branch} = $stage + 1;
176 }
177 }
178 } else {
179 warn "more metadata after merges!?: $_\n" unless /^\s*$/;
d3968363 180 }
d3968363 181 }
d3968363 182 }
d3968363 183
42f44b08
EW
184 if (%ps && !exists $psets{ $ps{id} }) {
185 my %temp = %ps; # break references
186 if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
187 $temp{parent_id} = $psets[$#psets]{id};
188 }
189 push (@psets, \%temp);
190 $psets{ $temp{id} } = \%temp;
191 }
192
193 close ABROWSE or die "$TLA abrowse failed on $limit\n";
194 }
d3968363
ML
195} # end foreach $root
196
42f44b08
EW
197do_abrowse(1);
198my $depth = 2;
199$opt_D ||= 0;
200while ($depth <= $opt_D) {
201 do_abrowse($depth);
202 $depth++;
203}
204
d3968363 205## Order patches by time
42f44b08
EW
206# FIXME see if we can find a more optimal way to do this by graphing
207# the ancestry data and walking it, that way we won't have to rely on
208# client-supplied dates
d3968363
ML
209@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
210
211#print Dumper \@psets;
212
213##
214## TODO cleanup irrelevant patches
215## and put an initial import
216## or a full tag
3292ae47 217my $import = 0;
1d4710d0 218unless (-d $git_dir) { # initial import
d3968363
ML
219 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
220 print "Starting import from $psets[0]{id}\n";
3292ae47
ML
221 `git-init-db`;
222 die $! if $?;
223 $import = 1;
d3968363
ML
224 } else {
225 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
226 }
b779d5f0
ML
227} else { # progressing an import
228 # load the rptags
42f44b08 229 opendir(DIR, $ptag_dir)
b779d5f0
ML
230 || die "can't opendir: $!";
231 while (my $file = readdir(DIR)) {
a7fb51d3
EW
232 # skip non-interesting-files
233 next unless -f "$ptag_dir/$file";
234
235 # convert first '--' to '/' from old git-archimport to use
236 # as an archivename/c--b--v private tag
237 if ($file !~ m!,!) {
238 my $oldfile = $file;
239 $file =~ s!--!,!;
240 print STDERR "converting old tag $oldfile to $file\n";
241 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
242 }
b779d5f0
ML
243 my $sha = ptag($file);
244 chomp $sha;
b779d5f0
ML
245 $rptags{$sha} = $file;
246 }
247 closedir DIR;
d3968363
ML
248}
249
3292ae47 250# process patchsets
22ff00fc
EW
251# extract the Arch repository name (Arch "archive" in Arch-speak)
252sub extract_reponame {
253 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
254 return (split(/\//, $fq_cvbr))[0];
255}
256
257sub extract_versionname {
258 my $name = shift;
259 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
260 return $name;
261}
d3968363 262
22ff00fc
EW
263# convert a fully-qualified revision or version to a unique dirname:
264# normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
265# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
266#
267# the git notion of a branch is closer to
268# archive/category--branch--version than archive/category--branch, so we
269# use this to convert to git branch names.
270# Also, keep archive names but replace '/' with ',' since it won't require
271# subdirectories, and is safer than swapping '--' which could confuse
272# reverse-mapping when dealing with bastard branches that
273# are just archive/category--version (no --branch)
274sub tree_dirname {
275 my $revision = shift;
276 my $name = extract_versionname($revision);
277 $name =~ s#/#,#;
278 return $name;
279}
280
fee3365f
ML
281# old versions of git-archimport just use the <category--branch> part:
282sub old_style_branchname {
283 my $id = shift;
284 my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
285 chomp $ret;
286 return $ret;
287}
288
289*git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
22ff00fc
EW
290
291# process patchsets
292foreach my $ps (@psets) {
293 $ps->{branch} = git_branchname($ps->{id});
d3968363
ML
294
295 #
296 # ensure we have a clean state
297 #
1136fb52 298 if (`git-diff-files`) {
d3968363
ML
299 die "Unclean tree when about to process $ps->{id} " .
300 " - did we fail to commit cleanly before?";
301 }
302 die $! if $?;
303
3292ae47
ML
304 #
305 # skip commits already in repo
306 #
307 if (ptag($ps->{id})) {
37f15d50 308 $opt_v && print " * Skipping already imported: $ps->{id}\n";
3292ae47
ML
309 next;
310 }
311
37f15d50
ML
312 print " * Starting to work on $ps->{id}\n";
313
d3968363
ML
314 #
315 # create the branch if needed
316 #
3292ae47
ML
317 if ($ps->{type} eq 'i' && !$import) {
318 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
d3968363
ML
319 }
320
3292ae47 321 unless ($import) { # skip for import
1d4710d0 322 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
d3968363 323 # we know about this branch
f88961a8 324 system('git-checkout',$ps->{branch});
d3968363
ML
325 } else {
326 # new branch! we need to verify a few things
327 die "Branch on a non-tag!" unless $ps->{type} eq 't';
328 my $branchpoint = ptag($ps->{tag});
329 die "Tagging from unknown id unsupported: $ps->{tag}"
330 unless $branchpoint;
331
332 # find where we are supposed to branch from
f88961a8 333 system('git-checkout','-b',$ps->{branch},$branchpoint);
52586ecb
ML
334
335 # If we trust Arch with the fact that this is just
336 # a tag, and it does not affect the state of the tree
337 # then we just tag and move on
338 tag($ps->{id}, $branchpoint);
339 ptag($ps->{id}, $branchpoint);
340 print " * Tagged $ps->{id} at $branchpoint\n";
341 next;
d3968363
ML
342 }
343 die $! if $?;
344 }
345
d3968363
ML
346 #
347 # Apply the import/changeset/merge into the working tree
348 #
349 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
d3968363 350 apply_import($ps) or die $!;
3292ae47 351 $import=0;
d3968363
ML
352 } elsif ($ps->{type} eq 's') {
353 apply_cset($ps);
354 }
355
356 #
357 # prepare update git's index, based on what arch knows
358 # about the pset, resolve parents, etc
359 #
360 my $tree;
361
6df896b5 362 my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
d3968363
ML
363 die "Error in cat-archive-log: $!" if $?;
364
6df896b5 365 parselog($ps,\@commitlog);
d3968363
ML
366
367 # imports don't give us good info
368 # on added files. Shame on them
6df896b5
EW
369 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
370 system('git-ls-files --others -z | '.
371 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
372 system('git-ls-files --deleted -z | '.
373 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
d3968363
ML
374 }
375
6df896b5
EW
376 # TODO: handle removed_directories and renamed_directories:
377
378 if (my $add = $ps->{new_files}) {
d3968363
ML
379 while (@$add) {
380 my @slice = splice(@$add, 0, 100);
6df896b5
EW
381 system('git-update-index','--add','--',@slice) == 0 or
382 die "Error in git-update-index --add: $! $?\n";
d3968363
ML
383 }
384 }
6df896b5
EW
385
386 if (my $del = $ps->{removed_files}) {
387 unlink @$del;
d3968363
ML
388 while (@$del) {
389 my @slice = splice(@$del, 0, 100);
6df896b5
EW
390 system('git-update-index','--remove','--',@slice) == 0 or
391 die "Error in git-update-index --remove: $! $?\n";
d3968363
ML
392 }
393 }
6df896b5
EW
394
395 if (my $ren = $ps->{renamed_files}) { # renamed
d3968363
ML
396 if (@$ren % 2) {
397 die "Odd number of entries in rename!?";
398 }
6df896b5 399
d3968363 400 while (@$ren) {
6df896b5
EW
401 my $from = shift @$ren;
402 my $to = shift @$ren;
d3968363
ML
403
404 unless (-d dirname($to)) {
405 mkpath(dirname($to)); # will die on err
406 }
6df896b5
EW
407 print "moving $from $to";
408 rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
409 system('git-update-index','--remove','--',$from) == 0 or
410 die "Error in git-update-index --remove: $! $?\n";
411 system('git-update-index','--add','--',$to) == 0 or
412 die "Error in git-update-index --add: $! $?\n";
d3968363
ML
413 }
414
415 }
6df896b5
EW
416
417 if (my $mod = $ps->{modified_files}) {
d3968363
ML
418 while (@$mod) {
419 my @slice = splice(@$mod, 0, 100);
6df896b5
EW
420 system('git-update-index','--',@slice) == 0 or
421 die "Error in git-update-index: $! $?\n";
d3968363
ML
422 }
423 }
6df896b5 424
215a7ad1 425 # warn "errors when running git-update-index! $!";
d3968363
ML
426 $tree = `git-write-tree`;
427 die "cannot write tree $!" if $?;
428 chomp $tree;
d3968363
ML
429
430 #
431 # Who's your daddy?
432 #
433 my @par;
1d4710d0 434 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
f88961a8 435 if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
d3968363
ML
436 my $p = <HEAD>;
437 close HEAD;
438 chomp $p;
439 push @par, '-p', $p;
440 } else {
441 if ($ps->{type} eq 's') {
442 warn "Could not find the right head for the branch $ps->{branch}";
443 }
444 }
445 }
446
b779d5f0
ML
447 if ($ps->{merges}) {
448 push @par, find_parents($ps);
449 }
d3968363
ML
450
451 #
452 # Commit, tag and clean state
453 #
454 $ENV{TZ} = 'GMT';
455 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
456 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
457 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
458 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
459 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
460 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
461
6df896b5 462 my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
d3968363 463 or die $!;
6df896b5
EW
464 print WRITER $ps->{summary},"\n";
465 print WRITER $ps->{message},"\n";
466
467 # make it easy to backtrack and figure out which Arch revision this was:
468 print WRITER 'git-archimport-id: ',$ps->{id},"\n";
469
d3968363
ML
470 close WRITER;
471 my $commitid = <READER>; # read
472 chomp $commitid;
473 close READER;
474 waitpid $pid,0; # close;
475
476 if (length $commitid != 40) {
477 die "Something went wrong with the commit! $! $commitid";
478 }
479 #
480 # Update the branch
481 #
f88961a8 482 open HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
d3968363
ML
483 print HEAD $commitid;
484 close HEAD;
8366a10a 485 system('git-update-ref', 'HEAD', "$ps->{branch}");
d3968363
ML
486
487 # tag accordingly
488 ptag($ps->{id}, $commitid); # private tag
489 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
490 tag($ps->{id}, $commitid);
491 }
492 print " * Committed $ps->{id}\n";
493 print " + tree $tree\n";
494 print " + commit $commitid\n";
b779d5f0 495 $opt_v && print " + commit date is $ps->{date} \n";
f88961a8 496 $opt_v && print " + parents: ",join(' ',@par),"\n";
d3968363
ML
497}
498
d3968363
ML
499sub apply_import {
500 my $ps = shift;
22ff00fc 501 my $bname = git_branchname($ps->{id});
d3968363 502
f88961a8 503 mkpath($tmp);
d3968363 504
f88961a8 505 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
d3968363 506 die "Cannot get import: $!" if $?;
f88961a8
EW
507 system('rsync','-aI','--delete', '--exclude',$git_dir,
508 '--exclude','.arch-ids','--exclude','{arch}',
509 "$tmp/import/", './');
d3968363
ML
510 die "Cannot rsync import:$!" if $?;
511
f88961a8 512 rmtree("$tmp/import");
d3968363
ML
513 die "Cannot remove tempdir: $!" if $?;
514
515
516 return 1;
517}
518
519sub apply_cset {
520 my $ps = shift;
521
f88961a8 522 mkpath($tmp);
d3968363
ML
523
524 # get the changeset
f88961a8 525 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
d3968363
ML
526 die "Cannot get changeset: $!" if $?;
527
528 # apply patches
529 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
530 # this can be sped up considerably by doing
531 # (find | xargs cat) | patch
532 # but that cna get mucked up by patches
533 # with missing trailing newlines or the standard
534 # 'missing newline' flag in the patch - possibly
535 # produced with an old/buggy diff.
536 # slow and safe, we invoke patch once per patchfile
537 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
538 die "Problem applying patches! $!" if $?;
539 }
540
541 # apply changed binary files
542 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
543 foreach my $mod (@modified) {
544 chomp $mod;
545 my $orig = $mod;
546 $orig =~ s/\.modified$//; # lazy
547 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
548 #print "rsync -p '$mod' '$orig'";
f88961a8 549 system('rsync','-p',$mod,"./$orig");
d3968363
ML
550 die "Problem applying binary changes! $!" if $?;
551 }
552 }
553
554 # bring in new files
f88961a8
EW
555 system('rsync','-aI','--exclude',$git_dir,
556 '--exclude','.arch-ids',
557 '--exclude', '{arch}',
558 "$tmp/changeset/new-files-archive/",'./');
d3968363
ML
559
560 # deleted files are hinted from the commitlog processing
561
f88961a8 562 rmtree("$tmp/changeset");
d3968363
ML
563}
564
565
566# =for reference
6df896b5
EW
567# notes: *-files/-directories keys cannot have spaces, they're always
568# pika-escaped. Everything after the first newline
569# A log entry looks like:
d3968363
ML
570# Revision: moodle-org--moodle--1.3.3--patch-15
571# Archive: arch-eduforge@catalyst.net.nz--2004
572# Creator: Penny Leach <penny@catalyst.net.nz>
573# Date: Wed May 25 14:15:34 NZST 2005
574# Standard-date: 2005-05-25 02:15:34 GMT
575# New-files: lang/de/.arch-ids/block_glossary_random.php.id
576# lang/de/.arch-ids/block_html.php.id
577# New-directories: lang/de/help/questionnaire
578# lang/de/help/questionnaire/.arch-ids
579# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
580# db_sears.sql db/db_sears.sql
581# Removed-files: lang/be/docs/.arch-ids/release.html.id
582# lang/be/docs/.arch-ids/releaseold.html.id
583# Modified-files: admin/cron.php admin/delete.php
584# admin/editor.html backup/lib.php backup/restore.php
585# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
586# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
6df896b5 587# summary can be multiline with a leading space just like the above fields
d3968363
ML
588# Keywords:
589#
590# Updating yadda tadda tadda madda
591sub parselog {
6df896b5
EW
592 my ($ps, $log) = @_;
593 my $key = undef;
594
595 # headers we want that contain filenames:
596 my %want_headers = (
597 new_files => 1,
598 modified_files => 1,
599 renamed_files => 1,
600 renamed_directories => 1,
601 removed_files => 1,
602 removed_directories => 1,
603 );
d3968363 604
6df896b5
EW
605 chomp (@$log);
606 while ($_ = shift @$log) {
607 if (/^Continuation-of:\s*(.*)/) {
608 $ps->{tag} = $1;
609 $key = undef;
610 } elsif (/^Summary:\s*(.*)$/ ) {
611 # summary can be multiline as long as it has a leading space
612 $ps->{summary} = [ $1 ];
613 $key = 'summary';
614 } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
615 $ps->{author} = $1;
616 $ps->{email} = $2;
617 $key = undef;
618 # any *-files or *-directories can be read here:
619 } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
620 my $val = $2;
621 $key = lc $1;
622 $key =~ tr/-/_/; # too lazy to quote :P
623 if ($want_headers{$key}) {
624 push @{$ps->{$key}}, split(/\s+/, $val);
625 } else {
626 $key = undef;
627 }
628 } elsif (/^$/) {
629 last; # remainder of @$log that didn't get shifted off is message
630 } elsif ($key) {
631 if (/^\s+(.*)$/) {
632 if ($key eq 'summary') {
633 push @{$ps->{$key}}, $1;
634 } else { # files/directories:
635 push @{$ps->{$key}}, split(/\s+/, $1);
636 }
637 } else {
638 $key = undef;
639 }
640 }
d3968363 641 }
6df896b5
EW
642
643 # post-processing:
644 $ps->{summary} = join("\n",@{$ps->{summary}})."\n";
645 $ps->{message} = join("\n",@$log);
d3968363 646
6df896b5
EW
647 # skip Arch control files, unescape pika-escaped files
648 foreach my $k (keys %want_headers) {
649 next unless (defined $ps->{$k});
650 my @tmp;
651 foreach my $t (@{$ps->{$k}}) {
652 next unless length ($t);
653 next if $t =~ m!\{arch\}/!;
654 next if $t =~ m!\.arch-ids/!;
655 # should we skip this?
656 next if $t =~ m!\.arch-inventory$!;
f84f9d38
ML
657 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
658 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
6df896b5 659 if ($t =~ /\\/ ){
f88961a8 660 $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
f84f9d38 661 }
6df896b5 662 push @tmp, $t;
d3968363 663 }
6df896b5 664 $ps->{$k} = \@tmp if scalar @tmp;
d3968363 665 }
d3968363
ML
666}
667
668# write/read a tag
669sub tag {
670 my ($tag, $commit) = @_;
a7fb51d3 671
fee3365f
ML
672 if ($opt_o) {
673 $tag =~ s|/|--|g;
674 } else {
675 # don't use subdirs for tags yet, it could screw up other porcelains
676 $tag =~ s|/|,|g;
677 }
d3968363
ML
678
679 if ($commit) {
a7fb51d3 680 open(C,">","$git_dir/refs/tags/$tag")
d3968363
ML
681 or die "Cannot create tag $tag: $!\n";
682 print C "$commit\n"
683 or die "Cannot write tag $tag: $!\n";
684 close(C)
685 or die "Cannot write tag $tag: $!\n";
a7fb51d3 686 print " * Created tag '$tag' on '$commit'\n" if $opt_v;
d3968363 687 } else { # read
a7fb51d3 688 open(C,"<","$git_dir/refs/tags/$tag")
d3968363
ML
689 or die "Cannot read tag $tag: $!\n";
690 $commit = <C>;
691 chomp $commit;
692 die "Error reading tag $tag: $!\n" unless length $commit == 40;
693 close(C)
694 or die "Cannot read tag $tag: $!\n";
695 return $commit;
696 }
697}
698
699# write/read a private tag
700# reads fail softly if the tag isn't there
701sub ptag {
702 my ($tag, $commit) = @_;
a7fb51d3
EW
703
704 # don't use subdirs for tags yet, it could screw up other porcelains
705 $tag =~ s|/|,|g;
d3968363 706
a7fb51d3
EW
707 my $tag_file = "$ptag_dir/$tag";
708 my $tag_branch_dir = dirname($tag_file);
709 mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
d3968363
ML
710
711 if ($commit) { # write
a7fb51d3 712 open(C,">",$tag_file)
d3968363
ML
713 or die "Cannot create tag $tag: $!\n";
714 print C "$commit\n"
715 or die "Cannot write tag $tag: $!\n";
716 close(C)
717 or die "Cannot write tag $tag: $!\n";
b779d5f0
ML
718 $rptags{$commit} = $tag
719 unless $tag =~ m/--base-0$/;
d3968363
ML
720 } else { # read
721 # if the tag isn't there, return 0
a7fb51d3 722 unless ( -s $tag_file) {
d3968363
ML
723 return 0;
724 }
a7fb51d3 725 open(C,"<",$tag_file)
d3968363
ML
726 or die "Cannot read tag $tag: $!\n";
727 $commit = <C>;
728 chomp $commit;
729 die "Error reading tag $tag: $!\n" unless length $commit == 40;
730 close(C)
731 or die "Cannot read tag $tag: $!\n";
b779d5f0
ML
732 unless (defined $rptags{$commit}) {
733 $rptags{$commit} = $tag;
734 }
d3968363
ML
735 return $commit;
736 }
737}
b779d5f0
ML
738
739sub find_parents {
740 #
741 # Identify what branches are merging into me
742 # and whether we are fully merged
743 # git-merge-base <headsha> <headsha> should tell
744 # me what the base of the merge should be
745 #
746 my $ps = shift;
747
748 my %branches; # holds an arrayref per branch
749 # the arrayref contains a list of
750 # merged patches between the base
751 # of the merge and the current head
752
753 my @parents; # parents found for this commit
754
755 # simple loop to split the merges
756 # per branch
757 foreach my $merge (@{$ps->{merges}}) {
22ff00fc 758 my $branch = git_branchname($merge);
b779d5f0
ML
759 unless (defined $branches{$branch} ){
760 $branches{$branch} = [];
761 }
762 push @{$branches{$branch}}, $merge;
763 }
764
765 #
766 # foreach branch find a merge base and walk it to the
767 # head where we are, collecting the merged patchsets that
768 # Arch has recorded. Keep that in @have
769 # Compare that with the commits on the other branch
770 # between merge-base and the tip of the branch (@need)
771 # and see if we have a series of consecutive patches
772 # starting from the merge base. The tip of the series
773 # of consecutive patches merged is our new parent for
774 # that branch.
775 #
776 foreach my $branch (keys %branches) {
37f15d50
ML
777
778 # check that we actually know about the branch
779 next unless -e "$git_dir/refs/heads/$branch";
780
b779d5f0 781 my $mergebase = `git-merge-base $branch $ps->{branch}`;
9b626e75
EW
782 if ($?) {
783 # Don't die here, Arch supports one-way cherry-picking
784 # between branches with no common base (or any relationship
785 # at all beforehand)
786 warn "Cannot find merge base for $branch and $ps->{branch}";
787 next;
788 }
b779d5f0
ML
789 chomp $mergebase;
790
791 # now walk up to the mergepoint collecting what patches we have
792 my $branchtip = git_rev_parse($ps->{branch});
793 my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
794 my %have; # collected merges this branch has
795 foreach my $merge (@{$ps->{merges}}) {
796 $have{$merge} = 1;
797 }
798 my %ancestorshave;
799 foreach my $par (@ancestors) {
800 $par = commitid2pset($par);
801 if (defined $par->{merges}) {
802 foreach my $merge (@{$par->{merges}}) {
803 $ancestorshave{$merge}=1;
804 }
805 }
806 }
807 # print "++++ Merges in $ps->{id} are....\n";
808 # my @have = sort keys %have; print Dumper(\@have);
809
810 # merge what we have with what ancestors have
811 %have = (%have, %ancestorshave);
812
813 # see what the remote branch has - these are the merges we
814 # will want to have in a consecutive series from the mergebase
815 my $otherbranchtip = git_rev_parse($branch);
816 my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
817 my @need;
818 foreach my $needps (@needraw) { # get the psets
819 $needps = commitid2pset($needps);
820 # git-rev-list will also
821 # list commits merged in via earlier
822 # merges. we are only interested in commits
823 # from the branch we're looking at
824 if ($branch eq $needps->{branch}) {
825 push @need, $needps->{id};
826 }
827 }
828
829 # print "++++ Merges from $branch we want are....\n";
830 # print Dumper(\@need);
831
832 my $newparent;
833 while (my $needed_commit = pop @need) {
834 if ($have{$needed_commit}) {
835 $newparent = $needed_commit;
836 } else {
837 last; # break out of the while
838 }
839 }
840 if ($newparent) {
841 push @parents, $newparent;
842 }
843
844
845 } # end foreach branch
846
847 # prune redundant parents
848 my %parents;
849 foreach my $p (@parents) {
850 $parents{$p} = 1;
851 }
852 foreach my $p (@parents) {
853 next unless exists $psets{$p}{merges};
854 next unless ref $psets{$p}{merges};
855 my @merges = @{$psets{$p}{merges}};
856 foreach my $merge (@merges) {
857 if ($parents{$merge}) {
858 delete $parents{$merge};
859 }
860 }
861 }
42f44b08 862
f88961a8
EW
863 @parents = ();
864 foreach (keys %parents) {
865 push @parents, '-p', ptag($_);
866 }
b779d5f0
ML
867 return @parents;
868}
869
870sub git_rev_parse {
871 my $name = shift;
872 my $val = `git-rev-parse $name`;
873 die "Error: git-rev-parse $name" if $?;
874 chomp $val;
875 return $val;
876}
877
878# resolve a SHA1 to a known patchset
879sub commitid2pset {
880 my $commitid = shift;
881 chomp $commitid;
882 my $name = $rptags{$commitid}
883 || die "Cannot find reverse tag mapping for $commitid";
a7fb51d3 884 $name =~ s|,|/|;
b779d5f0
ML
885 my $ps = $psets{$name}
886 || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
887 return $ps;
888}
2777ef76 889
42f44b08 890
2777ef76
EW
891# an alterative to `command` that allows input to be passed as an array
892# to work around shell problems with weird characters in arguments
893sub safe_pipe_capture {
894 my @output;
895 if (my $pid = open my $child, '-|') {
896 @output = (<$child>);
897 close $child or die join(' ',@_).": $! $?";
898 } else {
899 exec(@_) or die $?; # exec() can fail the executable can't be found
900 }
901 return wantarray ? @output : join('',@output);
902}
903
42f44b08
EW
904# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
905sub arch_tree_id {
906 my $dir = shift;
907 chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
908 return $ret;
909}
910
911sub archive_reachable {
912 my $archive = shift;
913 return 1 if $reachable{$archive};
914 return 0 if $unreachable{$archive};
915
916 if (system "$TLA whereis-archive $archive >/dev/null") {
917 if ($opt_a && (system($TLA,'register-archive',
918 "http://mirrors.sourcecontrol.net/$archive") == 0)) {
919 $reachable{$archive} = 1;
920 return 1;
921 }
922 print STDERR "Archive is unreachable: $archive\n";
923 $unreachable{$archive} = 1;
924 return 0;
925 } else {
926 $reachable{$archive} = 1;
927 return 1;
928 }
929}
2777ef76 930