Make git-reset delete empty directories
[git/git.git] / git-archimport.perl
CommitLineData
d3968363
ML
1#!/usr/bin/perl -w
2#
3# This tool is copyright (c) 2005, Martin Langhoff.
4# It is released under the Gnu Public License, version 2.
5#
6# The basic idea is to walk the output of tla abrowse,
7# fetch the changesets and apply them.
8#
241b5967 9
d3968363
ML
10=head1 Invocation
11
42f4570c
EW
12 git-archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ]
13 [ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
d3968363 14
241b5967
ML
15Imports a project from one or more Arch repositories. It will follow branches
16and repositories within the namespaces defined by the <archive/branch>
17parameters suppplied. If it cannot find the remote branch a merge comes from
18it will just import it as a regular commit. If it can find it, it will mark it
19as a merge whenever possible.
d3968363 20
241b5967 21See man (1) git-archimport for more details.
d3968363 22
241b5967 23=head1 TODO
d3968363 24
241b5967 25 - create tag objects instead of ref tags
d3968363 26 - audit shell-escaping of filenames
241b5967
ML
27 - hide our private tags somewhere smarter
28 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
3e525e67
EW
29 - sort and apply patches by graphing ancestry relations instead of just
30 relying in dates supplied in the changeset itself.
31 tla ancestry-graph -m could be helpful here...
d3968363
ML
32
33=head1 Devel tricks
34
35Add print in front of the shell commands invoked via backticks.
36
22ff00fc
EW
37=head1 Devel Notes
38
39There are several places where Arch and git terminology are intermixed
40and potentially confused.
41
42The notion of a "branch" in git is approximately equivalent to
43a "archive/category--branch--version" in Arch. Also, it should be noted
44that the "--branch" portion of "archive/category--branch--version" is really
45optional in Arch although not many people (nor tools!) seem to know this.
46This means that "archive/category--version" is also a valid "branch"
47in git terms.
48
49We always refer to Arch names by their fully qualified variant (which
50means the "archive" name is prefixed.
51
52For people unfamiliar with Arch, an "archive" is the term for "repository",
53and can contain multiple, unrelated branches.
54
d3968363
ML
55=cut
56
57use strict;
58use warnings;
59use Getopt::Std;
42f44b08 60use File::Temp qw(tempdir);
f88961a8 61use File::Path qw(mkpath rmtree);
d3968363 62use File::Basename qw(basename dirname);
d3968363
ML
63use Data::Dumper qw/ Dumper /;
64use IPC::Open2;
65
66$SIG{'PIPE'}="IGNORE";
67$ENV{'TZ'}="UTC";
68
1d4710d0
ML
69my $git_dir = $ENV{"GIT_DIR"} || ".git";
70$ENV{"GIT_DIR"} = $git_dir;
a7fb51d3 71my $ptag_dir = "$git_dir/archimport/tags";
1d4710d0 72
3e525e67 73our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
d3968363
ML
74
75sub usage() {
76 print STDERR <<END;
77Usage: ${\basename $0} # fetch/update GIT from Arch
42f4570c 78 [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ]
d3968363
ML
79 repository/arch-branch [ repository/arch-branch] ...
80END
81 exit(1);
82}
83
3e525e67 84getopts("fThvat:D:") or usage();
d3968363
ML
85usage if $opt_h;
86
87@ARGV >= 1 or usage();
42f44b08
EW
88# $arch_branches:
89# values associated with keys:
90# =1 - Arch version / git 'branch' detected via abrowse on a limit
91# >1 - Arch version / git 'branch' of an auxilliary branch we've merged
92my %arch_branches = map { $_ => 1 } @ARGV;
d3968363 93
5744f277
EW
94$ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
95my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
127bf00f 96$opt_v && print "+ Using $tmp as temporary directory\n";
d3968363 97
42f44b08
EW
98my %reachable = (); # Arch repositories we can access
99my %unreachable = (); # Arch repositories we can't access :<
d3968363 100my @psets = (); # the collection
b779d5f0 101my %psets = (); # the collection, by name
3e525e67
EW
102my %stats = ( # Track which strategy we used to import:
103 get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
104 simple_changeset => 0, import_or_tag => 0
105);
b779d5f0
ML
106
107my %rptags = (); # my reverse private tags
108 # to map a SHA1 to a commitid
2777ef76 109my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
d3968363 110
42f44b08
EW
111sub do_abrowse {
112 my $stage = shift;
113 while (my ($limit, $level) = each %arch_branches) {
114 next unless $level == $stage;
115
116 open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
117 or die "Problems with tla abrowse: $!";
d3968363 118
42f44b08
EW
119 my %ps = (); # the current one
120 my $lastseen = '';
d3968363 121
42f44b08
EW
122 while (<ABROWSE>) {
123 chomp;
d3968363 124
42f44b08
EW
125 # first record padded w 8 spaces
126 if (s/^\s{8}\b//) {
127 my ($id, $type) = split(m/\s+/, $_, 2);
128
129 my %last_ps;
130 # store the record we just captured
131 if (%ps && !exists $psets{ $ps{id} }) {
132 %last_ps = %ps; # break references
133 push (@psets, \%last_ps);
134 $psets{ $last_ps{id} } = \%last_ps;
135 }
d3968363 136
42f44b08
EW
137 my $branch = extract_versionname($id);
138 %ps = ( id => $id, branch => $branch );
139 if (%last_ps && ($last_ps{branch} eq $branch)) {
140 $ps{parent_id} = $last_ps{id};
141 }
142
143 $arch_branches{$branch} = 1;
144 $lastseen = 'id';
145
146 # deal with types (should work with baz or tla):
147 if ($type =~ m/\(.*changeset\)/) {
148 $ps{type} = 's';
149 } elsif ($type =~ /\(.*import\)/) {
150 $ps{type} = 'i';
6df896b5 151 } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
42f44b08
EW
152 $ps{type} = 't';
153 # read which revision we've tagged when we parse the log
6df896b5 154 $ps{tag} = $1;
42f44b08
EW
155 } else {
156 warn "Unknown type $type";
157 }
158
159 $arch_branches{$branch} = 1;
160 $lastseen = 'id';
161 } elsif (s/^\s{10}//) {
162 # 10 leading spaces or more
163 # indicate commit metadata
164
165 # date
166 if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
167 $ps{date} = $1;
168 $lastseen = 'date';
169 } elsif ($_ eq 'merges in:') {
170 $ps{merges} = [];
171 $lastseen = 'merges';
172 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
173 my $id = $_;
174 push (@{$ps{merges}}, $id);
175
176 # aggressive branch finding:
177 if ($opt_D) {
178 my $branch = extract_versionname($id);
179 my $repo = extract_reponame($branch);
180
181 if (archive_reachable($repo) &&
182 !defined $arch_branches{$branch}) {
183 $arch_branches{$branch} = $stage + 1;
184 }
185 }
186 } else {
187 warn "more metadata after merges!?: $_\n" unless /^\s*$/;
d3968363 188 }
d3968363 189 }
d3968363 190 }
d3968363 191
42f44b08
EW
192 if (%ps && !exists $psets{ $ps{id} }) {
193 my %temp = %ps; # break references
194 if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
195 $temp{parent_id} = $psets[$#psets]{id};
196 }
197 push (@psets, \%temp);
198 $psets{ $temp{id} } = \%temp;
199 }
200
201 close ABROWSE or die "$TLA abrowse failed on $limit\n";
202 }
d3968363
ML
203} # end foreach $root
204
42f44b08
EW
205do_abrowse(1);
206my $depth = 2;
207$opt_D ||= 0;
208while ($depth <= $opt_D) {
209 do_abrowse($depth);
210 $depth++;
211}
212
d3968363 213## Order patches by time
42f44b08
EW
214# FIXME see if we can find a more optimal way to do this by graphing
215# the ancestry data and walking it, that way we won't have to rely on
216# client-supplied dates
d3968363
ML
217@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
218
219#print Dumper \@psets;
220
221##
222## TODO cleanup irrelevant patches
223## and put an initial import
224## or a full tag
3292ae47 225my $import = 0;
1d4710d0 226unless (-d $git_dir) { # initial import
d3968363
ML
227 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
228 print "Starting import from $psets[0]{id}\n";
3292ae47
ML
229 `git-init-db`;
230 die $! if $?;
231 $import = 1;
d3968363
ML
232 } else {
233 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
234 }
b779d5f0
ML
235} else { # progressing an import
236 # load the rptags
42f44b08 237 opendir(DIR, $ptag_dir)
b779d5f0
ML
238 || die "can't opendir: $!";
239 while (my $file = readdir(DIR)) {
a7fb51d3
EW
240 # skip non-interesting-files
241 next unless -f "$ptag_dir/$file";
242
243 # convert first '--' to '/' from old git-archimport to use
244 # as an archivename/c--b--v private tag
245 if ($file !~ m!,!) {
246 my $oldfile = $file;
247 $file =~ s!--!,!;
248 print STDERR "converting old tag $oldfile to $file\n";
249 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
250 }
b779d5f0
ML
251 my $sha = ptag($file);
252 chomp $sha;
b779d5f0
ML
253 $rptags{$sha} = $file;
254 }
255 closedir DIR;
d3968363
ML
256}
257
3292ae47 258# process patchsets
22ff00fc
EW
259# extract the Arch repository name (Arch "archive" in Arch-speak)
260sub extract_reponame {
261 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
262 return (split(/\//, $fq_cvbr))[0];
263}
264
265sub extract_versionname {
266 my $name = shift;
267 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
268 return $name;
269}
d3968363 270
22ff00fc
EW
271# convert a fully-qualified revision or version to a unique dirname:
272# normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
273# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
274#
275# the git notion of a branch is closer to
276# archive/category--branch--version than archive/category--branch, so we
277# use this to convert to git branch names.
278# Also, keep archive names but replace '/' with ',' since it won't require
279# subdirectories, and is safer than swapping '--' which could confuse
280# reverse-mapping when dealing with bastard branches that
281# are just archive/category--version (no --branch)
282sub tree_dirname {
283 my $revision = shift;
284 my $name = extract_versionname($revision);
285 $name =~ s#/#,#;
286 return $name;
287}
288
fee3365f
ML
289# old versions of git-archimport just use the <category--branch> part:
290sub old_style_branchname {
291 my $id = shift;
292 my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
293 chomp $ret;
294 return $ret;
295}
296
297*git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
22ff00fc 298
3e525e67
EW
299sub process_patchset_accurate {
300 my $ps = shift;
301
302 # switch to that branch if we're not already in that branch:
303 if (-e "$git_dir/refs/heads/$ps->{branch}") {
304 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
d3968363 305
3e525e67
EW
306 # remove any old stuff that got leftover:
307 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
308 rmtree(split(/\0/,$rm)) if $rm;
3292ae47 309 }
3e525e67
EW
310
311 # Apply the import/changeset/merge into the working tree
312 my $dir = sync_to_ps($ps);
313 # read the new log entry:
314 my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
315 die "Error in cat-log: $!" if $?;
316 chomp @commitlog;
317
318 # grab variables we want from the log, new fields get added to $ps:
319 # (author, date, email, summary, message body ...)
320 parselog($ps, \@commitlog);
321
322 if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
323 # this should work when importing continuations
324 if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
325
326 # find where we are supposed to branch from
327 system('git-checkout','-f','-b',$ps->{branch},
328 $branchpoint) == 0 or die "$! $?\n";
329
330 # remove any old stuff that got leftover:
331 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
332 rmtree(split(/\0/,$rm)) if $rm;
3292ae47 333
3e525e67
EW
334 # If we trust Arch with the fact that this is just
335 # a tag, and it does not affect the state of the tree
336 # then we just tag and move on
337 tag($ps->{id}, $branchpoint);
338 ptag($ps->{id}, $branchpoint);
339 print " * Tagged $ps->{id} at $branchpoint\n";
340 return 0;
341 } else {
342 warn "Tagging from unknown id unsupported\n" if $ps->{tag};
343 }
344 # allow multiple bases/imports here since Arch supports cherry-picks
345 # from unrelated trees
346 }
347
348 # update the index with all the changes we got
349 system('git-ls-files --others -z | '.
350 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
351 system('git-ls-files --deleted -z | '.
352 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
353 system('git-ls-files -z | '.
354 'git-update-index -z --stdin') == 0 or die "$! $?\n";
355 return 1;
356}
37f15d50 357
3e525e67
EW
358# the native changeset processing strategy. This is very fast, but
359# does not handle permissions or any renames involving directories
360sub process_patchset_fast {
361 my $ps = shift;
d3968363
ML
362 #
363 # create the branch if needed
364 #
3292ae47
ML
365 if ($ps->{type} eq 'i' && !$import) {
366 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
d3968363
ML
367 }
368
3292ae47 369 unless ($import) { # skip for import
1d4710d0 370 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
d3968363 371 # we know about this branch
f88961a8 372 system('git-checkout',$ps->{branch});
d3968363
ML
373 } else {
374 # new branch! we need to verify a few things
375 die "Branch on a non-tag!" unless $ps->{type} eq 't';
376 my $branchpoint = ptag($ps->{tag});
377 die "Tagging from unknown id unsupported: $ps->{tag}"
378 unless $branchpoint;
379
380 # find where we are supposed to branch from
f88961a8 381 system('git-checkout','-b',$ps->{branch},$branchpoint);
52586ecb
ML
382
383 # If we trust Arch with the fact that this is just
384 # a tag, and it does not affect the state of the tree
385 # then we just tag and move on
386 tag($ps->{id}, $branchpoint);
387 ptag($ps->{id}, $branchpoint);
388 print " * Tagged $ps->{id} at $branchpoint\n";
3e525e67 389 return 0;
d3968363
ML
390 }
391 die $! if $?;
392 }
393
d3968363
ML
394 #
395 # Apply the import/changeset/merge into the working tree
396 #
397 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
d3968363 398 apply_import($ps) or die $!;
3e525e67 399 $stats{import_or_tag}++;
3292ae47 400 $import=0;
d3968363
ML
401 } elsif ($ps->{type} eq 's') {
402 apply_cset($ps);
3e525e67 403 $stats{simple_changeset}++;
d3968363
ML
404 }
405
406 #
407 # prepare update git's index, based on what arch knows
408 # about the pset, resolve parents, etc
409 #
d3968363 410
6df896b5 411 my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
d3968363
ML
412 die "Error in cat-archive-log: $!" if $?;
413
6df896b5 414 parselog($ps,\@commitlog);
d3968363
ML
415
416 # imports don't give us good info
417 # on added files. Shame on them
6df896b5
EW
418 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
419 system('git-ls-files --others -z | '.
420 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
421 system('git-ls-files --deleted -z | '.
422 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
d3968363
ML
423 }
424
6df896b5
EW
425 # TODO: handle removed_directories and renamed_directories:
426
427 if (my $add = $ps->{new_files}) {
d3968363
ML
428 while (@$add) {
429 my @slice = splice(@$add, 0, 100);
6df896b5
EW
430 system('git-update-index','--add','--',@slice) == 0 or
431 die "Error in git-update-index --add: $! $?\n";
d3968363
ML
432 }
433 }
6df896b5
EW
434
435 if (my $del = $ps->{removed_files}) {
436 unlink @$del;
d3968363
ML
437 while (@$del) {
438 my @slice = splice(@$del, 0, 100);
6df896b5
EW
439 system('git-update-index','--remove','--',@slice) == 0 or
440 die "Error in git-update-index --remove: $! $?\n";
d3968363
ML
441 }
442 }
6df896b5
EW
443
444 if (my $ren = $ps->{renamed_files}) { # renamed
d3968363
ML
445 if (@$ren % 2) {
446 die "Odd number of entries in rename!?";
447 }
6df896b5 448
d3968363 449 while (@$ren) {
6df896b5
EW
450 my $from = shift @$ren;
451 my $to = shift @$ren;
d3968363
ML
452
453 unless (-d dirname($to)) {
454 mkpath(dirname($to)); # will die on err
455 }
3e525e67 456 # print "moving $from $to";
6df896b5
EW
457 rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
458 system('git-update-index','--remove','--',$from) == 0 or
459 die "Error in git-update-index --remove: $! $?\n";
460 system('git-update-index','--add','--',$to) == 0 or
461 die "Error in git-update-index --add: $! $?\n";
d3968363 462 }
d3968363 463 }
6df896b5
EW
464
465 if (my $mod = $ps->{modified_files}) {
d3968363
ML
466 while (@$mod) {
467 my @slice = splice(@$mod, 0, 100);
6df896b5
EW
468 system('git-update-index','--',@slice) == 0 or
469 die "Error in git-update-index: $! $?\n";
d3968363
ML
470 }
471 }
3e525e67
EW
472 return 1; # we successfully applied the changeset
473}
474
475if ($opt_f) {
476 print "Will import patchsets using the fast strategy\n",
477 "Renamed directories and permission changes will be missed\n";
478 *process_patchset = *process_patchset_fast;
479} else {
480 print "Using the default (accurate) import strategy.\n",
481 "Things may be a bit slow\n";
482 *process_patchset = *process_patchset_accurate;
483}
6df896b5 484
3e525e67
EW
485foreach my $ps (@psets) {
486 # process patchsets
487 $ps->{branch} = git_branchname($ps->{id});
488
489 #
490 # ensure we have a clean state
491 #
492 if (my $dirty = `git-diff-files`) {
493 die "Unclean tree when about to process $ps->{id} " .
494 " - did we fail to commit cleanly before?\n$dirty";
495 }
496 die $! if $?;
497
498 #
499 # skip commits already in repo
500 #
501 if (ptag($ps->{id})) {
502 $opt_v && print " * Skipping already imported: $ps->{id}\n";
10945e00 503 next;
3e525e67
EW
504 }
505
506 print " * Starting to work on $ps->{id}\n";
507
508 process_patchset($ps) or next;
509
215a7ad1 510 # warn "errors when running git-update-index! $!";
3e525e67 511 my $tree = `git-write-tree`;
d3968363
ML
512 die "cannot write tree $!" if $?;
513 chomp $tree;
d3968363
ML
514
515 #
516 # Who's your daddy?
517 #
518 my @par;
1d4710d0 519 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
f88961a8 520 if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
d3968363
ML
521 my $p = <HEAD>;
522 close HEAD;
523 chomp $p;
524 push @par, '-p', $p;
525 } else {
526 if ($ps->{type} eq 's') {
527 warn "Could not find the right head for the branch $ps->{branch}";
528 }
529 }
530 }
531
b779d5f0
ML
532 if ($ps->{merges}) {
533 push @par, find_parents($ps);
534 }
d3968363
ML
535
536 #
537 # Commit, tag and clean state
538 #
539 $ENV{TZ} = 'GMT';
540 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
541 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
542 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
543 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
544 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
545 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
546
6df896b5 547 my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
d3968363 548 or die $!;
6df896b5
EW
549 print WRITER $ps->{summary},"\n";
550 print WRITER $ps->{message},"\n";
551
552 # make it easy to backtrack and figure out which Arch revision this was:
553 print WRITER 'git-archimport-id: ',$ps->{id},"\n";
554
d3968363
ML
555 close WRITER;
556 my $commitid = <READER>; # read
557 chomp $commitid;
558 close READER;
559 waitpid $pid,0; # close;
560
561 if (length $commitid != 40) {
562 die "Something went wrong with the commit! $! $commitid";
563 }
564 #
565 # Update the branch
566 #
f88961a8 567 open HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
d3968363
ML
568 print HEAD $commitid;
569 close HEAD;
8366a10a 570 system('git-update-ref', 'HEAD', "$ps->{branch}");
d3968363
ML
571
572 # tag accordingly
573 ptag($ps->{id}, $commitid); # private tag
574 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
575 tag($ps->{id}, $commitid);
576 }
577 print " * Committed $ps->{id}\n";
578 print " + tree $tree\n";
579 print " + commit $commitid\n";
b779d5f0 580 $opt_v && print " + commit date is $ps->{date} \n";
f88961a8 581 $opt_v && print " + parents: ",join(' ',@par),"\n";
3e525e67
EW
582}
583
584if ($opt_v) {
585 foreach (sort keys %stats) {
586 print" $_: $stats{$_}\n";
587 }
588}
589exit 0;
590
591# used by the accurate strategy:
592sub sync_to_ps {
593 my $ps = shift;
594 my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
595
596 $opt_v && print "sync_to_ps($ps->{id}) method: ";
597
598 if (-d $tree_dir) {
599 if ($ps->{type} eq 't') {
600 $opt_v && print "get (tag)\n";
601 # looks like a tag-only or (worse,) a mixed tags/changeset branch,
602 # can't rely on replay to work correctly on these
603 rmtree($tree_dir);
604 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
605 $stats{get_tag}++;
606 } else {
607 my $tree_id = arch_tree_id($tree_dir);
608 if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
609 # the common case (hopefully)
610 $opt_v && print "replay\n";
611 safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
612 $stats{replay}++;
613 } else {
614 # getting one tree is usually faster than getting two trees
615 # and applying the delta ...
616 rmtree($tree_dir);
617 $opt_v && print "apply-delta\n";
618 safe_pipe_capture($TLA,'get','--no-pristine',
619 $ps->{id},$tree_dir);
620 $stats{get_delta}++;
621 }
622 }
623 } else {
624 # new branch work
625 $opt_v && print "get (new tree)\n";
626 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
627 $stats{get_new}++;
628 }
629
630 # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
631 system('rsync','-aI','--delete','--exclude',$git_dir,
632# '--exclude','.arch-inventory',
633 '--exclude','.arch-ids','--exclude','{arch}',
634 '--exclude','+*','--exclude',',*',
635 "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
636 return $tree_dir;
d3968363
ML
637}
638
d3968363
ML
639sub apply_import {
640 my $ps = shift;
22ff00fc 641 my $bname = git_branchname($ps->{id});
d3968363 642
f88961a8 643 mkpath($tmp);
d3968363 644
f88961a8 645 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
d3968363 646 die "Cannot get import: $!" if $?;
f88961a8
EW
647 system('rsync','-aI','--delete', '--exclude',$git_dir,
648 '--exclude','.arch-ids','--exclude','{arch}',
649 "$tmp/import/", './');
d3968363
ML
650 die "Cannot rsync import:$!" if $?;
651
f88961a8 652 rmtree("$tmp/import");
d3968363
ML
653 die "Cannot remove tempdir: $!" if $?;
654
655
656 return 1;
657}
658
659sub apply_cset {
660 my $ps = shift;
661
f88961a8 662 mkpath($tmp);
d3968363
ML
663
664 # get the changeset
f88961a8 665 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
d3968363
ML
666 die "Cannot get changeset: $!" if $?;
667
668 # apply patches
669 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
670 # this can be sped up considerably by doing
671 # (find | xargs cat) | patch
672 # but that cna get mucked up by patches
673 # with missing trailing newlines or the standard
674 # 'missing newline' flag in the patch - possibly
675 # produced with an old/buggy diff.
676 # slow and safe, we invoke patch once per patchfile
677 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
678 die "Problem applying patches! $!" if $?;
679 }
680
681 # apply changed binary files
682 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
683 foreach my $mod (@modified) {
684 chomp $mod;
685 my $orig = $mod;
686 $orig =~ s/\.modified$//; # lazy
687 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
688 #print "rsync -p '$mod' '$orig'";
f88961a8 689 system('rsync','-p',$mod,"./$orig");
d3968363
ML
690 die "Problem applying binary changes! $!" if $?;
691 }
692 }
693
694 # bring in new files
f88961a8
EW
695 system('rsync','-aI','--exclude',$git_dir,
696 '--exclude','.arch-ids',
697 '--exclude', '{arch}',
698 "$tmp/changeset/new-files-archive/",'./');
d3968363
ML
699
700 # deleted files are hinted from the commitlog processing
701
f88961a8 702 rmtree("$tmp/changeset");
d3968363
ML
703}
704
705
706# =for reference
6df896b5
EW
707# notes: *-files/-directories keys cannot have spaces, they're always
708# pika-escaped. Everything after the first newline
709# A log entry looks like:
d3968363
ML
710# Revision: moodle-org--moodle--1.3.3--patch-15
711# Archive: arch-eduforge@catalyst.net.nz--2004
712# Creator: Penny Leach <penny@catalyst.net.nz>
713# Date: Wed May 25 14:15:34 NZST 2005
714# Standard-date: 2005-05-25 02:15:34 GMT
715# New-files: lang/de/.arch-ids/block_glossary_random.php.id
716# lang/de/.arch-ids/block_html.php.id
717# New-directories: lang/de/help/questionnaire
718# lang/de/help/questionnaire/.arch-ids
719# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
720# db_sears.sql db/db_sears.sql
721# Removed-files: lang/be/docs/.arch-ids/release.html.id
722# lang/be/docs/.arch-ids/releaseold.html.id
723# Modified-files: admin/cron.php admin/delete.php
724# admin/editor.html backup/lib.php backup/restore.php
725# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
726# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
6df896b5 727# summary can be multiline with a leading space just like the above fields
d3968363
ML
728# Keywords:
729#
730# Updating yadda tadda tadda madda
731sub parselog {
6df896b5
EW
732 my ($ps, $log) = @_;
733 my $key = undef;
734
735 # headers we want that contain filenames:
736 my %want_headers = (
737 new_files => 1,
738 modified_files => 1,
739 renamed_files => 1,
740 renamed_directories => 1,
741 removed_files => 1,
742 removed_directories => 1,
743 );
d3968363 744
6df896b5
EW
745 chomp (@$log);
746 while ($_ = shift @$log) {
747 if (/^Continuation-of:\s*(.*)/) {
748 $ps->{tag} = $1;
749 $key = undef;
750 } elsif (/^Summary:\s*(.*)$/ ) {
751 # summary can be multiline as long as it has a leading space
752 $ps->{summary} = [ $1 ];
753 $key = 'summary';
754 } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
755 $ps->{author} = $1;
756 $ps->{email} = $2;
757 $key = undef;
758 # any *-files or *-directories can be read here:
759 } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
760 my $val = $2;
761 $key = lc $1;
762 $key =~ tr/-/_/; # too lazy to quote :P
763 if ($want_headers{$key}) {
764 push @{$ps->{$key}}, split(/\s+/, $val);
765 } else {
766 $key = undef;
767 }
768 } elsif (/^$/) {
769 last; # remainder of @$log that didn't get shifted off is message
770 } elsif ($key) {
771 if (/^\s+(.*)$/) {
772 if ($key eq 'summary') {
773 push @{$ps->{$key}}, $1;
774 } else { # files/directories:
775 push @{$ps->{$key}}, split(/\s+/, $1);
776 }
777 } else {
778 $key = undef;
779 }
780 }
d3968363 781 }
6df896b5
EW
782
783 # post-processing:
784 $ps->{summary} = join("\n",@{$ps->{summary}})."\n";
785 $ps->{message} = join("\n",@$log);
d3968363 786
6df896b5
EW
787 # skip Arch control files, unescape pika-escaped files
788 foreach my $k (keys %want_headers) {
789 next unless (defined $ps->{$k});
6e33101a 790 my @tmp = ();
6df896b5
EW
791 foreach my $t (@{$ps->{$k}}) {
792 next unless length ($t);
793 next if $t =~ m!\{arch\}/!;
794 next if $t =~ m!\.arch-ids/!;
795 # should we skip this?
796 next if $t =~ m!\.arch-inventory$!;
f84f9d38
ML
797 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
798 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
6df896b5 799 if ($t =~ /\\/ ){
f88961a8 800 $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
f84f9d38 801 }
6df896b5 802 push @tmp, $t;
d3968363 803 }
6e33101a 804 $ps->{$k} = \@tmp;
d3968363 805 }
d3968363
ML
806}
807
808# write/read a tag
809sub tag {
810 my ($tag, $commit) = @_;
a7fb51d3 811
fee3365f
ML
812 if ($opt_o) {
813 $tag =~ s|/|--|g;
814 } else {
815 # don't use subdirs for tags yet, it could screw up other porcelains
816 $tag =~ s|/|,|g;
817 }
d3968363
ML
818
819 if ($commit) {
a7fb51d3 820 open(C,">","$git_dir/refs/tags/$tag")
d3968363
ML
821 or die "Cannot create tag $tag: $!\n";
822 print C "$commit\n"
823 or die "Cannot write tag $tag: $!\n";
824 close(C)
825 or die "Cannot write tag $tag: $!\n";
a7fb51d3 826 print " * Created tag '$tag' on '$commit'\n" if $opt_v;
d3968363 827 } else { # read
a7fb51d3 828 open(C,"<","$git_dir/refs/tags/$tag")
d3968363
ML
829 or die "Cannot read tag $tag: $!\n";
830 $commit = <C>;
831 chomp $commit;
832 die "Error reading tag $tag: $!\n" unless length $commit == 40;
833 close(C)
834 or die "Cannot read tag $tag: $!\n";
835 return $commit;
836 }
837}
838
839# write/read a private tag
840# reads fail softly if the tag isn't there
841sub ptag {
842 my ($tag, $commit) = @_;
a7fb51d3
EW
843
844 # don't use subdirs for tags yet, it could screw up other porcelains
845 $tag =~ s|/|,|g;
d3968363 846
a7fb51d3
EW
847 my $tag_file = "$ptag_dir/$tag";
848 my $tag_branch_dir = dirname($tag_file);
849 mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
d3968363
ML
850
851 if ($commit) { # write
a7fb51d3 852 open(C,">",$tag_file)
d3968363
ML
853 or die "Cannot create tag $tag: $!\n";
854 print C "$commit\n"
855 or die "Cannot write tag $tag: $!\n";
856 close(C)
857 or die "Cannot write tag $tag: $!\n";
b779d5f0
ML
858 $rptags{$commit} = $tag
859 unless $tag =~ m/--base-0$/;
d3968363
ML
860 } else { # read
861 # if the tag isn't there, return 0
a7fb51d3 862 unless ( -s $tag_file) {
d3968363
ML
863 return 0;
864 }
a7fb51d3 865 open(C,"<",$tag_file)
d3968363
ML
866 or die "Cannot read tag $tag: $!\n";
867 $commit = <C>;
868 chomp $commit;
869 die "Error reading tag $tag: $!\n" unless length $commit == 40;
870 close(C)
871 or die "Cannot read tag $tag: $!\n";
b779d5f0
ML
872 unless (defined $rptags{$commit}) {
873 $rptags{$commit} = $tag;
874 }
d3968363
ML
875 return $commit;
876 }
877}
b779d5f0
ML
878
879sub find_parents {
880 #
881 # Identify what branches are merging into me
882 # and whether we are fully merged
883 # git-merge-base <headsha> <headsha> should tell
884 # me what the base of the merge should be
885 #
886 my $ps = shift;
887
888 my %branches; # holds an arrayref per branch
889 # the arrayref contains a list of
890 # merged patches between the base
891 # of the merge and the current head
892
893 my @parents; # parents found for this commit
894
895 # simple loop to split the merges
896 # per branch
897 foreach my $merge (@{$ps->{merges}}) {
22ff00fc 898 my $branch = git_branchname($merge);
b779d5f0
ML
899 unless (defined $branches{$branch} ){
900 $branches{$branch} = [];
901 }
902 push @{$branches{$branch}}, $merge;
903 }
904
905 #
906 # foreach branch find a merge base and walk it to the
907 # head where we are, collecting the merged patchsets that
908 # Arch has recorded. Keep that in @have
909 # Compare that with the commits on the other branch
910 # between merge-base and the tip of the branch (@need)
911 # and see if we have a series of consecutive patches
912 # starting from the merge base. The tip of the series
913 # of consecutive patches merged is our new parent for
914 # that branch.
915 #
916 foreach my $branch (keys %branches) {
37f15d50
ML
917
918 # check that we actually know about the branch
919 next unless -e "$git_dir/refs/heads/$branch";
920
b779d5f0 921 my $mergebase = `git-merge-base $branch $ps->{branch}`;
9b626e75
EW
922 if ($?) {
923 # Don't die here, Arch supports one-way cherry-picking
924 # between branches with no common base (or any relationship
925 # at all beforehand)
926 warn "Cannot find merge base for $branch and $ps->{branch}";
927 next;
928 }
b779d5f0
ML
929 chomp $mergebase;
930
931 # now walk up to the mergepoint collecting what patches we have
932 my $branchtip = git_rev_parse($ps->{branch});
933 my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
934 my %have; # collected merges this branch has
935 foreach my $merge (@{$ps->{merges}}) {
936 $have{$merge} = 1;
937 }
938 my %ancestorshave;
939 foreach my $par (@ancestors) {
940 $par = commitid2pset($par);
941 if (defined $par->{merges}) {
942 foreach my $merge (@{$par->{merges}}) {
943 $ancestorshave{$merge}=1;
944 }
945 }
946 }
947 # print "++++ Merges in $ps->{id} are....\n";
948 # my @have = sort keys %have; print Dumper(\@have);
949
950 # merge what we have with what ancestors have
951 %have = (%have, %ancestorshave);
952
953 # see what the remote branch has - these are the merges we
954 # will want to have in a consecutive series from the mergebase
955 my $otherbranchtip = git_rev_parse($branch);
956 my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
957 my @need;
958 foreach my $needps (@needraw) { # get the psets
959 $needps = commitid2pset($needps);
960 # git-rev-list will also
961 # list commits merged in via earlier
962 # merges. we are only interested in commits
963 # from the branch we're looking at
964 if ($branch eq $needps->{branch}) {
965 push @need, $needps->{id};
966 }
967 }
968
969 # print "++++ Merges from $branch we want are....\n";
970 # print Dumper(\@need);
971
972 my $newparent;
973 while (my $needed_commit = pop @need) {
974 if ($have{$needed_commit}) {
975 $newparent = $needed_commit;
976 } else {
977 last; # break out of the while
978 }
979 }
980 if ($newparent) {
981 push @parents, $newparent;
982 }
983
984
985 } # end foreach branch
986
987 # prune redundant parents
988 my %parents;
989 foreach my $p (@parents) {
990 $parents{$p} = 1;
991 }
992 foreach my $p (@parents) {
993 next unless exists $psets{$p}{merges};
994 next unless ref $psets{$p}{merges};
995 my @merges = @{$psets{$p}{merges}};
996 foreach my $merge (@merges) {
997 if ($parents{$merge}) {
998 delete $parents{$merge};
999 }
1000 }
1001 }
42f44b08 1002
f88961a8
EW
1003 @parents = ();
1004 foreach (keys %parents) {
1005 push @parents, '-p', ptag($_);
1006 }
b779d5f0
ML
1007 return @parents;
1008}
1009
1010sub git_rev_parse {
1011 my $name = shift;
1012 my $val = `git-rev-parse $name`;
1013 die "Error: git-rev-parse $name" if $?;
1014 chomp $val;
1015 return $val;
1016}
1017
1018# resolve a SHA1 to a known patchset
1019sub commitid2pset {
1020 my $commitid = shift;
1021 chomp $commitid;
1022 my $name = $rptags{$commitid}
1023 || die "Cannot find reverse tag mapping for $commitid";
a7fb51d3 1024 $name =~ s|,|/|;
b779d5f0
ML
1025 my $ps = $psets{$name}
1026 || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
1027 return $ps;
1028}
2777ef76 1029
42f44b08 1030
2777ef76
EW
1031# an alterative to `command` that allows input to be passed as an array
1032# to work around shell problems with weird characters in arguments
1033sub safe_pipe_capture {
1034 my @output;
1035 if (my $pid = open my $child, '-|') {
1036 @output = (<$child>);
1037 close $child or die join(' ',@_).": $! $?";
1038 } else {
3e525e67 1039 exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
2777ef76
EW
1040 }
1041 return wantarray ? @output : join('',@output);
1042}
1043
42f44b08
EW
1044# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
1045sub arch_tree_id {
1046 my $dir = shift;
1047 chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
1048 return $ret;
1049}
1050
1051sub archive_reachable {
1052 my $archive = shift;
1053 return 1 if $reachable{$archive};
1054 return 0 if $unreachable{$archive};
1055
1056 if (system "$TLA whereis-archive $archive >/dev/null") {
1057 if ($opt_a && (system($TLA,'register-archive',
1058 "http://mirrors.sourcecontrol.net/$archive") == 0)) {
1059 $reachable{$archive} = 1;
1060 return 1;
1061 }
1062 print STDERR "Archive is unreachable: $archive\n";
1063 $unreachable{$archive} = 1;
1064 return 0;
1065 } else {
1066 $reachable{$archive} = 1;
1067 return 1;
1068 }
1069}
2777ef76 1070