t/t5515-fetch-merge-logic.sh: Add two more tests
[git/git.git] / git-archimport.perl
CommitLineData
d3968363
ML
1#!/usr/bin/perl -w
2#
3# This tool is copyright (c) 2005, Martin Langhoff.
4# It is released under the Gnu Public License, version 2.
5#
6# The basic idea is to walk the output of tla abrowse,
7# fetch the changesets and apply them.
8#
241b5967 9
d3968363
ML
10=head1 Invocation
11
42f4570c
EW
12 git-archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ]
13 [ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
d3968363 14
241b5967
ML
15Imports a project from one or more Arch repositories. It will follow branches
16and repositories within the namespaces defined by the <archive/branch>
82e5a82f 17parameters supplied. If it cannot find the remote branch a merge comes from
241b5967
ML
18it will just import it as a regular commit. If it can find it, it will mark it
19as a merge whenever possible.
d3968363 20
241b5967 21See man (1) git-archimport for more details.
d3968363 22
241b5967 23=head1 TODO
d3968363 24
241b5967 25 - create tag objects instead of ref tags
d3968363 26 - audit shell-escaping of filenames
241b5967
ML
27 - hide our private tags somewhere smarter
28 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
3e525e67
EW
29 - sort and apply patches by graphing ancestry relations instead of just
30 relying in dates supplied in the changeset itself.
31 tla ancestry-graph -m could be helpful here...
d3968363
ML
32
33=head1 Devel tricks
34
35Add print in front of the shell commands invoked via backticks.
36
22ff00fc
EW
37=head1 Devel Notes
38
39There are several places where Arch and git terminology are intermixed
40and potentially confused.
41
42The notion of a "branch" in git is approximately equivalent to
43a "archive/category--branch--version" in Arch. Also, it should be noted
44that the "--branch" portion of "archive/category--branch--version" is really
45optional in Arch although not many people (nor tools!) seem to know this.
46This means that "archive/category--version" is also a valid "branch"
47in git terms.
48
49We always refer to Arch names by their fully qualified variant (which
50means the "archive" name is prefixed.
51
52For people unfamiliar with Arch, an "archive" is the term for "repository",
53and can contain multiple, unrelated branches.
54
d3968363
ML
55=cut
56
57use strict;
58use warnings;
59use Getopt::Std;
42f44b08 60use File::Temp qw(tempdir);
f88961a8 61use File::Path qw(mkpath rmtree);
d3968363 62use File::Basename qw(basename dirname);
d3968363
ML
63use Data::Dumper qw/ Dumper /;
64use IPC::Open2;
65
66$SIG{'PIPE'}="IGNORE";
67$ENV{'TZ'}="UTC";
68
1d4710d0
ML
69my $git_dir = $ENV{"GIT_DIR"} || ".git";
70$ENV{"GIT_DIR"} = $git_dir;
a7fb51d3 71my $ptag_dir = "$git_dir/archimport/tags";
1d4710d0 72
3e525e67 73our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
d3968363
ML
74
75sub usage() {
76 print STDERR <<END;
77Usage: ${\basename $0} # fetch/update GIT from Arch
42f4570c 78 [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ]
d3968363
ML
79 repository/arch-branch [ repository/arch-branch] ...
80END
81 exit(1);
82}
83
3e525e67 84getopts("fThvat:D:") or usage();
d3968363
ML
85usage if $opt_h;
86
87@ARGV >= 1 or usage();
42f44b08
EW
88# $arch_branches:
89# values associated with keys:
90# =1 - Arch version / git 'branch' detected via abrowse on a limit
82e5a82f 91# >1 - Arch version / git 'branch' of an auxiliary branch we've merged
42f44b08 92my %arch_branches = map { $_ => 1 } @ARGV;
d3968363 93
5744f277
EW
94$ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
95my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
127bf00f 96$opt_v && print "+ Using $tmp as temporary directory\n";
d3968363 97
11dbe9e8
GP
98unless (-d $git_dir) { # initial import needs empty directory
99 opendir DIR, '.' or die "Unable to open current directory: $!\n";
100 while (my $entry = readdir DIR) {
101 $entry =~ /^\.\.?$/ or
102 die "Initial import needs an empty current working directory.\n"
103 }
104 closedir DIR
105}
106
42f44b08
EW
107my %reachable = (); # Arch repositories we can access
108my %unreachable = (); # Arch repositories we can't access :<
d3968363 109my @psets = (); # the collection
b779d5f0 110my %psets = (); # the collection, by name
3e525e67
EW
111my %stats = ( # Track which strategy we used to import:
112 get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
113 simple_changeset => 0, import_or_tag => 0
114);
b779d5f0
ML
115
116my %rptags = (); # my reverse private tags
117 # to map a SHA1 to a commitid
2777ef76 118my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
d3968363 119
42f44b08
EW
120sub do_abrowse {
121 my $stage = shift;
122 while (my ($limit, $level) = each %arch_branches) {
123 next unless $level == $stage;
124
125 open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
126 or die "Problems with tla abrowse: $!";
d3968363 127
42f44b08
EW
128 my %ps = (); # the current one
129 my $lastseen = '';
d3968363 130
42f44b08
EW
131 while (<ABROWSE>) {
132 chomp;
d3968363 133
42f44b08
EW
134 # first record padded w 8 spaces
135 if (s/^\s{8}\b//) {
136 my ($id, $type) = split(m/\s+/, $_, 2);
137
138 my %last_ps;
139 # store the record we just captured
140 if (%ps && !exists $psets{ $ps{id} }) {
141 %last_ps = %ps; # break references
142 push (@psets, \%last_ps);
143 $psets{ $last_ps{id} } = \%last_ps;
144 }
d3968363 145
42f44b08
EW
146 my $branch = extract_versionname($id);
147 %ps = ( id => $id, branch => $branch );
148 if (%last_ps && ($last_ps{branch} eq $branch)) {
149 $ps{parent_id} = $last_ps{id};
150 }
151
152 $arch_branches{$branch} = 1;
153 $lastseen = 'id';
154
155 # deal with types (should work with baz or tla):
156 if ($type =~ m/\(.*changeset\)/) {
157 $ps{type} = 's';
158 } elsif ($type =~ /\(.*import\)/) {
159 $ps{type} = 'i';
6df896b5 160 } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
42f44b08
EW
161 $ps{type} = 't';
162 # read which revision we've tagged when we parse the log
6df896b5 163 $ps{tag} = $1;
42f44b08
EW
164 } else {
165 warn "Unknown type $type";
166 }
167
168 $arch_branches{$branch} = 1;
169 $lastseen = 'id';
170 } elsif (s/^\s{10}//) {
171 # 10 leading spaces or more
172 # indicate commit metadata
173
174 # date
175 if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
176 $ps{date} = $1;
177 $lastseen = 'date';
178 } elsif ($_ eq 'merges in:') {
179 $ps{merges} = [];
180 $lastseen = 'merges';
181 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
182 my $id = $_;
183 push (@{$ps{merges}}, $id);
184
185 # aggressive branch finding:
186 if ($opt_D) {
187 my $branch = extract_versionname($id);
188 my $repo = extract_reponame($branch);
189
190 if (archive_reachable($repo) &&
191 !defined $arch_branches{$branch}) {
192 $arch_branches{$branch} = $stage + 1;
193 }
194 }
195 } else {
196 warn "more metadata after merges!?: $_\n" unless /^\s*$/;
d3968363 197 }
d3968363 198 }
d3968363 199 }
d3968363 200
42f44b08
EW
201 if (%ps && !exists $psets{ $ps{id} }) {
202 my %temp = %ps; # break references
203 if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
204 $temp{parent_id} = $psets[$#psets]{id};
205 }
206 push (@psets, \%temp);
207 $psets{ $temp{id} } = \%temp;
208 }
209
210 close ABROWSE or die "$TLA abrowse failed on $limit\n";
211 }
d3968363
ML
212} # end foreach $root
213
42f44b08
EW
214do_abrowse(1);
215my $depth = 2;
216$opt_D ||= 0;
217while ($depth <= $opt_D) {
218 do_abrowse($depth);
219 $depth++;
220}
221
d3968363 222## Order patches by time
42f44b08
EW
223# FIXME see if we can find a more optimal way to do this by graphing
224# the ancestry data and walking it, that way we won't have to rely on
225# client-supplied dates
d3968363
ML
226@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
227
228#print Dumper \@psets;
229
230##
231## TODO cleanup irrelevant patches
232## and put an initial import
233## or a full tag
3292ae47 234my $import = 0;
1d4710d0 235unless (-d $git_dir) { # initial import
d3968363
ML
236 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
237 print "Starting import from $psets[0]{id}\n";
5c94f87e 238 `git-init`;
3292ae47
ML
239 die $! if $?;
240 $import = 1;
d3968363
ML
241 } else {
242 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
243 }
b779d5f0
ML
244} else { # progressing an import
245 # load the rptags
42f44b08 246 opendir(DIR, $ptag_dir)
b779d5f0
ML
247 || die "can't opendir: $!";
248 while (my $file = readdir(DIR)) {
a7fb51d3
EW
249 # skip non-interesting-files
250 next unless -f "$ptag_dir/$file";
251
252 # convert first '--' to '/' from old git-archimport to use
253 # as an archivename/c--b--v private tag
254 if ($file !~ m!,!) {
255 my $oldfile = $file;
256 $file =~ s!--!,!;
257 print STDERR "converting old tag $oldfile to $file\n";
258 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
259 }
b779d5f0
ML
260 my $sha = ptag($file);
261 chomp $sha;
b779d5f0
ML
262 $rptags{$sha} = $file;
263 }
264 closedir DIR;
d3968363
ML
265}
266
3292ae47 267# process patchsets
22ff00fc
EW
268# extract the Arch repository name (Arch "archive" in Arch-speak)
269sub extract_reponame {
270 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
271 return (split(/\//, $fq_cvbr))[0];
272}
273
274sub extract_versionname {
275 my $name = shift;
276 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
277 return $name;
278}
d3968363 279
22ff00fc
EW
280# convert a fully-qualified revision or version to a unique dirname:
281# normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
282# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
283#
284# the git notion of a branch is closer to
285# archive/category--branch--version than archive/category--branch, so we
286# use this to convert to git branch names.
287# Also, keep archive names but replace '/' with ',' since it won't require
288# subdirectories, and is safer than swapping '--' which could confuse
289# reverse-mapping when dealing with bastard branches that
290# are just archive/category--version (no --branch)
291sub tree_dirname {
292 my $revision = shift;
293 my $name = extract_versionname($revision);
294 $name =~ s#/#,#;
295 return $name;
296}
297
fee3365f
ML
298# old versions of git-archimport just use the <category--branch> part:
299sub old_style_branchname {
300 my $id = shift;
301 my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
302 chomp $ret;
303 return $ret;
304}
305
306*git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
22ff00fc 307
3e525e67
EW
308sub process_patchset_accurate {
309 my $ps = shift;
310
311 # switch to that branch if we're not already in that branch:
312 if (-e "$git_dir/refs/heads/$ps->{branch}") {
313 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
d3968363 314
3e525e67
EW
315 # remove any old stuff that got leftover:
316 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
317 rmtree(split(/\0/,$rm)) if $rm;
3292ae47 318 }
3e525e67
EW
319
320 # Apply the import/changeset/merge into the working tree
321 my $dir = sync_to_ps($ps);
322 # read the new log entry:
323 my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
324 die "Error in cat-log: $!" if $?;
325 chomp @commitlog;
326
327 # grab variables we want from the log, new fields get added to $ps:
328 # (author, date, email, summary, message body ...)
329 parselog($ps, \@commitlog);
330
331 if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
332 # this should work when importing continuations
333 if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
334
335 # find where we are supposed to branch from
336 system('git-checkout','-f','-b',$ps->{branch},
337 $branchpoint) == 0 or die "$! $?\n";
338
339 # remove any old stuff that got leftover:
340 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
341 rmtree(split(/\0/,$rm)) if $rm;
3292ae47 342
3e525e67
EW
343 # If we trust Arch with the fact that this is just
344 # a tag, and it does not affect the state of the tree
345 # then we just tag and move on
346 tag($ps->{id}, $branchpoint);
347 ptag($ps->{id}, $branchpoint);
348 print " * Tagged $ps->{id} at $branchpoint\n";
349 return 0;
350 } else {
351 warn "Tagging from unknown id unsupported\n" if $ps->{tag};
352 }
353 # allow multiple bases/imports here since Arch supports cherry-picks
354 # from unrelated trees
355 }
356
357 # update the index with all the changes we got
3ff903bf
EW
358 system('git-diff-files --name-only -z | '.
359 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
3e525e67
EW
360 system('git-ls-files --others -z | '.
361 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
3e525e67
EW
362 return 1;
363}
37f15d50 364
3e525e67
EW
365# the native changeset processing strategy. This is very fast, but
366# does not handle permissions or any renames involving directories
367sub process_patchset_fast {
368 my $ps = shift;
d3968363
ML
369 #
370 # create the branch if needed
371 #
3292ae47
ML
372 if ($ps->{type} eq 'i' && !$import) {
373 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
d3968363
ML
374 }
375
3292ae47 376 unless ($import) { # skip for import
1d4710d0 377 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
d3968363 378 # we know about this branch
f88961a8 379 system('git-checkout',$ps->{branch});
d3968363
ML
380 } else {
381 # new branch! we need to verify a few things
382 die "Branch on a non-tag!" unless $ps->{type} eq 't';
383 my $branchpoint = ptag($ps->{tag});
384 die "Tagging from unknown id unsupported: $ps->{tag}"
385 unless $branchpoint;
386
387 # find where we are supposed to branch from
f88961a8 388 system('git-checkout','-b',$ps->{branch},$branchpoint);
52586ecb
ML
389
390 # If we trust Arch with the fact that this is just
391 # a tag, and it does not affect the state of the tree
392 # then we just tag and move on
393 tag($ps->{id}, $branchpoint);
394 ptag($ps->{id}, $branchpoint);
395 print " * Tagged $ps->{id} at $branchpoint\n";
3e525e67 396 return 0;
d3968363
ML
397 }
398 die $! if $?;
399 }
400
d3968363
ML
401 #
402 # Apply the import/changeset/merge into the working tree
403 #
404 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
d3968363 405 apply_import($ps) or die $!;
3e525e67 406 $stats{import_or_tag}++;
3292ae47 407 $import=0;
d3968363
ML
408 } elsif ($ps->{type} eq 's') {
409 apply_cset($ps);
3e525e67 410 $stats{simple_changeset}++;
d3968363
ML
411 }
412
413 #
414 # prepare update git's index, based on what arch knows
415 # about the pset, resolve parents, etc
416 #
d3968363 417
6df896b5 418 my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
d3968363
ML
419 die "Error in cat-archive-log: $!" if $?;
420
6df896b5 421 parselog($ps,\@commitlog);
d3968363
ML
422
423 # imports don't give us good info
424 # on added files. Shame on them
6df896b5 425 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
6df896b5
EW
426 system('git-ls-files --deleted -z | '.
427 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
3ff903bf
EW
428 system('git-ls-files --others -z | '.
429 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
d3968363
ML
430 }
431
6df896b5 432 # TODO: handle removed_directories and renamed_directories:
3ff903bf 433
6df896b5
EW
434 if (my $del = $ps->{removed_files}) {
435 unlink @$del;
d3968363
ML
436 while (@$del) {
437 my @slice = splice(@$del, 0, 100);
6df896b5
EW
438 system('git-update-index','--remove','--',@slice) == 0 or
439 die "Error in git-update-index --remove: $! $?\n";
d3968363
ML
440 }
441 }
6df896b5
EW
442
443 if (my $ren = $ps->{renamed_files}) { # renamed
d3968363
ML
444 if (@$ren % 2) {
445 die "Odd number of entries in rename!?";
446 }
6df896b5 447
d3968363 448 while (@$ren) {
6df896b5
EW
449 my $from = shift @$ren;
450 my $to = shift @$ren;
d3968363
ML
451
452 unless (-d dirname($to)) {
453 mkpath(dirname($to)); # will die on err
454 }
3e525e67 455 # print "moving $from $to";
6df896b5
EW
456 rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
457 system('git-update-index','--remove','--',$from) == 0 or
458 die "Error in git-update-index --remove: $! $?\n";
459 system('git-update-index','--add','--',$to) == 0 or
460 die "Error in git-update-index --add: $! $?\n";
d3968363 461 }
d3968363 462 }
6df896b5 463
3ff903bf
EW
464 if (my $add = $ps->{new_files}) {
465 while (@$add) {
466 my @slice = splice(@$add, 0, 100);
467 system('git-update-index','--add','--',@slice) == 0 or
468 die "Error in git-update-index --add: $! $?\n";
469 }
470 }
471
6df896b5 472 if (my $mod = $ps->{modified_files}) {
d3968363
ML
473 while (@$mod) {
474 my @slice = splice(@$mod, 0, 100);
6df896b5
EW
475 system('git-update-index','--',@slice) == 0 or
476 die "Error in git-update-index: $! $?\n";
d3968363
ML
477 }
478 }
3e525e67
EW
479 return 1; # we successfully applied the changeset
480}
481
482if ($opt_f) {
483 print "Will import patchsets using the fast strategy\n",
484 "Renamed directories and permission changes will be missed\n";
485 *process_patchset = *process_patchset_fast;
486} else {
487 print "Using the default (accurate) import strategy.\n",
488 "Things may be a bit slow\n";
489 *process_patchset = *process_patchset_accurate;
490}
6df896b5 491
3e525e67
EW
492foreach my $ps (@psets) {
493 # process patchsets
494 $ps->{branch} = git_branchname($ps->{id});
495
496 #
497 # ensure we have a clean state
498 #
499 if (my $dirty = `git-diff-files`) {
500 die "Unclean tree when about to process $ps->{id} " .
501 " - did we fail to commit cleanly before?\n$dirty";
502 }
503 die $! if $?;
504
505 #
506 # skip commits already in repo
507 #
508 if (ptag($ps->{id})) {
509 $opt_v && print " * Skipping already imported: $ps->{id}\n";
10945e00 510 next;
3e525e67
EW
511 }
512
513 print " * Starting to work on $ps->{id}\n";
514
515 process_patchset($ps) or next;
516
215a7ad1 517 # warn "errors when running git-update-index! $!";
3e525e67 518 my $tree = `git-write-tree`;
d3968363
ML
519 die "cannot write tree $!" if $?;
520 chomp $tree;
d3968363
ML
521
522 #
523 # Who's your daddy?
524 #
525 my @par;
1d4710d0 526 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
f88961a8 527 if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
d3968363
ML
528 my $p = <HEAD>;
529 close HEAD;
530 chomp $p;
531 push @par, '-p', $p;
532 } else {
533 if ($ps->{type} eq 's') {
534 warn "Could not find the right head for the branch $ps->{branch}";
535 }
536 }
537 }
538
b779d5f0
ML
539 if ($ps->{merges}) {
540 push @par, find_parents($ps);
541 }
d3968363
ML
542
543 #
544 # Commit, tag and clean state
545 #
546 $ENV{TZ} = 'GMT';
547 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
548 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
549 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
550 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
551 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
552 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
553
6df896b5 554 my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
d3968363 555 or die $!;
a94f457e 556 print WRITER $ps->{summary},"\n\n";
6df896b5
EW
557 print WRITER $ps->{message},"\n";
558
559 # make it easy to backtrack and figure out which Arch revision this was:
560 print WRITER 'git-archimport-id: ',$ps->{id},"\n";
561
d3968363
ML
562 close WRITER;
563 my $commitid = <READER>; # read
564 chomp $commitid;
565 close READER;
566 waitpid $pid,0; # close;
567
568 if (length $commitid != 40) {
569 die "Something went wrong with the commit! $! $commitid";
570 }
571 #
572 # Update the branch
573 #
f88961a8 574 open HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
d3968363
ML
575 print HEAD $commitid;
576 close HEAD;
8366a10a 577 system('git-update-ref', 'HEAD', "$ps->{branch}");
d3968363
ML
578
579 # tag accordingly
580 ptag($ps->{id}, $commitid); # private tag
581 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
582 tag($ps->{id}, $commitid);
583 }
584 print " * Committed $ps->{id}\n";
585 print " + tree $tree\n";
586 print " + commit $commitid\n";
b779d5f0 587 $opt_v && print " + commit date is $ps->{date} \n";
f88961a8 588 $opt_v && print " + parents: ",join(' ',@par),"\n";
3e525e67
EW
589}
590
591if ($opt_v) {
592 foreach (sort keys %stats) {
593 print" $_: $stats{$_}\n";
594 }
595}
596exit 0;
597
598# used by the accurate strategy:
599sub sync_to_ps {
600 my $ps = shift;
601 my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
602
603 $opt_v && print "sync_to_ps($ps->{id}) method: ";
604
605 if (-d $tree_dir) {
606 if ($ps->{type} eq 't') {
607 $opt_v && print "get (tag)\n";
608 # looks like a tag-only or (worse,) a mixed tags/changeset branch,
609 # can't rely on replay to work correctly on these
610 rmtree($tree_dir);
611 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
612 $stats{get_tag}++;
613 } else {
614 my $tree_id = arch_tree_id($tree_dir);
615 if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
616 # the common case (hopefully)
617 $opt_v && print "replay\n";
618 safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
619 $stats{replay}++;
620 } else {
621 # getting one tree is usually faster than getting two trees
622 # and applying the delta ...
623 rmtree($tree_dir);
624 $opt_v && print "apply-delta\n";
625 safe_pipe_capture($TLA,'get','--no-pristine',
626 $ps->{id},$tree_dir);
627 $stats{get_delta}++;
628 }
629 }
630 } else {
631 # new branch work
632 $opt_v && print "get (new tree)\n";
633 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
634 $stats{get_new}++;
635 }
636
637 # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
638 system('rsync','-aI','--delete','--exclude',$git_dir,
639# '--exclude','.arch-inventory',
640 '--exclude','.arch-ids','--exclude','{arch}',
641 '--exclude','+*','--exclude',',*',
642 "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
643 return $tree_dir;
d3968363
ML
644}
645
d3968363
ML
646sub apply_import {
647 my $ps = shift;
22ff00fc 648 my $bname = git_branchname($ps->{id});
d3968363 649
f88961a8 650 mkpath($tmp);
d3968363 651
f88961a8 652 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
d3968363 653 die "Cannot get import: $!" if $?;
f88961a8
EW
654 system('rsync','-aI','--delete', '--exclude',$git_dir,
655 '--exclude','.arch-ids','--exclude','{arch}',
656 "$tmp/import/", './');
d3968363
ML
657 die "Cannot rsync import:$!" if $?;
658
f88961a8 659 rmtree("$tmp/import");
d3968363
ML
660 die "Cannot remove tempdir: $!" if $?;
661
662
663 return 1;
664}
665
666sub apply_cset {
667 my $ps = shift;
668
f88961a8 669 mkpath($tmp);
d3968363
ML
670
671 # get the changeset
f88961a8 672 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
d3968363
ML
673 die "Cannot get changeset: $!" if $?;
674
675 # apply patches
676 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
677 # this can be sped up considerably by doing
678 # (find | xargs cat) | patch
82e5a82f 679 # but that can get mucked up by patches
d3968363
ML
680 # with missing trailing newlines or the standard
681 # 'missing newline' flag in the patch - possibly
682 # produced with an old/buggy diff.
683 # slow and safe, we invoke patch once per patchfile
684 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
685 die "Problem applying patches! $!" if $?;
686 }
687
688 # apply changed binary files
689 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
690 foreach my $mod (@modified) {
691 chomp $mod;
692 my $orig = $mod;
693 $orig =~ s/\.modified$//; # lazy
694 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
695 #print "rsync -p '$mod' '$orig'";
f88961a8 696 system('rsync','-p',$mod,"./$orig");
d3968363
ML
697 die "Problem applying binary changes! $!" if $?;
698 }
699 }
700
701 # bring in new files
f88961a8
EW
702 system('rsync','-aI','--exclude',$git_dir,
703 '--exclude','.arch-ids',
704 '--exclude', '{arch}',
705 "$tmp/changeset/new-files-archive/",'./');
d3968363
ML
706
707 # deleted files are hinted from the commitlog processing
708
f88961a8 709 rmtree("$tmp/changeset");
d3968363
ML
710}
711
712
713# =for reference
6df896b5
EW
714# notes: *-files/-directories keys cannot have spaces, they're always
715# pika-escaped. Everything after the first newline
716# A log entry looks like:
d3968363
ML
717# Revision: moodle-org--moodle--1.3.3--patch-15
718# Archive: arch-eduforge@catalyst.net.nz--2004
719# Creator: Penny Leach <penny@catalyst.net.nz>
720# Date: Wed May 25 14:15:34 NZST 2005
721# Standard-date: 2005-05-25 02:15:34 GMT
722# New-files: lang/de/.arch-ids/block_glossary_random.php.id
723# lang/de/.arch-ids/block_html.php.id
724# New-directories: lang/de/help/questionnaire
725# lang/de/help/questionnaire/.arch-ids
726# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
727# db_sears.sql db/db_sears.sql
728# Removed-files: lang/be/docs/.arch-ids/release.html.id
729# lang/be/docs/.arch-ids/releaseold.html.id
730# Modified-files: admin/cron.php admin/delete.php
731# admin/editor.html backup/lib.php backup/restore.php
732# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
733# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
6df896b5 734# summary can be multiline with a leading space just like the above fields
d3968363
ML
735# Keywords:
736#
737# Updating yadda tadda tadda madda
738sub parselog {
6df896b5
EW
739 my ($ps, $log) = @_;
740 my $key = undef;
741
742 # headers we want that contain filenames:
743 my %want_headers = (
744 new_files => 1,
745 modified_files => 1,
746 renamed_files => 1,
747 renamed_directories => 1,
748 removed_files => 1,
749 removed_directories => 1,
750 );
d3968363 751
6df896b5
EW
752 chomp (@$log);
753 while ($_ = shift @$log) {
754 if (/^Continuation-of:\s*(.*)/) {
755 $ps->{tag} = $1;
756 $key = undef;
757 } elsif (/^Summary:\s*(.*)$/ ) {
a94f457e
PB
758 # summary can be multiline as long as it has a leading space.
759 # we squeeze it onto a single line, though.
6df896b5
EW
760 $ps->{summary} = [ $1 ];
761 $key = 'summary';
762 } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
763 $ps->{author} = $1;
764 $ps->{email} = $2;
765 $key = undef;
766 # any *-files or *-directories can be read here:
767 } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
768 my $val = $2;
769 $key = lc $1;
770 $key =~ tr/-/_/; # too lazy to quote :P
771 if ($want_headers{$key}) {
772 push @{$ps->{$key}}, split(/\s+/, $val);
773 } else {
774 $key = undef;
775 }
776 } elsif (/^$/) {
777 last; # remainder of @$log that didn't get shifted off is message
778 } elsif ($key) {
779 if (/^\s+(.*)$/) {
780 if ($key eq 'summary') {
781 push @{$ps->{$key}}, $1;
782 } else { # files/directories:
783 push @{$ps->{$key}}, split(/\s+/, $1);
784 }
785 } else {
786 $key = undef;
787 }
788 }
d3968363 789 }
6df896b5 790
a94f457e
PB
791 # drop leading empty lines from the log message
792 while (@$log && $log->[0] eq '') {
793 shift @$log;
794 }
795 if (exists $ps->{summary} && @{$ps->{summary}}) {
796 $ps->{summary} = join(' ', @{$ps->{summary}});
797 }
798 elsif (@$log == 0) {
799 $ps->{summary} = 'empty commit message';
800 } else {
801 $ps->{summary} = $log->[0] . '...';
802 }
6df896b5 803 $ps->{message} = join("\n",@$log);
d3968363 804
6df896b5
EW
805 # skip Arch control files, unescape pika-escaped files
806 foreach my $k (keys %want_headers) {
807 next unless (defined $ps->{$k});
6e33101a 808 my @tmp = ();
6df896b5
EW
809 foreach my $t (@{$ps->{$k}}) {
810 next unless length ($t);
811 next if $t =~ m!\{arch\}/!;
812 next if $t =~ m!\.arch-ids/!;
813 # should we skip this?
814 next if $t =~ m!\.arch-inventory$!;
f84f9d38
ML
815 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
816 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
6df896b5 817 if ($t =~ /\\/ ){
f88961a8 818 $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
f84f9d38 819 }
6df896b5 820 push @tmp, $t;
d3968363 821 }
6e33101a 822 $ps->{$k} = \@tmp;
d3968363 823 }
d3968363
ML
824}
825
826# write/read a tag
827sub tag {
828 my ($tag, $commit) = @_;
a7fb51d3 829
fee3365f
ML
830 if ($opt_o) {
831 $tag =~ s|/|--|g;
832 } else {
833 # don't use subdirs for tags yet, it could screw up other porcelains
834 $tag =~ s|/|,|g;
835 }
d3968363
ML
836
837 if ($commit) {
a7fb51d3 838 open(C,">","$git_dir/refs/tags/$tag")
d3968363
ML
839 or die "Cannot create tag $tag: $!\n";
840 print C "$commit\n"
841 or die "Cannot write tag $tag: $!\n";
842 close(C)
843 or die "Cannot write tag $tag: $!\n";
a7fb51d3 844 print " * Created tag '$tag' on '$commit'\n" if $opt_v;
d3968363 845 } else { # read
a7fb51d3 846 open(C,"<","$git_dir/refs/tags/$tag")
d3968363
ML
847 or die "Cannot read tag $tag: $!\n";
848 $commit = <C>;
849 chomp $commit;
850 die "Error reading tag $tag: $!\n" unless length $commit == 40;
851 close(C)
852 or die "Cannot read tag $tag: $!\n";
853 return $commit;
854 }
855}
856
857# write/read a private tag
858# reads fail softly if the tag isn't there
859sub ptag {
860 my ($tag, $commit) = @_;
a7fb51d3
EW
861
862 # don't use subdirs for tags yet, it could screw up other porcelains
863 $tag =~ s|/|,|g;
d3968363 864
a7fb51d3
EW
865 my $tag_file = "$ptag_dir/$tag";
866 my $tag_branch_dir = dirname($tag_file);
867 mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
d3968363
ML
868
869 if ($commit) { # write
a7fb51d3 870 open(C,">",$tag_file)
d3968363
ML
871 or die "Cannot create tag $tag: $!\n";
872 print C "$commit\n"
873 or die "Cannot write tag $tag: $!\n";
874 close(C)
875 or die "Cannot write tag $tag: $!\n";
b779d5f0
ML
876 $rptags{$commit} = $tag
877 unless $tag =~ m/--base-0$/;
d3968363
ML
878 } else { # read
879 # if the tag isn't there, return 0
a7fb51d3 880 unless ( -s $tag_file) {
d3968363
ML
881 return 0;
882 }
a7fb51d3 883 open(C,"<",$tag_file)
d3968363
ML
884 or die "Cannot read tag $tag: $!\n";
885 $commit = <C>;
886 chomp $commit;
887 die "Error reading tag $tag: $!\n" unless length $commit == 40;
888 close(C)
889 or die "Cannot read tag $tag: $!\n";
b779d5f0
ML
890 unless (defined $rptags{$commit}) {
891 $rptags{$commit} = $tag;
892 }
d3968363
ML
893 return $commit;
894 }
895}
b779d5f0
ML
896
897sub find_parents {
898 #
899 # Identify what branches are merging into me
900 # and whether we are fully merged
901 # git-merge-base <headsha> <headsha> should tell
902 # me what the base of the merge should be
903 #
904 my $ps = shift;
905
906 my %branches; # holds an arrayref per branch
907 # the arrayref contains a list of
908 # merged patches between the base
909 # of the merge and the current head
910
911 my @parents; # parents found for this commit
912
913 # simple loop to split the merges
914 # per branch
915 foreach my $merge (@{$ps->{merges}}) {
22ff00fc 916 my $branch = git_branchname($merge);
b779d5f0
ML
917 unless (defined $branches{$branch} ){
918 $branches{$branch} = [];
919 }
920 push @{$branches{$branch}}, $merge;
921 }
922
923 #
924 # foreach branch find a merge base and walk it to the
925 # head where we are, collecting the merged patchsets that
926 # Arch has recorded. Keep that in @have
927 # Compare that with the commits on the other branch
928 # between merge-base and the tip of the branch (@need)
929 # and see if we have a series of consecutive patches
930 # starting from the merge base. The tip of the series
931 # of consecutive patches merged is our new parent for
932 # that branch.
933 #
934 foreach my $branch (keys %branches) {
37f15d50
ML
935
936 # check that we actually know about the branch
937 next unless -e "$git_dir/refs/heads/$branch";
938
b779d5f0 939 my $mergebase = `git-merge-base $branch $ps->{branch}`;
9b626e75
EW
940 if ($?) {
941 # Don't die here, Arch supports one-way cherry-picking
942 # between branches with no common base (or any relationship
943 # at all beforehand)
944 warn "Cannot find merge base for $branch and $ps->{branch}";
945 next;
946 }
b779d5f0
ML
947 chomp $mergebase;
948
949 # now walk up to the mergepoint collecting what patches we have
950 my $branchtip = git_rev_parse($ps->{branch});
765ac8ec 951 my @ancestors = `git-rev-list --topo-order $branchtip ^$mergebase`;
b779d5f0
ML
952 my %have; # collected merges this branch has
953 foreach my $merge (@{$ps->{merges}}) {
954 $have{$merge} = 1;
955 }
956 my %ancestorshave;
957 foreach my $par (@ancestors) {
958 $par = commitid2pset($par);
959 if (defined $par->{merges}) {
960 foreach my $merge (@{$par->{merges}}) {
961 $ancestorshave{$merge}=1;
962 }
963 }
964 }
965 # print "++++ Merges in $ps->{id} are....\n";
966 # my @have = sort keys %have; print Dumper(\@have);
967
968 # merge what we have with what ancestors have
969 %have = (%have, %ancestorshave);
970
971 # see what the remote branch has - these are the merges we
972 # will want to have in a consecutive series from the mergebase
973 my $otherbranchtip = git_rev_parse($branch);
765ac8ec 974 my @needraw = `git-rev-list --topo-order $otherbranchtip ^$mergebase`;
b779d5f0
ML
975 my @need;
976 foreach my $needps (@needraw) { # get the psets
977 $needps = commitid2pset($needps);
978 # git-rev-list will also
979 # list commits merged in via earlier
980 # merges. we are only interested in commits
981 # from the branch we're looking at
982 if ($branch eq $needps->{branch}) {
983 push @need, $needps->{id};
984 }
985 }
986
987 # print "++++ Merges from $branch we want are....\n";
988 # print Dumper(\@need);
989
990 my $newparent;
991 while (my $needed_commit = pop @need) {
992 if ($have{$needed_commit}) {
993 $newparent = $needed_commit;
994 } else {
995 last; # break out of the while
996 }
997 }
998 if ($newparent) {
999 push @parents, $newparent;
1000 }
1001
1002
1003 } # end foreach branch
1004
1005 # prune redundant parents
1006 my %parents;
1007 foreach my $p (@parents) {
1008 $parents{$p} = 1;
1009 }
1010 foreach my $p (@parents) {
1011 next unless exists $psets{$p}{merges};
1012 next unless ref $psets{$p}{merges};
1013 my @merges = @{$psets{$p}{merges}};
1014 foreach my $merge (@merges) {
1015 if ($parents{$merge}) {
1016 delete $parents{$merge};
1017 }
1018 }
1019 }
42f44b08 1020
f88961a8
EW
1021 @parents = ();
1022 foreach (keys %parents) {
1023 push @parents, '-p', ptag($_);
1024 }
b779d5f0
ML
1025 return @parents;
1026}
1027
1028sub git_rev_parse {
1029 my $name = shift;
1030 my $val = `git-rev-parse $name`;
1031 die "Error: git-rev-parse $name" if $?;
1032 chomp $val;
1033 return $val;
1034}
1035
1036# resolve a SHA1 to a known patchset
1037sub commitid2pset {
1038 my $commitid = shift;
1039 chomp $commitid;
1040 my $name = $rptags{$commitid}
1041 || die "Cannot find reverse tag mapping for $commitid";
a7fb51d3 1042 $name =~ s|,|/|;
b779d5f0
ML
1043 my $ps = $psets{$name}
1044 || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
1045 return $ps;
1046}
2777ef76 1047
42f44b08 1048
82e5a82f 1049# an alternative to `command` that allows input to be passed as an array
2777ef76
EW
1050# to work around shell problems with weird characters in arguments
1051sub safe_pipe_capture {
1052 my @output;
1053 if (my $pid = open my $child, '-|') {
1054 @output = (<$child>);
1055 close $child or die join(' ',@_).": $! $?";
1056 } else {
3e525e67 1057 exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
2777ef76
EW
1058 }
1059 return wantarray ? @output : join('',@output);
1060}
1061
42f44b08
EW
1062# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
1063sub arch_tree_id {
1064 my $dir = shift;
1065 chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
1066 return $ret;
1067}
1068
1069sub archive_reachable {
1070 my $archive = shift;
1071 return 1 if $reachable{$archive};
1072 return 0 if $unreachable{$archive};
1073
1074 if (system "$TLA whereis-archive $archive >/dev/null") {
1075 if ($opt_a && (system($TLA,'register-archive',
1076 "http://mirrors.sourcecontrol.net/$archive") == 0)) {
1077 $reachable{$archive} = 1;
1078 return 1;
1079 }
1080 print STDERR "Archive is unreachable: $archive\n";
1081 $unreachable{$archive} = 1;
1082 return 0;
1083 } else {
1084 $reachable{$archive} = 1;
1085 return 1;
1086 }
1087}
2777ef76 1088