cvsimport: handle the parsing of uppercase config options
[git/git.git] / git-cvsimport.perl
1 #!/usr/bin/perl -w
2
3 # This tool is copyright (c) 2005, Matthias Urlichs.
4 # It is released under the Gnu Public License, version 2.
5 #
6 # The basic idea is to aggregate CVS check-ins into related changes.
7 # Fortunately, "cvsps" does that for us; all we have to do is to parse
8 # its output.
9 #
10 # Checking out the files is done by a single long-running CVS connection
11 # / server process.
12 #
13 # The head revision is on branch "origin" by default.
14 # You can change that with the '-o' option.
15
16 use strict;
17 use warnings;
18 use Getopt::Long;
19 use File::Spec;
20 use File::Temp qw(tempfile tmpnam);
21 use File::Path qw(mkpath);
22 use File::Basename qw(basename dirname);
23 use Time::Local;
24 use IO::Socket;
25 use IO::Pipe;
26 use POSIX qw(strftime dup2 ENOENT);
27 use IPC::Open2;
28
29 $SIG{'PIPE'}="IGNORE";
30 $ENV{'TZ'}="UTC";
31
32 our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
33 my (%conv_author_name, %conv_author_email);
34
35 sub usage(;$) {
36 my $msg = shift;
37 print(STDERR "Error: $msg\n") if $msg;
38 print STDERR <<END;
39 Usage: git cvsimport # fetch/update GIT from CVS
40 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
41 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
42 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
43 [-r remote] [-R] [CVS_module]
44 END
45 exit(1);
46 }
47
48 sub read_author_info($) {
49 my ($file) = @_;
50 my $user;
51 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
52
53 while (<$f>) {
54 # Expected format is this:
55 # exon=Andreas Ericsson <ae@op5.se>
56 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
57 $user = $1;
58 $conv_author_name{$user} = $2;
59 $conv_author_email{$user} = $3;
60 }
61 # However, we also read from CVSROOT/users format
62 # to ease migration.
63 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
64 my $mapped;
65 ($user, $mapped) = ($1, $3);
66 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
67 $conv_author_name{$user} = $1;
68 $conv_author_email{$user} = $2;
69 }
70 elsif ($mapped =~ /^<?(.*)>?$/) {
71 $conv_author_name{$user} = $user;
72 $conv_author_email{$user} = $1;
73 }
74 }
75 # NEEDSWORK: Maybe warn on unrecognized lines?
76 }
77 close ($f);
78 }
79
80 sub write_author_info($) {
81 my ($file) = @_;
82 open my $f, '>', $file or
83 die("Failed to open $file for writing: $!");
84
85 foreach (keys %conv_author_name) {
86 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
87 }
88 close ($f);
89 }
90
91 # convert getopts specs for use by git config
92 my %longmap = (
93 'A:' => 'authors-file',
94 'M:' => 'merge-regex',
95 'P:' => undef,
96 'R' => 'track-revisions',
97 'S:' => 'ignore-paths',
98 );
99
100 sub read_repo_config {
101 # Split the string between characters, unless there is a ':'
102 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
103 my @opts = split(/ *(?!:)/, shift);
104 foreach my $o (@opts) {
105 my $key = $o;
106 $key =~ s/://g;
107 my $arg = 'git config';
108 $arg .= ' --bool' if ($o !~ /:$/);
109 my $ckey = $key;
110
111 if (exists $longmap{$o}) {
112 # An uppercase option like -R cannot be
113 # expressed in the configuration, as the
114 # variable names are downcased.
115 $ckey = $longmap{$o};
116 next if (! defined $ckey);
117 $ckey =~ s/-//g;
118 }
119 chomp(my $tmp = `$arg --get cvsimport.$ckey`);
120 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
121 no strict 'refs';
122 my $opt_name = "opt_" . $key;
123 if (!$$opt_name) {
124 $$opt_name = $tmp;
125 }
126 }
127 }
128 }
129
130 my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
131 read_repo_config($opts);
132 Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
133
134 # turn the Getopt::Std specification in a Getopt::Long one,
135 # with support for multiple -M options
136 GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
137 or usage();
138 usage if $opt_h;
139
140 if (@ARGV == 0) {
141 chomp(my $module = `git config --get cvsimport.module`);
142 push(@ARGV, $module) if $? == 0;
143 }
144 @ARGV <= 1 or usage("You can't specify more than one CVS module");
145
146 if ($opt_d) {
147 $ENV{"CVSROOT"} = $opt_d;
148 } elsif (-f 'CVS/Root') {
149 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
150 $opt_d = <$f>;
151 chomp $opt_d;
152 close $f;
153 $ENV{"CVSROOT"} = $opt_d;
154 } elsif ($ENV{"CVSROOT"}) {
155 $opt_d = $ENV{"CVSROOT"};
156 } else {
157 usage("CVSROOT needs to be set");
158 }
159 $opt_s ||= "-";
160 $opt_a ||= 0;
161
162 my $git_tree = $opt_C;
163 $git_tree ||= ".";
164
165 my $remote;
166 if (defined $opt_r) {
167 $remote = 'refs/remotes/' . $opt_r;
168 $opt_o ||= "master";
169 } else {
170 $opt_o ||= "origin";
171 $remote = 'refs/heads';
172 }
173
174 my $cvs_tree;
175 if ($#ARGV == 0) {
176 $cvs_tree = $ARGV[0];
177 } elsif (-f 'CVS/Repository') {
178 open my $f, '<', 'CVS/Repository' or
179 die 'Failed to open CVS/Repository';
180 $cvs_tree = <$f>;
181 chomp $cvs_tree;
182 close $f;
183 } else {
184 usage("CVS module has to be specified");
185 }
186
187 our @mergerx = ();
188 if ($opt_m) {
189 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
190 }
191 if (@opt_M) {
192 push (@mergerx, map { qr/$_/ } @opt_M);
193 }
194
195 # Remember UTC of our starting time
196 # we'll want to avoid importing commits
197 # that are too recent
198 our $starttime = time();
199
200 select(STDERR); $|=1; select(STDOUT);
201
202
203 package CVSconn;
204 # Basic CVS dialog.
205 # We're only interested in connecting and downloading, so ...
206
207 use File::Spec;
208 use File::Temp qw(tempfile);
209 use POSIX qw(strftime dup2);
210
211 sub new {
212 my ($what,$repo,$subdir) = @_;
213 $what=ref($what) if ref($what);
214
215 my $self = {};
216 $self->{'buffer'} = "";
217 bless($self,$what);
218
219 $repo =~ s#/+$##;
220 $self->{'fullrep'} = $repo;
221 $self->conn();
222
223 $self->{'subdir'} = $subdir;
224 $self->{'lines'} = undef;
225
226 return $self;
227 }
228
229 sub conn {
230 my $self = shift;
231 my $repo = $self->{'fullrep'};
232 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
233 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
234
235 my ($proxyhost,$proxyport);
236 if ($param && ($param =~ m/proxy=([^;]+)/)) {
237 $proxyhost = $1;
238 # Default proxyport, if not specified, is 8080.
239 $proxyport = 8080;
240 if ($ENV{"CVS_PROXY_PORT"}) {
241 $proxyport = $ENV{"CVS_PROXY_PORT"};
242 }
243 if ($param =~ m/proxyport=([^;]+)/) {
244 $proxyport = $1;
245 }
246 }
247 $repo ||= '/';
248
249 # if username is not explicit in CVSROOT, then use current user, as cvs would
250 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
251 my $rr2 = "-";
252 unless ($port) {
253 $rr2 = ":pserver:$user\@$serv:$repo";
254 $port=2401;
255 }
256 my $rr = ":pserver:$user\@$serv:$port$repo";
257
258 if ($pass) {
259 $pass = $self->_scramble($pass);
260 } else {
261 open(H,$ENV{'HOME'}."/.cvspass") and do {
262 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
263 while (<H>) {
264 chomp;
265 s/^\/\d+\s+//;
266 my ($w,$p) = split(/\s/,$_,2);
267 if ($w eq $rr or $w eq $rr2) {
268 $pass = $p;
269 last;
270 }
271 }
272 };
273 $pass = "A" unless $pass;
274 }
275
276 my ($s, $rep);
277 if ($proxyhost) {
278
279 # Use a HTTP Proxy. Only works for HTTP proxies that
280 # don't require user authentication
281 #
282 # See: http://www.ietf.org/rfc/rfc2817.txt
283
284 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
285 die "Socket to $proxyhost: $!\n" unless defined $s;
286 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
287 or die "Write to $proxyhost: $!\n";
288 $s->flush();
289
290 $rep = <$s>;
291
292 # The answer should look like 'HTTP/1.x 2yy ....'
293 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
294 die "Proxy connect: $rep\n";
295 }
296 # Skip up to the empty line of the proxy server output
297 # including the response headers.
298 while ($rep = <$s>) {
299 last if (!defined $rep ||
300 $rep eq "\n" ||
301 $rep eq "\r\n");
302 }
303 } else {
304 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
305 die "Socket to $serv: $!\n" unless defined $s;
306 }
307
308 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
309 or die "Write to $serv: $!\n";
310 $s->flush();
311
312 $rep = <$s>;
313
314 if ($rep ne "I LOVE YOU\n") {
315 $rep="<unknown>" unless $rep;
316 die "AuthReply: $rep\n";
317 }
318 $self->{'socketo'} = $s;
319 $self->{'socketi'} = $s;
320 } else { # local or ext: Fork off our own cvs server.
321 my $pr = IO::Pipe->new();
322 my $pw = IO::Pipe->new();
323 my $pid = fork();
324 die "Fork: $!\n" unless defined $pid;
325 my $cvs = 'cvs';
326 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
327 my $rsh = 'rsh';
328 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
329
330 my @cvs = ($cvs, 'server');
331 my ($local, $user, $host);
332 $local = $repo =~ s/:local://;
333 if (!$local) {
334 $repo =~ s/:ext://;
335 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
336 ($user, $host) = ($1, $2);
337 }
338 if (!$local) {
339 if ($user) {
340 unshift @cvs, $rsh, '-l', $user, $host;
341 } else {
342 unshift @cvs, $rsh, $host;
343 }
344 }
345
346 unless ($pid) {
347 $pr->writer();
348 $pw->reader();
349 dup2($pw->fileno(),0);
350 dup2($pr->fileno(),1);
351 $pr->close();
352 $pw->close();
353 exec(@cvs);
354 }
355 $pw->writer();
356 $pr->reader();
357 $self->{'socketo'} = $pw;
358 $self->{'socketi'} = $pr;
359 }
360 $self->{'socketo'}->write("Root $repo\n");
361
362 # Trial and error says that this probably is the minimum set
363 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
364
365 $self->{'socketo'}->write("valid-requests\n");
366 $self->{'socketo'}->flush();
367
368 chomp(my $rep=$self->readline());
369 if ($rep !~ s/^Valid-requests\s*//) {
370 $rep="<unknown>" unless $rep;
371 die "Expected Valid-requests from server, but got: $rep\n";
372 }
373 chomp(my $res=$self->readline());
374 die "validReply: $res\n" if $res ne "ok";
375
376 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
377 $self->{'repo'} = $repo;
378 }
379
380 sub readline {
381 my ($self) = @_;
382 return $self->{'socketi'}->getline();
383 }
384
385 sub _file {
386 # Request a file with a given revision.
387 # Trial and error says this is a good way to do it. :-/
388 my ($self,$fn,$rev) = @_;
389 $self->{'socketo'}->write("Argument -N\n") or return undef;
390 $self->{'socketo'}->write("Argument -P\n") or return undef;
391 # -kk: Linus' version doesn't use it - defaults to off
392 if ($opt_k) {
393 $self->{'socketo'}->write("Argument -kk\n") or return undef;
394 }
395 $self->{'socketo'}->write("Argument -r\n") or return undef;
396 $self->{'socketo'}->write("Argument $rev\n") or return undef;
397 $self->{'socketo'}->write("Argument --\n") or return undef;
398 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
399 $self->{'socketo'}->write("Directory .\n") or return undef;
400 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
401 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
402 $self->{'socketo'}->write("co\n") or return undef;
403 $self->{'socketo'}->flush() or return undef;
404 $self->{'lines'} = 0;
405 return 1;
406 }
407 sub _line {
408 # Read a line from the server.
409 # ... except that 'line' may be an entire file. ;-)
410 my ($self, $fh) = @_;
411 die "Not in lines" unless defined $self->{'lines'};
412
413 my $line;
414 my $res=0;
415 while (defined($line = $self->readline())) {
416 # M U gnupg-cvs-rep/AUTHORS
417 # Updated gnupg-cvs-rep/
418 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
419 # /AUTHORS/1.1///T1.1
420 # u=rw,g=rw,o=rw
421 # 0
422 # ok
423
424 if ($line =~ s/^(?:Created|Updated) //) {
425 $line = $self->readline(); # path
426 $line = $self->readline(); # Entries line
427 my $mode = $self->readline(); chomp $mode;
428 $self->{'mode'} = $mode;
429 defined (my $cnt = $self->readline())
430 or die "EOF from server after 'Changed'\n";
431 chomp $cnt;
432 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
433 $line="";
434 $res = $self->_fetchfile($fh, $cnt);
435 } elsif ($line =~ s/^ //) {
436 print $fh $line;
437 $res += length($line);
438 } elsif ($line =~ /^M\b/) {
439 # output, do nothing
440 } elsif ($line =~ /^Mbinary\b/) {
441 my $cnt;
442 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
443 chomp $cnt;
444 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
445 $line="";
446 $res += $self->_fetchfile($fh, $cnt);
447 } else {
448 chomp $line;
449 if ($line eq "ok") {
450 # print STDERR "S: ok (".length($res).")\n";
451 return $res;
452 } elsif ($line =~ s/^E //) {
453 # print STDERR "S: $line\n";
454 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
455 $line = $self->readline(); # filename
456 $line = $self->readline(); # OK
457 chomp $line;
458 die "Unknown: $line" if $line ne "ok";
459 return -1;
460 } else {
461 die "Unknown: $line\n";
462 }
463 }
464 }
465 return undef;
466 }
467 sub file {
468 my ($self,$fn,$rev) = @_;
469 my $res;
470
471 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
472 DIR => File::Spec->tmpdir(), UNLINK => 1);
473
474 $self->_file($fn,$rev) and $res = $self->_line($fh);
475
476 if (!defined $res) {
477 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
478 truncate $fh, 0;
479 $self->conn();
480 $self->_file($fn,$rev) or die "No file command send";
481 $res = $self->_line($fh);
482 die "Retry failed" unless defined $res;
483 }
484 close ($fh);
485
486 return ($name, $res);
487 }
488 sub _fetchfile {
489 my ($self, $fh, $cnt) = @_;
490 my $res = 0;
491 my $bufsize = 1024 * 1024;
492 while ($cnt) {
493 if ($bufsize > $cnt) {
494 $bufsize = $cnt;
495 }
496 my $buf;
497 my $num = $self->{'socketi'}->read($buf,$bufsize);
498 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
499 print $fh $buf;
500 $res += $num;
501 $cnt -= $num;
502 }
503 return $res;
504 }
505
506 sub _scramble {
507 my ($self, $pass) = @_;
508 my $scrambled = "A";
509
510 return $scrambled unless $pass;
511
512 my $pass_len = length($pass);
513 my @pass_arr = split("", $pass);
514 my $i;
515
516 # from cvs/src/scramble.c
517 my @shifts = (
518 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
519 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
520 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
521 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
522 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
523 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
524 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
525 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
526 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
527 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
528 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
529 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
530 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
531 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
532 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
533 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
534 );
535
536 for ($i = 0; $i < $pass_len; $i++) {
537 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
538 }
539
540 return $scrambled;
541 }
542
543 package main;
544
545 my $cvs = CVSconn->new($opt_d, $cvs_tree);
546
547
548 sub pdate($) {
549 my ($d) = @_;
550 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
551 or die "Unparseable date: $d\n";
552 my $y=$1; $y-=1900 if $y>1900;
553 return timegm($6||0,$5,$4,$3,$2-1,$y);
554 }
555
556 sub pmode($) {
557 my ($mode) = @_;
558 my $m = 0;
559 my $mm = 0;
560 my $um = 0;
561 for my $x(split(//,$mode)) {
562 if ($x eq ",") {
563 $m |= $mm&$um;
564 $mm = 0;
565 $um = 0;
566 } elsif ($x eq "u") { $um |= 0700;
567 } elsif ($x eq "g") { $um |= 0070;
568 } elsif ($x eq "o") { $um |= 0007;
569 } elsif ($x eq "r") { $mm |= 0444;
570 } elsif ($x eq "w") { $mm |= 0222;
571 } elsif ($x eq "x") { $mm |= 0111;
572 } elsif ($x eq "=") { # do nothing
573 } else { die "Unknown mode: $mode\n";
574 }
575 }
576 $m |= $mm&$um;
577 return $m;
578 }
579
580 sub getwd() {
581 my $pwd = `pwd`;
582 chomp $pwd;
583 return $pwd;
584 }
585
586 sub is_sha1 {
587 my $s = shift;
588 return $s =~ /^[a-f0-9]{40}$/;
589 }
590
591 sub get_headref ($) {
592 my $name = shift;
593 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
594 return undef unless $? == 0;
595 chomp $r;
596 return $r;
597 }
598
599 my $user_filename_prepend = '';
600 sub munge_user_filename {
601 my $name = shift;
602 return File::Spec->file_name_is_absolute($name) ?
603 $name :
604 $user_filename_prepend . $name;
605 }
606
607 -d $git_tree
608 or mkdir($git_tree,0777)
609 or die "Could not create $git_tree: $!";
610 if ($git_tree ne '.') {
611 $user_filename_prepend = getwd() . '/';
612 chdir($git_tree);
613 }
614
615 my $last_branch = "";
616 my $orig_branch = "";
617 my %branch_date;
618 my $tip_at_start = undef;
619
620 my $git_dir = $ENV{"GIT_DIR"} || ".git";
621 $git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
622 $ENV{"GIT_DIR"} = $git_dir;
623 my $orig_git_index;
624 $orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
625
626 my %index; # holds filenames of one index per branch
627
628 unless (-d $git_dir) {
629 system(qw(git init));
630 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
631 system(qw(git read-tree --empty));
632 die "Cannot init an empty tree: $?\n" if $?;
633
634 $last_branch = $opt_o;
635 $orig_branch = "";
636 } else {
637 open(F, "-|", qw(git symbolic-ref HEAD)) or
638 die "Cannot run git symbolic-ref: $!\n";
639 chomp ($last_branch = <F>);
640 $last_branch = basename($last_branch);
641 close(F);
642 unless ($last_branch) {
643 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
644 $last_branch = "master";
645 }
646 $orig_branch = $last_branch;
647 $tip_at_start = `git rev-parse --verify HEAD`;
648
649 # Get the last import timestamps
650 my $fmt = '($ref, $author) = (%(refname), %(author));';
651 my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
652 open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
653 while (defined(my $entry = <H>)) {
654 my ($ref, $author);
655 eval($entry) || die "cannot eval refs list: $@";
656 my ($head) = ($ref =~ m|^$remote/(.*)|);
657 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
658 $branch_date{$head} = $1;
659 }
660 close(H);
661 if (!exists $branch_date{$opt_o}) {
662 die "Branch '$opt_o' does not exist.\n".
663 "Either use the correct '-o branch' option,\n".
664 "or import to a new repository.\n";
665 }
666 }
667
668 -d $git_dir
669 or die "Could not create git subdir ($git_dir).\n";
670
671 # now we read (and possibly save) author-info as well
672 -f "$git_dir/cvs-authors" and
673 read_author_info("$git_dir/cvs-authors");
674 if ($opt_A) {
675 read_author_info(munge_user_filename($opt_A));
676 write_author_info("$git_dir/cvs-authors");
677 }
678
679 # open .git/cvs-revisions, if requested
680 open my $revision_map, '>>', "$git_dir/cvs-revisions"
681 or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
682 if defined $opt_R;
683
684
685 #
686 # run cvsps into a file unless we are getting
687 # it passed as a file via $opt_P
688 #
689 my $cvspsfile;
690 unless ($opt_P) {
691 print "Running cvsps...\n" if $opt_v;
692 my $pid = open(CVSPS,"-|");
693 my $cvspsfh;
694 die "Cannot fork: $!\n" unless defined $pid;
695 unless ($pid) {
696 my @opt;
697 @opt = split(/,/,$opt_p) if defined $opt_p;
698 unshift @opt, '-z', $opt_z if defined $opt_z;
699 unshift @opt, '-q' unless defined $opt_v;
700 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
701 push @opt, '--cvs-direct';
702 }
703 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
704 die "Could not start cvsps: $!\n";
705 }
706 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
707 DIR => File::Spec->tmpdir());
708 while (<CVSPS>) {
709 print $cvspsfh $_;
710 }
711 close CVSPS;
712 $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
713 close $cvspsfh;
714 } else {
715 $cvspsfile = munge_user_filename($opt_P);
716 }
717
718 open(CVS, "<$cvspsfile") or die $!;
719
720 ## cvsps output:
721 #---------------------
722 #PatchSet 314
723 #Date: 1999/09/18 13:03:59
724 #Author: wkoch
725 #Branch: STABLE-BRANCH-1-0
726 #Ancestor branch: HEAD
727 #Tag: (none)
728 #Log:
729 # See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
730 #Members:
731 # README:1.57->1.57.2.1
732 # VERSION:1.96->1.96.2.1
733 #
734 #---------------------
735
736 my $state = 0;
737
738 sub update_index (\@\@) {
739 my $old = shift;
740 my $new = shift;
741 open(my $fh, '|-', qw(git update-index -z --index-info))
742 or die "unable to open git update-index: $!";
743 print $fh
744 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
745 @$old),
746 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
747 @$new)
748 or die "unable to write to git update-index: $!";
749 close $fh
750 or die "unable to write to git update-index: $!";
751 $? and die "git update-index reported error: $?";
752 }
753
754 sub write_tree () {
755 open(my $fh, '-|', qw(git write-tree))
756 or die "unable to open git write-tree: $!";
757 chomp(my $tree = <$fh>);
758 is_sha1($tree)
759 or die "Cannot get tree id ($tree): $!";
760 close($fh)
761 or die "Error running git write-tree: $?\n";
762 print "Tree ID $tree\n" if $opt_v;
763 return $tree;
764 }
765
766 my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
767 my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
768
769 # commits that cvsps cannot place anywhere...
770 $ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
771
772 sub commit {
773 if ($branch eq $opt_o && !$index{branch} &&
774 !get_headref("$remote/$branch")) {
775 # looks like an initial commit
776 # use the index primed by git init
777 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
778 $index{$branch} = "$git_dir/index";
779 } else {
780 # use an index per branch to speed up
781 # imports of projects with many branches
782 unless ($index{$branch}) {
783 $index{$branch} = tmpnam();
784 $ENV{GIT_INDEX_FILE} = $index{$branch};
785 if ($ancestor) {
786 system("git", "read-tree", "$remote/$ancestor");
787 } else {
788 system("git", "read-tree", "$remote/$branch");
789 }
790 die "read-tree failed: $?\n" if $?;
791 }
792 }
793 $ENV{GIT_INDEX_FILE} = $index{$branch};
794
795 update_index(@old, @new);
796 @old = @new = ();
797 my $tree = write_tree();
798 my $parent = get_headref("$remote/$last_branch");
799 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
800
801 my @commit_args;
802 push @commit_args, ("-p", $parent) if $parent;
803
804 # loose detection of merges
805 # based on the commit msg
806 foreach my $rx (@mergerx) {
807 next unless $logmsg =~ $rx && $1;
808 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
809 if (my $sha1 = get_headref("$remote/$mparent")) {
810 push @commit_args, '-p', "$remote/$mparent";
811 print "Merge parent branch: $mparent\n" if $opt_v;
812 }
813 }
814
815 my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
816 $ENV{GIT_AUTHOR_NAME} = $author_name;
817 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
818 $ENV{GIT_AUTHOR_DATE} = $commit_date;
819 $ENV{GIT_COMMITTER_NAME} = $author_name;
820 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
821 $ENV{GIT_COMMITTER_DATE} = $commit_date;
822 my $pid = open2(my $commit_read, my $commit_write,
823 'git', 'commit-tree', $tree, @commit_args);
824
825 # compatibility with git2cvs
826 substr($logmsg,32767) = "" if length($logmsg) > 32767;
827 $logmsg =~ s/[\s\n]+\z//;
828
829 if (@skipped) {
830 $logmsg .= "\n\n\nSKIPPED:\n\t";
831 $logmsg .= join("\n\t", @skipped) . "\n";
832 @skipped = ();
833 }
834
835 print($commit_write "$logmsg\n") && close($commit_write)
836 or die "Error writing to git commit-tree: $!\n";
837
838 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
839 chomp(my $cid = <$commit_read>);
840 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
841 print "Commit ID $cid\n" if $opt_v;
842 close($commit_read);
843
844 waitpid($pid,0);
845 die "Error running git commit-tree: $?\n" if $?;
846
847 system('git' , 'update-ref', "$remote/$branch", $cid) == 0
848 or die "Cannot write branch $branch for update: $!\n";
849
850 if ($revision_map) {
851 print $revision_map "@$_ $cid\n" for @commit_revisions;
852 }
853 @commit_revisions = ();
854
855 if ($tag) {
856 my ($xtag) = $tag;
857 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
858 $xtag =~ tr/_/\./ if ( $opt_u );
859 $xtag =~ s/[\/]/$opt_s/g;
860 $xtag =~ s/\[//g;
861
862 system('git' , 'tag', '-f', $xtag, $cid) == 0
863 or die "Cannot create tag $xtag: $!\n";
864
865 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
866 }
867 };
868
869 my $commitcount = 1;
870 while (<CVS>) {
871 chomp;
872 if ($state == 0 and /^-+$/) {
873 $state = 1;
874 } elsif ($state == 0) {
875 $state = 1;
876 redo;
877 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
878 $patchset = 0+$_;
879 $state=2;
880 } elsif ($state == 2 and s/^Date:\s+//) {
881 $date = pdate($_);
882 unless ($date) {
883 print STDERR "Could not parse date: $_\n";
884 $state=0;
885 next;
886 }
887 $state=3;
888 } elsif ($state == 3 and s/^Author:\s+//) {
889 s/\s+$//;
890 if (/^(.*?)\s+<(.*)>/) {
891 ($author_name, $author_email) = ($1, $2);
892 } elsif ($conv_author_name{$_}) {
893 $author_name = $conv_author_name{$_};
894 $author_email = $conv_author_email{$_};
895 } else {
896 $author_name = $author_email = $_;
897 }
898 $state = 4;
899 } elsif ($state == 4 and s/^Branch:\s+//) {
900 s/\s+$//;
901 tr/_/\./ if ( $opt_u );
902 s/[\/]/$opt_s/g;
903 $branch = $_;
904 $state = 5;
905 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
906 s/\s+$//;
907 $ancestor = $_;
908 $ancestor = $opt_o if $ancestor eq "HEAD";
909 $state = 6;
910 } elsif ($state == 5) {
911 $ancestor = undef;
912 $state = 6;
913 redo;
914 } elsif ($state == 6 and s/^Tag:\s+//) {
915 s/\s+$//;
916 if ($_ eq "(none)") {
917 $tag = undef;
918 } else {
919 $tag = $_;
920 }
921 $state = 7;
922 } elsif ($state == 7 and /^Log:/) {
923 $logmsg = "";
924 $state = 8;
925 } elsif ($state == 8 and /^Members:/) {
926 $branch = $opt_o if $branch eq "HEAD";
927 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
928 # skip
929 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
930 $state = 11;
931 next;
932 }
933 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
934 # skip if the commit is too recent
935 # given that the cvsps default fuzz is 300s, we give ourselves another
936 # 300s just in case -- this also prevents skipping commits
937 # due to server clock drift
938 print "skip patchset $patchset: $date too recent\n" if $opt_v;
939 $state = 11;
940 next;
941 }
942 if (exists $ignorebranch{$branch}) {
943 print STDERR "Skipping $branch\n";
944 $state = 11;
945 next;
946 }
947 if ($ancestor) {
948 if ($ancestor eq $branch) {
949 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
950 $ancestor = $opt_o;
951 }
952 if (defined get_headref("$remote/$branch")) {
953 print STDERR "Branch $branch already exists!\n";
954 $state=11;
955 next;
956 }
957 my $id = get_headref("$remote/$ancestor");
958 if (!$id) {
959 print STDERR "Branch $ancestor does not exist!\n";
960 $ignorebranch{$branch} = 1;
961 $state=11;
962 next;
963 }
964
965 system(qw(git update-ref -m cvsimport),
966 "$remote/$branch", $id);
967 if($? != 0) {
968 print STDERR "Could not create branch $branch\n";
969 $ignorebranch{$branch} = 1;
970 $state=11;
971 next;
972 }
973 }
974 $last_branch = $branch if $branch ne $last_branch;
975 $state = 9;
976 } elsif ($state == 8) {
977 $logmsg .= "$_\n";
978 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
979 # VERSION:1.96->1.96.2.1
980 my $init = ($2 eq "INITIAL");
981 my $fn = $1;
982 my $rev = $3;
983 $fn =~ s#^/+##;
984 if ($opt_S && $fn =~ m/$opt_S/) {
985 print "SKIPPING $fn v $rev\n";
986 push(@skipped, $fn);
987 next;
988 }
989 push @commit_revisions, [$fn, $rev];
990 print "Fetching $fn v $rev\n" if $opt_v;
991 my ($tmpname, $size) = $cvs->file($fn,$rev);
992 if ($size == -1) {
993 push(@old,$fn);
994 print "Drop $fn\n" if $opt_v;
995 } else {
996 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
997 my $pid = open(my $F, '-|');
998 die $! unless defined $pid;
999 if (!$pid) {
1000 exec("git", "hash-object", "-w", $tmpname)
1001 or die "Cannot create object: $!\n";
1002 }
1003 my $sha = <$F>;
1004 chomp $sha;
1005 close $F;
1006 my $mode = pmode($cvs->{'mode'});
1007 push(@new,[$mode, $sha, $fn]); # may be resurrected!
1008 }
1009 unlink($tmpname);
1010 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1011 my $fn = $1;
1012 my $rev = $2;
1013 $fn =~ s#^/+##;
1014 push @commit_revisions, [$fn, $rev];
1015 push(@old,$fn);
1016 print "Delete $fn\n" if $opt_v;
1017 } elsif ($state == 9 and /^\s*$/) {
1018 $state = 10;
1019 } elsif (($state == 9 or $state == 10) and /^-+$/) {
1020 $commitcount++;
1021 if ($opt_L && $commitcount > $opt_L) {
1022 last;
1023 }
1024 commit();
1025 if (($commitcount & 1023) == 0) {
1026 system(qw(git repack -a -d));
1027 }
1028 $state = 1;
1029 } elsif ($state == 11 and /^-+$/) {
1030 $state = 1;
1031 } elsif (/^-+$/) { # end of unknown-line processing
1032 $state = 1;
1033 } elsif ($state != 11) { # ignore stuff when skipping
1034 print STDERR "* UNKNOWN LINE * $_\n";
1035 }
1036 }
1037 commit() if $branch and $state != 11;
1038
1039 unless ($opt_P) {
1040 unlink($cvspsfile);
1041 }
1042
1043 # The heuristic of repacking every 1024 commits can leave a
1044 # lot of unpacked data. If there is more than 1MB worth of
1045 # not-packed objects, repack once more.
1046 my $line = `git count-objects`;
1047 if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1048 my ($n_objects, $kb) = ($1, $2);
1049 1024 < $kb
1050 and system(qw(git repack -a -d));
1051 }
1052
1053 foreach my $git_index (values %index) {
1054 if ($git_index ne "$git_dir/index") {
1055 unlink($git_index);
1056 }
1057 }
1058
1059 if (defined $orig_git_index) {
1060 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1061 } else {
1062 delete $ENV{GIT_INDEX_FILE};
1063 }
1064
1065 # Now switch back to the branch we were in before all of this happened
1066 if ($orig_branch) {
1067 print "DONE.\n" if $opt_v;
1068 if ($opt_i) {
1069 exit 0;
1070 }
1071 my $tip_at_end = `git rev-parse --verify HEAD`;
1072 if ($tip_at_start ne $tip_at_end) {
1073 for ($tip_at_start, $tip_at_end) { chomp; }
1074 print "Fetched into the current branch.\n" if $opt_v;
1075 system(qw(git read-tree -u -m),
1076 $tip_at_start, $tip_at_end);
1077 die "Fast-forward update failed: $?\n" if $?;
1078 }
1079 else {
1080 system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1081 die "Could not merge $opt_o into the current branch.\n" if $?;
1082 }
1083 } else {
1084 $orig_branch = "master";
1085 print "DONE; creating $orig_branch branch\n" if $opt_v;
1086 system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1087 unless defined get_headref('refs/heads/master');
1088 system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1089 if ($opt_r && $opt_o ne 'HEAD');
1090 system('git', 'update-ref', 'HEAD', "$orig_branch");
1091 unless ($opt_i) {
1092 system(qw(git checkout -f));
1093 die "checkout failed: $?\n" if $?;
1094 }
1095 }