cvsimport: standarize open() calls to external git tools
[git/git.git] / git-cvsimport.perl
CommitLineData
a57a9493 1#!/usr/bin/perl -w
9718a00b 2
a57a9493
MU
3# This tool is copyright (c) 2005, Matthias Urlichs.
4# It is released under the Gnu Public License, version 2.
5#
6# The basic idea is to aggregate CVS check-ins into related changes.
7# Fortunately, "cvsps" does that for us; all we have to do is to parse
8# its output.
9#
10# Checking out the files is done by a single long-running CVS connection
11# / server process.
12#
13# The head revision is on branch "origin" by default.
14# You can change that with the '-o' option.
15
16use strict;
17use warnings;
bc434e82 18use Getopt::Long;
79ee456c 19use File::Spec;
7ccd9009 20use File::Temp qw(tempfile tmpnam);
a57a9493
MU
21use File::Path qw(mkpath);
22use File::Basename qw(basename dirname);
23use Time::Local;
2a3e1a85
MU
24use IO::Socket;
25use IO::Pipe;
e49289df 26use POSIX qw(strftime dup2 ENOENT);
0d821d4d 27use IPC::Open2;
a57a9493
MU
28
29$SIG{'PIPE'}="IGNORE";
30$ENV{'TZ'}="UTC";
31
bc434e82 32our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r);
ffd97f3a 33my (%conv_author_name, %conv_author_email);
a57a9493 34
7bf77644
FL
35sub usage(;$) {
36 my $msg = shift;
37 print(STDERR "Error: $msg\n") if $msg;
a57a9493 38 print STDERR <<END;
1b1dd23f 39Usage: git cvsimport # fetch/update GIT from CVS
ffd97f3a 40 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
edbe4466
FL
41 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
42 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
cbc9be5c 43 [-r remote] [CVS_module]
a57a9493
MU
44END
45 exit(1);
46}
47
ffd97f3a
AE
48sub read_author_info($) {
49 my ($file) = @_;
50 my $user;
51 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
52
53 while (<$f>) {
8cd16211 54 # Expected format is this:
ffd97f3a 55 # exon=Andreas Ericsson <ae@op5.se>
8cd16211 56 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
ffd97f3a 57 $user = $1;
8cd16211
JH
58 $conv_author_name{$user} = $2;
59 $conv_author_email{$user} = $3;
ffd97f3a 60 }
8cd16211
JH
61 # However, we also read from CVSROOT/users format
62 # to ease migration.
63 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
64 my $mapped;
65 ($user, $mapped) = ($1, $3);
66 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
67 $conv_author_name{$user} = $1;
68 $conv_author_email{$user} = $2;
69 }
70 elsif ($mapped =~ /^<?(.*)>?$/) {
71 $conv_author_name{$user} = $user;
72 $conv_author_email{$user} = $1;
73 }
74 }
75 # NEEDSWORK: Maybe warn on unrecognized lines?
ffd97f3a
AE
76 }
77 close ($f);
78}
79
80sub write_author_info($) {
81 my ($file) = @_;
82 open my $f, '>', $file or
83 die("Failed to open $file for writing: $!");
84
85 foreach (keys %conv_author_name) {
8cd16211 86 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
ffd97f3a
AE
87 }
88 close ($f);
89}
90
cfc44a12 91# convert getopts specs for use by git config
ed35dece
JB
92sub read_repo_config {
93 # Split the string between characters, unless there is a ':'
94 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
95 my @opts = split(/ *(?!:)/, shift);
96 foreach my $o (@opts) {
97 my $key = $o;
98 $key =~ s/://g;
cfc44a12 99 my $arg = 'git config';
ed35dece
JB
100 $arg .= ' --bool' if ($o !~ /:$/);
101
102 chomp(my $tmp = `$arg --get cvsimport.$key`);
103 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
104 no strict 'refs';
105 my $opt_name = "opt_" . $key;
106 if (!$$opt_name) {
107 $$opt_name = $tmp;
108 }
109 }
110 }
ed35dece
JB
111}
112
8b7f5fc1 113my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:";
ed35dece 114read_repo_config($opts);
bc434e82
PB
115Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
116
117# turn the Getopt::Std specification in a Getopt::Long one,
118# with support for multiple -M options
119GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
120 or usage();
a57a9493
MU
121usage if $opt_h;
122
67d23242 123if (@ARGV == 0) {
cfc44a12 124 chomp(my $module = `git config --get cvsimport.module`);
67d23242
JK
125 push(@ARGV, $module) if $? == 0;
126}
7bf77644 127@ARGV <= 1 or usage("You can't specify more than one CVS module");
a57a9493 128
86d11cf2 129if ($opt_d) {
2a3e1a85 130 $ENV{"CVSROOT"} = $opt_d;
86d11cf2 131} elsif (-f 'CVS/Root') {
f9714a4a
SV
132 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
133 $opt_d = <$f>;
134 chomp $opt_d;
135 close $f;
136 $ENV{"CVSROOT"} = $opt_d;
86d11cf2 137} elsif ($ENV{"CVSROOT"}) {
2a3e1a85
MU
138 $opt_d = $ENV{"CVSROOT"};
139} else {
7bf77644 140 usage("CVSROOT needs to be set");
2a3e1a85 141}
fbfd60d6 142$opt_s ||= "-";
ded9f400
ML
143$opt_a ||= 0;
144
f9714a4a 145my $git_tree = $opt_C;
2a3e1a85
MU
146$git_tree ||= ".";
147
8b7f5fc1
AW
148my $remote;
149if (defined $opt_r) {
150 $remote = 'refs/remotes/' . $opt_r;
151 $opt_o ||= "master";
152} else {
153 $opt_o ||= "origin";
154 $remote = 'refs/heads';
155}
156
f9714a4a
SV
157my $cvs_tree;
158if ($#ARGV == 0) {
159 $cvs_tree = $ARGV[0];
160} elsif (-f 'CVS/Repository') {
a6080a0a 161 open my $f, '<', 'CVS/Repository' or
f9714a4a
SV
162 die 'Failed to open CVS/Repository';
163 $cvs_tree = <$f>;
164 chomp $cvs_tree;
db4b6582 165 close $f;
f9714a4a 166} else {
7bf77644 167 usage("CVS module has to be specified");
f9714a4a
SV
168}
169
db4b6582
ML
170our @mergerx = ();
171if ($opt_m) {
fbbbc362 172 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
db4b6582 173}
bc434e82
PB
174if (@opt_M) {
175 push (@mergerx, map { qr/$_/ } @opt_M);
db4b6582
ML
176}
177
6211988f
ML
178# Remember UTC of our starting time
179# we'll want to avoid importing commits
180# that are too recent
181our $starttime = time();
182
a57a9493
MU
183select(STDERR); $|=1; select(STDOUT);
184
185
186package CVSconn;
187# Basic CVS dialog.
2a3e1a85 188# We're only interested in connecting and downloading, so ...
a57a9493 189
2eb6d82e
SV
190use File::Spec;
191use File::Temp qw(tempfile);
f65ae603
MU
192use POSIX qw(strftime dup2);
193
a57a9493 194sub new {
86d11cf2 195 my ($what,$repo,$subdir) = @_;
a57a9493
MU
196 $what=ref($what) if ref($what);
197
198 my $self = {};
199 $self->{'buffer'} = "";
200 bless($self,$what);
201
202 $repo =~ s#/+$##;
203 $self->{'fullrep'} = $repo;
204 $self->conn();
205
206 $self->{'subdir'} = $subdir;
207 $self->{'lines'} = undef;
208
209 return $self;
210}
211
212sub conn {
213 my $self = shift;
214 my $repo = $self->{'fullrep'};
86d11cf2
JH
215 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
216 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
73bcf533 217
86d11cf2
JH
218 my ($proxyhost,$proxyport);
219 if ($param && ($param =~ m/proxy=([^;]+)/)) {
73bcf533
IA
220 $proxyhost = $1;
221 # Default proxyport, if not specified, is 8080.
222 $proxyport = 8080;
86d11cf2 223 if ($ENV{"CVS_PROXY_PORT"}) {
73bcf533
IA
224 $proxyport = $ENV{"CVS_PROXY_PORT"};
225 }
86d11cf2 226 if ($param =~ m/proxyport=([^;]+)/) {
73bcf533
IA
227 $proxyport = $1;
228 }
229 }
8c372fb0 230 $repo ||= '/';
73bcf533 231
2e458e05
GH
232 # if username is not explicit in CVSROOT, then use current user, as cvs would
233 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
2a3e1a85 234 my $rr2 = "-";
86d11cf2 235 unless ($port) {
a57a9493
MU
236 $rr2 = ":pserver:$user\@$serv:$repo";
237 $port=2401;
238 }
239 my $rr = ":pserver:$user\@$serv:$port$repo";
240
3fb9d582
PO
241 if ($pass) {
242 $pass = $self->_scramble($pass);
243 } else {
a57a9493
MU
244 open(H,$ENV{'HOME'}."/.cvspass") and do {
245 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
86d11cf2 246 while (<H>) {
a57a9493
MU
247 chomp;
248 s/^\/\d+\s+//;
249 my ($w,$p) = split(/\s/,$_,2);
86d11cf2 250 if ($w eq $rr or $w eq $rr2) {
a57a9493
MU
251 $pass = $p;
252 last;
253 }
254 }
255 };
e481b1d8 256 $pass = "A" unless $pass;
a57a9493 257 }
b2139dbd 258
73bcf533 259 my ($s, $rep);
86d11cf2 260 if ($proxyhost) {
73bcf533
IA
261
262 # Use a HTTP Proxy. Only works for HTTP proxies that
263 # don't require user authentication
264 #
265 # See: http://www.ietf.org/rfc/rfc2817.txt
266
267 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
268 die "Socket to $proxyhost: $!\n" unless defined $s;
269 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
270 or die "Write to $proxyhost: $!\n";
271 $s->flush();
272
273 $rep = <$s>;
274
275 # The answer should look like 'HTTP/1.x 2yy ....'
86d11cf2 276 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
73bcf533
IA
277 die "Proxy connect: $rep\n";
278 }
279 # Skip up to the empty line of the proxy server output
280 # including the response headers.
281 while ($rep = <$s>) {
282 last if (!defined $rep ||
283 $rep eq "\n" ||
284 $rep eq "\r\n");
285 }
286 } else {
287 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
288 die "Socket to $serv: $!\n" unless defined $s;
289 }
290
a57a9493
MU
291 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
292 or die "Write to $serv: $!\n";
293 $s->flush();
294
73bcf533 295 $rep = <$s>;
a57a9493 296
86d11cf2 297 if ($rep ne "I LOVE YOU\n") {
a57a9493
MU
298 $rep="<unknown>" unless $rep;
299 die "AuthReply: $rep\n";
300 }
301 $self->{'socketo'} = $s;
302 $self->{'socketi'} = $s;
34155390 303 } else { # local or ext: Fork off our own cvs server.
a57a9493
MU
304 my $pr = IO::Pipe->new();
305 my $pw = IO::Pipe->new();
306 my $pid = fork();
307 die "Fork: $!\n" unless defined $pid;
8d0ea311
SV
308 my $cvs = 'cvs';
309 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
34155390
SV
310 my $rsh = 'rsh';
311 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
312
313 my @cvs = ($cvs, 'server');
314 my ($local, $user, $host);
315 $local = $repo =~ s/:local://;
316 if (!$local) {
317 $repo =~ s/:ext://;
318 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
319 ($user, $host) = ($1, $2);
320 }
321 if (!$local) {
322 if ($user) {
323 unshift @cvs, $rsh, '-l', $user, $host;
324 } else {
325 unshift @cvs, $rsh, $host;
326 }
327 }
328
86d11cf2 329 unless ($pid) {
a57a9493
MU
330 $pr->writer();
331 $pw->reader();
a57a9493
MU
332 dup2($pw->fileno(),0);
333 dup2($pr->fileno(),1);
334 $pr->close();
335 $pw->close();
34155390 336 exec(@cvs);
a57a9493
MU
337 }
338 $pw->writer();
339 $pr->reader();
340 $self->{'socketo'} = $pw;
341 $self->{'socketi'} = $pr;
342 }
343 $self->{'socketo'}->write("Root $repo\n");
344
345 # Trial and error says that this probably is the minimum set
b0921331 346 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
a57a9493
MU
347
348 $self->{'socketo'}->write("valid-requests\n");
349 $self->{'socketo'}->flush();
350
351 chomp(my $rep=$self->readline());
86d11cf2 352 if ($rep !~ s/^Valid-requests\s*//) {
a57a9493
MU
353 $rep="<unknown>" unless $rep;
354 die "Expected Valid-requests from server, but got: $rep\n";
355 }
356 chomp(my $res=$self->readline());
357 die "validReply: $res\n" if $res ne "ok";
358
359 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
360 $self->{'repo'} = $repo;
361}
362
363sub readline {
86d11cf2 364 my ($self) = @_;
a57a9493
MU
365 return $self->{'socketi'}->getline();
366}
367
368sub _file {
369 # Request a file with a given revision.
370 # Trial and error says this is a good way to do it. :-/
86d11cf2 371 my ($self,$fn,$rev) = @_;
a57a9493
MU
372 $self->{'socketo'}->write("Argument -N\n") or return undef;
373 $self->{'socketo'}->write("Argument -P\n") or return undef;
abe05822
ML
374 # -kk: Linus' version doesn't use it - defaults to off
375 if ($opt_k) {
376 $self->{'socketo'}->write("Argument -kk\n") or return undef;
377 }
a57a9493
MU
378 $self->{'socketo'}->write("Argument -r\n") or return undef;
379 $self->{'socketo'}->write("Argument $rev\n") or return undef;
380 $self->{'socketo'}->write("Argument --\n") or return undef;
381 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
382 $self->{'socketo'}->write("Directory .\n") or return undef;
383 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
4f7c0caa 384 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
a57a9493
MU
385 $self->{'socketo'}->write("co\n") or return undef;
386 $self->{'socketo'}->flush() or return undef;
387 $self->{'lines'} = 0;
388 return 1;
389}
390sub _line {
391 # Read a line from the server.
392 # ... except that 'line' may be an entire file. ;-)
86d11cf2 393 my ($self, $fh) = @_;
a57a9493
MU
394 die "Not in lines" unless defined $self->{'lines'};
395
396 my $line;
2eb6d82e 397 my $res=0;
86d11cf2 398 while (defined($line = $self->readline())) {
a57a9493
MU
399 # M U gnupg-cvs-rep/AUTHORS
400 # Updated gnupg-cvs-rep/
401 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
402 # /AUTHORS/1.1///T1.1
403 # u=rw,g=rw,o=rw
404 # 0
405 # ok
406
86d11cf2 407 if ($line =~ s/^(?:Created|Updated) //) {
a57a9493
MU
408 $line = $self->readline(); # path
409 $line = $self->readline(); # Entries line
410 my $mode = $self->readline(); chomp $mode;
411 $self->{'mode'} = $mode;
412 defined (my $cnt = $self->readline())
413 or die "EOF from server after 'Changed'\n";
414 chomp $cnt;
415 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
416 $line="";
55cad842 417 $res = $self->_fetchfile($fh, $cnt);
86d11cf2 418 } elsif ($line =~ s/^ //) {
2eb6d82e
SV
419 print $fh $line;
420 $res += length($line);
86d11cf2 421 } elsif ($line =~ /^M\b/) {
a57a9493 422 # output, do nothing
86d11cf2 423 } elsif ($line =~ /^Mbinary\b/) {
a57a9493
MU
424 my $cnt;
425 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
426 chomp $cnt;
427 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
428 $line="";
55cad842 429 $res += $self->_fetchfile($fh, $cnt);
a57a9493
MU
430 } else {
431 chomp $line;
86d11cf2 432 if ($line eq "ok") {
a57a9493
MU
433 # print STDERR "S: ok (".length($res).")\n";
434 return $res;
86d11cf2 435 } elsif ($line =~ s/^E //) {
a57a9493 436 # print STDERR "S: $line\n";
86d11cf2 437 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
8b8840e0
MU
438 $line = $self->readline(); # filename
439 $line = $self->readline(); # OK
440 chomp $line;
441 die "Unknown: $line" if $line ne "ok";
442 return -1;
a57a9493
MU
443 } else {
444 die "Unknown: $line\n";
445 }
446 }
447 }
39ba7d54 448 return undef;
a57a9493
MU
449}
450sub file {
86d11cf2 451 my ($self,$fn,$rev) = @_;
a57a9493
MU
452 my $res;
453
a6080a0a 454 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
2eb6d82e
SV
455 DIR => File::Spec->tmpdir(), UNLINK => 1);
456
457 $self->_file($fn,$rev) and $res = $self->_line($fh);
458
459 if (!defined $res) {
39ba7d54
MM
460 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
461 truncate $fh, 0;
2eb6d82e 462 $self->conn();
39ba7d54 463 $self->_file($fn,$rev) or die "No file command send";
2eb6d82e 464 $res = $self->_line($fh);
39ba7d54 465 die "Retry failed" unless defined $res;
a57a9493 466 }
c619ad51 467 close ($fh);
a57a9493 468
2eb6d82e 469 return ($name, $res);
a57a9493 470}
55cad842
ML
471sub _fetchfile {
472 my ($self, $fh, $cnt) = @_;
61efa5e3 473 my $res = 0;
55cad842 474 my $bufsize = 1024 * 1024;
86d11cf2 475 while ($cnt) {
55cad842
ML
476 if ($bufsize > $cnt) {
477 $bufsize = $cnt;
478 }
479 my $buf;
480 my $num = $self->{'socketi'}->read($buf,$bufsize);
481 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
482 print $fh $buf;
483 $res += $num;
484 $cnt -= $num;
485 }
486 return $res;
487}
a57a9493 488
b2139dbd
DH
489sub _scramble {
490 my ($self, $pass) = @_;
491 my $scrambled = "A";
492
493 return $scrambled unless $pass;
494
495 my $pass_len = length($pass);
496 my @pass_arr = split("", $pass);
497 my $i;
498
499 # from cvs/src/scramble.c
500 my @shifts = (
501 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
502 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
503 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
504 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
505 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
506 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
507 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
508 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
509 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
510 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
511 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
512 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
513 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
514 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
515 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
516 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
517 );
518
519 for ($i = 0; $i < $pass_len; $i++) {
520 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
521 }
522
523 return $scrambled;
524}
a57a9493
MU
525
526package main;
527
2a3e1a85 528my $cvs = CVSconn->new($opt_d, $cvs_tree);
a57a9493
MU
529
530
531sub pdate($) {
86d11cf2 532 my ($d) = @_;
a57a9493
MU
533 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
534 or die "Unparseable date: $d\n";
535 my $y=$1; $y-=1900 if $y>1900;
536 return timegm($6||0,$5,$4,$3,$2-1,$y);
9718a00b
TM
537}
538
a57a9493 539sub pmode($) {
86d11cf2 540 my ($mode) = @_;
a57a9493
MU
541 my $m = 0;
542 my $mm = 0;
543 my $um = 0;
544 for my $x(split(//,$mode)) {
86d11cf2 545 if ($x eq ",") {
a57a9493
MU
546 $m |= $mm&$um;
547 $mm = 0;
548 $um = 0;
86d11cf2
JH
549 } elsif ($x eq "u") { $um |= 0700;
550 } elsif ($x eq "g") { $um |= 0070;
551 } elsif ($x eq "o") { $um |= 0007;
552 } elsif ($x eq "r") { $mm |= 0444;
553 } elsif ($x eq "w") { $mm |= 0222;
554 } elsif ($x eq "x") { $mm |= 0111;
555 } elsif ($x eq "=") { # do nothing
a57a9493
MU
556 } else { die "Unknown mode: $mode\n";
557 }
558 }
559 $m |= $mm&$um;
560 return $m;
561}
d4f8b390 562
a57a9493
MU
563sub getwd() {
564 my $pwd = `pwd`;
565 chomp $pwd;
566 return $pwd;
d4f8b390
LT
567}
568
e73aefe4
JK
569sub is_sha1 {
570 my $s = shift;
571 return $s =~ /^[a-f0-9]{40}$/;
572}
db4b6582 573
9da0dabc
JK
574sub get_headref ($) {
575 my $name = shift;
576 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
577 return undef unless $? == 0;
578 chomp $r;
579 return $r;
db4b6582
ML
580}
581
f6fdbb68
JK
582my $user_filename_prepend = '';
583sub munge_user_filename {
584 my $name = shift;
585 return File::Spec->file_name_is_absolute($name) ?
586 $name :
587 $user_filename_prepend . $name;
588}
589
a57a9493
MU
590-d $git_tree
591 or mkdir($git_tree,0777)
592 or die "Could not create $git_tree: $!";
f6fdbb68
JK
593if ($git_tree ne '.') {
594 $user_filename_prepend = getwd() . '/';
595 chdir($git_tree);
596}
d4f8b390 597
a57a9493 598my $last_branch = "";
46541669 599my $orig_branch = "";
a57a9493 600my %branch_date;
8a5f2eac 601my $tip_at_start = undef;
a57a9493
MU
602
603my $git_dir = $ENV{"GIT_DIR"} || ".git";
604$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
605$ENV{"GIT_DIR"} = $git_dir;
79ee456c
SV
606my $orig_git_index;
607$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
8f732649
ML
608
609my %index; # holds filenames of one index per branch
061303f0 610
86d11cf2 611unless (-d $git_dir) {
640d9d08 612 system("git init");
a57a9493 613 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
640d9d08 614 system("git read-tree");
a57a9493
MU
615 die "Cannot init an empty tree: $?\n" if $?;
616
617 $last_branch = $opt_o;
46541669 618 $orig_branch = "";
a57a9493 619} else {
a12477db 620 open(F, "-|", qw(git symbolic-ref HEAD)) or
640d9d08 621 die "Cannot run git symbolic-ref: $!\n";
8366a10a
PR
622 chomp ($last_branch = <F>);
623 $last_branch = basename($last_branch);
624 close(F);
86d11cf2 625 unless ($last_branch) {
46541669
MU
626 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
627 $last_branch = "master";
628 }
629 $orig_branch = $last_branch;
640d9d08 630 $tip_at_start = `git rev-parse --verify HEAD`;
a57a9493
MU
631
632 # Get the last import timestamps
1f24c587 633 my $fmt = '($ref, $author) = (%(refname), %(author));';
a12477db
BW
634 my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
635 open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
86d11cf2 636 while (defined(my $entry = <H>)) {
1f24c587
AW
637 my ($ref, $author);
638 eval($entry) || die "cannot eval refs list: $@";
8b7f5fc1 639 my ($head) = ($ref =~ m|^$remote/(.*)|);
1f24c587
AW
640 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
641 $branch_date{$head} = $1;
a57a9493 642 }
1f24c587 643 close(H);
7ca055f7
SS
644 if (!exists $branch_date{$opt_o}) {
645 die "Branch '$opt_o' does not exist.\n".
646 "Either use the correct '-o branch' option,\n".
647 "or import to a new repository.\n";
648 }
a57a9493
MU
649}
650
651-d $git_dir
652 or die "Could not create git subdir ($git_dir).\n";
653
ffd97f3a
AE
654# now we read (and possibly save) author-info as well
655-f "$git_dir/cvs-authors" and
656 read_author_info("$git_dir/cvs-authors");
657if ($opt_A) {
f6fdbb68 658 read_author_info(munge_user_filename($opt_A));
ffd97f3a
AE
659 write_author_info("$git_dir/cvs-authors");
660}
661
2f57c697
ML
662
663#
664# run cvsps into a file unless we are getting
665# it passed as a file via $opt_P
666#
4083c2fc 667my $cvspsfile;
2f57c697
ML
668unless ($opt_P) {
669 print "Running cvsps...\n" if $opt_v;
670 my $pid = open(CVSPS,"-|");
4083c2fc 671 my $cvspsfh;
2f57c697 672 die "Cannot fork: $!\n" unless defined $pid;
86d11cf2 673 unless ($pid) {
2f57c697
ML
674 my @opt;
675 @opt = split(/,/,$opt_p) if defined $opt_p;
676 unshift @opt, '-z', $opt_z if defined $opt_z;
677 unshift @opt, '-q' unless defined $opt_v;
678 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
679 push @opt, '--cvs-direct';
680 }
681 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
682 die "Could not start cvsps: $!\n";
df73e9c6 683 }
4083c2fc
ML
684 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
685 DIR => File::Spec->tmpdir());
2f57c697
ML
686 while (<CVSPS>) {
687 print $cvspsfh $_;
211dcac6 688 }
2f57c697 689 close CVSPS;
640d9d08 690 $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
2f57c697 691 close $cvspsfh;
4083c2fc 692} else {
f6fdbb68 693 $cvspsfile = munge_user_filename($opt_P);
a57a9493
MU
694}
695
4083c2fc 696open(CVS, "<$cvspsfile") or die $!;
2f57c697 697
a57a9493
MU
698## cvsps output:
699#---------------------
700#PatchSet 314
701#Date: 1999/09/18 13:03:59
702#Author: wkoch
703#Branch: STABLE-BRANCH-1-0
704#Ancestor branch: HEAD
705#Tag: (none)
706#Log:
707# See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
708#Members:
709# README:1.57->1.57.2.1
710# VERSION:1.96->1.96.2.1
711#
712#---------------------
713
714my $state = 0;
715
e73aefe4
JK
716sub update_index (\@\@) {
717 my $old = shift;
718 my $new = shift;
640d9d08
BW
719 open(my $fh, '|-', qw(git update-index -z --index-info))
720 or die "unable to open git update-index: $!";
6a1871e1
JK
721 print $fh
722 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
e73aefe4 723 @$old),
6a1871e1 724 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
e73aefe4 725 @$new)
640d9d08 726 or die "unable to write to git update-index: $!";
6a1871e1 727 close $fh
640d9d08
BW
728 or die "unable to write to git update-index: $!";
729 $? and die "git update-index reported error: $?";
e73aefe4 730}
a57a9493 731
e73aefe4 732sub write_tree () {
a12477db 733 open(my $fh, '-|', qw(git write-tree))
640d9d08 734 or die "unable to open git write-tree: $!";
e73aefe4
JK
735 chomp(my $tree = <$fh>);
736 is_sha1($tree)
737 or die "Cannot get tree id ($tree): $!";
738 close($fh)
640d9d08 739 or die "Error running git write-tree: $?\n";
a57a9493 740 print "Tree ID $tree\n" if $opt_v;
e73aefe4
JK
741 return $tree;
742}
a57a9493 743
86d11cf2
JH
744my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
745my (@old,@new,@skipped,%ignorebranch);
71b08148
ML
746
747# commits that cvsps cannot place anywhere...
748$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
749
e73aefe4 750sub commit {
9da0dabc
JK
751 if ($branch eq $opt_o && !$index{branch} &&
752 !get_headref("$remote/$branch")) {
c5f448b0 753 # looks like an initial commit
640d9d08 754 # use the index primed by git init
23fcdc79
MM
755 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
756 $index{$branch} = "$git_dir/index";
c5f448b0
ML
757 } else {
758 # use an index per branch to speed up
759 # imports of projects with many branches
760 unless ($index{$branch}) {
761 $index{$branch} = tmpnam();
762 $ENV{GIT_INDEX_FILE} = $index{$branch};
763 if ($ancestor) {
640d9d08 764 system("git", "read-tree", "$remote/$ancestor");
c5f448b0 765 } else {
640d9d08 766 system("git", "read-tree", "$remote/$branch");
c5f448b0
ML
767 }
768 die "read-tree failed: $?\n" if $?;
769 }
770 }
771 $ENV{GIT_INDEX_FILE} = $index{$branch};
772
e73aefe4
JK
773 update_index(@old, @new);
774 @old = @new = ();
775 my $tree = write_tree();
9da0dabc 776 my $parent = get_headref("$remote/$last_branch");
e73aefe4
JK
777 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
778
779 my @commit_args;
780 push @commit_args, ("-p", $parent) if $parent;
781
782 # loose detection of merges
783 # based on the commit msg
784 foreach my $rx (@mergerx) {
785 next unless $logmsg =~ $rx && $1;
786 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
9da0dabc 787 if (my $sha1 = get_headref("$remote/$mparent")) {
c36c5b84 788 push @commit_args, '-p', "$remote/$mparent";
e73aefe4 789 print "Merge parent branch: $mparent\n" if $opt_v;
db4b6582 790 }
a57a9493 791 }
e73aefe4
JK
792
793 my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
62bf0d96
JK
794 $ENV{GIT_AUTHOR_NAME} = $author_name;
795 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
796 $ENV{GIT_AUTHOR_DATE} = $commit_date;
797 $ENV{GIT_COMMITTER_NAME} = $author_name;
798 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
799 $ENV{GIT_COMMITTER_DATE} = $commit_date;
e73aefe4 800 my $pid = open2(my $commit_read, my $commit_write,
640d9d08 801 'git', 'commit-tree', $tree, @commit_args);
e371046b
MU
802
803 # compatibility with git2cvs
804 substr($logmsg,32767) = "" if length($logmsg) > 32767;
805 $logmsg =~ s/[\s\n]+\z//;
806
5179c8a5
ML
807 if (@skipped) {
808 $logmsg .= "\n\n\nSKIPPED:\n\t";
809 $logmsg .= join("\n\t", @skipped) . "\n";
f396f01f 810 @skipped = ();
5179c8a5
ML
811 }
812
e73aefe4 813 print($commit_write "$logmsg\n") && close($commit_write)
640d9d08 814 or die "Error writing to git commit-tree: $!\n";
2a3e1a85 815
e73aefe4
JK
816 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
817 chomp(my $cid = <$commit_read>);
818 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
a57a9493 819 print "Commit ID $cid\n" if $opt_v;
e73aefe4 820 close($commit_read);
2a3e1a85
MU
821
822 waitpid($pid,0);
640d9d08 823 die "Error running git commit-tree: $?\n" if $?;
a57a9493 824
640d9d08 825 system('git' , 'update-ref', "$remote/$branch", $cid) == 0
a57a9493
MU
826 or die "Cannot write branch $branch for update: $!\n";
827
86d11cf2 828 if ($tag) {
86d11cf2 829 my ($xtag) = $tag;
0d821d4d
PA
830 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
831 $xtag =~ tr/_/\./ if ( $opt_u );
34c99da2 832 $xtag =~ s/[\/]/$opt_s/g;
509792b9 833 $xtag =~ s/\[//g;
a6080a0a 834
640d9d08 835 system('git' , 'tag', '-f', $xtag, $cid) == 0
0d821d4d 836 or die "Cannot create tag $xtag: $!\n";
0d821d4d
PA
837
838 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
a57a9493 839 }
a57a9493
MU
840};
841
06918348 842my $commitcount = 1;
86d11cf2 843while (<CVS>) {
a57a9493 844 chomp;
86d11cf2 845 if ($state == 0 and /^-+$/) {
a57a9493 846 $state = 1;
86d11cf2 847 } elsif ($state == 0) {
a57a9493
MU
848 $state = 1;
849 redo;
86d11cf2 850 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
a57a9493
MU
851 $patchset = 0+$_;
852 $state=2;
86d11cf2 853 } elsif ($state == 2 and s/^Date:\s+//) {
a57a9493 854 $date = pdate($_);
86d11cf2 855 unless ($date) {
a57a9493
MU
856 print STDERR "Could not parse date: $_\n";
857 $state=0;
858 next;
859 }
860 $state=3;
86d11cf2 861 } elsif ($state == 3 and s/^Author:\s+//) {
a57a9493 862 s/\s+$//;
94c23343
JH
863 if (/^(.*?)\s+<(.*)>/) {
864 ($author_name, $author_email) = ($1, $2);
ffd97f3a
AE
865 } elsif ($conv_author_name{$_}) {
866 $author_name = $conv_author_name{$_};
867 $author_email = $conv_author_email{$_};
94c23343
JH
868 } else {
869 $author_name = $author_email = $_;
870 }
a57a9493 871 $state = 4;
86d11cf2 872 } elsif ($state == 4 and s/^Branch:\s+//) {
a57a9493 873 s/\s+$//;
a0554224 874 tr/_/\./ if ( $opt_u );
fbfd60d6 875 s/[\/]/$opt_s/g;
a57a9493
MU
876 $branch = $_;
877 $state = 5;
86d11cf2 878 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
a57a9493
MU
879 s/\s+$//;
880 $ancestor = $_;
0fa2824f 881 $ancestor = $opt_o if $ancestor eq "HEAD";
a57a9493 882 $state = 6;
86d11cf2 883 } elsif ($state == 5) {
a57a9493
MU
884 $ancestor = undef;
885 $state = 6;
886 redo;
86d11cf2 887 } elsif ($state == 6 and s/^Tag:\s+//) {
a57a9493 888 s/\s+$//;
86d11cf2 889 if ($_ eq "(none)") {
a57a9493
MU
890 $tag = undef;
891 } else {
892 $tag = $_;
893 }
894 $state = 7;
86d11cf2 895 } elsif ($state == 7 and /^Log:/) {
a57a9493
MU
896 $logmsg = "";
897 $state = 8;
86d11cf2 898 } elsif ($state == 8 and /^Members:/) {
a57a9493 899 $branch = $opt_o if $branch eq "HEAD";
86d11cf2 900 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
a57a9493 901 # skip
9da07f34 902 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
a57a9493
MU
903 $state = 11;
904 next;
905 }
ded9f400 906 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
6211988f 907 # skip if the commit is too recent
77190eb9 908 # given that the cvsps default fuzz is 300s, we give ourselves another
6211988f
ML
909 # 300s just in case -- this also prevents skipping commits
910 # due to server clock drift
911 print "skip patchset $patchset: $date too recent\n" if $opt_v;
912 $state = 11;
913 next;
914 }
71b08148
ML
915 if (exists $ignorebranch{$branch}) {
916 print STDERR "Skipping $branch\n";
917 $state = 11;
918 next;
919 }
86d11cf2
JH
920 if ($ancestor) {
921 if ($ancestor eq $branch) {
71b08148
ML
922 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
923 $ancestor = $opt_o;
924 }
0750d751 925 if (defined get_headref("$remote/$branch")) {
a57a9493
MU
926 print STDERR "Branch $branch already exists!\n";
927 $state=11;
928 next;
929 }
0750d751
JK
930 my $id = get_headref("$remote/$ancestor");
931 if (!$id) {
a57a9493 932 print STDERR "Branch $ancestor does not exist!\n";
71b08148 933 $ignorebranch{$branch} = 1;
a57a9493
MU
934 $state=11;
935 next;
936 }
0750d751
JK
937
938 system(qw(git update-ref -m cvsimport),
939 "$remote/$branch", $id);
940 if($? != 0) {
941 print STDERR "Could not create branch $branch\n";
71b08148 942 $ignorebranch{$branch} = 1;
a57a9493
MU
943 $state=11;
944 next;
945 }
a57a9493 946 }
46e63efc 947 $last_branch = $branch if $branch ne $last_branch;
a57a9493 948 $state = 9;
86d11cf2 949 } elsif ($state == 8) {
a57a9493 950 $logmsg .= "$_\n";
86d11cf2 951 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
a57a9493 952# VERSION:1.96->1.96.2.1
2a3e1a85 953 my $init = ($2 eq "INITIAL");
a57a9493 954 my $fn = $1;
f65ae603
MU
955 my $rev = $3;
956 $fn =~ s#^/+##;
5179c8a5
ML
957 if ($opt_S && $fn =~ m/$opt_S/) {
958 print "SKIPPING $fn v $rev\n";
959 push(@skipped, $fn);
960 next;
961 }
962 print "Fetching $fn v $rev\n" if $opt_v;
2eb6d82e 963 my ($tmpname, $size) = $cvs->file($fn,$rev);
86d11cf2 964 if ($size == -1) {
8b8840e0
MU
965 push(@old,$fn);
966 print "Drop $fn\n" if $opt_v;
967 } else {
968 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
dd27478f
JH
969 my $pid = open(my $F, '-|');
970 die $! unless defined $pid;
971 if (!$pid) {
640d9d08 972 exec("git", "hash-object", "-w", $tmpname)
8b8840e0 973 or die "Cannot create object: $!\n";
dd27478f 974 }
8b8840e0
MU
975 my $sha = <$F>;
976 chomp $sha;
977 close $F;
978 my $mode = pmode($cvs->{'mode'});
979 push(@new,[$mode, $sha, $fn]); # may be resurrected!
980 }
2eb6d82e 981 unlink($tmpname);
86d11cf2 982 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
f65ae603
MU
983 my $fn = $1;
984 $fn =~ s#^/+##;
985 push(@old,$fn);
8b8840e0 986 print "Delete $fn\n" if $opt_v;
86d11cf2 987 } elsif ($state == 9 and /^\s*$/) {
a57a9493 988 $state = 10;
86d11cf2 989 } elsif (($state == 9 or $state == 10) and /^-+$/) {
4adcea99
LT
990 $commitcount++;
991 if ($opt_L && $commitcount > $opt_L) {
06918348
ML
992 last;
993 }
c4b16f8d 994 commit();
4adcea99
LT
995 if (($commitcount & 1023) == 0) {
996 system("git repack -a -d");
997 }
a57a9493 998 $state = 1;
86d11cf2 999 } elsif ($state == 11 and /^-+$/) {
a57a9493 1000 $state = 1;
86d11cf2 1001 } elsif (/^-+$/) { # end of unknown-line processing
a57a9493 1002 $state = 1;
86d11cf2 1003 } elsif ($state != 11) { # ignore stuff when skipping
3be39998 1004 print STDERR "* UNKNOWN LINE * $_\n";
a57a9493
MU
1005 }
1006}
c4b16f8d 1007commit() if $branch and $state != 11;
d4f8b390 1008
4083c2fc
ML
1009unless ($opt_P) {
1010 unlink($cvspsfile);
1011}
1012
efe4abd1
JM
1013# The heuristic of repacking every 1024 commits can leave a
1014# lot of unpacked data. If there is more than 1MB worth of
1015# not-packed objects, repack once more.
640d9d08 1016my $line = `git count-objects`;
efe4abd1
JM
1017if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1018 my ($n_objects, $kb) = ($1, $2);
1019 1024 < $kb
1020 and system("git repack -a -d");
1021}
1022
8f732649 1023foreach my $git_index (values %index) {
23fcdc79 1024 if ($git_index ne "$git_dir/index") {
c5f448b0
ML
1025 unlink($git_index);
1026 }
8f732649 1027}
79ee456c 1028
210569f9
SV
1029if (defined $orig_git_index) {
1030 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1031} else {
1032 delete $ENV{GIT_INDEX_FILE};
1033}
1034
46541669 1035# Now switch back to the branch we were in before all of this happened
86d11cf2 1036if ($orig_branch) {
8a5f2eac
JH
1037 print "DONE.\n" if $opt_v;
1038 if ($opt_i) {
1039 exit 0;
1040 }
640d9d08 1041 my $tip_at_end = `git rev-parse --verify HEAD`;
8a5f2eac 1042 if ($tip_at_start ne $tip_at_end) {
cb9594e2 1043 for ($tip_at_start, $tip_at_end) { chomp; }
8a5f2eac 1044 print "Fetched into the current branch.\n" if $opt_v;
640d9d08 1045 system(qw(git read-tree -u -m),
8a5f2eac
JH
1046 $tip_at_start, $tip_at_end);
1047 die "Fast-forward update failed: $?\n" if $?;
1048 }
1049 else {
640d9d08 1050 system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
8a5f2eac
JH
1051 die "Could not merge $opt_o into the current branch.\n" if $?;
1052 }
46541669
MU
1053} else {
1054 $orig_branch = "master";
1055 print "DONE; creating $orig_branch branch\n" if $opt_v;
640d9d08 1056 system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
0750d751 1057 unless defined get_headref('refs/heads/master');
640d9d08 1058 system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
06baffd3 1059 if ($opt_r && $opt_o ne 'HEAD');
640d9d08 1060 system('git', 'update-ref', 'HEAD', "$orig_branch");
c1c774e7 1061 unless ($opt_i) {
7051c3b1 1062 system('git checkout -f');
c1c774e7
SV
1063 die "checkout failed: $?\n" if $?;
1064 }
46541669 1065}