Implement blob ID validation in fast-import.
[git/git.git] / fast-import.c
CommitLineData
463acbe1
SP
1/*
2Format of STDIN stream:
3
4 stream ::= cmd*;
5
6 cmd ::= new_blob
7 | new_commit
8 | new_branch
9 | new_tag
10 ;
11
12 new_blob ::= 'blob' blob_data;
13
14 new_commit ::= 'comt' ref_name author_committer_msg
15 file_change*
16 '0';
17
18 new_branch ::= 'brch' dst_ref_name src_ref_name;
19 dst_ref_name ::= ref_name;
20 src_ref_name ::= ref_name | sha1_exp;
21
22 new_tag ::= 'tagg' ref_name tag_name tagger_msg;
23
24 file_change ::= 'M' path_name hexsha1
25 | 'D' path_name
26 ;
27
28 author_committer_msg ::= len32
29 'author' sp name '<' email '>' ts tz lf
30 'committer' sp name '<' email '>' ts tz lf
31 lf
32 binary_data;
33
34 tagger_msg ::= len32
35 'tagger' sp name '<' email '>' ts tz lf
36 lf
37 binary_data;
38
39 blob_data ::= len32 binary_data; # max len is 2^32-1
40 path_name ::= len32 path; # max len is PATH_MAX-1
41 ref_name ::= len32 ref; # max len is PATH_MAX-1
42 tag_name ::= len32 tag; # max len is PATH_MAX-1
43 sha1_exp ::= len32 sha1exp; # max len is PATH_MAX-1
44
45 len32 ::= # unsigned 32 bit value, native format;
46 binary_data ::= # file content, not interpreted;
47 sp ::= # ASCII space character;
48 lf ::= # ASCII newline (LF) character;
49 path ::= # GIT style file path, e.g. "a/b/c";
50 ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
51 tag ::= # GIT tag name, e.g. "FIREFOX_1_5";
52 sha1exp ::= # Any valid GIT SHA1 expression;
53 hexsha1 ::= # SHA1 in hexadecimal format;
54 name ::= # valid GIT author/committer name;
55 email ::= # valid GIT author/committer email;
56 ts ::= # time since the epoch in seconds, ascii decimal;
57 tz ::= # GIT style timezone;
58*/
59
db5e523f
SP
60#include "builtin.h"
61#include "cache.h"
62#include "object.h"
63#include "blob.h"
463acbe1 64#include "tree.h"
db5e523f
SP
65#include "delta.h"
66#include "pack.h"
463acbe1 67#include "refs.h"
db5e523f
SP
68#include "csum-file.h"
69
27d6d290
SP
70struct object_entry
71{
72 struct object_entry *next;
7111feed 73 enum object_type type;
27d6d290
SP
74 unsigned long offset;
75 unsigned char sha1[20];
76};
77
463acbe1 78struct object_entry_pool
27d6d290 79{
463acbe1 80 struct object_entry_pool *next_pool;
27d6d290
SP
81 struct object_entry *next_free;
82 struct object_entry *end;
ac47a738 83 struct object_entry entries[FLEX_ARRAY]; /* more */
27d6d290
SP
84};
85
ac47a738
SP
86struct last_object
87{
88 void *data;
6bb5b329
SP
89 unsigned int len;
90 unsigned int depth;
ac47a738
SP
91 unsigned char sha1[20];
92};
93
463acbe1
SP
94struct mem_pool
95{
96 struct mem_pool *next_pool;
97 char *next_free;
98 char *end;
99 char space[FLEX_ARRAY]; /* more */
100};
101
102struct atom_str
103{
104 struct atom_str *next_atom;
105 int str_len;
106 char str_dat[FLEX_ARRAY]; /* more */
107};
108
109struct tree_content;
6bb5b329
SP
110struct tree_entry
111{
463acbe1
SP
112 struct tree_content *tree;
113 struct atom_str* name;
114 unsigned int mode;
6bb5b329 115 unsigned char sha1[20];
6bb5b329
SP
116};
117
463acbe1 118struct tree_content
6bb5b329 119{
463acbe1
SP
120 unsigned int entry_capacity; /* must match avail_tree_content */
121 unsigned int entry_count;
122 struct tree_entry *entries[FLEX_ARRAY]; /* more */
123};
124
125struct avail_tree_content
126{
127 unsigned int entry_capacity; /* must match tree_content */
128 struct avail_tree_content *next_avail;
6bb5b329
SP
129};
130
131struct branch
132{
463acbe1
SP
133 struct branch *table_next_branch;
134 struct branch *active_next_branch;
6bb5b329 135 const char *name;
463acbe1
SP
136 unsigned long last_commit;
137 struct tree_entry branch_tree;
138 unsigned char sha1[20];
6bb5b329
SP
139};
140
463acbe1
SP
141
142/* Stats and misc. counters */
db5e523f 143static int max_depth = 10;
27d6d290 144static unsigned long alloc_count;
6bb5b329 145static unsigned long branch_count;
db5e523f 146static unsigned long object_count;
8bcce301 147static unsigned long duplicate_count;
6143f064
SP
148static unsigned long object_count_by_type[9];
149static unsigned long duplicate_count_by_type[9];
ac47a738 150
463acbe1
SP
151/* Memory pools */
152static size_t mem_pool_alloc = 2*1024*1024 - sizeof(struct mem_pool);
153static size_t total_allocd;
154static struct mem_pool *mem_pool;
155
156/* atom management */
157static unsigned int atom_table_sz = 4451;
158static unsigned int atom_cnt;
159static struct atom_str **atom_table;
160
161/* The .pack file being generated */
ac47a738
SP
162static int pack_fd;
163static unsigned long pack_offset;
164static unsigned char pack_sha1[20];
165
166/* Table of objects we've written. */
463acbe1
SP
167static unsigned int object_entry_alloc = 1000;
168static struct object_entry_pool *blocks;
169static struct object_entry *object_table[1 << 16];
ac47a738
SP
170
171/* Our last blob */
463acbe1
SP
172static struct last_object last_blob;
173
174/* Tree management */
175static unsigned int tree_entry_alloc = 1000;
176static void *avail_tree_entry;
177static unsigned int avail_tree_table_sz = 100;
178static struct avail_tree_content **avail_tree_table;
8bcce301 179
6bb5b329 180/* Branch data */
463acbe1
SP
181static unsigned int max_active_branches = 5;
182static unsigned int cur_active_branches;
183static unsigned int branch_table_sz = 1039;
184static struct branch **branch_table;
185static struct branch *active_branches;
186
6bb5b329 187
27d6d290 188static void alloc_objects(int cnt)
8bcce301 189{
463acbe1 190 struct object_entry_pool *b;
27d6d290 191
463acbe1 192 b = xmalloc(sizeof(struct object_entry_pool)
27d6d290 193 + cnt * sizeof(struct object_entry));
463acbe1 194 b->next_pool = blocks;
27d6d290
SP
195 b->next_free = b->entries;
196 b->end = b->entries + cnt;
197 blocks = b;
198 alloc_count += cnt;
199}
8bcce301 200
27d6d290 201static struct object_entry* new_object(unsigned char *sha1)
8bcce301 202{
27d6d290 203 struct object_entry *e;
8bcce301 204
27d6d290 205 if (blocks->next_free == blocks->end)
463acbe1 206 alloc_objects(object_entry_alloc);
8bcce301 207
27d6d290
SP
208 e = blocks->next_free++;
209 memcpy(e->sha1, sha1, sizeof(e->sha1));
210 return e;
8bcce301
SP
211}
212
463acbe1
SP
213static struct object_entry* find_object(unsigned char *sha1)
214{
215 unsigned int h = sha1[0] << 8 | sha1[1];
216 struct object_entry *e;
217 for (e = object_table[h]; e; e = e->next)
218 if (!memcmp(sha1, e->sha1, sizeof(e->sha1)))
219 return e;
220 return NULL;
221}
222
8bcce301
SP
223static struct object_entry* insert_object(unsigned char *sha1)
224{
225 unsigned int h = sha1[0] << 8 | sha1[1];
27d6d290 226 struct object_entry *e = object_table[h];
463acbe1 227 struct object_entry *p = NULL;
8bcce301
SP
228
229 while (e) {
230 if (!memcmp(sha1, e->sha1, sizeof(e->sha1)))
231 return e;
232 p = e;
233 e = e->next;
234 }
235
236 e = new_object(sha1);
463acbe1 237 e->next = NULL;
8bcce301
SP
238 e->offset = 0;
239 if (p)
240 p->next = e;
241 else
27d6d290 242 object_table[h] = e;
8bcce301
SP
243 return e;
244}
db5e523f 245
463acbe1
SP
246static unsigned int hc_str(const char *s, size_t len)
247{
248 unsigned int r = 0;
249 while (len-- > 0)
250 r = r * 31 + *s++;
251 return r;
252}
253
254static void* pool_alloc(size_t len)
255{
256 struct mem_pool *p;
257 void *r;
258
259 for (p = mem_pool; p; p = p->next_pool)
260 if ((p->end - p->next_free >= len))
261 break;
262
263 if (!p) {
264 if (len >= (mem_pool_alloc/2)) {
265 total_allocd += len;
266 return xmalloc(len);
267 }
268 total_allocd += sizeof(struct mem_pool) + mem_pool_alloc;
269 p = xmalloc(sizeof(struct mem_pool) + mem_pool_alloc);
270 p->next_pool = mem_pool;
271 p->next_free = p->space;
272 p->end = p->next_free + mem_pool_alloc;
273 mem_pool = p;
274 }
275
276 r = p->next_free;
277 p->next_free += len;
278 return r;
279}
280
281static void* pool_calloc(size_t count, size_t size)
282{
283 size_t len = count * size;
284 void *r = pool_alloc(len);
285 memset(r, 0, len);
286 return r;
287}
288
289static char* pool_strdup(const char *s)
290{
291 char *r = pool_alloc(strlen(s) + 1);
292 strcpy(r, s);
293 return r;
294}
295
296static struct atom_str* to_atom(const char *s, size_t len)
297{
298 unsigned int hc = hc_str(s, len) % atom_table_sz;
299 struct atom_str *c;
300
301 for (c = atom_table[hc]; c; c = c->next_atom)
302 if (c->str_len == len && !strncmp(s, c->str_dat, len))
303 return c;
304
305 c = pool_alloc(sizeof(struct atom_str) + len + 1);
306 c->str_len = len;
307 strncpy(c->str_dat, s, len);
308 c->str_dat[len] = 0;
309 c->next_atom = atom_table[hc];
310 atom_table[hc] = c;
311 atom_cnt++;
312 return c;
313}
314
315static struct branch* lookup_branch(const char *name)
316{
317 unsigned int hc = hc_str(name, strlen(name)) % branch_table_sz;
318 struct branch *b;
319
320 for (b = branch_table[hc]; b; b = b->table_next_branch)
321 if (!strcmp(name, b->name))
322 return b;
323 return NULL;
324}
325
326static struct branch* new_branch(const char *name)
327{
328 unsigned int hc = hc_str(name, strlen(name)) % branch_table_sz;
329 struct branch* b = lookup_branch(name);
330
331 if (b)
332 die("Invalid attempt to create duplicate branch: %s", name);
333
334 b = pool_calloc(1, sizeof(struct branch));
335 b->name = pool_strdup(name);
336 b->table_next_branch = branch_table[hc];
337 branch_table[hc] = b;
338 branch_count++;
339 return b;
340}
341
342static unsigned int hc_entries(unsigned int cnt)
343{
344 cnt = cnt & 7 ? (cnt / 8) + 1 : cnt / 8;
345 return cnt < avail_tree_table_sz ? cnt : avail_tree_table_sz - 1;
346}
347
348static struct tree_content* new_tree_content(unsigned int cnt)
349{
350 struct avail_tree_content *f, *l = NULL;
351 struct tree_content *t;
352 unsigned int hc = hc_entries(cnt);
353
354 for (f = avail_tree_table[hc]; f; l = f, f = f->next_avail)
355 if (f->entry_capacity >= cnt)
356 break;
357
358 if (f) {
359 if (l)
360 l->next_avail = f->next_avail;
361 else
362 avail_tree_table[hc] = f->next_avail;
363 } else {
364 cnt = cnt & 7 ? ((cnt / 8) + 1) * 8 : cnt;
365 f = pool_alloc(sizeof(*t) + sizeof(t->entries[0]) * cnt);
366 f->entry_capacity = cnt;
367 }
368
369 t = (struct tree_content*)f;
370 t->entry_count = 0;
371 return t;
372}
373
374static void release_tree_entry(struct tree_entry *e);
375static void release_tree_content(struct tree_content *t)
376{
377 struct avail_tree_content *f = (struct avail_tree_content*)t;
378 unsigned int hc = hc_entries(f->entry_capacity);
379 unsigned int i;
380 for (i = 0; i < t->entry_count; i++)
381 release_tree_entry(t->entries[i]);
382 f->next_avail = avail_tree_table[hc];
383 avail_tree_table[hc] = f;
384}
385
386static struct tree_content* grow_tree_content(
387 struct tree_content *t,
388 int amt)
389{
390 struct tree_content *r = new_tree_content(t->entry_count + amt);
391 r->entry_count = t->entry_count;
392 memcpy(r->entries,t->entries,t->entry_count*sizeof(t->entries[0]));
393 release_tree_content(t);
394 return r;
395}
396
397static struct tree_entry* new_tree_entry()
398{
399 struct tree_entry *e;
400
401 if (!avail_tree_entry) {
402 unsigned int n = tree_entry_alloc;
403 avail_tree_entry = e = xmalloc(n * sizeof(struct tree_entry));
404 while (n--) {
405 *((void**)e) = e + 1;
406 e++;
407 }
408 }
409
410 e = avail_tree_entry;
411 avail_tree_entry = *((void**)e);
412 return e;
413}
414
415static void release_tree_entry(struct tree_entry *e)
416{
417 if (e->tree)
418 release_tree_content(e->tree);
419 *((void**)e) = avail_tree_entry;
420 avail_tree_entry = e;
421}
422
423static void yread(int fd, void *buffer, size_t length)
db5e523f
SP
424{
425 ssize_t ret = 0;
426 while (ret < length) {
427 ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
463acbe1
SP
428 if (!size)
429 die("Read from descriptor %i: end of stream", fd);
430 if (size < 0)
431 die("Read from descriptor %i: %s", fd, strerror(errno));
432 ret += size;
433 }
434}
435
436static int optional_read(int fd, void *buffer, size_t length)
437{
438 ssize_t ret = 0;
439 while (ret < length) {
440 ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
441 if (!size && !ret)
442 return 1;
443 if (!size)
444 die("Read from descriptor %i: end of stream", fd);
445 if (size < 0)
446 die("Read from descriptor %i: %s", fd, strerror(errno));
db5e523f
SP
447 ret += size;
448 }
463acbe1 449 return 0;
db5e523f
SP
450}
451
463acbe1 452static void ywrite(int fd, void *buffer, size_t length)
db5e523f
SP
453{
454 ssize_t ret = 0;
455 while (ret < length) {
456 ssize_t size = xwrite(fd, (char *) buffer + ret, length - ret);
463acbe1
SP
457 if (!size)
458 die("Write to descriptor %i: end of file", fd);
459 if (size < 0)
460 die("Write to descriptor %i: %s", fd, strerror(errno));
db5e523f
SP
461 ret += size;
462 }
db5e523f
SP
463}
464
463acbe1 465static const char* read_path()
6bb5b329
SP
466{
467 static char sn[PATH_MAX];
468 unsigned long slen;
469
463acbe1 470 yread(0, &slen, 4);
6bb5b329 471 if (!slen)
463acbe1 472 die("Expected string command parameter, didn't find one");
6bb5b329
SP
473 if (slen > (PATH_MAX - 1))
474 die("Can't handle excessive string length %lu", slen);
463acbe1 475 yread(0, sn, slen);
6bb5b329
SP
476 sn[slen] = 0;
477 return sn;
478}
479
ac47a738
SP
480static unsigned long encode_header(
481 enum object_type type,
482 unsigned long size,
483 unsigned char *hdr)
db5e523f
SP
484{
485 int n = 1;
486 unsigned char c;
487
488 if (type < OBJ_COMMIT || type > OBJ_DELTA)
489 die("bad type %d", type);
490
491 c = (type << 4) | (size & 15);
492 size >>= 4;
493 while (size) {
494 *hdr++ = c | 0x80;
495 c = size & 0x7f;
496 size >>= 7;
497 n++;
498 }
499 *hdr = c;
500 return n;
501}
502
ac47a738
SP
503static int store_object(
504 enum object_type type,
505 void *dat,
506 unsigned long datlen,
6bb5b329
SP
507 struct last_object *last,
508 unsigned char *sha1out)
db5e523f 509{
db5e523f 510 void *out, *delta;
ac47a738
SP
511 struct object_entry *e;
512 unsigned char hdr[96];
513 unsigned char sha1[20];
db5e523f 514 unsigned long hdrlen, deltalen;
ac47a738
SP
515 SHA_CTX c;
516 z_stream s;
517
518 hdrlen = sprintf((char*)hdr,"%s %lu",type_names[type],datlen) + 1;
519 SHA1_Init(&c);
520 SHA1_Update(&c, hdr, hdrlen);
521 SHA1_Update(&c, dat, datlen);
522 SHA1_Final(sha1, &c);
6bb5b329
SP
523 if (sha1out)
524 memcpy(sha1out, sha1, sizeof(sha1));
ac47a738
SP
525
526 e = insert_object(sha1);
527 if (e->offset) {
528 duplicate_count++;
6143f064 529 duplicate_count_by_type[type]++;
463acbe1 530 return 1;
ac47a738 531 }
7111feed 532 e->type = type;
ac47a738
SP
533 e->offset = pack_offset;
534 object_count++;
6143f064 535 object_count_by_type[type]++;
db5e523f 536
463acbe1 537 if (last && last->data && last->depth < max_depth)
ac47a738 538 delta = diff_delta(last->data, last->len,
db5e523f
SP
539 dat, datlen,
540 &deltalen, 0);
ac47a738 541 else
db5e523f
SP
542 delta = 0;
543
544 memset(&s, 0, sizeof(s));
545 deflateInit(&s, zlib_compression_level);
546
547 if (delta) {
ac47a738 548 last->depth++;
db5e523f
SP
549 s.next_in = delta;
550 s.avail_in = deltalen;
551 hdrlen = encode_header(OBJ_DELTA, deltalen, hdr);
463acbe1
SP
552 ywrite(pack_fd, hdr, hdrlen);
553 ywrite(pack_fd, last->sha1, sizeof(sha1));
ac47a738 554 pack_offset += hdrlen + sizeof(sha1);
db5e523f 555 } else {
463acbe1
SP
556 if (last)
557 last->depth = 0;
db5e523f
SP
558 s.next_in = dat;
559 s.avail_in = datlen;
ac47a738 560 hdrlen = encode_header(type, datlen, hdr);
463acbe1 561 ywrite(pack_fd, hdr, hdrlen);
ac47a738 562 pack_offset += hdrlen;
db5e523f
SP
563 }
564
565 s.avail_out = deflateBound(&s, s.avail_in);
566 s.next_out = out = xmalloc(s.avail_out);
567 while (deflate(&s, Z_FINISH) == Z_OK)
568 /* nothing */;
569 deflateEnd(&s);
570
463acbe1 571 ywrite(pack_fd, out, s.total_out);
ac47a738 572 pack_offset += s.total_out;
db5e523f
SP
573
574 free(out);
575 if (delta)
576 free(delta);
463acbe1
SP
577 if (last) {
578 if (last->data)
579 free(last->data);
580 last->data = dat;
581 last->len = datlen;
582 memcpy(last->sha1, sha1, sizeof(sha1));
583 }
584 return 0;
585}
586
587static const char *get_mode(const char *str, unsigned int *modep)
588{
589 unsigned char c;
590 unsigned int mode = 0;
591
592 while ((c = *str++) != ' ') {
593 if (c < '0' || c > '7')
594 return NULL;
595 mode = (mode << 3) + (c - '0');
596 }
597 *modep = mode;
598 return str;
599}
600
601static void load_tree(struct tree_entry *root)
602{
603 struct object_entry *myoe;
604 struct tree_content *t;
605 unsigned long size;
606 char *buf;
607 const char *c;
608 char type[20];
609
610 root->tree = t = new_tree_content(8);
611 if (!memcmp(root->sha1, null_sha1, 20))
612 return;
613
614 myoe = find_object(root->sha1);
615 if (myoe) {
616 die("FIXME");
617 } else {
618 buf = read_sha1_file(root->sha1, type, &size);
619 if (!buf || strcmp(type, tree_type))
620 die("Can't load existing tree %s", sha1_to_hex(root->sha1));
621 }
622
623 c = buf;
624 while (c != (buf + size)) {
625 struct tree_entry *e = new_tree_entry();
626
627 if (t->entry_count == t->entry_capacity)
628 root->tree = t = grow_tree_content(t, 8);
629 t->entries[t->entry_count++] = e;
630
631 e->tree = NULL;
632 c = get_mode(c, &e->mode);
633 if (!c)
634 die("Corrupt mode in %s", sha1_to_hex(root->sha1));
635 e->name = to_atom(c, strlen(c));
636 c += e->name->str_len + 1;
637 memcpy(e->sha1, c, sizeof(e->sha1));
638 c += 20;
639 }
640 free(buf);
641}
642
643static int tecmp (const void *_a, const void *_b)
644{
645 struct tree_entry *a = *((struct tree_entry**)_a);
646 struct tree_entry *b = *((struct tree_entry**)_b);
647 return base_name_compare(
648 a->name->str_dat, a->name->str_len, a->mode,
649 b->name->str_dat, b->name->str_len, b->mode);
650}
651
652static void store_tree(struct tree_entry *root)
653{
654 struct tree_content *t = root->tree;
655 unsigned int i;
656 size_t maxlen;
657 char *buf, *c;
658
659 if (memcmp(root->sha1, null_sha1, 20))
660 return;
661
662 maxlen = 0;
663 for (i = 0; i < t->entry_count; i++) {
664 maxlen += t->entries[i]->name->str_len + 34;
665 if (t->entries[i]->tree)
666 store_tree(t->entries[i]);
667 }
668
669 qsort(t->entries, t->entry_count, sizeof(t->entries[0]), tecmp);
670 buf = c = xmalloc(maxlen);
671 for (i = 0; i < t->entry_count; i++) {
672 struct tree_entry *e = t->entries[i];
673 c += sprintf(c, "%o", e->mode);
674 *c++ = ' ';
675 strcpy(c, e->name->str_dat);
676 c += e->name->str_len + 1;
677 memcpy(c, e->sha1, 20);
678 c += 20;
679 }
680 store_object(OBJ_TREE, buf, c - buf, NULL, root->sha1);
681 free(buf);
682}
683
684static int tree_content_set(
685 struct tree_entry *root,
686 const char *p,
687 const unsigned char *sha1,
688 const unsigned int mode)
689{
690 struct tree_content *t = root->tree;
691 const char *slash1;
692 unsigned int i, n;
693 struct tree_entry *e;
694
695 slash1 = strchr(p, '/');
696 if (slash1)
697 n = slash1 - p;
698 else
699 n = strlen(p);
700
701 for (i = 0; i < t->entry_count; i++) {
702 e = t->entries[i];
703 if (e->name->str_len == n && !strncmp(p, e->name->str_dat, n)) {
704 if (!slash1) {
705 if (e->mode == mode && !memcmp(e->sha1, sha1, 20))
706 return 0;
707 e->mode = mode;
708 memcpy(e->sha1, sha1, 20);
709 if (e->tree) {
710 release_tree_content(e->tree);
711 e->tree = NULL;
712 }
713 memcpy(root->sha1, null_sha1, 20);
714 return 1;
715 }
716 if (!S_ISDIR(e->mode)) {
717 e->tree = new_tree_content(8);
7111feed 718 e->mode = S_IFDIR;
463acbe1
SP
719 }
720 if (!e->tree)
721 load_tree(e);
722 if (tree_content_set(e, slash1 + 1, sha1, mode)) {
723 memcpy(root->sha1, null_sha1, 20);
724 return 1;
725 }
726 return 0;
727 }
728 }
729
730 if (t->entry_count == t->entry_capacity)
731 root->tree = t = grow_tree_content(t, 8);
732 e = new_tree_entry();
733 e->name = to_atom(p, n);
734 t->entries[t->entry_count++] = e;
735 if (slash1) {
736 e->tree = new_tree_content(8);
7111feed 737 e->mode = S_IFDIR;
463acbe1
SP
738 tree_content_set(e, slash1 + 1, sha1, mode);
739 } else {
740 e->tree = NULL;
741 e->mode = mode;
742 memcpy(e->sha1, sha1, 20);
743 }
744 memcpy(root->sha1, null_sha1, 20);
745 return 1;
746}
747
748static int tree_content_remove(struct tree_entry *root, const char *p)
749{
750 struct tree_content *t = root->tree;
751 const char *slash1;
752 unsigned int i, n;
753 struct tree_entry *e;
754
755 slash1 = strchr(p, '/');
756 if (slash1)
757 n = slash1 - p;
758 else
759 n = strlen(p);
760
761 for (i = 0; i < t->entry_count; i++) {
762 e = t->entries[i];
763 if (e->name->str_len == n && !strncmp(p, e->name->str_dat, n)) {
764 if (!slash1 || !S_ISDIR(e->mode))
765 goto del_entry;
766 if (!e->tree)
767 load_tree(e);
768 if (tree_content_remove(e, slash1 + 1)) {
769 if (!e->tree->entry_count)
770 goto del_entry;
771 memcpy(root->sha1, null_sha1, 20);
772 return 1;
773 }
774 return 0;
775 }
776 }
777 return 0;
778
779del_entry:
780 for (i++; i < t->entry_count; i++)
781 t->entries[i-1] = t->entries[i];
782 t->entry_count--;
783 release_tree_entry(e);
784 memcpy(root->sha1, null_sha1, 20);
ac47a738 785 return 1;
db5e523f
SP
786}
787
8bcce301 788static void init_pack_header()
db5e523f
SP
789{
790 const char* magic = "PACK";
6143f064 791 unsigned long version = 3;
db5e523f
SP
792 unsigned long zero = 0;
793
794 version = htonl(version);
463acbe1
SP
795 ywrite(pack_fd, (char*)magic, 4);
796 ywrite(pack_fd, &version, 4);
797 ywrite(pack_fd, &zero, 4);
ac47a738 798 pack_offset = 4 * 3;
db5e523f
SP
799}
800
8bcce301 801static void fixup_header_footer()
db5e523f
SP
802{
803 SHA_CTX c;
804 char hdr[8];
db5e523f
SP
805 unsigned long cnt;
806 char *buf;
807 size_t n;
808
ac47a738 809 if (lseek(pack_fd, 0, SEEK_SET) != 0)
db5e523f
SP
810 die("Failed seeking to start: %s", strerror(errno));
811
812 SHA1_Init(&c);
463acbe1 813 yread(pack_fd, hdr, 8);
db5e523f
SP
814 SHA1_Update(&c, hdr, 8);
815
db5e523f
SP
816 cnt = htonl(object_count);
817 SHA1_Update(&c, &cnt, 4);
463acbe1 818 ywrite(pack_fd, &cnt, 4);
db5e523f
SP
819
820 buf = xmalloc(128 * 1024);
821 for (;;) {
ac47a738 822 n = xread(pack_fd, buf, 128 * 1024);
db5e523f
SP
823 if (n <= 0)
824 break;
825 SHA1_Update(&c, buf, n);
826 }
827 free(buf);
828
ac47a738 829 SHA1_Final(pack_sha1, &c);
463acbe1 830 ywrite(pack_fd, pack_sha1, sizeof(pack_sha1));
db5e523f
SP
831}
832
8bcce301 833static int oecmp (const void *_a, const void *_b)
db5e523f 834{
8bcce301
SP
835 struct object_entry *a = *((struct object_entry**)_a);
836 struct object_entry *b = *((struct object_entry**)_b);
837 return memcmp(a->sha1, b->sha1, sizeof(a->sha1));
838}
839
840static void write_index(const char *idx_name)
841{
842 struct sha1file *f;
843 struct object_entry **idx, **c, **last;
844 struct object_entry *e;
463acbe1 845 struct object_entry_pool *o;
8bcce301
SP
846 unsigned int array[256];
847 int i;
848
849 /* Build the sorted table of object IDs. */
850 idx = xmalloc(object_count * sizeof(struct object_entry*));
851 c = idx;
463acbe1 852 for (o = blocks; o; o = o->next_pool)
27d6d290
SP
853 for (e = o->entries; e != o->next_free; e++)
854 *c++ = e;
8bcce301
SP
855 last = idx + object_count;
856 qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
857
858 /* Generate the fan-out array. */
859 c = idx;
860 for (i = 0; i < 256; i++) {
861 struct object_entry **next = c;;
862 while (next < last) {
863 if ((*next)->sha1[0] != i)
864 break;
865 next++;
866 }
867 array[i] = htonl(next - idx);
868 c = next;
869 }
870
871 f = sha1create("%s", idx_name);
872 sha1write(f, array, 256 * sizeof(int));
873 for (c = idx; c != last; c++) {
874 unsigned int offset = htonl((*c)->offset);
875 sha1write(f, &offset, 4);
876 sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
877 }
ac47a738 878 sha1write(f, pack_sha1, sizeof(pack_sha1));
8bcce301
SP
879 sha1close(f, NULL, 1);
880 free(idx);
881}
882
463acbe1
SP
883static void dump_branches()
884{
885 static const char *msg = "fast-import";
886 unsigned int i;
887 struct branch *b;
888 struct ref_lock *lock;
889
890 for (i = 0; i < branch_table_sz; i++) {
891 for (b = branch_table[i]; b; b = b->table_next_branch) {
892 lock = lock_any_ref_for_update(b->name, NULL, 0);
893 if (!lock || write_ref_sha1(lock, b->sha1, msg) < 0)
894 die("Can't write %s", b->name);
895 }
896 }
897}
898
899static void cmd_new_blob()
6143f064
SP
900{
901 unsigned long datlen;
463acbe1 902 unsigned char sha1[20];
6143f064
SP
903 void *dat;
904
463acbe1 905 yread(0, &datlen, 4);
6143f064 906 dat = xmalloc(datlen);
463acbe1
SP
907 yread(0, dat, datlen);
908 if (store_object(OBJ_BLOB, dat, datlen, &last_blob, sha1))
6143f064
SP
909 free(dat);
910}
911
463acbe1 912static void unload_one_branch()
6bb5b329 913{
463acbe1
SP
914 while (cur_active_branches >= max_active_branches) {
915 unsigned long min_commit = ULONG_MAX;
916 struct branch *e, *l = NULL, *p = NULL;
917
918 for (e = active_branches; e; e = e->active_next_branch) {
919 if (e->last_commit < min_commit) {
920 p = l;
921 min_commit = e->last_commit;
922 }
923 l = e;
924 }
925
926 if (p) {
927 e = p->active_next_branch;
928 p->active_next_branch = e->active_next_branch;
929 } else {
930 e = active_branches;
931 active_branches = e->active_next_branch;
932 }
933 e->active_next_branch = NULL;
934 if (e->branch_tree.tree) {
935 release_tree_content(e->branch_tree.tree);
936 e->branch_tree.tree = NULL;
937 }
938 cur_active_branches--;
6bb5b329 939 }
6bb5b329
SP
940}
941
463acbe1 942static void load_branch(struct branch *b)
6bb5b329 943{
463acbe1
SP
944 load_tree(&b->branch_tree);
945 b->active_next_branch = active_branches;
946 active_branches = b;
947 cur_active_branches++;
6bb5b329
SP
948}
949
463acbe1 950static void file_change_m(struct branch *b)
6bb5b329 951{
463acbe1 952 const char *path = read_path();
7111feed 953 struct object_entry *oe;
463acbe1
SP
954 char hexsha1[41];
955 unsigned char sha1[20];
7111feed 956 char type[20];
6bb5b329 957
463acbe1
SP
958 yread(0, hexsha1, 40);
959 hexsha1[40] = 0;
6bb5b329 960
463acbe1
SP
961 if (get_sha1_hex(hexsha1, sha1))
962 die("Invalid sha1 %s for %s", hexsha1, path);
7111feed
SP
963 oe = find_object(sha1);
964 if (oe) {
965 if (oe->type != OBJ_BLOB)
966 die("%s is a %s not a blob (for %s)", hexsha1, type_names[oe->type], path);
967 } else {
968 if (sha1_object_info(sha1, type, NULL))
969 die("No blob %s for %s", hexsha1, path);
970 if (strcmp(blob_type, type))
971 die("%s is a %s not a blob (for %s)", hexsha1, type, path);
972 }
6bb5b329 973
7111feed 974 tree_content_set(&b->branch_tree, path, sha1, S_IFREG | 0644);
463acbe1 975}
6bb5b329 976
463acbe1
SP
977static void file_change_d(struct branch *b)
978{
979 tree_content_remove(&b->branch_tree, read_path());
6bb5b329
SP
980}
981
463acbe1 982static void cmd_new_commit()
6bb5b329 983{
463acbe1
SP
984 static const unsigned int max_hdr_len = 94;
985 const char *name = read_path();
986 struct branch *b = lookup_branch(name);
987 unsigned int acmsglen;
988 char *body, *c;
989
990 if (!b)
991 die("Branch not declared: %s", name);
992 if (!b->branch_tree.tree) {
993 unload_one_branch();
994 load_branch(b);
995 }
6bb5b329 996
463acbe1
SP
997 /* author_committer_msg */
998 yread(0, &acmsglen, 4);
999 body = xmalloc(acmsglen + max_hdr_len);
1000 c = body + max_hdr_len;
1001 yread(0, c, acmsglen);
1002
7111feed
SP
1003 /* oddly enough this is all that fsck-objects cares about */
1004 if (memcmp(c, "author ", 7))
1005 die("Invalid commit format on branch %s", name);
1006
463acbe1
SP
1007 /* file_change* */
1008 for (;;) {
1009 unsigned char cmd;
1010 yread(0, &cmd, 1);
1011 if (cmd == '0')
1012 break;
1013 else if (cmd == 'M')
1014 file_change_m(b);
1015 else if (cmd == 'D')
1016 file_change_d(b);
1017 else
1018 die("Unsupported file_change: %c", cmd);
6bb5b329 1019 }
6bb5b329 1020
463acbe1
SP
1021 if (memcmp(b->sha1, null_sha1, 20)) {
1022 sprintf(c - 48, "parent %s", sha1_to_hex(b->sha1));
1023 *(c - 1) = '\n';
1024 c -= 48;
1025 }
1026 store_tree(&b->branch_tree);
1027 sprintf(c - 46, "tree %s", sha1_to_hex(b->branch_tree.sha1));
1028 *(c - 1) = '\n';
1029 c -= 46;
1030
1031 store_object(OBJ_COMMIT,
1032 c, (body + max_hdr_len + acmsglen) - c,
1033 NULL, b->sha1);
1034 free(body);
1035 b->last_commit = object_count_by_type[OBJ_COMMIT];
6bb5b329
SP
1036}
1037
463acbe1 1038static void cmd_new_branch()
6bb5b329 1039{
463acbe1
SP
1040 struct branch *b = new_branch(read_path());
1041 const char *base = read_path();
1042 struct branch *s = lookup_branch(base);
1043
1044 if (!strcmp(b->name, base))
1045 die("Can't create a branch from itself: %s", base);
1046 else if (s) {
1047 memcpy(b->sha1, s->sha1, 20);
1048 memcpy(b->branch_tree.sha1, s->branch_tree.sha1, 20);
1049 }
1050 else if (!get_sha1(base, b->sha1)) {
1051 if (!memcmp(b->sha1, null_sha1, 20))
1052 memcpy(b->branch_tree.sha1, null_sha1, 20);
1053 else {
1054 unsigned long size;
1055 char *buf;
1056
1057 buf = read_object_with_reference(b->sha1,
1058 type_names[OBJ_COMMIT], &size, b->sha1);
1059 if (!buf || size < 46)
1060 die("Not a valid commit: %s", base);
1061 if (memcmp("tree ", buf, 5)
1062 || get_sha1_hex(buf + 5, b->branch_tree.sha1))
1063 die("The commit %s is corrupt", sha1_to_hex(b->sha1));
1064 free(buf);
1065 }
1066 } else
1067 die("Not a SHA1 or branch: %s", base);
6bb5b329
SP
1068}
1069
8bcce301
SP
1070int main(int argc, const char **argv)
1071{
1072 const char *base_name = argv[1];
1073 int est_obj_cnt = atoi(argv[2]);
1074 char *pack_name;
1075 char *idx_name;
6143f064 1076 struct stat sb;
8bcce301 1077
463acbe1
SP
1078 setup_ident();
1079 git_config(git_default_config);
1080
8bcce301
SP
1081 pack_name = xmalloc(strlen(base_name) + 6);
1082 sprintf(pack_name, "%s.pack", base_name);
1083 idx_name = xmalloc(strlen(base_name) + 5);
1084 sprintf(idx_name, "%s.idx", base_name);
1085
ac47a738
SP
1086 pack_fd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666);
1087 if (pack_fd < 0)
6143f064 1088 die("Can't create %s: %s", pack_name, strerror(errno));
8bcce301 1089
27d6d290 1090 alloc_objects(est_obj_cnt);
463acbe1
SP
1091
1092 atom_table = xcalloc(atom_table_sz, sizeof(struct atom_str*));
1093 branch_table = xcalloc(branch_table_sz, sizeof(struct branch*));
1094 avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*));
1095
db5e523f
SP
1096 init_pack_header();
1097 for (;;) {
6143f064 1098 unsigned long cmd;
463acbe1 1099 if (optional_read(0, &cmd, 4))
db5e523f
SP
1100 break;
1101
463acbe1
SP
1102 switch (ntohl(cmd)) {
1103 case 'blob': cmd_new_blob(); break;
1104 case 'comt': cmd_new_commit(); break;
1105 case 'brch': cmd_new_branch(); break;
6143f064
SP
1106 default:
1107 die("Invalid command %lu", cmd);
1108 }
db5e523f
SP
1109 }
1110 fixup_header_footer();
ac47a738 1111 close(pack_fd);
8bcce301 1112 write_index(idx_name);
463acbe1 1113 dump_branches();
8bcce301 1114
6143f064
SP
1115 fprintf(stderr, "%s statistics:\n", argv[0]);
1116 fprintf(stderr, "---------------------------------------------------\n");
1117 fprintf(stderr, "Alloc'd objects: %10lu (%10lu overflow )\n", alloc_count, alloc_count - est_obj_cnt);
1118 fprintf(stderr, "Total objects: %10lu (%10lu duplicates)\n", object_count, duplicate_count);
1119 fprintf(stderr, " blobs : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB]);
1120 fprintf(stderr, " trees : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE]);
1121 fprintf(stderr, " commits: %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT]);
1122 fprintf(stderr, " tags : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG]);
6bb5b329 1123 fprintf(stderr, "Total branches: %10lu\n", branch_count);
463acbe1 1124 fprintf(stderr, "Total atoms: %10u\n", atom_cnt);
7111feed
SP
1125 fprintf(stderr, "Memory total: %10lu KiB\n", (total_allocd + alloc_count*sizeof(struct object_entry))/1024);
1126 fprintf(stderr, " pools: %10lu KiB\n", total_allocd/1024);
1127 fprintf(stderr, " objects: %10lu KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
6143f064
SP
1128 fprintf(stderr, "---------------------------------------------------\n");
1129
1130 stat(pack_name, &sb);
1131 fprintf(stderr, "Pack size: %10lu KiB\n", (unsigned long)(sb.st_size/1024));
1132 stat(idx_name, &sb);
1133 fprintf(stderr, "Index size: %10lu KiB\n", (unsigned long)(sb.st_size/1024));
1134
1135 fprintf(stderr, "\n");
db5e523f
SP
1136
1137 return 0;
1138}