Added basic command handler to fast-import.
[git/git.git] / fast-import.c
CommitLineData
db5e523f
SP
1#include "builtin.h"
2#include "cache.h"
3#include "object.h"
4#include "blob.h"
5#include "delta.h"
6#include "pack.h"
7#include "csum-file.h"
8
27d6d290
SP
9struct object_entry
10{
11 struct object_entry *next;
12 unsigned long offset;
13 unsigned char sha1[20];
14};
15
16struct object_entry_block
17{
18 struct object_entry_block *next_block;
19 struct object_entry *next_free;
20 struct object_entry *end;
ac47a738 21 struct object_entry entries[FLEX_ARRAY]; /* more */
27d6d290
SP
22};
23
ac47a738
SP
24struct last_object
25{
26 void *data;
27 unsigned long len;
28 int depth;
29 unsigned char sha1[20];
30};
31
32/* Stats and misc. counters. */
db5e523f 33static int max_depth = 10;
27d6d290 34static unsigned long alloc_count;
db5e523f 35static unsigned long object_count;
8bcce301 36static unsigned long duplicate_count;
6143f064
SP
37static unsigned long object_count_by_type[9];
38static unsigned long duplicate_count_by_type[9];
ac47a738
SP
39
40/* The .pack file */
41static int pack_fd;
42static unsigned long pack_offset;
43static unsigned char pack_sha1[20];
44
45/* Table of objects we've written. */
27d6d290 46struct object_entry_block *blocks;
ac47a738
SP
47struct object_entry *object_table[1 << 16];
48
49/* Our last blob */
50struct last_object last_blob;
8bcce301 51
27d6d290 52static void alloc_objects(int cnt)
8bcce301 53{
27d6d290
SP
54 struct object_entry_block *b;
55
56 b = xmalloc(sizeof(struct object_entry_block)
57 + cnt * sizeof(struct object_entry));
58 b->next_block = blocks;
59 b->next_free = b->entries;
60 b->end = b->entries + cnt;
61 blocks = b;
62 alloc_count += cnt;
63}
8bcce301 64
27d6d290 65static struct object_entry* new_object(unsigned char *sha1)
8bcce301 66{
27d6d290 67 struct object_entry *e;
8bcce301 68
27d6d290
SP
69 if (blocks->next_free == blocks->end)
70 alloc_objects(1000);
8bcce301 71
27d6d290
SP
72 e = blocks->next_free++;
73 memcpy(e->sha1, sha1, sizeof(e->sha1));
74 return e;
8bcce301
SP
75}
76
77static struct object_entry* insert_object(unsigned char *sha1)
78{
79 unsigned int h = sha1[0] << 8 | sha1[1];
27d6d290 80 struct object_entry *e = object_table[h];
8bcce301
SP
81 struct object_entry *p = 0;
82
83 while (e) {
84 if (!memcmp(sha1, e->sha1, sizeof(e->sha1)))
85 return e;
86 p = e;
87 e = e->next;
88 }
89
90 e = new_object(sha1);
91 e->next = 0;
92 e->offset = 0;
93 if (p)
94 p->next = e;
95 else
27d6d290 96 object_table[h] = e;
8bcce301
SP
97 return e;
98}
db5e523f
SP
99
100static ssize_t yread(int fd, void *buffer, size_t length)
101{
102 ssize_t ret = 0;
103 while (ret < length) {
104 ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
105 if (size < 0) {
106 return size;
107 }
108 if (size == 0) {
109 return ret;
110 }
111 ret += size;
112 }
113 return ret;
114}
115
116static ssize_t ywrite(int fd, void *buffer, size_t length)
117{
118 ssize_t ret = 0;
119 while (ret < length) {
120 ssize_t size = xwrite(fd, (char *) buffer + ret, length - ret);
121 if (size < 0) {
122 return size;
123 }
124 if (size == 0) {
125 return ret;
126 }
127 ret += size;
128 }
129 return ret;
130}
131
ac47a738
SP
132static unsigned long encode_header(
133 enum object_type type,
134 unsigned long size,
135 unsigned char *hdr)
db5e523f
SP
136{
137 int n = 1;
138 unsigned char c;
139
140 if (type < OBJ_COMMIT || type > OBJ_DELTA)
141 die("bad type %d", type);
142
143 c = (type << 4) | (size & 15);
144 size >>= 4;
145 while (size) {
146 *hdr++ = c | 0x80;
147 c = size & 0x7f;
148 size >>= 7;
149 n++;
150 }
151 *hdr = c;
152 return n;
153}
154
ac47a738
SP
155static int store_object(
156 enum object_type type,
157 void *dat,
158 unsigned long datlen,
159 struct last_object *last)
db5e523f 160{
db5e523f 161 void *out, *delta;
ac47a738
SP
162 struct object_entry *e;
163 unsigned char hdr[96];
164 unsigned char sha1[20];
db5e523f 165 unsigned long hdrlen, deltalen;
ac47a738
SP
166 SHA_CTX c;
167 z_stream s;
168
169 hdrlen = sprintf((char*)hdr,"%s %lu",type_names[type],datlen) + 1;
170 SHA1_Init(&c);
171 SHA1_Update(&c, hdr, hdrlen);
172 SHA1_Update(&c, dat, datlen);
173 SHA1_Final(sha1, &c);
174
175 e = insert_object(sha1);
176 if (e->offset) {
177 duplicate_count++;
6143f064 178 duplicate_count_by_type[type]++;
ac47a738
SP
179 return 0;
180 }
181 e->offset = pack_offset;
182 object_count++;
6143f064 183 object_count_by_type[type]++;
db5e523f 184
ac47a738
SP
185 if (last->data && last->depth < max_depth)
186 delta = diff_delta(last->data, last->len,
db5e523f
SP
187 dat, datlen,
188 &deltalen, 0);
ac47a738 189 else
db5e523f
SP
190 delta = 0;
191
192 memset(&s, 0, sizeof(s));
193 deflateInit(&s, zlib_compression_level);
194
195 if (delta) {
ac47a738 196 last->depth++;
db5e523f
SP
197 s.next_in = delta;
198 s.avail_in = deltalen;
199 hdrlen = encode_header(OBJ_DELTA, deltalen, hdr);
ac47a738 200 if (ywrite(pack_fd, hdr, hdrlen) != hdrlen)
db5e523f 201 die("Can't write object header: %s", strerror(errno));
ac47a738 202 if (ywrite(pack_fd, last->sha1, sizeof(sha1)) != sizeof(sha1))
db5e523f 203 die("Can't write object base: %s", strerror(errno));
ac47a738 204 pack_offset += hdrlen + sizeof(sha1);
db5e523f 205 } else {
ac47a738 206 last->depth = 0;
db5e523f
SP
207 s.next_in = dat;
208 s.avail_in = datlen;
ac47a738
SP
209 hdrlen = encode_header(type, datlen, hdr);
210 if (ywrite(pack_fd, hdr, hdrlen) != hdrlen)
db5e523f 211 die("Can't write object header: %s", strerror(errno));
ac47a738 212 pack_offset += hdrlen;
db5e523f
SP
213 }
214
215 s.avail_out = deflateBound(&s, s.avail_in);
216 s.next_out = out = xmalloc(s.avail_out);
217 while (deflate(&s, Z_FINISH) == Z_OK)
218 /* nothing */;
219 deflateEnd(&s);
220
ac47a738 221 if (ywrite(pack_fd, out, s.total_out) != s.total_out)
db5e523f 222 die("Failed writing compressed data %s", strerror(errno));
ac47a738 223 pack_offset += s.total_out;
db5e523f
SP
224
225 free(out);
226 if (delta)
227 free(delta);
ac47a738
SP
228 if (last->data)
229 free(last->data);
230 last->data = dat;
231 last->len = datlen;
232 memcpy(last->sha1, sha1, sizeof(sha1));
233 return 1;
db5e523f
SP
234}
235
8bcce301 236static void init_pack_header()
db5e523f
SP
237{
238 const char* magic = "PACK";
6143f064 239 unsigned long version = 3;
db5e523f
SP
240 unsigned long zero = 0;
241
242 version = htonl(version);
243
ac47a738 244 if (ywrite(pack_fd, (char*)magic, 4) != 4)
db5e523f 245 die("Can't write pack magic: %s", strerror(errno));
ac47a738 246 if (ywrite(pack_fd, &version, 4) != 4)
db5e523f 247 die("Can't write pack version: %s", strerror(errno));
ac47a738 248 if (ywrite(pack_fd, &zero, 4) != 4)
db5e523f 249 die("Can't write 0 object count: %s", strerror(errno));
ac47a738 250 pack_offset = 4 * 3;
db5e523f
SP
251}
252
8bcce301 253static void fixup_header_footer()
db5e523f
SP
254{
255 SHA_CTX c;
256 char hdr[8];
db5e523f
SP
257 unsigned long cnt;
258 char *buf;
259 size_t n;
260
ac47a738 261 if (lseek(pack_fd, 0, SEEK_SET) != 0)
db5e523f
SP
262 die("Failed seeking to start: %s", strerror(errno));
263
264 SHA1_Init(&c);
ac47a738 265 if (yread(pack_fd, hdr, 8) != 8)
db5e523f
SP
266 die("Failed reading header: %s", strerror(errno));
267 SHA1_Update(&c, hdr, 8);
268
db5e523f
SP
269 cnt = htonl(object_count);
270 SHA1_Update(&c, &cnt, 4);
ac47a738 271 if (ywrite(pack_fd, &cnt, 4) != 4)
db5e523f
SP
272 die("Failed writing object count: %s", strerror(errno));
273
274 buf = xmalloc(128 * 1024);
275 for (;;) {
ac47a738 276 n = xread(pack_fd, buf, 128 * 1024);
db5e523f
SP
277 if (n <= 0)
278 break;
279 SHA1_Update(&c, buf, n);
280 }
281 free(buf);
282
ac47a738
SP
283 SHA1_Final(pack_sha1, &c);
284 if (ywrite(pack_fd, pack_sha1, sizeof(pack_sha1)) != sizeof(pack_sha1))
db5e523f
SP
285 die("Failed writing pack checksum: %s", strerror(errno));
286}
287
8bcce301 288static int oecmp (const void *_a, const void *_b)
db5e523f 289{
8bcce301
SP
290 struct object_entry *a = *((struct object_entry**)_a);
291 struct object_entry *b = *((struct object_entry**)_b);
292 return memcmp(a->sha1, b->sha1, sizeof(a->sha1));
293}
294
295static void write_index(const char *idx_name)
296{
297 struct sha1file *f;
298 struct object_entry **idx, **c, **last;
299 struct object_entry *e;
27d6d290 300 struct object_entry_block *o;
8bcce301
SP
301 unsigned int array[256];
302 int i;
303
304 /* Build the sorted table of object IDs. */
305 idx = xmalloc(object_count * sizeof(struct object_entry*));
306 c = idx;
27d6d290
SP
307 for (o = blocks; o; o = o->next_block)
308 for (e = o->entries; e != o->next_free; e++)
309 *c++ = e;
8bcce301
SP
310 last = idx + object_count;
311 qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
312
313 /* Generate the fan-out array. */
314 c = idx;
315 for (i = 0; i < 256; i++) {
316 struct object_entry **next = c;;
317 while (next < last) {
318 if ((*next)->sha1[0] != i)
319 break;
320 next++;
321 }
322 array[i] = htonl(next - idx);
323 c = next;
324 }
325
326 f = sha1create("%s", idx_name);
327 sha1write(f, array, 256 * sizeof(int));
328 for (c = idx; c != last; c++) {
329 unsigned int offset = htonl((*c)->offset);
330 sha1write(f, &offset, 4);
331 sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
332 }
ac47a738 333 sha1write(f, pack_sha1, sizeof(pack_sha1));
8bcce301
SP
334 sha1close(f, NULL, 1);
335 free(idx);
336}
337
6143f064
SP
338static void new_blob()
339{
340 unsigned long datlen;
341 void *dat;
342
343 if (yread(0, &datlen, 4) != 4)
344 die("Can't obtain blob length");
345
346 dat = xmalloc(datlen);
347 if (yread(0, dat, datlen) != datlen)
348 die("Con't obtain %lu bytes of blob data", datlen);
349
350 if (!store_object(OBJ_BLOB, dat, datlen, &last_blob))
351 free(dat);
352}
353
8bcce301
SP
354int main(int argc, const char **argv)
355{
356 const char *base_name = argv[1];
357 int est_obj_cnt = atoi(argv[2]);
358 char *pack_name;
359 char *idx_name;
6143f064 360 struct stat sb;
8bcce301
SP
361
362 pack_name = xmalloc(strlen(base_name) + 6);
363 sprintf(pack_name, "%s.pack", base_name);
364 idx_name = xmalloc(strlen(base_name) + 5);
365 sprintf(idx_name, "%s.idx", base_name);
366
ac47a738
SP
367 pack_fd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666);
368 if (pack_fd < 0)
6143f064 369 die("Can't create %s: %s", pack_name, strerror(errno));
8bcce301 370
27d6d290 371 alloc_objects(est_obj_cnt);
db5e523f
SP
372 init_pack_header();
373 for (;;) {
6143f064
SP
374 unsigned long cmd;
375 if (yread(0, &cmd, 4) != 4)
db5e523f
SP
376 break;
377
6143f064
SP
378 switch (cmd) {
379 case 'blob': new_blob(); break;
380 default:
381 die("Invalid command %lu", cmd);
382 }
db5e523f
SP
383 }
384 fixup_header_footer();
ac47a738 385 close(pack_fd);
8bcce301
SP
386 write_index(idx_name);
387
6143f064
SP
388 fprintf(stderr, "%s statistics:\n", argv[0]);
389 fprintf(stderr, "---------------------------------------------------\n");
390 fprintf(stderr, "Alloc'd objects: %10lu (%10lu overflow )\n", alloc_count, alloc_count - est_obj_cnt);
391 fprintf(stderr, "Total objects: %10lu (%10lu duplicates)\n", object_count, duplicate_count);
392 fprintf(stderr, " blobs : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB]);
393 fprintf(stderr, " trees : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE]);
394 fprintf(stderr, " commits: %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT]);
395 fprintf(stderr, " tags : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG]);
396 fprintf(stderr, "---------------------------------------------------\n");
397
398 stat(pack_name, &sb);
399 fprintf(stderr, "Pack size: %10lu KiB\n", (unsigned long)(sb.st_size/1024));
400 stat(idx_name, &sb);
401 fprintf(stderr, "Index size: %10lu KiB\n", (unsigned long)(sb.st_size/1024));
402
403 fprintf(stderr, "\n");
db5e523f
SP
404
405 return 0;
406}