Refactored fast-import's internals for future additions.
[git/git.git] / fast-import.c
CommitLineData
db5e523f
SP
1#include "builtin.h"
2#include "cache.h"
3#include "object.h"
4#include "blob.h"
5#include "delta.h"
6#include "pack.h"
7#include "csum-file.h"
8
27d6d290
SP
9struct object_entry
10{
11 struct object_entry *next;
12 unsigned long offset;
13 unsigned char sha1[20];
14};
15
16struct object_entry_block
17{
18 struct object_entry_block *next_block;
19 struct object_entry *next_free;
20 struct object_entry *end;
ac47a738 21 struct object_entry entries[FLEX_ARRAY]; /* more */
27d6d290
SP
22};
23
ac47a738
SP
24struct last_object
25{
26 void *data;
27 unsigned long len;
28 int depth;
29 unsigned char sha1[20];
30};
31
32/* Stats and misc. counters. */
db5e523f 33static int max_depth = 10;
27d6d290 34static unsigned long alloc_count;
db5e523f 35static unsigned long object_count;
8bcce301 36static unsigned long duplicate_count;
ac47a738
SP
37
38/* The .pack file */
39static int pack_fd;
40static unsigned long pack_offset;
41static unsigned char pack_sha1[20];
42
43/* Table of objects we've written. */
27d6d290 44struct object_entry_block *blocks;
ac47a738
SP
45struct object_entry *object_table[1 << 16];
46
47/* Our last blob */
48struct last_object last_blob;
8bcce301 49
27d6d290 50static void alloc_objects(int cnt)
8bcce301 51{
27d6d290
SP
52 struct object_entry_block *b;
53
54 b = xmalloc(sizeof(struct object_entry_block)
55 + cnt * sizeof(struct object_entry));
56 b->next_block = blocks;
57 b->next_free = b->entries;
58 b->end = b->entries + cnt;
59 blocks = b;
60 alloc_count += cnt;
61}
8bcce301 62
27d6d290 63static struct object_entry* new_object(unsigned char *sha1)
8bcce301 64{
27d6d290 65 struct object_entry *e;
8bcce301 66
27d6d290
SP
67 if (blocks->next_free == blocks->end)
68 alloc_objects(1000);
8bcce301 69
27d6d290
SP
70 e = blocks->next_free++;
71 memcpy(e->sha1, sha1, sizeof(e->sha1));
72 return e;
8bcce301
SP
73}
74
75static struct object_entry* insert_object(unsigned char *sha1)
76{
77 unsigned int h = sha1[0] << 8 | sha1[1];
27d6d290 78 struct object_entry *e = object_table[h];
8bcce301
SP
79 struct object_entry *p = 0;
80
81 while (e) {
82 if (!memcmp(sha1, e->sha1, sizeof(e->sha1)))
83 return e;
84 p = e;
85 e = e->next;
86 }
87
88 e = new_object(sha1);
89 e->next = 0;
90 e->offset = 0;
91 if (p)
92 p->next = e;
93 else
27d6d290 94 object_table[h] = e;
8bcce301
SP
95 return e;
96}
db5e523f
SP
97
98static ssize_t yread(int fd, void *buffer, size_t length)
99{
100 ssize_t ret = 0;
101 while (ret < length) {
102 ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
103 if (size < 0) {
104 return size;
105 }
106 if (size == 0) {
107 return ret;
108 }
109 ret += size;
110 }
111 return ret;
112}
113
114static ssize_t ywrite(int fd, void *buffer, size_t length)
115{
116 ssize_t ret = 0;
117 while (ret < length) {
118 ssize_t size = xwrite(fd, (char *) buffer + ret, length - ret);
119 if (size < 0) {
120 return size;
121 }
122 if (size == 0) {
123 return ret;
124 }
125 ret += size;
126 }
127 return ret;
128}
129
ac47a738
SP
130static unsigned long encode_header(
131 enum object_type type,
132 unsigned long size,
133 unsigned char *hdr)
db5e523f
SP
134{
135 int n = 1;
136 unsigned char c;
137
138 if (type < OBJ_COMMIT || type > OBJ_DELTA)
139 die("bad type %d", type);
140
141 c = (type << 4) | (size & 15);
142 size >>= 4;
143 while (size) {
144 *hdr++ = c | 0x80;
145 c = size & 0x7f;
146 size >>= 7;
147 n++;
148 }
149 *hdr = c;
150 return n;
151}
152
ac47a738
SP
153static int store_object(
154 enum object_type type,
155 void *dat,
156 unsigned long datlen,
157 struct last_object *last)
db5e523f 158{
db5e523f 159 void *out, *delta;
ac47a738
SP
160 struct object_entry *e;
161 unsigned char hdr[96];
162 unsigned char sha1[20];
db5e523f 163 unsigned long hdrlen, deltalen;
ac47a738
SP
164 SHA_CTX c;
165 z_stream s;
166
167 hdrlen = sprintf((char*)hdr,"%s %lu",type_names[type],datlen) + 1;
168 SHA1_Init(&c);
169 SHA1_Update(&c, hdr, hdrlen);
170 SHA1_Update(&c, dat, datlen);
171 SHA1_Final(sha1, &c);
172
173 e = insert_object(sha1);
174 if (e->offset) {
175 duplicate_count++;
176 return 0;
177 }
178 e->offset = pack_offset;
179 object_count++;
db5e523f 180
ac47a738
SP
181 if (last->data && last->depth < max_depth)
182 delta = diff_delta(last->data, last->len,
db5e523f
SP
183 dat, datlen,
184 &deltalen, 0);
ac47a738 185 else
db5e523f
SP
186 delta = 0;
187
188 memset(&s, 0, sizeof(s));
189 deflateInit(&s, zlib_compression_level);
190
191 if (delta) {
ac47a738 192 last->depth++;
db5e523f
SP
193 s.next_in = delta;
194 s.avail_in = deltalen;
195 hdrlen = encode_header(OBJ_DELTA, deltalen, hdr);
ac47a738 196 if (ywrite(pack_fd, hdr, hdrlen) != hdrlen)
db5e523f 197 die("Can't write object header: %s", strerror(errno));
ac47a738 198 if (ywrite(pack_fd, last->sha1, sizeof(sha1)) != sizeof(sha1))
db5e523f 199 die("Can't write object base: %s", strerror(errno));
ac47a738 200 pack_offset += hdrlen + sizeof(sha1);
db5e523f 201 } else {
ac47a738 202 last->depth = 0;
db5e523f
SP
203 s.next_in = dat;
204 s.avail_in = datlen;
ac47a738
SP
205 hdrlen = encode_header(type, datlen, hdr);
206 if (ywrite(pack_fd, hdr, hdrlen) != hdrlen)
db5e523f 207 die("Can't write object header: %s", strerror(errno));
ac47a738 208 pack_offset += hdrlen;
db5e523f
SP
209 }
210
211 s.avail_out = deflateBound(&s, s.avail_in);
212 s.next_out = out = xmalloc(s.avail_out);
213 while (deflate(&s, Z_FINISH) == Z_OK)
214 /* nothing */;
215 deflateEnd(&s);
216
ac47a738 217 if (ywrite(pack_fd, out, s.total_out) != s.total_out)
db5e523f 218 die("Failed writing compressed data %s", strerror(errno));
ac47a738 219 pack_offset += s.total_out;
db5e523f
SP
220
221 free(out);
222 if (delta)
223 free(delta);
ac47a738
SP
224 if (last->data)
225 free(last->data);
226 last->data = dat;
227 last->len = datlen;
228 memcpy(last->sha1, sha1, sizeof(sha1));
229 return 1;
db5e523f
SP
230}
231
8bcce301 232static void init_pack_header()
db5e523f
SP
233{
234 const char* magic = "PACK";
235 unsigned long version = 2;
236 unsigned long zero = 0;
237
238 version = htonl(version);
239
ac47a738 240 if (ywrite(pack_fd, (char*)magic, 4) != 4)
db5e523f 241 die("Can't write pack magic: %s", strerror(errno));
ac47a738 242 if (ywrite(pack_fd, &version, 4) != 4)
db5e523f 243 die("Can't write pack version: %s", strerror(errno));
ac47a738 244 if (ywrite(pack_fd, &zero, 4) != 4)
db5e523f 245 die("Can't write 0 object count: %s", strerror(errno));
ac47a738 246 pack_offset = 4 * 3;
db5e523f
SP
247}
248
8bcce301 249static void fixup_header_footer()
db5e523f
SP
250{
251 SHA_CTX c;
252 char hdr[8];
db5e523f
SP
253 unsigned long cnt;
254 char *buf;
255 size_t n;
256
ac47a738 257 if (lseek(pack_fd, 0, SEEK_SET) != 0)
db5e523f
SP
258 die("Failed seeking to start: %s", strerror(errno));
259
260 SHA1_Init(&c);
ac47a738 261 if (yread(pack_fd, hdr, 8) != 8)
db5e523f
SP
262 die("Failed reading header: %s", strerror(errno));
263 SHA1_Update(&c, hdr, 8);
264
db5e523f
SP
265 cnt = htonl(object_count);
266 SHA1_Update(&c, &cnt, 4);
ac47a738 267 if (ywrite(pack_fd, &cnt, 4) != 4)
db5e523f
SP
268 die("Failed writing object count: %s", strerror(errno));
269
270 buf = xmalloc(128 * 1024);
271 for (;;) {
ac47a738 272 n = xread(pack_fd, buf, 128 * 1024);
db5e523f
SP
273 if (n <= 0)
274 break;
275 SHA1_Update(&c, buf, n);
276 }
277 free(buf);
278
ac47a738
SP
279 SHA1_Final(pack_sha1, &c);
280 if (ywrite(pack_fd, pack_sha1, sizeof(pack_sha1)) != sizeof(pack_sha1))
db5e523f
SP
281 die("Failed writing pack checksum: %s", strerror(errno));
282}
283
8bcce301 284static int oecmp (const void *_a, const void *_b)
db5e523f 285{
8bcce301
SP
286 struct object_entry *a = *((struct object_entry**)_a);
287 struct object_entry *b = *((struct object_entry**)_b);
288 return memcmp(a->sha1, b->sha1, sizeof(a->sha1));
289}
290
291static void write_index(const char *idx_name)
292{
293 struct sha1file *f;
294 struct object_entry **idx, **c, **last;
295 struct object_entry *e;
27d6d290 296 struct object_entry_block *o;
8bcce301
SP
297 unsigned int array[256];
298 int i;
299
300 /* Build the sorted table of object IDs. */
301 idx = xmalloc(object_count * sizeof(struct object_entry*));
302 c = idx;
27d6d290
SP
303 for (o = blocks; o; o = o->next_block)
304 for (e = o->entries; e != o->next_free; e++)
305 *c++ = e;
8bcce301
SP
306 last = idx + object_count;
307 qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
308
309 /* Generate the fan-out array. */
310 c = idx;
311 for (i = 0; i < 256; i++) {
312 struct object_entry **next = c;;
313 while (next < last) {
314 if ((*next)->sha1[0] != i)
315 break;
316 next++;
317 }
318 array[i] = htonl(next - idx);
319 c = next;
320 }
321
322 f = sha1create("%s", idx_name);
323 sha1write(f, array, 256 * sizeof(int));
324 for (c = idx; c != last; c++) {
325 unsigned int offset = htonl((*c)->offset);
326 sha1write(f, &offset, 4);
327 sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
328 }
ac47a738 329 sha1write(f, pack_sha1, sizeof(pack_sha1));
8bcce301
SP
330 sha1close(f, NULL, 1);
331 free(idx);
332}
333
334int main(int argc, const char **argv)
335{
336 const char *base_name = argv[1];
337 int est_obj_cnt = atoi(argv[2]);
338 char *pack_name;
339 char *idx_name;
340
341 pack_name = xmalloc(strlen(base_name) + 6);
342 sprintf(pack_name, "%s.pack", base_name);
343 idx_name = xmalloc(strlen(base_name) + 5);
344 sprintf(idx_name, "%s.idx", base_name);
345
ac47a738
SP
346 pack_fd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666);
347 if (pack_fd < 0)
8bcce301
SP
348 die("Can't create pack file %s: %s", pack_name, strerror(errno));
349
27d6d290 350 alloc_objects(est_obj_cnt);
db5e523f
SP
351 init_pack_header();
352 for (;;) {
353 unsigned long datlen;
db5e523f 354 void *dat;
db5e523f
SP
355
356 if (yread(0, &datlen, 4) != 4)
357 break;
358
359 dat = xmalloc(datlen);
360 if (yread(0, dat, datlen) != datlen)
361 break;
362
ac47a738 363 if (!store_object(OBJ_BLOB, dat, datlen, &last_blob))
8bcce301 364 free(dat);
db5e523f
SP
365 }
366 fixup_header_footer();
ac47a738 367 close(pack_fd);
8bcce301
SP
368 write_index(idx_name);
369
27d6d290
SP
370 fprintf(stderr, "%lu objects, %lu duplicates, %lu allocated (%lu overflow)\n",
371 object_count, duplicate_count, alloc_count, alloc_count - est_obj_cnt);
db5e523f
SP
372
373 return 0;
374}