Commit | Line | Data |
---|---|---|
db5e523f SP |
1 | #include "builtin.h" |
2 | #include "cache.h" | |
3 | #include "object.h" | |
4 | #include "blob.h" | |
5 | #include "delta.h" | |
6 | #include "pack.h" | |
7 | #include "csum-file.h" | |
8 | ||
27d6d290 SP |
9 | struct object_entry |
10 | { | |
11 | struct object_entry *next; | |
12 | unsigned long offset; | |
13 | unsigned char sha1[20]; | |
14 | }; | |
15 | ||
16 | struct object_entry_block | |
17 | { | |
18 | struct object_entry_block *next_block; | |
19 | struct object_entry *next_free; | |
20 | struct object_entry *end; | |
ac47a738 | 21 | struct object_entry entries[FLEX_ARRAY]; /* more */ |
27d6d290 SP |
22 | }; |
23 | ||
ac47a738 SP |
24 | struct last_object |
25 | { | |
26 | void *data; | |
27 | unsigned long len; | |
28 | int depth; | |
29 | unsigned char sha1[20]; | |
30 | }; | |
31 | ||
32 | /* Stats and misc. counters. */ | |
db5e523f | 33 | static int max_depth = 10; |
27d6d290 | 34 | static unsigned long alloc_count; |
db5e523f | 35 | static unsigned long object_count; |
8bcce301 | 36 | static unsigned long duplicate_count; |
ac47a738 SP |
37 | |
38 | /* The .pack file */ | |
39 | static int pack_fd; | |
40 | static unsigned long pack_offset; | |
41 | static unsigned char pack_sha1[20]; | |
42 | ||
43 | /* Table of objects we've written. */ | |
27d6d290 | 44 | struct object_entry_block *blocks; |
ac47a738 SP |
45 | struct object_entry *object_table[1 << 16]; |
46 | ||
47 | /* Our last blob */ | |
48 | struct last_object last_blob; | |
8bcce301 | 49 | |
27d6d290 | 50 | static void alloc_objects(int cnt) |
8bcce301 | 51 | { |
27d6d290 SP |
52 | struct object_entry_block *b; |
53 | ||
54 | b = xmalloc(sizeof(struct object_entry_block) | |
55 | + cnt * sizeof(struct object_entry)); | |
56 | b->next_block = blocks; | |
57 | b->next_free = b->entries; | |
58 | b->end = b->entries + cnt; | |
59 | blocks = b; | |
60 | alloc_count += cnt; | |
61 | } | |
8bcce301 | 62 | |
27d6d290 | 63 | static struct object_entry* new_object(unsigned char *sha1) |
8bcce301 | 64 | { |
27d6d290 | 65 | struct object_entry *e; |
8bcce301 | 66 | |
27d6d290 SP |
67 | if (blocks->next_free == blocks->end) |
68 | alloc_objects(1000); | |
8bcce301 | 69 | |
27d6d290 SP |
70 | e = blocks->next_free++; |
71 | memcpy(e->sha1, sha1, sizeof(e->sha1)); | |
72 | return e; | |
8bcce301 SP |
73 | } |
74 | ||
75 | static struct object_entry* insert_object(unsigned char *sha1) | |
76 | { | |
77 | unsigned int h = sha1[0] << 8 | sha1[1]; | |
27d6d290 | 78 | struct object_entry *e = object_table[h]; |
8bcce301 SP |
79 | struct object_entry *p = 0; |
80 | ||
81 | while (e) { | |
82 | if (!memcmp(sha1, e->sha1, sizeof(e->sha1))) | |
83 | return e; | |
84 | p = e; | |
85 | e = e->next; | |
86 | } | |
87 | ||
88 | e = new_object(sha1); | |
89 | e->next = 0; | |
90 | e->offset = 0; | |
91 | if (p) | |
92 | p->next = e; | |
93 | else | |
27d6d290 | 94 | object_table[h] = e; |
8bcce301 SP |
95 | return e; |
96 | } | |
db5e523f SP |
97 | |
98 | static ssize_t yread(int fd, void *buffer, size_t length) | |
99 | { | |
100 | ssize_t ret = 0; | |
101 | while (ret < length) { | |
102 | ssize_t size = xread(fd, (char *) buffer + ret, length - ret); | |
103 | if (size < 0) { | |
104 | return size; | |
105 | } | |
106 | if (size == 0) { | |
107 | return ret; | |
108 | } | |
109 | ret += size; | |
110 | } | |
111 | return ret; | |
112 | } | |
113 | ||
114 | static ssize_t ywrite(int fd, void *buffer, size_t length) | |
115 | { | |
116 | ssize_t ret = 0; | |
117 | while (ret < length) { | |
118 | ssize_t size = xwrite(fd, (char *) buffer + ret, length - ret); | |
119 | if (size < 0) { | |
120 | return size; | |
121 | } | |
122 | if (size == 0) { | |
123 | return ret; | |
124 | } | |
125 | ret += size; | |
126 | } | |
127 | return ret; | |
128 | } | |
129 | ||
ac47a738 SP |
130 | static unsigned long encode_header( |
131 | enum object_type type, | |
132 | unsigned long size, | |
133 | unsigned char *hdr) | |
db5e523f SP |
134 | { |
135 | int n = 1; | |
136 | unsigned char c; | |
137 | ||
138 | if (type < OBJ_COMMIT || type > OBJ_DELTA) | |
139 | die("bad type %d", type); | |
140 | ||
141 | c = (type << 4) | (size & 15); | |
142 | size >>= 4; | |
143 | while (size) { | |
144 | *hdr++ = c | 0x80; | |
145 | c = size & 0x7f; | |
146 | size >>= 7; | |
147 | n++; | |
148 | } | |
149 | *hdr = c; | |
150 | return n; | |
151 | } | |
152 | ||
ac47a738 SP |
153 | static int store_object( |
154 | enum object_type type, | |
155 | void *dat, | |
156 | unsigned long datlen, | |
157 | struct last_object *last) | |
db5e523f | 158 | { |
db5e523f | 159 | void *out, *delta; |
ac47a738 SP |
160 | struct object_entry *e; |
161 | unsigned char hdr[96]; | |
162 | unsigned char sha1[20]; | |
db5e523f | 163 | unsigned long hdrlen, deltalen; |
ac47a738 SP |
164 | SHA_CTX c; |
165 | z_stream s; | |
166 | ||
167 | hdrlen = sprintf((char*)hdr,"%s %lu",type_names[type],datlen) + 1; | |
168 | SHA1_Init(&c); | |
169 | SHA1_Update(&c, hdr, hdrlen); | |
170 | SHA1_Update(&c, dat, datlen); | |
171 | SHA1_Final(sha1, &c); | |
172 | ||
173 | e = insert_object(sha1); | |
174 | if (e->offset) { | |
175 | duplicate_count++; | |
176 | return 0; | |
177 | } | |
178 | e->offset = pack_offset; | |
179 | object_count++; | |
db5e523f | 180 | |
ac47a738 SP |
181 | if (last->data && last->depth < max_depth) |
182 | delta = diff_delta(last->data, last->len, | |
db5e523f SP |
183 | dat, datlen, |
184 | &deltalen, 0); | |
ac47a738 | 185 | else |
db5e523f SP |
186 | delta = 0; |
187 | ||
188 | memset(&s, 0, sizeof(s)); | |
189 | deflateInit(&s, zlib_compression_level); | |
190 | ||
191 | if (delta) { | |
ac47a738 | 192 | last->depth++; |
db5e523f SP |
193 | s.next_in = delta; |
194 | s.avail_in = deltalen; | |
195 | hdrlen = encode_header(OBJ_DELTA, deltalen, hdr); | |
ac47a738 | 196 | if (ywrite(pack_fd, hdr, hdrlen) != hdrlen) |
db5e523f | 197 | die("Can't write object header: %s", strerror(errno)); |
ac47a738 | 198 | if (ywrite(pack_fd, last->sha1, sizeof(sha1)) != sizeof(sha1)) |
db5e523f | 199 | die("Can't write object base: %s", strerror(errno)); |
ac47a738 | 200 | pack_offset += hdrlen + sizeof(sha1); |
db5e523f | 201 | } else { |
ac47a738 | 202 | last->depth = 0; |
db5e523f SP |
203 | s.next_in = dat; |
204 | s.avail_in = datlen; | |
ac47a738 SP |
205 | hdrlen = encode_header(type, datlen, hdr); |
206 | if (ywrite(pack_fd, hdr, hdrlen) != hdrlen) | |
db5e523f | 207 | die("Can't write object header: %s", strerror(errno)); |
ac47a738 | 208 | pack_offset += hdrlen; |
db5e523f SP |
209 | } |
210 | ||
211 | s.avail_out = deflateBound(&s, s.avail_in); | |
212 | s.next_out = out = xmalloc(s.avail_out); | |
213 | while (deflate(&s, Z_FINISH) == Z_OK) | |
214 | /* nothing */; | |
215 | deflateEnd(&s); | |
216 | ||
ac47a738 | 217 | if (ywrite(pack_fd, out, s.total_out) != s.total_out) |
db5e523f | 218 | die("Failed writing compressed data %s", strerror(errno)); |
ac47a738 | 219 | pack_offset += s.total_out; |
db5e523f SP |
220 | |
221 | free(out); | |
222 | if (delta) | |
223 | free(delta); | |
ac47a738 SP |
224 | if (last->data) |
225 | free(last->data); | |
226 | last->data = dat; | |
227 | last->len = datlen; | |
228 | memcpy(last->sha1, sha1, sizeof(sha1)); | |
229 | return 1; | |
db5e523f SP |
230 | } |
231 | ||
8bcce301 | 232 | static void init_pack_header() |
db5e523f SP |
233 | { |
234 | const char* magic = "PACK"; | |
235 | unsigned long version = 2; | |
236 | unsigned long zero = 0; | |
237 | ||
238 | version = htonl(version); | |
239 | ||
ac47a738 | 240 | if (ywrite(pack_fd, (char*)magic, 4) != 4) |
db5e523f | 241 | die("Can't write pack magic: %s", strerror(errno)); |
ac47a738 | 242 | if (ywrite(pack_fd, &version, 4) != 4) |
db5e523f | 243 | die("Can't write pack version: %s", strerror(errno)); |
ac47a738 | 244 | if (ywrite(pack_fd, &zero, 4) != 4) |
db5e523f | 245 | die("Can't write 0 object count: %s", strerror(errno)); |
ac47a738 | 246 | pack_offset = 4 * 3; |
db5e523f SP |
247 | } |
248 | ||
8bcce301 | 249 | static void fixup_header_footer() |
db5e523f SP |
250 | { |
251 | SHA_CTX c; | |
252 | char hdr[8]; | |
db5e523f SP |
253 | unsigned long cnt; |
254 | char *buf; | |
255 | size_t n; | |
256 | ||
ac47a738 | 257 | if (lseek(pack_fd, 0, SEEK_SET) != 0) |
db5e523f SP |
258 | die("Failed seeking to start: %s", strerror(errno)); |
259 | ||
260 | SHA1_Init(&c); | |
ac47a738 | 261 | if (yread(pack_fd, hdr, 8) != 8) |
db5e523f SP |
262 | die("Failed reading header: %s", strerror(errno)); |
263 | SHA1_Update(&c, hdr, 8); | |
264 | ||
db5e523f SP |
265 | cnt = htonl(object_count); |
266 | SHA1_Update(&c, &cnt, 4); | |
ac47a738 | 267 | if (ywrite(pack_fd, &cnt, 4) != 4) |
db5e523f SP |
268 | die("Failed writing object count: %s", strerror(errno)); |
269 | ||
270 | buf = xmalloc(128 * 1024); | |
271 | for (;;) { | |
ac47a738 | 272 | n = xread(pack_fd, buf, 128 * 1024); |
db5e523f SP |
273 | if (n <= 0) |
274 | break; | |
275 | SHA1_Update(&c, buf, n); | |
276 | } | |
277 | free(buf); | |
278 | ||
ac47a738 SP |
279 | SHA1_Final(pack_sha1, &c); |
280 | if (ywrite(pack_fd, pack_sha1, sizeof(pack_sha1)) != sizeof(pack_sha1)) | |
db5e523f SP |
281 | die("Failed writing pack checksum: %s", strerror(errno)); |
282 | } | |
283 | ||
8bcce301 | 284 | static int oecmp (const void *_a, const void *_b) |
db5e523f | 285 | { |
8bcce301 SP |
286 | struct object_entry *a = *((struct object_entry**)_a); |
287 | struct object_entry *b = *((struct object_entry**)_b); | |
288 | return memcmp(a->sha1, b->sha1, sizeof(a->sha1)); | |
289 | } | |
290 | ||
291 | static void write_index(const char *idx_name) | |
292 | { | |
293 | struct sha1file *f; | |
294 | struct object_entry **idx, **c, **last; | |
295 | struct object_entry *e; | |
27d6d290 | 296 | struct object_entry_block *o; |
8bcce301 SP |
297 | unsigned int array[256]; |
298 | int i; | |
299 | ||
300 | /* Build the sorted table of object IDs. */ | |
301 | idx = xmalloc(object_count * sizeof(struct object_entry*)); | |
302 | c = idx; | |
27d6d290 SP |
303 | for (o = blocks; o; o = o->next_block) |
304 | for (e = o->entries; e != o->next_free; e++) | |
305 | *c++ = e; | |
8bcce301 SP |
306 | last = idx + object_count; |
307 | qsort(idx, object_count, sizeof(struct object_entry*), oecmp); | |
308 | ||
309 | /* Generate the fan-out array. */ | |
310 | c = idx; | |
311 | for (i = 0; i < 256; i++) { | |
312 | struct object_entry **next = c;; | |
313 | while (next < last) { | |
314 | if ((*next)->sha1[0] != i) | |
315 | break; | |
316 | next++; | |
317 | } | |
318 | array[i] = htonl(next - idx); | |
319 | c = next; | |
320 | } | |
321 | ||
322 | f = sha1create("%s", idx_name); | |
323 | sha1write(f, array, 256 * sizeof(int)); | |
324 | for (c = idx; c != last; c++) { | |
325 | unsigned int offset = htonl((*c)->offset); | |
326 | sha1write(f, &offset, 4); | |
327 | sha1write(f, (*c)->sha1, sizeof((*c)->sha1)); | |
328 | } | |
ac47a738 | 329 | sha1write(f, pack_sha1, sizeof(pack_sha1)); |
8bcce301 SP |
330 | sha1close(f, NULL, 1); |
331 | free(idx); | |
332 | } | |
333 | ||
334 | int main(int argc, const char **argv) | |
335 | { | |
336 | const char *base_name = argv[1]; | |
337 | int est_obj_cnt = atoi(argv[2]); | |
338 | char *pack_name; | |
339 | char *idx_name; | |
340 | ||
341 | pack_name = xmalloc(strlen(base_name) + 6); | |
342 | sprintf(pack_name, "%s.pack", base_name); | |
343 | idx_name = xmalloc(strlen(base_name) + 5); | |
344 | sprintf(idx_name, "%s.idx", base_name); | |
345 | ||
ac47a738 SP |
346 | pack_fd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666); |
347 | if (pack_fd < 0) | |
8bcce301 SP |
348 | die("Can't create pack file %s: %s", pack_name, strerror(errno)); |
349 | ||
27d6d290 | 350 | alloc_objects(est_obj_cnt); |
db5e523f SP |
351 | init_pack_header(); |
352 | for (;;) { | |
353 | unsigned long datlen; | |
db5e523f | 354 | void *dat; |
db5e523f SP |
355 | |
356 | if (yread(0, &datlen, 4) != 4) | |
357 | break; | |
358 | ||
359 | dat = xmalloc(datlen); | |
360 | if (yread(0, dat, datlen) != datlen) | |
361 | break; | |
362 | ||
ac47a738 | 363 | if (!store_object(OBJ_BLOB, dat, datlen, &last_blob)) |
8bcce301 | 364 | free(dat); |
db5e523f SP |
365 | } |
366 | fixup_header_footer(); | |
ac47a738 | 367 | close(pack_fd); |
8bcce301 SP |
368 | write_index(idx_name); |
369 | ||
27d6d290 SP |
370 | fprintf(stderr, "%lu objects, %lu duplicates, %lu allocated (%lu overflow)\n", |
371 | object_count, duplicate_count, alloc_count, alloc_count - est_obj_cnt); | |
db5e523f SP |
372 | |
373 | return 0; | |
374 | } |