pack: move clear_delta_base_cache(), packed_object_info(), unpack_entry()
[git/git.git] / packfile.c
CommitLineData
4f39cd82 1#include "cache.h"
6d6a80e0 2#include "mru.h"
0317f455 3#include "pack.h"
0abe14f6
JT
4#include "dir.h"
5#include "mergesort.h"
6#include "packfile.h"
7b3aa75d 7#include "delta.h"
f1d8130b
JT
8#include "list.h"
9#include "streaming.h"
4f39cd82
JT
10
11char *odb_pack_name(struct strbuf *buf,
12 const unsigned char *sha1,
13 const char *ext)
14{
15 strbuf_reset(buf);
16 strbuf_addf(buf, "%s/pack/pack-%s.%s", get_object_directory(),
17 sha1_to_hex(sha1), ext);
18 return buf->buf;
19}
20
21char *sha1_pack_name(const unsigned char *sha1)
22{
23 static struct strbuf buf = STRBUF_INIT;
24 return odb_pack_name(&buf, sha1, "pack");
25}
26
27char *sha1_pack_index_name(const unsigned char *sha1)
28{
29 static struct strbuf buf = STRBUF_INIT;
30 return odb_pack_name(&buf, sha1, "idx");
31}
6d6a80e0 32
84f80ad5
JT
33static unsigned int pack_used_ctr;
34static unsigned int pack_mmap_calls;
35static unsigned int peak_pack_open_windows;
36static unsigned int pack_open_windows;
e65f1862 37static unsigned int pack_open_fds;
84f80ad5
JT
38static unsigned int pack_max_fds;
39static size_t peak_pack_mapped;
40static size_t pack_mapped;
6d6a80e0
JT
41struct packed_git *packed_git;
42
43static struct mru packed_git_mru_storage;
44struct mru *packed_git_mru = &packed_git_mru_storage;
8e21176c
JT
45
46#define SZ_FMT PRIuMAX
47static inline uintmax_t sz_fmt(size_t s) { return s; }
48
49void pack_report(void)
50{
51 fprintf(stderr,
52 "pack_report: getpagesize() = %10" SZ_FMT "\n"
53 "pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
54 "pack_report: core.packedGitLimit = %10" SZ_FMT "\n",
55 sz_fmt(getpagesize()),
56 sz_fmt(packed_git_window_size),
57 sz_fmt(packed_git_limit));
58 fprintf(stderr,
59 "pack_report: pack_used_ctr = %10u\n"
60 "pack_report: pack_mmap_calls = %10u\n"
61 "pack_report: pack_open_windows = %10u / %10u\n"
62 "pack_report: pack_mapped = "
63 "%10" SZ_FMT " / %10" SZ_FMT "\n",
64 pack_used_ctr,
65 pack_mmap_calls,
66 pack_open_windows, peak_pack_open_windows,
67 sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped));
68}
0317f455
JT
69
70/*
71 * Open and mmap the index file at path, perform a couple of
72 * consistency checks, then record its information to p. Return 0 on
73 * success.
74 */
75static int check_packed_git_idx(const char *path, struct packed_git *p)
76{
77 void *idx_map;
78 struct pack_idx_header *hdr;
79 size_t idx_size;
80 uint32_t version, nr, i, *index;
81 int fd = git_open(path);
82 struct stat st;
83
84 if (fd < 0)
85 return -1;
86 if (fstat(fd, &st)) {
87 close(fd);
88 return -1;
89 }
90 idx_size = xsize_t(st.st_size);
91 if (idx_size < 4 * 256 + 20 + 20) {
92 close(fd);
93 return error("index file %s is too small", path);
94 }
95 idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
96 close(fd);
97
98 hdr = idx_map;
99 if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
100 version = ntohl(hdr->idx_version);
101 if (version < 2 || version > 2) {
102 munmap(idx_map, idx_size);
103 return error("index file %s is version %"PRIu32
104 " and is not supported by this binary"
105 " (try upgrading GIT to a newer version)",
106 path, version);
107 }
108 } else
109 version = 1;
110
111 nr = 0;
112 index = idx_map;
113 if (version > 1)
114 index += 2; /* skip index header */
115 for (i = 0; i < 256; i++) {
116 uint32_t n = ntohl(index[i]);
117 if (n < nr) {
118 munmap(idx_map, idx_size);
119 return error("non-monotonic index %s", path);
120 }
121 nr = n;
122 }
123
124 if (version == 1) {
125 /*
126 * Total size:
127 * - 256 index entries 4 bytes each
128 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
129 * - 20-byte SHA1 of the packfile
130 * - 20-byte SHA1 file checksum
131 */
132 if (idx_size != 4*256 + nr * 24 + 20 + 20) {
133 munmap(idx_map, idx_size);
134 return error("wrong index v1 file size in %s", path);
135 }
136 } else if (version == 2) {
137 /*
138 * Minimum size:
139 * - 8 bytes of header
140 * - 256 index entries 4 bytes each
141 * - 20-byte sha1 entry * nr
142 * - 4-byte crc entry * nr
143 * - 4-byte offset entry * nr
144 * - 20-byte SHA1 of the packfile
145 * - 20-byte SHA1 file checksum
146 * And after the 4-byte offset table might be a
147 * variable sized table containing 8-byte entries
148 * for offsets larger than 2^31.
149 */
150 unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
151 unsigned long max_size = min_size;
152 if (nr)
153 max_size += (nr - 1)*8;
154 if (idx_size < min_size || idx_size > max_size) {
155 munmap(idx_map, idx_size);
156 return error("wrong index v2 file size in %s", path);
157 }
158 if (idx_size != min_size &&
159 /*
160 * make sure we can deal with large pack offsets.
161 * 31-bit signed offset won't be enough, neither
162 * 32-bit unsigned one will be.
163 */
164 (sizeof(off_t) <= 4)) {
165 munmap(idx_map, idx_size);
166 return error("pack too large for current definition of off_t in %s", path);
167 }
168 }
169
170 p->index_version = version;
171 p->index_data = idx_map;
172 p->index_size = idx_size;
173 p->num_objects = nr;
174 return 0;
175}
176
177int open_pack_index(struct packed_git *p)
178{
179 char *idx_name;
180 size_t len;
181 int ret;
182
183 if (p->index_data)
184 return 0;
185
186 if (!strip_suffix(p->pack_name, ".pack", &len))
187 die("BUG: pack_name does not end in .pack");
188 idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name);
189 ret = check_packed_git_idx(idx_name, p);
190 free(idx_name);
191 return ret;
192}
193
194static struct packed_git *alloc_packed_git(int extra)
195{
196 struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
197 memset(p, 0, sizeof(*p));
198 p->pack_fd = -1;
199 return p;
200}
201
202struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
203{
204 const char *path = sha1_pack_name(sha1);
205 size_t alloc = st_add(strlen(path), 1);
206 struct packed_git *p = alloc_packed_git(alloc);
207
208 memcpy(p->pack_name, path, alloc); /* includes NUL */
209 hashcpy(p->sha1, sha1);
210 if (check_packed_git_idx(idx_path, p)) {
211 free(p);
212 return NULL;
213 }
214
215 return p;
216}
f0e17e86
JT
217
218static void scan_windows(struct packed_git *p,
219 struct packed_git **lru_p,
220 struct pack_window **lru_w,
221 struct pack_window **lru_l)
222{
223 struct pack_window *w, *w_l;
224
225 for (w_l = NULL, w = p->windows; w; w = w->next) {
226 if (!w->inuse_cnt) {
227 if (!*lru_w || w->last_used < (*lru_w)->last_used) {
228 *lru_p = p;
229 *lru_w = w;
230 *lru_l = w_l;
231 }
232 }
233 w_l = w;
234 }
235}
236
84f80ad5 237static int unuse_one_window(struct packed_git *current)
f0e17e86
JT
238{
239 struct packed_git *p, *lru_p = NULL;
240 struct pack_window *lru_w = NULL, *lru_l = NULL;
241
242 if (current)
243 scan_windows(current, &lru_p, &lru_w, &lru_l);
244 for (p = packed_git; p; p = p->next)
245 scan_windows(p, &lru_p, &lru_w, &lru_l);
246 if (lru_p) {
247 munmap(lru_w->base, lru_w->len);
248 pack_mapped -= lru_w->len;
249 if (lru_l)
250 lru_l->next = lru_w->next;
251 else
252 lru_p->windows = lru_w->next;
253 free(lru_w);
254 pack_open_windows--;
255 return 1;
256 }
257 return 0;
258}
259
260void release_pack_memory(size_t need)
261{
262 size_t cur = pack_mapped;
263 while (need >= (cur - pack_mapped) && unuse_one_window(NULL))
264 ; /* nothing */
265}
3836d88a
JT
266
267void close_pack_windows(struct packed_git *p)
268{
269 while (p->windows) {
270 struct pack_window *w = p->windows;
271
272 if (w->inuse_cnt)
273 die("pack '%s' still has open windows to it",
274 p->pack_name);
275 munmap(w->base, w->len);
276 pack_mapped -= w->len;
277 pack_open_windows--;
278 p->windows = w->next;
279 free(w);
280 }
281}
282
84f80ad5 283static int close_pack_fd(struct packed_git *p)
3836d88a
JT
284{
285 if (p->pack_fd < 0)
286 return 0;
287
288 close(p->pack_fd);
289 pack_open_fds--;
290 p->pack_fd = -1;
291
292 return 1;
293}
294
295void close_pack_index(struct packed_git *p)
296{
297 if (p->index_data) {
298 munmap((void *)p->index_data, p->index_size);
299 p->index_data = NULL;
300 }
301}
302
303static void close_pack(struct packed_git *p)
304{
305 close_pack_windows(p);
306 close_pack_fd(p);
307 close_pack_index(p);
308}
309
310void close_all_packs(void)
311{
312 struct packed_git *p;
313
314 for (p = packed_git; p; p = p->next)
315 if (p->do_not_close)
316 die("BUG: want to close pack marked 'do-not-close'");
317 else
318 close_pack(p);
319}
84f80ad5
JT
320
321/*
322 * The LRU pack is the one with the oldest MRU window, preferring packs
323 * with no used windows, or the oldest mtime if it has no windows allocated.
324 */
325static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struct pack_window **mru_w, int *accept_windows_inuse)
326{
327 struct pack_window *w, *this_mru_w;
328 int has_windows_inuse = 0;
329
330 /*
331 * Reject this pack if it has windows and the previously selected
332 * one does not. If this pack does not have windows, reject
333 * it if the pack file is newer than the previously selected one.
334 */
335 if (*lru_p && !*mru_w && (p->windows || p->mtime > (*lru_p)->mtime))
336 return;
337
338 for (w = this_mru_w = p->windows; w; w = w->next) {
339 /*
340 * Reject this pack if any of its windows are in use,
341 * but the previously selected pack did not have any
342 * inuse windows. Otherwise, record that this pack
343 * has windows in use.
344 */
345 if (w->inuse_cnt) {
346 if (*accept_windows_inuse)
347 has_windows_inuse = 1;
348 else
349 return;
350 }
351
352 if (w->last_used > this_mru_w->last_used)
353 this_mru_w = w;
354
355 /*
356 * Reject this pack if it has windows that have been
357 * used more recently than the previously selected pack.
358 * If the previously selected pack had windows inuse and
359 * we have not encountered a window in this pack that is
360 * inuse, skip this check since we prefer a pack with no
361 * inuse windows to one that has inuse windows.
362 */
363 if (*mru_w && *accept_windows_inuse == has_windows_inuse &&
364 this_mru_w->last_used > (*mru_w)->last_used)
365 return;
366 }
367
368 /*
369 * Select this pack.
370 */
371 *mru_w = this_mru_w;
372 *lru_p = p;
373 *accept_windows_inuse = has_windows_inuse;
374}
375
376static int close_one_pack(void)
377{
378 struct packed_git *p, *lru_p = NULL;
379 struct pack_window *mru_w = NULL;
380 int accept_windows_inuse = 1;
381
382 for (p = packed_git; p; p = p->next) {
383 if (p->pack_fd == -1)
384 continue;
385 find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
386 }
387
388 if (lru_p)
389 return close_pack_fd(lru_p);
390
391 return 0;
392}
393
394static unsigned int get_max_fd_limit(void)
395{
396#ifdef RLIMIT_NOFILE
397 {
398 struct rlimit lim;
399
400 if (!getrlimit(RLIMIT_NOFILE, &lim))
401 return lim.rlim_cur;
402 }
403#endif
404
405#ifdef _SC_OPEN_MAX
406 {
407 long open_max = sysconf(_SC_OPEN_MAX);
408 if (0 < open_max)
409 return open_max;
410 /*
411 * Otherwise, we got -1 for one of the two
412 * reasons:
413 *
414 * (1) sysconf() did not understand _SC_OPEN_MAX
415 * and signaled an error with -1; or
416 * (2) sysconf() said there is no limit.
417 *
418 * We _could_ clear errno before calling sysconf() to
419 * tell these two cases apart and return a huge number
420 * in the latter case to let the caller cap it to a
421 * value that is not so selfish, but letting the
422 * fallback OPEN_MAX codepath take care of these cases
423 * is a lot simpler.
424 */
425 }
426#endif
427
428#ifdef OPEN_MAX
429 return OPEN_MAX;
430#else
431 return 1; /* see the caller ;-) */
432#endif
433}
434
435/*
436 * Do not call this directly as this leaks p->pack_fd on error return;
437 * call open_packed_git() instead.
438 */
439static int open_packed_git_1(struct packed_git *p)
440{
441 struct stat st;
442 struct pack_header hdr;
443 unsigned char sha1[20];
444 unsigned char *idx_sha1;
445 long fd_flag;
446
447 if (!p->index_data && open_pack_index(p))
448 return error("packfile %s index unavailable", p->pack_name);
449
450 if (!pack_max_fds) {
451 unsigned int max_fds = get_max_fd_limit();
452
453 /* Save 3 for stdin/stdout/stderr, 22 for work */
454 if (25 < max_fds)
455 pack_max_fds = max_fds - 25;
456 else
457 pack_max_fds = 1;
458 }
459
460 while (pack_max_fds <= pack_open_fds && close_one_pack())
461 ; /* nothing */
462
463 p->pack_fd = git_open(p->pack_name);
464 if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
465 return -1;
466 pack_open_fds++;
467
468 /* If we created the struct before we had the pack we lack size. */
469 if (!p->pack_size) {
470 if (!S_ISREG(st.st_mode))
471 return error("packfile %s not a regular file", p->pack_name);
472 p->pack_size = st.st_size;
473 } else if (p->pack_size != st.st_size)
474 return error("packfile %s size changed", p->pack_name);
475
476 /* We leave these file descriptors open with sliding mmap;
477 * there is no point keeping them open across exec(), though.
478 */
479 fd_flag = fcntl(p->pack_fd, F_GETFD, 0);
480 if (fd_flag < 0)
481 return error("cannot determine file descriptor flags");
482 fd_flag |= FD_CLOEXEC;
483 if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
484 return error("cannot set FD_CLOEXEC");
485
486 /* Verify we recognize this pack file format. */
487 if (read_in_full(p->pack_fd, &hdr, sizeof(hdr)) != sizeof(hdr))
488 return error("file %s is far too short to be a packfile", p->pack_name);
489 if (hdr.hdr_signature != htonl(PACK_SIGNATURE))
490 return error("file %s is not a GIT packfile", p->pack_name);
491 if (!pack_version_ok(hdr.hdr_version))
492 return error("packfile %s is version %"PRIu32" and not"
493 " supported (try upgrading GIT to a newer version)",
494 p->pack_name, ntohl(hdr.hdr_version));
495
496 /* Verify the pack matches its index. */
497 if (p->num_objects != ntohl(hdr.hdr_entries))
498 return error("packfile %s claims to have %"PRIu32" objects"
499 " while index indicates %"PRIu32" objects",
500 p->pack_name, ntohl(hdr.hdr_entries),
501 p->num_objects);
502 if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1)
503 return error("end of packfile %s is unavailable", p->pack_name);
504 if (read_in_full(p->pack_fd, sha1, sizeof(sha1)) != sizeof(sha1))
505 return error("packfile %s signature is unavailable", p->pack_name);
506 idx_sha1 = ((unsigned char *)p->index_data) + p->index_size - 40;
507 if (hashcmp(sha1, idx_sha1))
508 return error("packfile %s does not match index", p->pack_name);
509 return 0;
510}
511
512int open_packed_git(struct packed_git *p)
513{
514 if (!open_packed_git_1(p))
515 return 0;
516 close_pack_fd(p);
517 return -1;
518}
519
520static int in_window(struct pack_window *win, off_t offset)
521{
522 /* We must promise at least 20 bytes (one hash) after the
523 * offset is available from this window, otherwise the offset
524 * is not actually in this window and a different window (which
525 * has that one hash excess) must be used. This is to support
526 * the object header and delta base parsing routines below.
527 */
528 off_t win_off = win->offset;
529 return win_off <= offset
530 && (offset + 20) <= (win_off + win->len);
531}
532
533unsigned char *use_pack(struct packed_git *p,
534 struct pack_window **w_cursor,
535 off_t offset,
536 unsigned long *left)
537{
538 struct pack_window *win = *w_cursor;
539
540 /* Since packfiles end in a hash of their content and it's
541 * pointless to ask for an offset into the middle of that
542 * hash, and the in_window function above wouldn't match
543 * don't allow an offset too close to the end of the file.
544 */
545 if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
546 die("packfile %s cannot be accessed", p->pack_name);
547 if (offset > (p->pack_size - 20))
548 die("offset beyond end of packfile (truncated pack?)");
549 if (offset < 0)
550 die(_("offset before end of packfile (broken .idx?)"));
551
552 if (!win || !in_window(win, offset)) {
553 if (win)
554 win->inuse_cnt--;
555 for (win = p->windows; win; win = win->next) {
556 if (in_window(win, offset))
557 break;
558 }
559 if (!win) {
560 size_t window_align = packed_git_window_size / 2;
561 off_t len;
562
563 if (p->pack_fd == -1 && open_packed_git(p))
564 die("packfile %s cannot be accessed", p->pack_name);
565
566 win = xcalloc(1, sizeof(*win));
567 win->offset = (offset / window_align) * window_align;
568 len = p->pack_size - win->offset;
569 if (len > packed_git_window_size)
570 len = packed_git_window_size;
571 win->len = (size_t)len;
572 pack_mapped += win->len;
573 while (packed_git_limit < pack_mapped
574 && unuse_one_window(p))
575 ; /* nothing */
576 win->base = xmmap(NULL, win->len,
577 PROT_READ, MAP_PRIVATE,
578 p->pack_fd, win->offset);
579 if (win->base == MAP_FAILED)
580 die_errno("packfile %s cannot be mapped",
581 p->pack_name);
582 if (!win->offset && win->len == p->pack_size
583 && !p->do_not_close)
584 close_pack_fd(p);
585 pack_mmap_calls++;
586 pack_open_windows++;
587 if (pack_mapped > peak_pack_mapped)
588 peak_pack_mapped = pack_mapped;
589 if (pack_open_windows > peak_pack_open_windows)
590 peak_pack_open_windows = pack_open_windows;
591 win->next = p->windows;
592 p->windows = win;
593 }
594 }
595 if (win != *w_cursor) {
596 win->last_used = pack_used_ctr++;
597 win->inuse_cnt++;
598 *w_cursor = win;
599 }
600 offset -= win->offset;
601 if (left)
602 *left = win->len - xsize_t(offset);
603 return win->base + offset;
604}
97de1803
JT
605
606void unuse_pack(struct pack_window **w_cursor)
607{
608 struct pack_window *w = *w_cursor;
609 if (w) {
610 w->inuse_cnt--;
611 *w_cursor = NULL;
612 }
613}
9a428653
JT
614
615static void try_to_free_pack_memory(size_t size)
616{
617 release_pack_memory(size);
618}
619
620struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
621{
622 static int have_set_try_to_free_routine;
623 struct stat st;
624 size_t alloc;
625 struct packed_git *p;
626
627 if (!have_set_try_to_free_routine) {
628 have_set_try_to_free_routine = 1;
629 set_try_to_free_routine(try_to_free_pack_memory);
630 }
631
632 /*
633 * Make sure a corresponding .pack file exists and that
634 * the index looks sane.
635 */
636 if (!strip_suffix_mem(path, &path_len, ".idx"))
637 return NULL;
638
639 /*
640 * ".pack" is long enough to hold any suffix we're adding (and
641 * the use xsnprintf double-checks that)
642 */
643 alloc = st_add3(path_len, strlen(".pack"), 1);
644 p = alloc_packed_git(alloc);
645 memcpy(p->pack_name, path, path_len);
646
647 xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
648 if (!access(p->pack_name, F_OK))
649 p->pack_keep = 1;
650
651 xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");
652 if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
653 free(p);
654 return NULL;
655 }
656
657 /* ok, it looks sane as far as we can check without
658 * actually mapping the pack file.
659 */
660 p->pack_size = st.st_size;
661 p->pack_local = local;
662 p->mtime = st.st_mtime;
663 if (path_len < 40 || get_sha1_hex(path + path_len - 40, p->sha1))
664 hashclr(p->sha1);
665 return p;
666}
e65f1862
JT
667
668void install_packed_git(struct packed_git *pack)
669{
670 if (pack->pack_fd != -1)
671 pack_open_fds++;
672
673 pack->next = packed_git;
674 packed_git = pack;
675}
0abe14f6
JT
676
677void (*report_garbage)(unsigned seen_bits, const char *path);
678
679static void report_helper(const struct string_list *list,
680 int seen_bits, int first, int last)
681{
682 if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))
683 return;
684
685 for (; first < last; first++)
686 report_garbage(seen_bits, list->items[first].string);
687}
688
689static void report_pack_garbage(struct string_list *list)
690{
691 int i, baselen = -1, first = 0, seen_bits = 0;
692
693 if (!report_garbage)
694 return;
695
696 string_list_sort(list);
697
698 for (i = 0; i < list->nr; i++) {
699 const char *path = list->items[i].string;
700 if (baselen != -1 &&
701 strncmp(path, list->items[first].string, baselen)) {
702 report_helper(list, seen_bits, first, i);
703 baselen = -1;
704 seen_bits = 0;
705 }
706 if (baselen == -1) {
707 const char *dot = strrchr(path, '.');
708 if (!dot) {
709 report_garbage(PACKDIR_FILE_GARBAGE, path);
710 continue;
711 }
712 baselen = dot - path + 1;
713 first = i;
714 }
715 if (!strcmp(path + baselen, "pack"))
716 seen_bits |= 1;
717 else if (!strcmp(path + baselen, "idx"))
718 seen_bits |= 2;
719 }
720 report_helper(list, seen_bits, first, list->nr);
721}
722
723static void prepare_packed_git_one(char *objdir, int local)
724{
725 struct strbuf path = STRBUF_INIT;
726 size_t dirnamelen;
727 DIR *dir;
728 struct dirent *de;
729 struct string_list garbage = STRING_LIST_INIT_DUP;
730
731 strbuf_addstr(&path, objdir);
732 strbuf_addstr(&path, "/pack");
733 dir = opendir(path.buf);
734 if (!dir) {
735 if (errno != ENOENT)
736 error_errno("unable to open object pack directory: %s",
737 path.buf);
738 strbuf_release(&path);
739 return;
740 }
741 strbuf_addch(&path, '/');
742 dirnamelen = path.len;
743 while ((de = readdir(dir)) != NULL) {
744 struct packed_git *p;
745 size_t base_len;
746
747 if (is_dot_or_dotdot(de->d_name))
748 continue;
749
750 strbuf_setlen(&path, dirnamelen);
751 strbuf_addstr(&path, de->d_name);
752
753 base_len = path.len;
754 if (strip_suffix_mem(path.buf, &base_len, ".idx")) {
755 /* Don't reopen a pack we already have. */
756 for (p = packed_git; p; p = p->next) {
757 size_t len;
758 if (strip_suffix(p->pack_name, ".pack", &len) &&
759 len == base_len &&
760 !memcmp(p->pack_name, path.buf, len))
761 break;
762 }
763 if (p == NULL &&
764 /*
765 * See if it really is a valid .idx file with
766 * corresponding .pack file that we can map.
767 */
768 (p = add_packed_git(path.buf, path.len, local)) != NULL)
769 install_packed_git(p);
770 }
771
772 if (!report_garbage)
773 continue;
774
775 if (ends_with(de->d_name, ".idx") ||
776 ends_with(de->d_name, ".pack") ||
777 ends_with(de->d_name, ".bitmap") ||
778 ends_with(de->d_name, ".keep"))
779 string_list_append(&garbage, path.buf);
780 else
781 report_garbage(PACKDIR_FILE_GARBAGE, path.buf);
782 }
783 closedir(dir);
784 report_pack_garbage(&garbage);
785 string_list_clear(&garbage, 0);
786 strbuf_release(&path);
787}
788
789static int approximate_object_count_valid;
790
791/*
792 * Give a fast, rough count of the number of objects in the repository. This
793 * ignores loose objects completely. If you have a lot of them, then either
794 * you should repack because your performance will be awful, or they are
795 * all unreachable objects about to be pruned, in which case they're not really
796 * interesting as a measure of repo size in the first place.
797 */
798unsigned long approximate_object_count(void)
799{
800 static unsigned long count;
801 if (!approximate_object_count_valid) {
802 struct packed_git *p;
803
804 prepare_packed_git();
805 count = 0;
806 for (p = packed_git; p; p = p->next) {
807 if (open_pack_index(p))
808 continue;
809 count += p->num_objects;
810 }
811 }
812 return count;
813}
814
815static void *get_next_packed_git(const void *p)
816{
817 return ((const struct packed_git *)p)->next;
818}
819
820static void set_next_packed_git(void *p, void *next)
821{
822 ((struct packed_git *)p)->next = next;
823}
824
825static int sort_pack(const void *a_, const void *b_)
826{
827 const struct packed_git *a = a_;
828 const struct packed_git *b = b_;
829 int st;
830
831 /*
832 * Local packs tend to contain objects specific to our
833 * variant of the project than remote ones. In addition,
834 * remote ones could be on a network mounted filesystem.
835 * Favor local ones for these reasons.
836 */
837 st = a->pack_local - b->pack_local;
838 if (st)
839 return -st;
840
841 /*
842 * Younger packs tend to contain more recent objects,
843 * and more recent objects tend to get accessed more
844 * often.
845 */
846 if (a->mtime < b->mtime)
847 return 1;
848 else if (a->mtime == b->mtime)
849 return 0;
850 return -1;
851}
852
853static void rearrange_packed_git(void)
854{
855 packed_git = llist_mergesort(packed_git, get_next_packed_git,
856 set_next_packed_git, sort_pack);
857}
858
859static void prepare_packed_git_mru(void)
860{
861 struct packed_git *p;
862
863 mru_clear(packed_git_mru);
864 for (p = packed_git; p; p = p->next)
865 mru_append(packed_git_mru, p);
866}
867
868static int prepare_packed_git_run_once = 0;
869void prepare_packed_git(void)
870{
871 struct alternate_object_database *alt;
872
873 if (prepare_packed_git_run_once)
874 return;
875 prepare_packed_git_one(get_object_directory(), 1);
876 prepare_alt_odb();
877 for (alt = alt_odb_list; alt; alt = alt->next)
878 prepare_packed_git_one(alt->path, 0);
879 rearrange_packed_git();
880 prepare_packed_git_mru();
881 prepare_packed_git_run_once = 1;
882}
883
884void reprepare_packed_git(void)
885{
886 approximate_object_count_valid = 0;
887 prepare_packed_git_run_once = 0;
888 prepare_packed_git();
889}
32b42e15
JT
890
891unsigned long unpack_object_header_buffer(const unsigned char *buf,
892 unsigned long len, enum object_type *type, unsigned long *sizep)
893{
894 unsigned shift;
895 unsigned long size, c;
896 unsigned long used = 0;
897
898 c = buf[used++];
899 *type = (c >> 4) & 7;
900 size = c & 15;
901 shift = 4;
902 while (c & 0x80) {
903 if (len <= used || bitsizeof(long) <= shift) {
904 error("bad object header");
905 size = used = 0;
906 break;
907 }
908 c = buf[used++];
909 size += (c & 0x7f) << shift;
910 shift += 7;
911 }
912 *sizep = size;
913 return used;
914}
7b3aa75d
JT
915
916unsigned long get_size_from_delta(struct packed_git *p,
917 struct pack_window **w_curs,
918 off_t curpos)
919{
920 const unsigned char *data;
921 unsigned char delta_head[20], *in;
922 git_zstream stream;
923 int st;
924
925 memset(&stream, 0, sizeof(stream));
926 stream.next_out = delta_head;
927 stream.avail_out = sizeof(delta_head);
928
929 git_inflate_init(&stream);
930 do {
931 in = use_pack(p, w_curs, curpos, &stream.avail_in);
932 stream.next_in = in;
933 st = git_inflate(&stream, Z_FINISH);
934 curpos += stream.next_in - in;
935 } while ((st == Z_OK || st == Z_BUF_ERROR) &&
936 stream.total_out < sizeof(delta_head));
937 git_inflate_end(&stream);
938 if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) {
939 error("delta data unpack-initial failed");
940 return 0;
941 }
942
943 /* Examine the initial part of the delta to figure out
944 * the result size.
945 */
946 data = delta_head;
947
948 /* ignore base size */
949 get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
950
951 /* Read the result size */
952 return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
953}
3588dd6e
JT
954
955int unpack_object_header(struct packed_git *p,
956 struct pack_window **w_curs,
957 off_t *curpos,
958 unsigned long *sizep)
959{
960 unsigned char *base;
961 unsigned long left;
962 unsigned long used;
963 enum object_type type;
964
965 /* use_pack() assures us we have [base, base + 20) available
966 * as a range that we can look at. (Its actually the hash
967 * size that is assured.) With our object header encoding
968 * the maximum deflated object size is 2^137, which is just
969 * insane, so we know won't exceed what we have been given.
970 */
971 base = use_pack(p, w_curs, *curpos, &left);
972 used = unpack_object_header_buffer(base, left, &type, sizep);
973 if (!used) {
974 type = OBJ_BAD;
975 } else
976 *curpos += used;
977
978 return type;
979}
f1d8130b
JT
980
981void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1)
982{
983 unsigned i;
984 for (i = 0; i < p->num_bad_objects; i++)
985 if (!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i))
986 return;
987 p->bad_object_sha1 = xrealloc(p->bad_object_sha1,
988 st_mult(GIT_MAX_RAWSZ,
989 st_add(p->num_bad_objects, 1)));
990 hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1);
991 p->num_bad_objects++;
992}
993
994const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
995{
996 struct packed_git *p;
997 unsigned i;
998
999 for (p = packed_git; p; p = p->next)
1000 for (i = 0; i < p->num_bad_objects; i++)
1001 if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
1002 return p;
1003 return NULL;
1004}
1005
1006static off_t get_delta_base(struct packed_git *p,
1007 struct pack_window **w_curs,
1008 off_t *curpos,
1009 enum object_type type,
1010 off_t delta_obj_offset)
1011{
1012 unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
1013 off_t base_offset;
1014
1015 /* use_pack() assured us we have [base_info, base_info + 20)
1016 * as a range that we can look at without walking off the
1017 * end of the mapped window. Its actually the hash size
1018 * that is assured. An OFS_DELTA longer than the hash size
1019 * is stupid, as then a REF_DELTA would be smaller to store.
1020 */
1021 if (type == OBJ_OFS_DELTA) {
1022 unsigned used = 0;
1023 unsigned char c = base_info[used++];
1024 base_offset = c & 127;
1025 while (c & 128) {
1026 base_offset += 1;
1027 if (!base_offset || MSB(base_offset, 7))
1028 return 0; /* overflow */
1029 c = base_info[used++];
1030 base_offset = (base_offset << 7) + (c & 127);
1031 }
1032 base_offset = delta_obj_offset - base_offset;
1033 if (base_offset <= 0 || base_offset >= delta_obj_offset)
1034 return 0; /* out of bound */
1035 *curpos += used;
1036 } else if (type == OBJ_REF_DELTA) {
1037 /* The base entry _must_ be in the same pack */
1038 base_offset = find_pack_entry_one(base_info, p);
1039 *curpos += 20;
1040 } else
1041 die("I am totally screwed");
1042 return base_offset;
1043}
1044
1045/*
1046 * Like get_delta_base above, but we return the sha1 instead of the pack
1047 * offset. This means it is cheaper for REF deltas (we do not have to do
1048 * the final object lookup), but more expensive for OFS deltas (we
1049 * have to load the revidx to convert the offset back into a sha1).
1050 */
1051static const unsigned char *get_delta_base_sha1(struct packed_git *p,
1052 struct pack_window **w_curs,
1053 off_t curpos,
1054 enum object_type type,
1055 off_t delta_obj_offset)
1056{
1057 if (type == OBJ_REF_DELTA) {
1058 unsigned char *base = use_pack(p, w_curs, curpos, NULL);
1059 return base;
1060 } else if (type == OBJ_OFS_DELTA) {
1061 struct revindex_entry *revidx;
1062 off_t base_offset = get_delta_base(p, w_curs, &curpos,
1063 type, delta_obj_offset);
1064
1065 if (!base_offset)
1066 return NULL;
1067
1068 revidx = find_pack_revindex(p, base_offset);
1069 if (!revidx)
1070 return NULL;
1071
1072 return nth_packed_object_sha1(p, revidx->nr);
1073 } else
1074 return NULL;
1075}
1076
1077static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset)
1078{
1079 int type;
1080 struct revindex_entry *revidx;
1081 const unsigned char *sha1;
1082 revidx = find_pack_revindex(p, obj_offset);
1083 if (!revidx)
1084 return OBJ_BAD;
1085 sha1 = nth_packed_object_sha1(p, revidx->nr);
1086 mark_bad_packed_object(p, sha1);
1087 type = sha1_object_info(sha1, NULL);
1088 if (type <= OBJ_NONE)
1089 return OBJ_BAD;
1090 return type;
1091}
1092
1093#define POI_STACK_PREALLOC 64
1094
1095static enum object_type packed_to_object_type(struct packed_git *p,
1096 off_t obj_offset,
1097 enum object_type type,
1098 struct pack_window **w_curs,
1099 off_t curpos)
1100{
1101 off_t small_poi_stack[POI_STACK_PREALLOC];
1102 off_t *poi_stack = small_poi_stack;
1103 int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
1104
1105 while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1106 off_t base_offset;
1107 unsigned long size;
1108 /* Push the object we're going to leave behind */
1109 if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
1110 poi_stack_alloc = alloc_nr(poi_stack_nr);
1111 ALLOC_ARRAY(poi_stack, poi_stack_alloc);
1112 memcpy(poi_stack, small_poi_stack, sizeof(off_t)*poi_stack_nr);
1113 } else {
1114 ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);
1115 }
1116 poi_stack[poi_stack_nr++] = obj_offset;
1117 /* If parsing the base offset fails, just unwind */
1118 base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
1119 if (!base_offset)
1120 goto unwind;
1121 curpos = obj_offset = base_offset;
1122 type = unpack_object_header(p, w_curs, &curpos, &size);
1123 if (type <= OBJ_NONE) {
1124 /* If getting the base itself fails, we first
1125 * retry the base, otherwise unwind */
1126 type = retry_bad_packed_offset(p, base_offset);
1127 if (type > OBJ_NONE)
1128 goto out;
1129 goto unwind;
1130 }
1131 }
1132
1133 switch (type) {
1134 case OBJ_BAD:
1135 case OBJ_COMMIT:
1136 case OBJ_TREE:
1137 case OBJ_BLOB:
1138 case OBJ_TAG:
1139 break;
1140 default:
1141 error("unknown object type %i at offset %"PRIuMAX" in %s",
1142 type, (uintmax_t)obj_offset, p->pack_name);
1143 type = OBJ_BAD;
1144 }
1145
1146out:
1147 if (poi_stack != small_poi_stack)
1148 free(poi_stack);
1149 return type;
1150
1151unwind:
1152 while (poi_stack_nr) {
1153 obj_offset = poi_stack[--poi_stack_nr];
1154 type = retry_bad_packed_offset(p, obj_offset);
1155 if (type > OBJ_NONE)
1156 goto out;
1157 }
1158 type = OBJ_BAD;
1159 goto out;
1160}
1161
1162static struct hashmap delta_base_cache;
1163static size_t delta_base_cached;
1164
1165static LIST_HEAD(delta_base_cache_lru);
1166
1167struct delta_base_cache_key {
1168 struct packed_git *p;
1169 off_t base_offset;
1170};
1171
1172struct delta_base_cache_entry {
1173 struct hashmap hash;
1174 struct delta_base_cache_key key;
1175 struct list_head lru;
1176 void *data;
1177 unsigned long size;
1178 enum object_type type;
1179};
1180
1181static unsigned int pack_entry_hash(struct packed_git *p, off_t base_offset)
1182{
1183 unsigned int hash;
1184
1185 hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset;
1186 hash += (hash >> 8) + (hash >> 16);
1187 return hash;
1188}
1189
1190static struct delta_base_cache_entry *
1191get_delta_base_cache_entry(struct packed_git *p, off_t base_offset)
1192{
1193 struct hashmap_entry entry;
1194 struct delta_base_cache_key key;
1195
1196 if (!delta_base_cache.cmpfn)
1197 return NULL;
1198
1199 hashmap_entry_init(&entry, pack_entry_hash(p, base_offset));
1200 key.p = p;
1201 key.base_offset = base_offset;
1202 return hashmap_get(&delta_base_cache, &entry, &key);
1203}
1204
1205static int delta_base_cache_key_eq(const struct delta_base_cache_key *a,
1206 const struct delta_base_cache_key *b)
1207{
1208 return a->p == b->p && a->base_offset == b->base_offset;
1209}
1210
1211static int delta_base_cache_hash_cmp(const void *unused_cmp_data,
1212 const void *va, const void *vb,
1213 const void *vkey)
1214{
1215 const struct delta_base_cache_entry *a = va, *b = vb;
1216 const struct delta_base_cache_key *key = vkey;
1217 if (key)
1218 return !delta_base_cache_key_eq(&a->key, key);
1219 else
1220 return !delta_base_cache_key_eq(&a->key, &b->key);
1221}
1222
1223static int in_delta_base_cache(struct packed_git *p, off_t base_offset)
1224{
1225 return !!get_delta_base_cache_entry(p, base_offset);
1226}
1227
1228/*
1229 * Remove the entry from the cache, but do _not_ free the associated
1230 * entry data. The caller takes ownership of the "data" buffer, and
1231 * should copy out any fields it wants before detaching.
1232 */
1233static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)
1234{
1235 hashmap_remove(&delta_base_cache, ent, &ent->key);
1236 list_del(&ent->lru);
1237 delta_base_cached -= ent->size;
1238 free(ent);
1239}
1240
1241static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset,
1242 unsigned long *base_size, enum object_type *type)
1243{
1244 struct delta_base_cache_entry *ent;
1245
1246 ent = get_delta_base_cache_entry(p, base_offset);
1247 if (!ent)
1248 return unpack_entry(p, base_offset, type, base_size);
1249
1250 if (type)
1251 *type = ent->type;
1252 if (base_size)
1253 *base_size = ent->size;
1254 return xmemdupz(ent->data, ent->size);
1255}
1256
1257static inline void release_delta_base_cache(struct delta_base_cache_entry *ent)
1258{
1259 free(ent->data);
1260 detach_delta_base_cache_entry(ent);
1261}
1262
1263void clear_delta_base_cache(void)
1264{
1265 struct list_head *lru, *tmp;
1266 list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
1267 struct delta_base_cache_entry *entry =
1268 list_entry(lru, struct delta_base_cache_entry, lru);
1269 release_delta_base_cache(entry);
1270 }
1271}
1272
1273static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
1274 void *base, unsigned long base_size, enum object_type type)
1275{
1276 struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
1277 struct list_head *lru, *tmp;
1278
1279 delta_base_cached += base_size;
1280
1281 list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
1282 struct delta_base_cache_entry *f =
1283 list_entry(lru, struct delta_base_cache_entry, lru);
1284 if (delta_base_cached <= delta_base_cache_limit)
1285 break;
1286 release_delta_base_cache(f);
1287 }
1288
1289 ent->key.p = p;
1290 ent->key.base_offset = base_offset;
1291 ent->type = type;
1292 ent->data = base;
1293 ent->size = base_size;
1294 list_add_tail(&ent->lru, &delta_base_cache_lru);
1295
1296 if (!delta_base_cache.cmpfn)
1297 hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, NULL, 0);
1298 hashmap_entry_init(ent, pack_entry_hash(p, base_offset));
1299 hashmap_add(&delta_base_cache, ent);
1300}
1301
1302int packed_object_info(struct packed_git *p, off_t obj_offset,
1303 struct object_info *oi)
1304{
1305 struct pack_window *w_curs = NULL;
1306 unsigned long size;
1307 off_t curpos = obj_offset;
1308 enum object_type type;
1309
1310 /*
1311 * We always get the representation type, but only convert it to
1312 * a "real" type later if the caller is interested.
1313 */
1314 if (oi->contentp) {
1315 *oi->contentp = cache_or_unpack_entry(p, obj_offset, oi->sizep,
1316 &type);
1317 if (!*oi->contentp)
1318 type = OBJ_BAD;
1319 } else {
1320 type = unpack_object_header(p, &w_curs, &curpos, &size);
1321 }
1322
1323 if (!oi->contentp && oi->sizep) {
1324 if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1325 off_t tmp_pos = curpos;
1326 off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
1327 type, obj_offset);
1328 if (!base_offset) {
1329 type = OBJ_BAD;
1330 goto out;
1331 }
1332 *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
1333 if (*oi->sizep == 0) {
1334 type = OBJ_BAD;
1335 goto out;
1336 }
1337 } else {
1338 *oi->sizep = size;
1339 }
1340 }
1341
1342 if (oi->disk_sizep) {
1343 struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1344 *oi->disk_sizep = revidx[1].offset - obj_offset;
1345 }
1346
1347 if (oi->typep || oi->typename) {
1348 enum object_type ptot;
1349 ptot = packed_to_object_type(p, obj_offset, type, &w_curs,
1350 curpos);
1351 if (oi->typep)
1352 *oi->typep = ptot;
1353 if (oi->typename) {
1354 const char *tn = typename(ptot);
1355 if (tn)
1356 strbuf_addstr(oi->typename, tn);
1357 }
1358 if (ptot < 0) {
1359 type = OBJ_BAD;
1360 goto out;
1361 }
1362 }
1363
1364 if (oi->delta_base_sha1) {
1365 if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1366 const unsigned char *base;
1367
1368 base = get_delta_base_sha1(p, &w_curs, curpos,
1369 type, obj_offset);
1370 if (!base) {
1371 type = OBJ_BAD;
1372 goto out;
1373 }
1374
1375 hashcpy(oi->delta_base_sha1, base);
1376 } else
1377 hashclr(oi->delta_base_sha1);
1378 }
1379
1380 oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
1381 OI_PACKED;
1382
1383out:
1384 unuse_pack(&w_curs);
1385 return type;
1386}
1387
1388static void *unpack_compressed_entry(struct packed_git *p,
1389 struct pack_window **w_curs,
1390 off_t curpos,
1391 unsigned long size)
1392{
1393 int st;
1394 git_zstream stream;
1395 unsigned char *buffer, *in;
1396
1397 buffer = xmallocz_gently(size);
1398 if (!buffer)
1399 return NULL;
1400 memset(&stream, 0, sizeof(stream));
1401 stream.next_out = buffer;
1402 stream.avail_out = size + 1;
1403
1404 git_inflate_init(&stream);
1405 do {
1406 in = use_pack(p, w_curs, curpos, &stream.avail_in);
1407 stream.next_in = in;
1408 st = git_inflate(&stream, Z_FINISH);
1409 if (!stream.avail_out)
1410 break; /* the payload is larger than it should be */
1411 curpos += stream.next_in - in;
1412 } while (st == Z_OK || st == Z_BUF_ERROR);
1413 git_inflate_end(&stream);
1414 if ((st != Z_STREAM_END) || stream.total_out != size) {
1415 free(buffer);
1416 return NULL;
1417 }
1418
1419 return buffer;
1420}
1421
1422static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
1423{
1424 static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS);
1425 trace_printf_key(&pack_access, "%s %"PRIuMAX"\n",
1426 p->pack_name, (uintmax_t)obj_offset);
1427}
1428
1429int do_check_packed_object_crc;
1430
1431#define UNPACK_ENTRY_STACK_PREALLOC 64
1432struct unpack_entry_stack_ent {
1433 off_t obj_offset;
1434 off_t curpos;
1435 unsigned long size;
1436};
1437
1438static void *read_object(const unsigned char *sha1, enum object_type *type,
1439 unsigned long *size)
1440{
1441 struct object_info oi = OBJECT_INFO_INIT;
1442 void *content;
1443 oi.typep = type;
1444 oi.sizep = size;
1445 oi.contentp = &content;
1446
1447 if (sha1_object_info_extended(sha1, &oi, 0) < 0)
1448 return NULL;
1449 return content;
1450}
1451
1452void *unpack_entry(struct packed_git *p, off_t obj_offset,
1453 enum object_type *final_type, unsigned long *final_size)
1454{
1455 struct pack_window *w_curs = NULL;
1456 off_t curpos = obj_offset;
1457 void *data = NULL;
1458 unsigned long size;
1459 enum object_type type;
1460 struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];
1461 struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
1462 int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
1463 int base_from_cache = 0;
1464
1465 write_pack_access_log(p, obj_offset);
1466
1467 /* PHASE 1: drill down to the innermost base object */
1468 for (;;) {
1469 off_t base_offset;
1470 int i;
1471 struct delta_base_cache_entry *ent;
1472
1473 ent = get_delta_base_cache_entry(p, curpos);
1474 if (ent) {
1475 type = ent->type;
1476 data = ent->data;
1477 size = ent->size;
1478 detach_delta_base_cache_entry(ent);
1479 base_from_cache = 1;
1480 break;
1481 }
1482
1483 if (do_check_packed_object_crc && p->index_version > 1) {
1484 struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1485 off_t len = revidx[1].offset - obj_offset;
1486 if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
1487 const unsigned char *sha1 =
1488 nth_packed_object_sha1(p, revidx->nr);
1489 error("bad packed object CRC for %s",
1490 sha1_to_hex(sha1));
1491 mark_bad_packed_object(p, sha1);
1492 data = NULL;
1493 goto out;
1494 }
1495 }
1496
1497 type = unpack_object_header(p, &w_curs, &curpos, &size);
1498 if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)
1499 break;
1500
1501 base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
1502 if (!base_offset) {
1503 error("failed to validate delta base reference "
1504 "at offset %"PRIuMAX" from %s",
1505 (uintmax_t)curpos, p->pack_name);
1506 /* bail to phase 2, in hopes of recovery */
1507 data = NULL;
1508 break;
1509 }
1510
1511 /* push object, proceed to base */
1512 if (delta_stack_nr >= delta_stack_alloc
1513 && delta_stack == small_delta_stack) {
1514 delta_stack_alloc = alloc_nr(delta_stack_nr);
1515 ALLOC_ARRAY(delta_stack, delta_stack_alloc);
1516 memcpy(delta_stack, small_delta_stack,
1517 sizeof(*delta_stack)*delta_stack_nr);
1518 } else {
1519 ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc);
1520 }
1521 i = delta_stack_nr++;
1522 delta_stack[i].obj_offset = obj_offset;
1523 delta_stack[i].curpos = curpos;
1524 delta_stack[i].size = size;
1525
1526 curpos = obj_offset = base_offset;
1527 }
1528
1529 /* PHASE 2: handle the base */
1530 switch (type) {
1531 case OBJ_OFS_DELTA:
1532 case OBJ_REF_DELTA:
1533 if (data)
1534 die("BUG: unpack_entry: left loop at a valid delta");
1535 break;
1536 case OBJ_COMMIT:
1537 case OBJ_TREE:
1538 case OBJ_BLOB:
1539 case OBJ_TAG:
1540 if (!base_from_cache)
1541 data = unpack_compressed_entry(p, &w_curs, curpos, size);
1542 break;
1543 default:
1544 data = NULL;
1545 error("unknown object type %i at offset %"PRIuMAX" in %s",
1546 type, (uintmax_t)obj_offset, p->pack_name);
1547 }
1548
1549 /* PHASE 3: apply deltas in order */
1550
1551 /* invariants:
1552 * 'data' holds the base data, or NULL if there was corruption
1553 */
1554 while (delta_stack_nr) {
1555 void *delta_data;
1556 void *base = data;
1557 void *external_base = NULL;
1558 unsigned long delta_size, base_size = size;
1559 int i;
1560
1561 data = NULL;
1562
1563 if (base)
1564 add_delta_base_cache(p, obj_offset, base, base_size, type);
1565
1566 if (!base) {
1567 /*
1568 * We're probably in deep shit, but let's try to fetch
1569 * the required base anyway from another pack or loose.
1570 * This is costly but should happen only in the presence
1571 * of a corrupted pack, and is better than failing outright.
1572 */
1573 struct revindex_entry *revidx;
1574 const unsigned char *base_sha1;
1575 revidx = find_pack_revindex(p, obj_offset);
1576 if (revidx) {
1577 base_sha1 = nth_packed_object_sha1(p, revidx->nr);
1578 error("failed to read delta base object %s"
1579 " at offset %"PRIuMAX" from %s",
1580 sha1_to_hex(base_sha1), (uintmax_t)obj_offset,
1581 p->pack_name);
1582 mark_bad_packed_object(p, base_sha1);
1583 base = read_object(base_sha1, &type, &base_size);
1584 external_base = base;
1585 }
1586 }
1587
1588 i = --delta_stack_nr;
1589 obj_offset = delta_stack[i].obj_offset;
1590 curpos = delta_stack[i].curpos;
1591 delta_size = delta_stack[i].size;
1592
1593 if (!base)
1594 continue;
1595
1596 delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size);
1597
1598 if (!delta_data) {
1599 error("failed to unpack compressed delta "
1600 "at offset %"PRIuMAX" from %s",
1601 (uintmax_t)curpos, p->pack_name);
1602 data = NULL;
1603 free(external_base);
1604 continue;
1605 }
1606
1607 data = patch_delta(base, base_size,
1608 delta_data, delta_size,
1609 &size);
1610
1611 /*
1612 * We could not apply the delta; warn the user, but keep going.
1613 * Our failure will be noticed either in the next iteration of
1614 * the loop, or if this is the final delta, in the caller when
1615 * we return NULL. Those code paths will take care of making
1616 * a more explicit warning and retrying with another copy of
1617 * the object.
1618 */
1619 if (!data)
1620 error("failed to apply delta");
1621
1622 free(delta_data);
1623 free(external_base);
1624 }
1625
1626 if (final_type)
1627 *final_type = type;
1628 if (final_size)
1629 *final_size = size;
1630
1631out:
1632 unuse_pack(&w_curs);
1633
1634 if (delta_stack != small_delta_stack)
1635 free(delta_stack);
1636
1637 return data;
1638}