[PATCH] Use enhanced diff_delta() in the similarity estimator.
[git/git.git] / unpack-objects.c
CommitLineData
bad50dc8 1#include "cache.h"
74536958 2#include "object.h"
8ee378a0 3#include "delta.h"
a733cb60 4#include "pack.h"
bad50dc8 5
74536958 6static int dry_run;
bad50dc8
LT
7static int nr_entries;
8static const char *base_name;
9static const char unpack_usage[] = "git-unpack-objects basename";
10
11struct pack_entry {
8ee378a0 12 unsigned int offset; /* network byte order */
bad50dc8
LT
13 unsigned char sha1[20];
14};
15
74536958
LT
16static void *pack_base;
17static unsigned long pack_size;
8ee378a0
JH
18static void *index_base;
19static unsigned long index_size;
74536958 20
bad50dc8
LT
21static struct pack_entry **pack_list;
22
23static void *map_file(const char *suffix, unsigned long *sizep)
24{
25 static char pathname[PATH_MAX];
26 unsigned long len;
27 int fd;
28 struct stat st;
29 void *map;
30
31 len = snprintf(pathname, PATH_MAX, "%s.%s", base_name, suffix);
32 if (len >= PATH_MAX)
33 die("bad pack base-name");
34 fd = open(pathname, O_RDONLY);
35 if (fd < 0 || fstat(fd, &st))
36 die("unable to open '%s'", pathname);
37 len = st.st_size;
38 if (!len)
39 die("bad pack file '%s'", pathname);
40 map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
41 if (-1 == (int)(long)map)
42 die("unable to mmap '%s'", pathname);
43 close(fd);
44 *sizep = len;
45 return map;
46}
47
48static int sort_by_offset(const void *_a, const void *_b)
49{
50 struct pack_entry *a = *(struct pack_entry **)_a;
51 struct pack_entry *b = *(struct pack_entry **)_b;
52 unsigned int o1, o2;
53
54 o1 = ntohl(a->offset);
55 o2 = ntohl(b->offset);
56 return o1 < o2 ? -1 : 1;
57}
58
8ee378a0 59static int check_index(void)
bad50dc8 60{
8ee378a0 61 unsigned int *array = index_base;
bad50dc8
LT
62 unsigned int nr;
63 int i;
64
c38138cd 65 if (index_size < 4*256 + 20)
bad50dc8
LT
66 return error("index file too small");
67 nr = 0;
68 for (i = 0; i < 256; i++) {
69 unsigned int n = ntohl(array[i]);
70 if (n < nr)
71 return error("non-monotonic index");
72 nr = n;
73 }
c38138cd
LT
74 /*
75 * Total size:
76 * - 256 index entries 4 bytes each
77 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
e1808845 78 * - 20-byte SHA1 of the packfile
c38138cd
LT
79 * - 20-byte SHA1 file checksum
80 */
e1808845 81 if (index_size != 4*256 + nr * 24 + 20 + 20)
bad50dc8 82 return error("wrong index file size");
bad50dc8
LT
83
84 nr_entries = nr;
85 pack_list = xmalloc(nr * sizeof(struct pack_entry *));
86 for (i = 0; i < nr; i++)
8ee378a0 87 pack_list[i] = index_base + 4*256 + i*24;
bad50dc8
LT
88
89 qsort(pack_list, nr, sizeof(*pack_list), sort_by_offset);
90
91 printf("%d entries\n", nr);
92 return 0;
93}
94
8ee378a0 95static int unpack_non_delta_entry(struct pack_entry *entry,
a733cb60 96 enum object_type kind,
c4fb06c0
LT
97 unsigned char *data,
98 unsigned long size,
99 unsigned long left)
74536958 100{
c4fb06c0 101 int st;
8ee378a0
JH
102 z_stream stream;
103 char *buffer;
104 unsigned char sha1[20];
a733cb60 105 char *type;
8ee378a0 106
a733cb60 107 printf("%s %c %lu\n", sha1_to_hex(entry->sha1), ".CTBGD"[kind], size);
c4fb06c0
LT
108 if (dry_run)
109 return 0;
8ee378a0
JH
110
111 buffer = xmalloc(size + 1);
112 buffer[size] = 0;
113 memset(&stream, 0, sizeof(stream));
c4fb06c0
LT
114 stream.next_in = data;
115 stream.avail_in = left;
8ee378a0
JH
116 stream.next_out = buffer;
117 stream.avail_out = size;
118
119 inflateInit(&stream);
120 st = inflate(&stream, Z_FINISH);
121 inflateEnd(&stream);
122 if ((st != Z_STREAM_END) || stream.total_out != size)
123 goto err_finish;
124 switch (kind) {
a733cb60
LT
125 case OBJ_COMMIT: type = "commit"; break;
126 case OBJ_TREE: type = "tree"; break;
127 case OBJ_BLOB: type = "blob"; break;
128 case OBJ_TAG: type = "tag"; break;
8ee378a0
JH
129 default: goto err_finish;
130 }
a733cb60 131 if (write_sha1_file(buffer, size, type, sha1) < 0)
8ee378a0 132 die("failed to write %s (%s)",
a733cb60
LT
133 sha1_to_hex(entry->sha1), type);
134 printf("%s %s\n", sha1_to_hex(sha1), type);
8ee378a0 135 if (memcmp(sha1, entry->sha1, 20))
a733cb60 136 die("resulting %s have wrong SHA1", type);
8ee378a0
JH
137
138 finish:
139 st = 0;
140 free(buffer);
141 return st;
142 err_finish:
143 st = -1;
144 goto finish;
145}
146
147static int find_pack_entry(unsigned char *sha1, struct pack_entry **ent)
148{
149 int *level1_ofs = index_base;
150 int hi = ntohl(level1_ofs[*sha1]);
151 int lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1]));
152 void *index = index_base + 4*256;
153
154 do {
155 int mi = (lo + hi) / 2;
156 int cmp = memcmp(index + 24 * mi + 4, sha1, 20);
157 if (!cmp) {
158 *ent = index + 24 * mi;
159 return 1;
160 }
c4fb06c0 161 if (cmp > 0)
8ee378a0
JH
162 hi = mi;
163 else
c4fb06c0 164 lo = mi+1;
8ee378a0
JH
165 } while (lo < hi);
166 return 0;
167}
168
169/* forward declaration for a mutually recursive function */
170static void unpack_entry(struct pack_entry *);
171
c4fb06c0
LT
172static int unpack_delta_entry(struct pack_entry *entry,
173 unsigned char *base_sha1,
174 unsigned long delta_size,
175 unsigned long left)
8ee378a0 176{
c4fb06c0
LT
177 void *data, *delta_data, *result, *base;
178 unsigned long data_size, result_size, base_size;
8ee378a0
JH
179 z_stream stream;
180 int st;
181 char type[20];
182 unsigned char sha1[20];
183
c4fb06c0
LT
184 if (left < 20)
185 die("truncated pack file");
186 data = base_sha1 + 20;
187 data_size = left - 20;
188 printf("%s D %lu", sha1_to_hex(entry->sha1), delta_size);
189 printf(" %s\n", sha1_to_hex(base_sha1));
190
191 if (dry_run)
192 return 0;
8ee378a0 193
c4fb06c0 194 /* pack+5 is the base sha1, unless we have it, we need to
8ee378a0
JH
195 * unpack it first.
196 */
c4fb06c0 197 if (!has_sha1_file(base_sha1)) {
8ee378a0 198 struct pack_entry *base;
c4fb06c0 199 if (!find_pack_entry(base_sha1, &base))
8ee378a0
JH
200 die("cannot find delta-pack base object");
201 unpack_entry(base);
202 }
c4fb06c0 203 delta_data = xmalloc(delta_size);
8ee378a0
JH
204
205 memset(&stream, 0, sizeof(stream));
206
c4fb06c0
LT
207 stream.next_in = data;
208 stream.avail_in = data_size;
8ee378a0 209 stream.next_out = delta_data;
c4fb06c0 210 stream.avail_out = delta_size;
8ee378a0
JH
211
212 inflateInit(&stream);
c4fb06c0 213 st = inflate(&stream, Z_FINISH);
8ee378a0 214 inflateEnd(&stream);
c4fb06c0
LT
215 if ((st != Z_STREAM_END) || stream.total_out != delta_size)
216 die("delta data unpack failed");
8ee378a0 217
c4fb06c0 218 base = read_sha1_file(base_sha1, type, &base_size);
8ee378a0 219 if (!base)
c4fb06c0 220 die("failed to read delta-pack base object %s", sha1_to_hex(base_sha1));
8ee378a0
JH
221 result = patch_delta(base, base_size,
222 delta_data, delta_size,
223 &result_size);
224 if (!result)
225 die("failed to apply delta");
226 free(delta_data);
227
228 if (write_sha1_file(result, result_size, type, sha1) < 0)
229 die("failed to write %s (%s)",
230 sha1_to_hex(entry->sha1), type);
231 free(result);
232 printf("%s %s\n", sha1_to_hex(sha1), type);
233 if (memcmp(sha1, entry->sha1, 20))
234 die("resulting %s have wrong SHA1", type);
235 return 0;
236}
237
238static void unpack_entry(struct pack_entry *entry)
239{
c4fb06c0 240 unsigned long offset, size, left;
a733cb60
LT
241 unsigned char *pack, c;
242 int type;
74536958
LT
243
244 /* Have we done this one already due to deltas based on it? */
245 if (lookup_object(entry->sha1))
246 return;
247
248 offset = ntohl(entry->offset);
a733cb60
LT
249 if (offset >= pack_size)
250 goto bad;
251
74536958 252 pack = pack_base + offset;
a733cb60
LT
253 c = *pack++;
254 offset++;
255 type = (c >> 4) & 7;
256 size = (c & 15);
257 while (c & 0x80) {
258 if (offset >= pack_size)
259 goto bad;
260 offset++;
261 c = *pack++;
262 size = (size << 7) + (c & 0x7f);
263
264 }
265 left = pack_size - offset;
266 switch (type) {
267 case OBJ_COMMIT:
268 case OBJ_TREE:
269 case OBJ_BLOB:
270 case OBJ_TAG:
271 unpack_non_delta_entry(entry, type, pack, size, left);
272 return;
273 case OBJ_DELTA:
e1ddc976 274 unpack_delta_entry(entry, pack, size, left);
a733cb60 275 return;
74536958 276 }
a733cb60
LT
277bad:
278 die("corrupted pack file");
74536958
LT
279}
280
281/*
282 * We unpack from the end, older files first. Now, usually
283 * there are deltas etc, so we'll not actually write the
284 * objects in that order, but we might as well try..
285 */
286static void unpack_all(void)
287{
288 int i = nr_entries;
289
290 while (--i >= 0) {
291 struct pack_entry *entry = pack_list[i];
292 unpack_entry(entry);
293 }
294}
295
bad50dc8
LT
296int main(int argc, char **argv)
297{
298 int i;
bad50dc8
LT
299
300 for (i = 1 ; i < argc; i++) {
301 const char *arg = argv[i];
302
303 if (*arg == '-') {
74536958
LT
304 if (!strcmp(arg, "-n")) {
305 dry_run = 1;
306 continue;
307 }
bad50dc8
LT
308 usage(unpack_usage);
309 }
310 if (base_name)
311 usage(unpack_usage);
312 base_name = arg;
313 }
314 if (!base_name)
315 usage(unpack_usage);
8ee378a0 316 index_base = map_file("idx", &index_size);
74536958 317 pack_base = map_file("pack", &pack_size);
8ee378a0 318 if (check_index() < 0)
bad50dc8 319 die("bad index file");
74536958 320 unpack_all();
bad50dc8
LT
321 return 0;
322}