Merge branch 'maint'
[git/git.git] / diff-delta.c
CommitLineData
a310d434
NP
1/*
2 * diff-delta.c: generate a delta between two buffers
3 *
366b53c1
NP
4 * This code was greatly inspired by parts of LibXDiff from Davide Libenzi
5 * http://www.xmailserver.org/xdiff-lib.html
a310d434 6 *
366b53c1 7 * Rewritten for GIT by Nicolas Pitre <nico@cam.org>, (C) 2005-2007
a310d434 8 *
366b53c1
NP
9 * This code is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
a310d434
NP
12 */
13
63f17569 14#include "git-compat-util.h"
85023577 15#include "delta.h"
a310d434 16
08abe669
NP
17/* maximum hash entry list for the same hash bucket */
18#define HASH_LIMIT 64
a310d434 19
3dc5a9e4
NP
20#define RABIN_SHIFT 23
21#define RABIN_WINDOW 16
22
23static const unsigned int T[256] = {
24 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
25 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
26 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
27 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
28 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
29 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
30 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
31 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
32 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
33 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
34 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
35 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
36 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
37 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
38 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
39 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
40 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
41 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
42 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
43 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
44 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
45 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
46 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
47 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
48 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
49 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
50 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
51 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
52 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
53 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
54 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
55 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
56 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
57 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
58 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
59 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
60 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
61 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
62 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
63 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
64 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
65 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
66 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
67};
68
69static const unsigned int U[256] = {
70 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
71 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
72 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
73 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
74 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
75 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
76 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
77 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
78 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
79 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
80 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
81 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
82 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
83 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
84 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
85 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
86 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
87 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
88 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
89 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
90 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
91 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
92 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
93 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
94 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
95 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
96 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
97 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
98 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
99 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
100 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
101 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
102 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
103 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
104 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
105 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
106 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
107 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
108 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
109 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
110 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
111 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
112 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
113};
a310d434 114
08abe669 115struct index_entry {
a310d434 116 const unsigned char *ptr;
8e1454b5 117 unsigned int val;
08abe669 118 struct index_entry *next;
8e1454b5 119};
a310d434 120
08abe669 121struct delta_index {
11779e79 122 unsigned long memsize;
08abe669
NP
123 const void *src_buf;
124 unsigned long src_size;
3dc5a9e4 125 unsigned int hash_mask;
63f17569 126 struct index_entry *hash[FLEX_ARRAY];
08abe669
NP
127};
128
129struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
a310d434 130{
06a9f920 131 unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
08abe669
NP
132 const unsigned char *data, *buffer = buf;
133 struct delta_index *index;
134 struct index_entry *entry, **hash;
8e1454b5 135 void *mem;
06a9f920 136 unsigned long memsize;
8e1454b5 137
08abe669
NP
138 if (!buf || !bufsize)
139 return NULL;
140
3dc5a9e4 141 /* Determine index hash size. Note that indexing skips the
82e5a82f 142 first byte to allow for optimizing the Rabin's polynomial
3dc5a9e4
NP
143 initialization in create_delta(). */
144 entries = (bufsize - 1) / RABIN_WINDOW;
8e1454b5 145 hsize = entries / 4;
b05faa2d 146 for (i = 4; (1u << i) < hsize && i < 31; i++);
8e1454b5 147 hsize = 1 << i;
3dc5a9e4 148 hmask = hsize - 1;
8e1454b5
NP
149
150 /* allocate lookup index */
06a9f920
NP
151 memsize = sizeof(*index) +
152 sizeof(*hash) * hsize +
153 sizeof(*entry) * entries;
154 mem = malloc(memsize);
8e1454b5
NP
155 if (!mem)
156 return NULL;
08abe669
NP
157 index = mem;
158 mem = index + 1;
8e1454b5 159 hash = mem;
08abe669
NP
160 mem = hash + hsize;
161 entry = mem;
162
11779e79 163 index->memsize = memsize;
08abe669
NP
164 index->src_buf = buf;
165 index->src_size = bufsize;
3dc5a9e4 166 index->hash_mask = hmask;
8e1454b5
NP
167 memset(hash, 0, hsize * sizeof(*hash));
168
c13c6bf7
NP
169 /* allocate an array to count hash entries */
170 hash_count = calloc(hsize, sizeof(*hash_count));
171 if (!hash_count) {
08abe669 172 free(index);
c13c6bf7
NP
173 return NULL;
174 }
175
176 /* then populate the index */
06a9f920
NP
177 prev_val = ~0;
178 for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
179 data >= buffer;
180 data -= RABIN_WINDOW) {
3dc5a9e4
NP
181 unsigned int val = 0;
182 for (i = 1; i <= RABIN_WINDOW; i++)
183 val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
06a9f920
NP
184 if (val == prev_val) {
185 /* keep the lowest of consecutive identical blocks */
186 entry[-1].ptr = data + RABIN_WINDOW;
187 } else {
188 prev_val = val;
189 i = val & hmask;
190 entry->ptr = data + RABIN_WINDOW;
191 entry->val = val;
192 entry->next = hash[i];
193 hash[i] = entry++;
194 hash_count[i]++;
06a9f920 195 }
3dc5a9e4 196 }
a310d434 197
c13c6bf7
NP
198 /*
199 * Determine a limit on the number of entries in the same hash
82e5a82f 200 * bucket. This guards us against pathological data sets causing
c13c6bf7
NP
201 * really bad hash distribution with most entries in the same hash
202 * bucket that would bring us to O(m*n) computing costs (m and n
203 * corresponding to reference and target buffer sizes).
204 *
08abe669 205 * Make sure none of the hash buckets has more entries than
c13c6bf7
NP
206 * we're willing to test. Otherwise we cull the entry list
207 * uniformly to still preserve a good repartition across
208 * the reference buffer.
209 */
210 for (i = 0; i < hsize; i++) {
08abe669 211 if (hash_count[i] < HASH_LIMIT)
c13c6bf7
NP
212 continue;
213 entry = hash[i];
214 do {
08abe669 215 struct index_entry *keep = entry;
b1d884a9 216 int skip = hash_count[i] / HASH_LIMIT;
c13c6bf7
NP
217 do {
218 entry = entry->next;
219 } while(--skip && entry);
220 keep->next = entry;
221 } while(entry);
222 }
223 free(hash_count);
224
08abe669
NP
225 return index;
226}
227
228void free_delta_index(struct delta_index *index)
229{
230 free(index);
a310d434
NP
231}
232
11779e79
BD
233unsigned long sizeof_delta_index(struct delta_index *index)
234{
235 if (index)
236 return index->memsize;
237 else
238 return 0;
239}
240
3dc5a9e4
NP
241/*
242 * The maximum size for any opcode sequence, including the initial header
82e5a82f 243 * plus Rabin window plus biggest copy.
3dc5a9e4
NP
244 */
245#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
fe474b58 246
08abe669
NP
247void *
248create_delta(const struct delta_index *index,
249 const void *trg_buf, unsigned long trg_size,
250 unsigned long *delta_size, unsigned long max_size)
a310d434 251{
84336696 252 unsigned int i, outpos, outsize, moff, msize, val;
5a1fb2ca 253 int inscnt;
8e1454b5
NP
254 const unsigned char *ref_data, *ref_top, *data, *top;
255 unsigned char *out;
a310d434 256
08abe669 257 if (!trg_buf || !trg_size)
8e1454b5
NP
258 return NULL;
259
a310d434
NP
260 outpos = 0;
261 outsize = 8192;
fe474b58
NP
262 if (max_size && outsize >= max_size)
263 outsize = max_size + MAX_OP_SIZE + 1;
a310d434 264 out = malloc(outsize);
08abe669 265 if (!out)
a310d434 266 return NULL;
a310d434
NP
267
268 /* store reference buffer size */
08abe669
NP
269 i = index->src_size;
270 while (i >= 0x80) {
271 out[outpos++] = i | 0x80;
272 i >>= 7;
69a2d426 273 }
08abe669 274 out[outpos++] = i;
a310d434
NP
275
276 /* store target buffer size */
08abe669
NP
277 i = trg_size;
278 while (i >= 0x80) {
279 out[outpos++] = i | 0x80;
280 i >>= 7;
69a2d426 281 }
08abe669 282 out[outpos++] = i;
a310d434 283
08abe669
NP
284 ref_data = index->src_buf;
285 ref_top = ref_data + index->src_size;
286 data = trg_buf;
1d7f171c 287 top = (const unsigned char *) trg_buf + trg_size;
3dc5a9e4
NP
288
289 outpos++;
290 val = 0;
291 for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
292 out[outpos++] = *data;
293 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
294 }
295 inscnt = i;
a310d434 296
84336696
NP
297 moff = 0;
298 msize = 0;
8e1454b5 299 while (data < top) {
84336696
NP
300 if (msize < 4096) {
301 struct index_entry *entry;
302 val ^= U[data[-RABIN_WINDOW]];
303 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
304 i = val & index->hash_mask;
305 for (entry = index->hash[i]; entry; entry = entry->next) {
306 const unsigned char *ref = entry->ptr;
307 const unsigned char *src = data;
308 unsigned int ref_size = ref_top - ref;
309 if (entry->val != val)
310 continue;
311 if (ref_size > top - src)
312 ref_size = top - src;
313 if (ref_size <= msize)
314 break;
315 while (ref_size-- && *src++ == *ref)
316 ref++;
317 if (msize < ref - entry->ptr) {
318 /* this is our best match so far */
319 msize = ref - entry->ptr;
320 moff = entry->ptr - ref_data;
321 if (msize >= 4096) /* good enough */
322 break;
323 }
a310d434
NP
324 }
325 }
326
3dc5a9e4 327 if (msize < 4) {
a310d434
NP
328 if (!inscnt)
329 outpos++;
330 out[outpos++] = *data++;
331 inscnt++;
332 if (inscnt == 0x7f) {
333 out[outpos - inscnt - 1] = inscnt;
334 inscnt = 0;
335 }
84336696 336 msize = 0;
a310d434 337 } else {
84336696 338 unsigned int left;
8e1454b5
NP
339 unsigned char *op;
340
a310d434 341 if (inscnt) {
5a1fb2ca 342 while (moff && ref_data[moff-1] == data[-1]) {
5a1fb2ca
NP
343 /* we can match one byte back */
344 msize++;
345 moff--;
346 data--;
347 outpos--;
348 if (--inscnt)
349 continue;
350 outpos--; /* remove count slot */
351 inscnt--; /* make it -1 */
352 break;
353 }
a310d434
NP
354 out[outpos - inscnt - 1] = inscnt;
355 inscnt = 0;
356 }
357
84336696
NP
358 /* A copy op is currently limited to 64KB (pack v2) */
359 left = (msize < 0x10000) ? 0 : (msize - 0x10000);
360 msize -= left;
361
8e1454b5 362 op = out + outpos++;
a310d434
NP
363 i = 0x80;
364
84336696
NP
365 if (moff & 0x000000ff)
366 out[outpos++] = moff >> 0, i |= 0x01;
367 if (moff & 0x0000ff00)
368 out[outpos++] = moff >> 8, i |= 0x02;
369 if (moff & 0x00ff0000)
370 out[outpos++] = moff >> 16, i |= 0x04;
371 if (moff & 0xff000000)
372 out[outpos++] = moff >> 24, i |= 0x08;
a310d434 373
84336696
NP
374 if (msize & 0x00ff)
375 out[outpos++] = msize >> 0, i |= 0x10;
376 if (msize & 0xff00)
377 out[outpos++] = msize >> 8, i |= 0x20;
a310d434 378
8e1454b5 379 *op = i;
84336696
NP
380
381 data += msize;
382 moff += msize;
383 msize = left;
384
385 if (msize < 4096) {
386 int j;
387 val = 0;
388 for (j = -RABIN_WINDOW; j < 0; j++)
389 val = ((val << 8) | data[j])
390 ^ T[val >> RABIN_SHIFT];
391 }
a310d434
NP
392 }
393
fe474b58 394 if (outpos >= outsize - MAX_OP_SIZE) {
a310d434
NP
395 void *tmp = out;
396 outsize = outsize * 3 / 2;
fe474b58
NP
397 if (max_size && outsize >= max_size)
398 outsize = max_size + MAX_OP_SIZE + 1;
399 if (max_size && outpos > max_size)
3dc5a9e4 400 break;
b75c6c6d 401 out = realloc(out, outsize);
a310d434
NP
402 if (!out) {
403 free(tmp);
a310d434
NP
404 return NULL;
405 }
406 }
407 }
408
409 if (inscnt)
410 out[outpos - inscnt - 1] = inscnt;
411
3dc5a9e4
NP
412 if (max_size && outpos > max_size) {
413 free(out);
414 return NULL;
415 }
416
a310d434
NP
417 *delta_size = outpos;
418 return out;
419}