Add delta-islands.{c,h}
[git/git.git] / delta-islands.c
1 #include "cache.h"
2 #include "attr.h"
3 #include "object.h"
4 #include "blob.h"
5 #include "commit.h"
6 #include "tag.h"
7 #include "tree.h"
8 #include "delta.h"
9 #include "pack.h"
10 #include "tree-walk.h"
11 #include "diff.h"
12 #include "revision.h"
13 #include "list-objects.h"
14 #include "progress.h"
15 #include "refs.h"
16 #include "khash.h"
17 #include "pack-bitmap.h"
18 #include "pack-objects.h"
19 #include "delta-islands.h"
20 #include "sha1-array.h"
21 #include "config.h"
22
23 KHASH_INIT(str, const char *, void *, 1, kh_str_hash_func, kh_str_hash_equal)
24
25 static khash_sha1 *island_marks;
26 static unsigned island_counter;
27 static unsigned island_counter_core;
28
29 static kh_str_t *remote_islands;
30
31 struct remote_island {
32 uint64_t hash;
33 struct oid_array oids;
34 };
35
36 struct island_bitmap {
37 uint32_t refcount;
38 uint32_t bits[FLEX_ARRAY];
39 };
40
41 static uint32_t island_bitmap_size;
42
43 /*
44 * Allocate a new bitmap; if "old" is not NULL, the new bitmap will be a copy
45 * of "old". Otherwise, the new bitmap is empty.
46 */
47 static struct island_bitmap *island_bitmap_new(const struct island_bitmap *old)
48 {
49 size_t size = sizeof(struct island_bitmap) + (island_bitmap_size * 4);
50 struct island_bitmap *b = xcalloc(1, size);
51
52 if (old)
53 memcpy(b, old, size);
54
55 b->refcount = 1;
56 return b;
57 }
58
59 static void island_bitmap_or(struct island_bitmap *a, const struct island_bitmap *b)
60 {
61 uint32_t i;
62
63 for (i = 0; i < island_bitmap_size; ++i)
64 a->bits[i] |= b->bits[i];
65 }
66
67 static int island_bitmap_is_subset(struct island_bitmap *self,
68 struct island_bitmap *super)
69 {
70 uint32_t i;
71
72 if (self == super)
73 return 1;
74
75 for (i = 0; i < island_bitmap_size; ++i) {
76 if ((self->bits[i] & super->bits[i]) != self->bits[i])
77 return 0;
78 }
79
80 return 1;
81 }
82
83 #define ISLAND_BITMAP_BLOCK(x) (x / 32)
84 #define ISLAND_BITMAP_MASK(x) (1 << (x % 32))
85
86 static void island_bitmap_set(struct island_bitmap *self, uint32_t i)
87 {
88 self->bits[ISLAND_BITMAP_BLOCK(i)] |= ISLAND_BITMAP_MASK(i);
89 }
90
91 static int island_bitmap_get(struct island_bitmap *self, uint32_t i)
92 {
93 return (self->bits[ISLAND_BITMAP_BLOCK(i)] & ISLAND_BITMAP_MASK(i)) != 0;
94 }
95
96 int in_same_island(const struct object_id *trg_oid, const struct object_id *src_oid)
97 {
98 khiter_t trg_pos, src_pos;
99
100 /* If we aren't using islands, assume everything goes together. */
101 if (!island_marks)
102 return 1;
103
104 /*
105 * If we don't have a bitmap for the target, we can delta it
106 * against anything -- it's not an important object
107 */
108 trg_pos = kh_get_sha1(island_marks, trg_oid->hash);
109 if (trg_pos >= kh_end(island_marks))
110 return 1;
111
112 /*
113 * if the source (our delta base) doesn't have a bitmap,
114 * we don't want to base any deltas on it!
115 */
116 src_pos = kh_get_sha1(island_marks, src_oid->hash);
117 if (src_pos >= kh_end(island_marks))
118 return 0;
119
120 return island_bitmap_is_subset(kh_value(island_marks, trg_pos),
121 kh_value(island_marks, src_pos));
122 }
123
124 int island_delta_cmp(const struct object_id *a, const struct object_id *b)
125 {
126 khiter_t a_pos, b_pos;
127 struct island_bitmap *a_bitmap = NULL, *b_bitmap = NULL;
128
129 if (!island_marks)
130 return 0;
131
132 a_pos = kh_get_sha1(island_marks, a->hash);
133 if (a_pos < kh_end(island_marks))
134 a_bitmap = kh_value(island_marks, a_pos);
135
136 b_pos = kh_get_sha1(island_marks, b->hash);
137 if (b_pos < kh_end(island_marks))
138 b_bitmap = kh_value(island_marks, b_pos);
139
140 if (a_bitmap) {
141 if (!b_bitmap || !island_bitmap_is_subset(a_bitmap, b_bitmap))
142 return -1;
143 }
144 if (b_bitmap) {
145 if (!a_bitmap || !island_bitmap_is_subset(b_bitmap, a_bitmap))
146 return 1;
147 }
148
149 return 0;
150 }
151
152 static struct island_bitmap *create_or_get_island_marks(struct object *obj)
153 {
154 khiter_t pos;
155 int hash_ret;
156
157 pos = kh_put_sha1(island_marks, obj->oid.hash, &hash_ret);
158 if (hash_ret)
159 kh_value(island_marks, pos) = island_bitmap_new(NULL);
160
161 return kh_value(island_marks, pos);
162 }
163
164 static void set_island_marks(struct object *obj, struct island_bitmap *marks)
165 {
166 struct island_bitmap *b;
167 khiter_t pos;
168 int hash_ret;
169
170 pos = kh_put_sha1(island_marks, obj->oid.hash, &hash_ret);
171 if (hash_ret) {
172 /*
173 * We don't have one yet; make a copy-on-write of the
174 * parent.
175 */
176 marks->refcount++;
177 kh_value(island_marks, pos) = marks;
178 return;
179 }
180
181 /*
182 * We do have it. Make sure we split any copy-on-write before
183 * updating.
184 */
185 b = kh_value(island_marks, pos);
186 if (b->refcount > 1) {
187 b->refcount--;
188 b = kh_value(island_marks, pos) = island_bitmap_new(b);
189 }
190 island_bitmap_or(b, marks);
191 }
192
193 static void mark_remote_island_1(struct remote_island *rl, int is_core_island)
194 {
195 uint32_t i;
196
197 for (i = 0; i < rl->oids.nr; ++i) {
198 struct island_bitmap *marks;
199 struct object *obj = parse_object(the_repository, &rl->oids.oid[i]);
200
201 if (!obj)
202 continue;
203
204 marks = create_or_get_island_marks(obj);
205 island_bitmap_set(marks, island_counter);
206
207 if (is_core_island && obj->type == OBJ_COMMIT)
208 obj->flags |= NEEDS_BITMAP;
209
210 /* If it was a tag, also make sure we hit the underlying object. */
211 while (obj && obj->type == OBJ_TAG) {
212 obj = ((struct tag *)obj)->tagged;
213 if (obj) {
214 parse_object(the_repository, &obj->oid);
215 marks = create_or_get_island_marks(obj);
216 island_bitmap_set(marks, island_counter);
217 }
218 }
219 }
220
221 if (is_core_island)
222 island_counter_core = island_counter;
223
224 island_counter++;
225 }
226
227 static int cmp_tree_depth(const void *va, const void *vb)
228 {
229 struct object_entry *a = *(struct object_entry **)va;
230 struct object_entry *b = *(struct object_entry **)vb;
231 return a->tree_depth - b->tree_depth;
232 }
233
234 void resolve_tree_islands(int progress, struct packing_data *to_pack)
235 {
236 struct progress *progress_state = NULL;
237 struct object_entry **todo;
238 int nr = 0;
239 int i;
240
241 if (!island_marks)
242 return;
243
244 /*
245 * We process only trees, as commits and tags have already been handled
246 * (and passed their marks on to root trees, as well. We must make sure
247 * to process them in descending tree-depth order so that marks
248 * propagate down the tree properly, even if a sub-tree is found in
249 * multiple parent trees.
250 */
251 ALLOC_ARRAY(todo, to_pack->nr_objects);
252 for (i = 0; i < to_pack->nr_objects; i++) {
253 if (oe_type(&to_pack->objects[i]) == OBJ_TREE)
254 todo[nr++] = &to_pack->objects[i];
255 }
256 QSORT(todo, nr, cmp_tree_depth);
257
258 if (progress)
259 progress_state = start_progress(_("Propagating island marks"), nr);
260
261 for (i = 0; i < nr; i++) {
262 struct object_entry *ent = todo[i];
263 struct island_bitmap *root_marks;
264 struct tree *tree;
265 struct tree_desc desc;
266 struct name_entry entry;
267 khiter_t pos;
268
269 pos = kh_get_sha1(island_marks, ent->idx.oid.hash);
270 if (pos >= kh_end(island_marks))
271 continue;
272
273 root_marks = kh_value(island_marks, pos);
274
275 tree = lookup_tree(the_repository, &ent->idx.oid);
276 if (!tree || parse_tree(tree) < 0)
277 die(_("bad tree object %s"), oid_to_hex(&ent->idx.oid));
278
279 init_tree_desc(&desc, tree->buffer, tree->size);
280 while (tree_entry(&desc, &entry)) {
281 struct object *obj;
282
283 if (S_ISGITLINK(entry.mode))
284 continue;
285
286 obj = lookup_object(the_repository, entry.oid->hash);
287 if (!obj)
288 continue;
289
290 set_island_marks(obj, root_marks);
291 }
292
293 free_tree_buffer(tree);
294
295 display_progress(progress_state, i+1);
296 }
297
298 stop_progress(&progress_state);
299 free(todo);
300 }
301
302 static regex_t *island_regexes;
303 static unsigned int island_regexes_alloc, island_regexes_nr;
304 static const char *core_island_name;
305
306 static int island_config_callback(const char *k, const char *v, void *cb)
307 {
308 if (!strcmp(k, "pack.island")) {
309 struct strbuf re = STRBUF_INIT;
310
311 if (!v)
312 return config_error_nonbool(k);
313
314 ALLOC_GROW(island_regexes, island_regexes_nr + 1, island_regexes_alloc);
315
316 if (*v != '^')
317 strbuf_addch(&re, '^');
318 strbuf_addstr(&re, v);
319
320 if (regcomp(&island_regexes[island_regexes_nr], re.buf, REG_EXTENDED))
321 die(_("failed to load island regex for '%s': %s"), k, re.buf);
322
323 strbuf_release(&re);
324 island_regexes_nr++;
325 return 0;
326 }
327
328 if (!strcmp(k, "pack.islandcore"))
329 return git_config_string(&core_island_name, k, v);
330
331 return 0;
332 }
333
334 static void add_ref_to_island(const char *island_name, const struct object_id *oid)
335 {
336 uint64_t sha_core;
337 struct remote_island *rl = NULL;
338
339 int hash_ret;
340 khiter_t pos = kh_put_str(remote_islands, island_name, &hash_ret);
341
342 if (hash_ret) {
343 kh_key(remote_islands, pos) = xstrdup(island_name);
344 kh_value(remote_islands, pos) = xcalloc(1, sizeof(struct remote_island));
345 }
346
347 rl = kh_value(remote_islands, pos);
348 oid_array_append(&rl->oids, oid);
349
350 memcpy(&sha_core, oid->hash, sizeof(uint64_t));
351 rl->hash += sha_core;
352 }
353
354 static int find_island_for_ref(const char *refname, const struct object_id *oid,
355 int flags, void *data)
356 {
357 /*
358 * We should advertise 'ARRAY_SIZE(matches) - 2' as the max,
359 * so we can diagnose below a config with more capture groups
360 * than we support.
361 */
362 regmatch_t matches[16];
363 int i, m;
364 struct strbuf island_name = STRBUF_INIT;
365
366 /* walk backwards to get last-one-wins ordering */
367 for (i = island_regexes_nr - 1; i >= 0; i--) {
368 if (!regexec(&island_regexes[i], refname,
369 ARRAY_SIZE(matches), matches, 0))
370 break;
371 }
372
373 if (i < 0)
374 return 0;
375
376 if (matches[ARRAY_SIZE(matches) - 1].rm_so != -1)
377 warning(_("island regex from config has "
378 "too many capture groups (max=%d)"),
379 (int)ARRAY_SIZE(matches) - 2);
380
381 for (m = 1; m < ARRAY_SIZE(matches); m++) {
382 regmatch_t *match = &matches[m];
383
384 if (match->rm_so == -1)
385 continue;
386
387 if (island_name.len)
388 strbuf_addch(&island_name, '-');
389
390 strbuf_add(&island_name, refname + match->rm_so, match->rm_eo - match->rm_so);
391 }
392
393 add_ref_to_island(island_name.buf, oid);
394 strbuf_release(&island_name);
395 return 0;
396 }
397
398 static struct remote_island *get_core_island(void)
399 {
400 if (core_island_name) {
401 khiter_t pos = kh_get_str(remote_islands, core_island_name);
402 if (pos < kh_end(remote_islands))
403 return kh_value(remote_islands, pos);
404 }
405
406 return NULL;
407 }
408
409 static void deduplicate_islands(void)
410 {
411 struct remote_island *island, *core = NULL, **list;
412 unsigned int island_count, dst, src, ref, i = 0;
413
414 island_count = kh_size(remote_islands);
415 ALLOC_ARRAY(list, island_count);
416
417 kh_foreach_value(remote_islands, island, {
418 list[i++] = island;
419 });
420
421 for (ref = 0; ref + 1 < island_count; ref++) {
422 for (src = ref + 1, dst = src; src < island_count; src++) {
423 if (list[ref]->hash == list[src]->hash)
424 continue;
425
426 if (src != dst)
427 list[dst] = list[src];
428
429 dst++;
430 }
431 island_count = dst;
432 }
433
434 island_bitmap_size = (island_count / 32) + 1;
435 core = get_core_island();
436
437 for (i = 0; i < island_count; ++i) {
438 mark_remote_island_1(list[i], core && list[i]->hash == core->hash);
439 }
440
441 free(list);
442 }
443
444 void load_delta_islands(void)
445 {
446 island_marks = kh_init_sha1();
447 remote_islands = kh_init_str();
448
449 git_config(island_config_callback, NULL);
450 for_each_ref(find_island_for_ref, NULL);
451 deduplicate_islands();
452
453 fprintf(stderr, _("Marked %d islands, done.\n"), island_counter);
454 }
455
456 void propagate_island_marks(struct commit *commit)
457 {
458 khiter_t pos = kh_get_sha1(island_marks, commit->object.oid.hash);
459
460 if (pos < kh_end(island_marks)) {
461 struct commit_list *p;
462 struct island_bitmap *root_marks = kh_value(island_marks, pos);
463
464 parse_commit(commit);
465 set_island_marks(&get_commit_tree(commit)->object, root_marks);
466 for (p = commit->parents; p; p = p->next)
467 set_island_marks(&p->item->object, root_marks);
468 }
469 }
470
471 int compute_pack_layers(struct packing_data *to_pack)
472 {
473 uint32_t i;
474
475 if (!core_island_name || !island_marks)
476 return 1;
477
478 for (i = 0; i < to_pack->nr_objects; ++i) {
479 struct object_entry *entry = &to_pack->objects[i];
480 khiter_t pos = kh_get_sha1(island_marks, entry->idx.oid.hash);
481
482 entry->layer = 1;
483
484 if (pos < kh_end(island_marks)) {
485 struct island_bitmap *bitmap = kh_value(island_marks, pos);
486
487 if (island_bitmap_get(bitmap, island_counter_core))
488 entry->layer = 0;
489 }
490 }
491
492 return 2;
493 }