Add a new merge strategy by Fredrik Kuivinen.
[git/git.git] / read-tree.c
1 /*
2 * GIT - The information manager from hell
3 *
4 * Copyright (C) Linus Torvalds, 2005
5 */
6 #define DBRT_DEBUG 1
7
8 #include "cache.h"
9
10 #include "object.h"
11 #include "tree.h"
12
13 static int merge = 0;
14 static int update = 0;
15 static int index_only = 0;
16
17 static int head_idx = -1;
18 static int merge_size = 0;
19
20 static struct object_list *trees = NULL;
21
22 static struct cache_entry df_conflict_entry = {
23 };
24
25 static struct tree_entry_list df_conflict_list = {
26 .name = NULL,
27 .next = &df_conflict_list
28 };
29
30 typedef int (*merge_fn_t)(struct cache_entry **src);
31
32 static int entcmp(char *name1, int dir1, char *name2, int dir2)
33 {
34 int len1 = strlen(name1);
35 int len2 = strlen(name2);
36 int len = len1 < len2 ? len1 : len2;
37 int ret = memcmp(name1, name2, len);
38 unsigned char c1, c2;
39 if (ret)
40 return ret;
41 c1 = name1[len];
42 c2 = name2[len];
43 if (!c1 && dir1)
44 c1 = '/';
45 if (!c2 && dir2)
46 c2 = '/';
47 ret = (c1 < c2) ? -1 : (c1 > c2) ? 1 : 0;
48 if (c1 && c2 && !ret)
49 ret = len1 - len2;
50 return ret;
51 }
52
53 static int unpack_trees_rec(struct tree_entry_list **posns, int len,
54 const char *base, merge_fn_t fn, int *indpos)
55 {
56 int baselen = strlen(base);
57 int src_size = len + 1;
58 do {
59 int i;
60 char *first;
61 int firstdir = 0;
62 int pathlen;
63 unsigned ce_size;
64 struct tree_entry_list **subposns;
65 struct cache_entry **src;
66 int any_files = 0;
67 int any_dirs = 0;
68 char *cache_name;
69 int ce_stage;
70
71 /* Find the first name in the input. */
72
73 first = NULL;
74 cache_name = NULL;
75
76 /* Check the cache */
77 if (merge && *indpos < active_nr) {
78 /* This is a bit tricky: */
79 /* If the index has a subdirectory (with
80 * contents) as the first name, it'll get a
81 * filename like "foo/bar". But that's after
82 * "foo", so the entry in trees will get
83 * handled first, at which point we'll go into
84 * "foo", and deal with "bar" from the index,
85 * because the base will be "foo/". The only
86 * way we can actually have "foo/bar" first of
87 * all the things is if the trees don't
88 * contain "foo" at all, in which case we'll
89 * handle "foo/bar" without going into the
90 * directory, but that's fine (and will return
91 * an error anyway, with the added unknown
92 * file case.
93 */
94
95 cache_name = active_cache[*indpos]->name;
96 if (strlen(cache_name) > baselen &&
97 !memcmp(cache_name, base, baselen)) {
98 cache_name += baselen;
99 first = cache_name;
100 } else {
101 cache_name = NULL;
102 }
103 }
104
105 #if DBRT_DEBUG > 1
106 if (first)
107 printf("index %s\n", first);
108 #endif
109 for (i = 0; i < len; i++) {
110 if (!posns[i] || posns[i] == &df_conflict_list)
111 continue;
112 #if DBRT_DEBUG > 1
113 printf("%d %s\n", i + 1, posns[i]->name);
114 #endif
115 if (!first || entcmp(first, firstdir,
116 posns[i]->name,
117 posns[i]->directory) > 0) {
118 first = posns[i]->name;
119 firstdir = posns[i]->directory;
120 }
121 }
122 /* No name means we're done */
123 if (!first)
124 return 0;
125
126 pathlen = strlen(first);
127 ce_size = cache_entry_size(baselen + pathlen);
128
129 src = xmalloc(sizeof(struct cache_entry *) * src_size);
130 memset(src, 0, sizeof(struct cache_entry *) * src_size);
131
132 subposns = xmalloc(sizeof(struct tree_list_entry *) * len);
133 memset(subposns, 0, sizeof(struct tree_list_entry *) * len);
134
135 if (cache_name && !strcmp(cache_name, first)) {
136 any_files = 1;
137 src[0] = active_cache[*indpos];
138 remove_cache_entry_at(*indpos);
139 }
140
141 for (i = 0; i < len; i++) {
142 struct cache_entry *ce;
143
144 if (!posns[i] ||
145 (posns[i] != &df_conflict_list &&
146 strcmp(first, posns[i]->name))) {
147 continue;
148 }
149
150 if (posns[i] == &df_conflict_list) {
151 src[i + merge] = &df_conflict_entry;
152 continue;
153 }
154
155 if (posns[i]->directory) {
156 any_dirs = 1;
157 parse_tree(posns[i]->item.tree);
158 subposns[i] = posns[i]->item.tree->entries;
159 posns[i] = posns[i]->next;
160 src[i + merge] = &df_conflict_entry;
161 continue;
162 }
163
164 if (!merge)
165 ce_stage = 0;
166 else if (i + 1 < head_idx)
167 ce_stage = 1;
168 else if (i + 1 > head_idx)
169 ce_stage = 3;
170 else
171 ce_stage = 2;
172
173 ce = xmalloc(ce_size);
174 memset(ce, 0, ce_size);
175 ce->ce_mode = create_ce_mode(posns[i]->mode);
176 ce->ce_flags = create_ce_flags(baselen + pathlen,
177 ce_stage);
178 memcpy(ce->name, base, baselen);
179 memcpy(ce->name + baselen, first, pathlen + 1);
180
181 any_files = 1;
182
183 memcpy(ce->sha1, posns[i]->item.any->sha1, 20);
184 src[i + merge] = ce;
185 subposns[i] = &df_conflict_list;
186 posns[i] = posns[i]->next;
187 }
188 if (any_files) {
189 if (merge) {
190 int ret;
191
192 #if DBRT_DEBUG > 1
193 printf("%s:\n", first);
194 for (i = 0; i < src_size; i++) {
195 printf(" %d ", i);
196 if (src[i])
197 printf("%s\n", sha1_to_hex(src[i]->sha1));
198 else
199 printf("\n");
200 }
201 #endif
202 ret = fn(src);
203
204 #if DBRT_DEBUG > 1
205 printf("Added %d entries\n", ret);
206 #endif
207 *indpos += ret;
208 } else {
209 for (i = 0; i < src_size; i++) {
210 if (src[i]) {
211 add_cache_entry(src[i], ADD_CACHE_OK_TO_ADD|ADD_CACHE_SKIP_DFCHECK);
212 }
213 }
214 }
215 }
216 if (any_dirs) {
217 char *newbase = xmalloc(baselen + 2 + pathlen);
218 memcpy(newbase, base, baselen);
219 memcpy(newbase + baselen, first, pathlen);
220 newbase[baselen + pathlen] = '/';
221 newbase[baselen + pathlen + 1] = '\0';
222 if (unpack_trees_rec(subposns, len, newbase, fn,
223 indpos))
224 return -1;
225 free(newbase);
226 }
227 free(subposns);
228 free(src);
229 } while (1);
230 }
231
232 static void reject_merge(struct cache_entry *ce)
233 {
234 die("Entry '%s' would be overwritten by merge. Cannot merge.",
235 ce->name);
236 }
237
238 static void check_updates(struct cache_entry **src, int nr)
239 {
240 static struct checkout state = {
241 .base_dir = "",
242 .force = 1,
243 .quiet = 1,
244 .refresh_cache = 1,
245 };
246 unsigned short mask = htons(CE_UPDATE);
247 while (nr--) {
248 struct cache_entry *ce = *src++;
249 if (!ce->ce_mode) {
250 if (update)
251 unlink(ce->name);
252 continue;
253 }
254 if (ce->ce_flags & mask) {
255 ce->ce_flags &= ~mask;
256 if (update)
257 checkout_entry(ce, &state);
258 }
259 }
260 }
261
262 static int unpack_trees(merge_fn_t fn)
263 {
264 int indpos = 0;
265 unsigned len = object_list_length(trees);
266 struct tree_entry_list **posns =
267 xmalloc(len * sizeof(struct tree_entry_list *));
268 int i;
269 struct object_list *posn = trees;
270 merge_size = len;
271 for (i = 0; i < len; i++) {
272 posns[i] = ((struct tree *) posn->item)->entries;
273 posn = posn->next;
274 }
275 if (unpack_trees_rec(posns, len, "", fn, &indpos))
276 return -1;
277
278 check_updates(active_cache, active_nr);
279 return 0;
280 }
281
282 static int list_tree(unsigned char *sha1)
283 {
284 struct tree *tree = parse_tree_indirect(sha1);
285 if (!tree)
286 return -1;
287 object_list_append(&tree->object, &trees);
288 return 0;
289 }
290
291 static int same(struct cache_entry *a, struct cache_entry *b)
292 {
293 if (!!a != !!b)
294 return 0;
295 if (!a && !b)
296 return 1;
297 return a->ce_mode == b->ce_mode &&
298 !memcmp(a->sha1, b->sha1, 20);
299 }
300
301
302 /*
303 * When a CE gets turned into an unmerged entry, we
304 * want it to be up-to-date
305 */
306 static void verify_uptodate(struct cache_entry *ce)
307 {
308 struct stat st;
309
310 if (index_only)
311 return;
312
313 if (!lstat(ce->name, &st)) {
314 unsigned changed = ce_match_stat(ce, &st);
315 if (!changed)
316 return;
317 errno = 0;
318 }
319 if (errno == ENOENT)
320 return;
321 die("Entry '%s' not uptodate. Cannot merge.", ce->name);
322 }
323
324 static int merged_entry(struct cache_entry *merge, struct cache_entry *old)
325 {
326 merge->ce_flags |= htons(CE_UPDATE);
327 if (old) {
328 /*
329 * See if we can re-use the old CE directly?
330 * That way we get the uptodate stat info.
331 *
332 * This also removes the UPDATE flag on
333 * a match.
334 */
335 if (same(old, merge)) {
336 *merge = *old;
337 } else {
338 verify_uptodate(old);
339 }
340 }
341 merge->ce_flags &= ~htons(CE_STAGEMASK);
342 add_cache_entry(merge, ADD_CACHE_OK_TO_ADD);
343 return 1;
344 }
345
346 static int deleted_entry(struct cache_entry *ce, struct cache_entry *old)
347 {
348 if (old)
349 verify_uptodate(old);
350 ce->ce_mode = 0;
351 add_cache_entry(ce, ADD_CACHE_OK_TO_ADD);
352 return 1;
353 }
354
355 static int keep_entry(struct cache_entry *ce)
356 {
357 add_cache_entry(ce, ADD_CACHE_OK_TO_ADD);
358 return 1;
359 }
360
361 #if DBRT_DEBUG
362 static void show_stage_entry(FILE *o,
363 const char *label, const struct cache_entry *ce)
364 {
365 fprintf(stderr, "%s%06o %s %d\t%s\n",
366 label,
367 ntohl(ce->ce_mode),
368 sha1_to_hex(ce->sha1),
369 ce_stage(ce),
370 ce->name);
371 }
372 #endif
373
374 static int threeway_merge(struct cache_entry **stages)
375 {
376 struct cache_entry *index;
377 struct cache_entry *head;
378 struct cache_entry *remote = stages[head_idx + 1];
379 int count;
380 int head_match = 0;
381 int remote_match = 0;
382
383 int df_conflict_head = 0;
384 int df_conflict_remote = 0;
385
386 int any_anc_missing = 0;
387 int i;
388
389 for (i = 1; i < head_idx; i++) {
390 if (!stages[i])
391 any_anc_missing = 1;
392 }
393
394 index = stages[0];
395 head = stages[head_idx];
396
397 if (head == &df_conflict_entry) {
398 df_conflict_head = 1;
399 head = NULL;
400 }
401
402 if (remote == &df_conflict_entry) {
403 df_conflict_remote = 1;
404 remote = NULL;
405 }
406
407 /* First, if there's a #16 situation, note that to prevent #13
408 * and #14.
409 */
410 if (!same(remote, head)) {
411 for (i = 1; i < head_idx; i++) {
412 if (same(stages[i], head)) {
413 head_match = i;
414 }
415 if (same(stages[i], remote)) {
416 remote_match = i;
417 }
418 }
419 }
420
421 /* We start with cases where the index is allowed to match
422 * something other than the head: #14(ALT) and #2ALT, where it
423 * is permitted to match the result instead.
424 */
425 /* #14, #14ALT, #2ALT */
426 if (remote && !df_conflict_head && head_match && !remote_match) {
427 if (index && !same(index, remote) && !same(index, head))
428 reject_merge(index);
429 return merged_entry(remote, index);
430 }
431 /*
432 * If we have an entry in the index cache, then we want to
433 * make sure that it matches head.
434 */
435 if (index && !same(index, head)) {
436 reject_merge(index);
437 }
438
439 if (head) {
440 /* #5ALT, #15 */
441 if (same(head, remote))
442 return merged_entry(head, index);
443 /* #13, #3ALT */
444 if (!df_conflict_remote && remote_match && !head_match)
445 return merged_entry(head, index);
446 }
447
448 /* #1 */
449 if (!head && !remote && any_anc_missing)
450 return 0;
451
452 /* Below are "no merge" cases, which require that the index be
453 * up-to-date to avoid the files getting overwritten with
454 * conflict resolution files.
455 */
456 if (index) {
457 verify_uptodate(index);
458 }
459
460 /* #2, #3, #4, #6, #7, #9, #11. */
461 count = 0;
462 if (!head_match || !remote_match) {
463 for (i = 1; i < head_idx; i++) {
464 if (stages[i]) {
465 keep_entry(stages[i]);
466 count++;
467 break;
468 }
469 }
470 }
471 #if DBRT_DEBUG
472 else {
473 fprintf(stderr, "read-tree: warning #16 detected\n");
474 show_stage_entry(stderr, "head ", stages[head_match]);
475 show_stage_entry(stderr, "remote ", stages[remote_match]);
476 }
477 #endif
478 if (head) { count += keep_entry(head); }
479 if (remote) { count += keep_entry(remote); }
480 return count;
481 }
482
483 /*
484 * Two-way merge.
485 *
486 * The rule is to "carry forward" what is in the index without losing
487 * information across a "fast forward", favoring a successful merge
488 * over a merge failure when it makes sense. For details of the
489 * "carry forward" rule, please see <Documentation/git-read-tree.txt>.
490 *
491 */
492 static int twoway_merge(struct cache_entry **src)
493 {
494 struct cache_entry *current = src[0];
495 struct cache_entry *oldtree = src[1], *newtree = src[2];
496
497 if (merge_size != 2)
498 return error("Cannot do a twoway merge of %d trees\n",
499 merge_size);
500
501 if (current) {
502 if ((!oldtree && !newtree) || /* 4 and 5 */
503 (!oldtree && newtree &&
504 same(current, newtree)) || /* 6 and 7 */
505 (oldtree && newtree &&
506 same(oldtree, newtree)) || /* 14 and 15 */
507 (oldtree && newtree &&
508 !same(oldtree, newtree) && /* 18 and 19*/
509 same(current, newtree))) {
510 return keep_entry(current);
511 }
512 else if (oldtree && !newtree && same(current, oldtree)) {
513 /* 10 or 11 */
514 return deleted_entry(oldtree, current);
515 }
516 else if (oldtree && newtree &&
517 same(current, oldtree) && !same(current, newtree)) {
518 /* 20 or 21 */
519 return merged_entry(newtree, current);
520 }
521 else {
522 /* all other failures */
523 if (oldtree)
524 reject_merge(oldtree);
525 if (current)
526 reject_merge(current);
527 if (newtree)
528 reject_merge(newtree);
529 return -1;
530 }
531 }
532 else if (newtree)
533 return merged_entry(newtree, current);
534 else
535 return deleted_entry(oldtree, current);
536 }
537
538 /*
539 * One-way merge.
540 *
541 * The rule is:
542 * - take the stat information from stage0, take the data from stage1
543 */
544 static int oneway_merge(struct cache_entry **src)
545 {
546 struct cache_entry *old = src[0];
547 struct cache_entry *a = src[1];
548
549 if (merge_size != 1)
550 return error("Cannot do a oneway merge of %d trees\n",
551 merge_size);
552
553 if (!a)
554 return 0;
555 if (old && same(old, a)) {
556 return keep_entry(old);
557 }
558 return merged_entry(a, NULL);
559 }
560
561 static int read_cache_unmerged(void)
562 {
563 int i, deleted;
564 struct cache_entry **dst;
565
566 read_cache();
567 dst = active_cache;
568 deleted = 0;
569 for (i = 0; i < active_nr; i++) {
570 struct cache_entry *ce = active_cache[i];
571 if (ce_stage(ce)) {
572 deleted++;
573 continue;
574 }
575 if (deleted)
576 *dst = ce;
577 dst++;
578 }
579 active_nr -= deleted;
580 return deleted;
581 }
582
583 static const char read_tree_usage[] = "git-read-tree (<sha> | -m [-u | -i] <sha1> [<sha2> [<sha3>]])";
584
585 static struct cache_file cache_file;
586
587 int main(int argc, char **argv)
588 {
589 int i, newfd, reset, stage = 0;
590 unsigned char sha1[20];
591 merge_fn_t fn = NULL;
592
593 newfd = hold_index_file_for_update(&cache_file, get_index_file());
594 if (newfd < 0)
595 die("unable to create new cachefile");
596
597 merge = 0;
598 reset = 0;
599 for (i = 1; i < argc; i++) {
600 const char *arg = argv[i];
601
602 /* "-u" means "update", meaning that a merge will update
603 * the working tree.
604 */
605 if (!strcmp(arg, "-u")) {
606 update = 1;
607 continue;
608 }
609
610 /* "-i" means "index only", meaning that a merge will
611 * not even look at the working tree.
612 */
613 if (!strcmp(arg, "-i")) {
614 index_only = 1;
615 continue;
616 }
617
618 /* This differs from "-m" in that we'll silently ignore unmerged entries */
619 if (!strcmp(arg, "--reset")) {
620 if (stage || merge)
621 usage(read_tree_usage);
622 reset = 1;
623 merge = 1;
624 stage = 1;
625 read_cache_unmerged();
626 continue;
627 }
628
629 if (!strcmp(arg, "--head")) {
630 head_idx = stage - 1;
631 fn = threeway_merge;
632 }
633
634 /* "-m" stands for "merge", meaning we start in stage 1 */
635 if (!strcmp(arg, "-m")) {
636 if (stage || merge)
637 usage(read_tree_usage);
638 if (read_cache_unmerged())
639 die("you need to resolve your current index first");
640 stage = 1;
641 merge = 1;
642 continue;
643 }
644
645 /* using -u and -i at the same time makes no sense */
646 if (1 < index_only + update)
647 usage(read_tree_usage);
648
649 if (get_sha1(arg, sha1) < 0)
650 usage(read_tree_usage);
651 if (list_tree(sha1) < 0)
652 die("failed to unpack tree object %s", arg);
653 stage++;
654 }
655 if (update && !merge)
656 usage(read_tree_usage);
657 if (merge && !fn) {
658 if (stage < 2)
659 die("just how do you expect me to merge %d trees?", stage-1);
660 switch (stage - 1) {
661 case 1:
662 fn = oneway_merge;
663 break;
664 case 2:
665 fn = twoway_merge;
666 break;
667 case 3:
668 fn = threeway_merge;
669 break;
670 default:
671 fn = threeway_merge;
672 break;
673 }
674 }
675
676 if (head_idx < 0) {
677 if (stage - 1 >= 3)
678 head_idx = stage - 2;
679 else
680 head_idx = 1;
681 }
682
683 unpack_trees(fn);
684 if (write_cache(newfd, active_cache, active_nr) ||
685 commit_index_file(&cache_file))
686 die("unable to write new index file");
687 return 0;
688 }