diff: make default rename detection limit configurable.
[git/git.git] / diff.c
1 /*
2 * Copyright (C) 2005 Junio C Hamano
3 */
4 #include <sys/types.h>
5 #include <sys/wait.h>
6 #include <signal.h>
7 #include "cache.h"
8 #include "quote.h"
9 #include "diff.h"
10 #include "diffcore.h"
11
12 static const char *diff_opts = "-pu";
13
14 static int use_size_cache;
15
16 int diff_rename_limit_default = -1;
17
18 static char *quote_one(const char *str)
19 {
20 int needlen;
21 char *xp;
22
23 if (!str)
24 return NULL;
25 needlen = quote_c_style(str, NULL, NULL, 0);
26 if (!needlen)
27 return strdup(str);
28 xp = xmalloc(needlen + 1);
29 quote_c_style(str, xp, NULL, 0);
30 return xp;
31 }
32
33 static char *quote_two(const char *one, const char *two)
34 {
35 int need_one = quote_c_style(one, NULL, NULL, 1);
36 int need_two = quote_c_style(two, NULL, NULL, 1);
37 char *xp;
38
39 if (need_one + need_two) {
40 if (!need_one) need_one = strlen(one);
41 if (!need_two) need_one = strlen(two);
42
43 xp = xmalloc(need_one + need_two + 3);
44 xp[0] = '"';
45 quote_c_style(one, xp + 1, NULL, 1);
46 quote_c_style(two, xp + need_one + 1, NULL, 1);
47 strcpy(xp + need_one + need_two + 1, "\"");
48 return xp;
49 }
50 need_one = strlen(one);
51 need_two = strlen(two);
52 xp = xmalloc(need_one + need_two + 1);
53 strcpy(xp, one);
54 strcpy(xp + need_one, two);
55 return xp;
56 }
57
58 static const char *external_diff(void)
59 {
60 static const char *external_diff_cmd = NULL;
61 static int done_preparing = 0;
62 const char *env_diff_opts;
63
64 if (done_preparing)
65 return external_diff_cmd;
66
67 /*
68 * Default values above are meant to match the
69 * Linux kernel development style. Examples of
70 * alternative styles you can specify via environment
71 * variables are:
72 *
73 * GIT_DIFF_OPTS="-c";
74 */
75 external_diff_cmd = getenv("GIT_EXTERNAL_DIFF");
76
77 /* In case external diff fails... */
78 env_diff_opts = getenv("GIT_DIFF_OPTS");
79 if (env_diff_opts) diff_opts = env_diff_opts;
80
81 done_preparing = 1;
82 return external_diff_cmd;
83 }
84
85 #define TEMPFILE_PATH_LEN 50
86
87 static struct diff_tempfile {
88 const char *name; /* filename external diff should read from */
89 char hex[41];
90 char mode[10];
91 char tmp_path[TEMPFILE_PATH_LEN];
92 } diff_temp[2];
93
94 static int count_lines(const char *filename)
95 {
96 FILE *in;
97 int count, ch, completely_empty = 1, nl_just_seen = 0;
98 in = fopen(filename, "r");
99 count = 0;
100 while ((ch = fgetc(in)) != EOF)
101 if (ch == '\n') {
102 count++;
103 nl_just_seen = 1;
104 completely_empty = 0;
105 }
106 else {
107 nl_just_seen = 0;
108 completely_empty = 0;
109 }
110 fclose(in);
111 if (completely_empty)
112 return 0;
113 if (!nl_just_seen)
114 count++; /* no trailing newline */
115 return count;
116 }
117
118 static void print_line_count(int count)
119 {
120 switch (count) {
121 case 0:
122 printf("0,0");
123 break;
124 case 1:
125 printf("1");
126 break;
127 default:
128 printf("1,%d", count);
129 break;
130 }
131 }
132
133 static void copy_file(int prefix, const char *filename)
134 {
135 FILE *in;
136 int ch, nl_just_seen = 1;
137 in = fopen(filename, "r");
138 while ((ch = fgetc(in)) != EOF) {
139 if (nl_just_seen)
140 putchar(prefix);
141 putchar(ch);
142 if (ch == '\n')
143 nl_just_seen = 1;
144 else
145 nl_just_seen = 0;
146 }
147 fclose(in);
148 if (!nl_just_seen)
149 printf("\n\\ No newline at end of file\n");
150 }
151
152 static void emit_rewrite_diff(const char *name_a,
153 const char *name_b,
154 struct diff_tempfile *temp)
155 {
156 /* Use temp[i].name as input, name_a and name_b as labels */
157 int lc_a, lc_b;
158 lc_a = count_lines(temp[0].name);
159 lc_b = count_lines(temp[1].name);
160 printf("--- %s\n+++ %s\n@@ -", name_a, name_b);
161 print_line_count(lc_a);
162 printf(" +");
163 print_line_count(lc_b);
164 printf(" @@\n");
165 if (lc_a)
166 copy_file('-', temp[0].name);
167 if (lc_b)
168 copy_file('+', temp[1].name);
169 }
170
171 static void builtin_diff(const char *name_a,
172 const char *name_b,
173 struct diff_tempfile *temp,
174 const char *xfrm_msg,
175 int complete_rewrite)
176 {
177 int i, next_at, cmd_size;
178 const char *const diff_cmd = "diff -L%s -L%s";
179 const char *const diff_arg = "-- %s %s||:"; /* "||:" is to return 0 */
180 const char *input_name_sq[2];
181 const char *label_path[2];
182 char *cmd;
183
184 /* diff_cmd and diff_arg have 4 %s in total which makes
185 * the sum of these strings 8 bytes larger than required.
186 * we use 2 spaces around diff-opts, and we need to count
187 * terminating NUL; we used to subtract 5 here, but we do not
188 * care about small leaks in this subprocess that is about
189 * to exec "diff" anymore.
190 */
191 cmd_size = (strlen(diff_cmd) + strlen(diff_opts) + strlen(diff_arg)
192 + 128);
193
194 for (i = 0; i < 2; i++) {
195 input_name_sq[i] = sq_quote(temp[i].name);
196 if (!strcmp(temp[i].name, "/dev/null"))
197 label_path[i] = "/dev/null";
198 else if (!i)
199 label_path[i] = sq_quote(quote_two("a/", name_a));
200 else
201 label_path[i] = sq_quote(quote_two("b/", name_b));
202 cmd_size += (strlen(label_path[i]) + strlen(input_name_sq[i]));
203 }
204
205 cmd = xmalloc(cmd_size);
206
207 next_at = 0;
208 next_at += snprintf(cmd+next_at, cmd_size-next_at,
209 diff_cmd, label_path[0], label_path[1]);
210 next_at += snprintf(cmd+next_at, cmd_size-next_at,
211 " %s ", diff_opts);
212 next_at += snprintf(cmd+next_at, cmd_size-next_at,
213 diff_arg, input_name_sq[0], input_name_sq[1]);
214
215 printf("diff --git %s %s\n",
216 quote_two("a/", name_a), quote_two("b/", name_b));
217 if (label_path[0][0] == '/') {
218 /* dev/null */
219 printf("new file mode %s\n", temp[1].mode);
220 if (xfrm_msg && xfrm_msg[0])
221 puts(xfrm_msg);
222 }
223 else if (label_path[1][0] == '/') {
224 printf("deleted file mode %s\n", temp[0].mode);
225 if (xfrm_msg && xfrm_msg[0])
226 puts(xfrm_msg);
227 }
228 else {
229 if (strcmp(temp[0].mode, temp[1].mode)) {
230 printf("old mode %s\n", temp[0].mode);
231 printf("new mode %s\n", temp[1].mode);
232 }
233 if (xfrm_msg && xfrm_msg[0])
234 puts(xfrm_msg);
235 if (strncmp(temp[0].mode, temp[1].mode, 3))
236 /* we do not run diff between different kind
237 * of objects.
238 */
239 exit(0);
240 if (complete_rewrite) {
241 fflush(NULL);
242 emit_rewrite_diff(name_a, name_b, temp);
243 exit(0);
244 }
245 }
246 fflush(NULL);
247 execlp("/bin/sh","sh", "-c", cmd, NULL);
248 }
249
250 struct diff_filespec *alloc_filespec(const char *path)
251 {
252 int namelen = strlen(path);
253 struct diff_filespec *spec = xmalloc(sizeof(*spec) + namelen + 1);
254
255 memset(spec, 0, sizeof(*spec));
256 spec->path = (char *)(spec + 1);
257 memcpy(spec->path, path, namelen+1);
258 return spec;
259 }
260
261 void fill_filespec(struct diff_filespec *spec, const unsigned char *sha1,
262 unsigned short mode)
263 {
264 if (mode) {
265 spec->mode = DIFF_FILE_CANON_MODE(mode);
266 memcpy(spec->sha1, sha1, 20);
267 spec->sha1_valid = !!memcmp(sha1, null_sha1, 20);
268 }
269 }
270
271 /*
272 * Given a name and sha1 pair, if the dircache tells us the file in
273 * the work tree has that object contents, return true, so that
274 * prepare_temp_file() does not have to inflate and extract.
275 */
276 static int work_tree_matches(const char *name, const unsigned char *sha1)
277 {
278 struct cache_entry *ce;
279 struct stat st;
280 int pos, len;
281
282 /* We do not read the cache ourselves here, because the
283 * benchmark with my previous version that always reads cache
284 * shows that it makes things worse for diff-tree comparing
285 * two linux-2.6 kernel trees in an already checked out work
286 * tree. This is because most diff-tree comparisons deal with
287 * only a small number of files, while reading the cache is
288 * expensive for a large project, and its cost outweighs the
289 * savings we get by not inflating the object to a temporary
290 * file. Practically, this code only helps when we are used
291 * by diff-cache --cached, which does read the cache before
292 * calling us.
293 */
294 if (!active_cache)
295 return 0;
296
297 len = strlen(name);
298 pos = cache_name_pos(name, len);
299 if (pos < 0)
300 return 0;
301 ce = active_cache[pos];
302 if ((lstat(name, &st) < 0) ||
303 !S_ISREG(st.st_mode) || /* careful! */
304 ce_match_stat(ce, &st) ||
305 memcmp(sha1, ce->sha1, 20))
306 return 0;
307 /* we return 1 only when we can stat, it is a regular file,
308 * stat information matches, and sha1 recorded in the cache
309 * matches. I.e. we know the file in the work tree really is
310 * the same as the <name, sha1> pair.
311 */
312 return 1;
313 }
314
315 static struct sha1_size_cache {
316 unsigned char sha1[20];
317 unsigned long size;
318 } **sha1_size_cache;
319 static int sha1_size_cache_nr, sha1_size_cache_alloc;
320
321 static struct sha1_size_cache *locate_size_cache(unsigned char *sha1,
322 int find_only,
323 unsigned long size)
324 {
325 int first, last;
326 struct sha1_size_cache *e;
327
328 first = 0;
329 last = sha1_size_cache_nr;
330 while (last > first) {
331 int cmp, next = (last + first) >> 1;
332 e = sha1_size_cache[next];
333 cmp = memcmp(e->sha1, sha1, 20);
334 if (!cmp)
335 return e;
336 if (cmp < 0) {
337 last = next;
338 continue;
339 }
340 first = next+1;
341 }
342 /* not found */
343 if (find_only)
344 return NULL;
345 /* insert to make it at "first" */
346 if (sha1_size_cache_alloc <= sha1_size_cache_nr) {
347 sha1_size_cache_alloc = alloc_nr(sha1_size_cache_alloc);
348 sha1_size_cache = xrealloc(sha1_size_cache,
349 sha1_size_cache_alloc *
350 sizeof(*sha1_size_cache));
351 }
352 sha1_size_cache_nr++;
353 if (first < sha1_size_cache_nr)
354 memmove(sha1_size_cache + first + 1, sha1_size_cache + first,
355 (sha1_size_cache_nr - first - 1) *
356 sizeof(*sha1_size_cache));
357 e = xmalloc(sizeof(struct sha1_size_cache));
358 sha1_size_cache[first] = e;
359 memcpy(e->sha1, sha1, 20);
360 e->size = size;
361 return e;
362 }
363
364 /*
365 * While doing rename detection and pickaxe operation, we may need to
366 * grab the data for the blob (or file) for our own in-core comparison.
367 * diff_filespec has data and size fields for this purpose.
368 */
369 int diff_populate_filespec(struct diff_filespec *s, int size_only)
370 {
371 int err = 0;
372 if (!DIFF_FILE_VALID(s))
373 die("internal error: asking to populate invalid file.");
374 if (S_ISDIR(s->mode))
375 return -1;
376
377 if (!use_size_cache)
378 size_only = 0;
379
380 if (s->data)
381 return err;
382 if (!s->sha1_valid ||
383 work_tree_matches(s->path, s->sha1)) {
384 struct stat st;
385 int fd;
386 if (lstat(s->path, &st) < 0) {
387 if (errno == ENOENT) {
388 err_empty:
389 err = -1;
390 empty:
391 s->data = "";
392 s->size = 0;
393 return err;
394 }
395 }
396 s->size = st.st_size;
397 if (!s->size)
398 goto empty;
399 if (size_only)
400 return 0;
401 if (S_ISLNK(st.st_mode)) {
402 int ret;
403 s->data = xmalloc(s->size);
404 s->should_free = 1;
405 ret = readlink(s->path, s->data, s->size);
406 if (ret < 0) {
407 free(s->data);
408 goto err_empty;
409 }
410 return 0;
411 }
412 fd = open(s->path, O_RDONLY);
413 if (fd < 0)
414 goto err_empty;
415 s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
416 close(fd);
417 if (s->data == MAP_FAILED)
418 goto err_empty;
419 s->should_munmap = 1;
420 }
421 else {
422 char type[20];
423 struct sha1_size_cache *e;
424
425 if (size_only) {
426 e = locate_size_cache(s->sha1, 1, 0);
427 if (e) {
428 s->size = e->size;
429 return 0;
430 }
431 if (!sha1_object_info(s->sha1, type, &s->size))
432 locate_size_cache(s->sha1, 0, s->size);
433 }
434 else {
435 s->data = read_sha1_file(s->sha1, type, &s->size);
436 s->should_free = 1;
437 }
438 }
439 return 0;
440 }
441
442 void diff_free_filespec_data(struct diff_filespec *s)
443 {
444 if (s->should_free)
445 free(s->data);
446 else if (s->should_munmap)
447 munmap(s->data, s->size);
448 s->should_free = s->should_munmap = 0;
449 s->data = NULL;
450 }
451
452 static void prep_temp_blob(struct diff_tempfile *temp,
453 void *blob,
454 unsigned long size,
455 const unsigned char *sha1,
456 int mode)
457 {
458 int fd;
459
460 fd = git_mkstemp(temp->tmp_path, TEMPFILE_PATH_LEN, ".diff_XXXXXX");
461 if (fd < 0)
462 die("unable to create temp-file");
463 if (write(fd, blob, size) != size)
464 die("unable to write temp-file");
465 close(fd);
466 temp->name = temp->tmp_path;
467 strcpy(temp->hex, sha1_to_hex(sha1));
468 temp->hex[40] = 0;
469 sprintf(temp->mode, "%06o", mode);
470 }
471
472 static void prepare_temp_file(const char *name,
473 struct diff_tempfile *temp,
474 struct diff_filespec *one)
475 {
476 if (!DIFF_FILE_VALID(one)) {
477 not_a_valid_file:
478 /* A '-' entry produces this for file-2, and
479 * a '+' entry produces this for file-1.
480 */
481 temp->name = "/dev/null";
482 strcpy(temp->hex, ".");
483 strcpy(temp->mode, ".");
484 return;
485 }
486
487 if (!one->sha1_valid ||
488 work_tree_matches(name, one->sha1)) {
489 struct stat st;
490 if (lstat(name, &st) < 0) {
491 if (errno == ENOENT)
492 goto not_a_valid_file;
493 die("stat(%s): %s", name, strerror(errno));
494 }
495 if (S_ISLNK(st.st_mode)) {
496 int ret;
497 char *buf, buf_[1024];
498 buf = ((sizeof(buf_) < st.st_size) ?
499 xmalloc(st.st_size) : buf_);
500 ret = readlink(name, buf, st.st_size);
501 if (ret < 0)
502 die("readlink(%s)", name);
503 prep_temp_blob(temp, buf, st.st_size,
504 (one->sha1_valid ?
505 one->sha1 : null_sha1),
506 (one->sha1_valid ?
507 one->mode : S_IFLNK));
508 }
509 else {
510 /* we can borrow from the file in the work tree */
511 temp->name = name;
512 if (!one->sha1_valid)
513 strcpy(temp->hex, sha1_to_hex(null_sha1));
514 else
515 strcpy(temp->hex, sha1_to_hex(one->sha1));
516 /* Even though we may sometimes borrow the
517 * contents from the work tree, we always want
518 * one->mode. mode is trustworthy even when
519 * !(one->sha1_valid), as long as
520 * DIFF_FILE_VALID(one).
521 */
522 sprintf(temp->mode, "%06o", one->mode);
523 }
524 return;
525 }
526 else {
527 if (diff_populate_filespec(one, 0))
528 die("cannot read data blob for %s", one->path);
529 prep_temp_blob(temp, one->data, one->size,
530 one->sha1, one->mode);
531 }
532 }
533
534 static void remove_tempfile(void)
535 {
536 int i;
537
538 for (i = 0; i < 2; i++)
539 if (diff_temp[i].name == diff_temp[i].tmp_path) {
540 unlink(diff_temp[i].name);
541 diff_temp[i].name = NULL;
542 }
543 }
544
545 static void remove_tempfile_on_signal(int signo)
546 {
547 remove_tempfile();
548 }
549
550 /* An external diff command takes:
551 *
552 * diff-cmd name infile1 infile1-sha1 infile1-mode \
553 * infile2 infile2-sha1 infile2-mode [ rename-to ]
554 *
555 */
556 static void run_external_diff(const char *pgm,
557 const char *name,
558 const char *other,
559 struct diff_filespec *one,
560 struct diff_filespec *two,
561 const char *xfrm_msg,
562 int complete_rewrite)
563 {
564 struct diff_tempfile *temp = diff_temp;
565 pid_t pid;
566 int status;
567 static int atexit_asked = 0;
568 const char *othername;
569
570 othername = (other? other : name);
571 if (one && two) {
572 prepare_temp_file(name, &temp[0], one);
573 prepare_temp_file(othername, &temp[1], two);
574 if (! atexit_asked &&
575 (temp[0].name == temp[0].tmp_path ||
576 temp[1].name == temp[1].tmp_path)) {
577 atexit_asked = 1;
578 atexit(remove_tempfile);
579 }
580 signal(SIGINT, remove_tempfile_on_signal);
581 }
582
583 fflush(NULL);
584 pid = fork();
585 if (pid < 0)
586 die("unable to fork");
587 if (!pid) {
588 if (pgm) {
589 if (one && two) {
590 const char *exec_arg[10];
591 const char **arg = &exec_arg[0];
592 *arg++ = pgm;
593 *arg++ = name;
594 *arg++ = temp[0].name;
595 *arg++ = temp[0].hex;
596 *arg++ = temp[0].mode;
597 *arg++ = temp[1].name;
598 *arg++ = temp[1].hex;
599 *arg++ = temp[1].mode;
600 if (other) {
601 *arg++ = other;
602 *arg++ = xfrm_msg;
603 }
604 *arg = NULL;
605 execvp(pgm, (char *const*) exec_arg);
606 }
607 else
608 execlp(pgm, pgm, name, NULL);
609 }
610 /*
611 * otherwise we use the built-in one.
612 */
613 if (one && two)
614 builtin_diff(name, othername, temp, xfrm_msg,
615 complete_rewrite);
616 else
617 printf("* Unmerged path %s\n", name);
618 exit(0);
619 }
620 if (waitpid(pid, &status, 0) < 0 ||
621 !WIFEXITED(status) || WEXITSTATUS(status)) {
622 /* Earlier we did not check the exit status because
623 * diff exits non-zero if files are different, and
624 * we are not interested in knowing that. It was a
625 * mistake which made it harder to quit a diff-*
626 * session that uses the git-apply-patch-script as
627 * the GIT_EXTERNAL_DIFF. A custom GIT_EXTERNAL_DIFF
628 * should also exit non-zero only when it wants to
629 * abort the entire diff-* session.
630 */
631 remove_tempfile();
632 fprintf(stderr, "external diff died, stopping at %s.\n", name);
633 exit(1);
634 }
635 remove_tempfile();
636 }
637
638 static void diff_fill_sha1_info(struct diff_filespec *one)
639 {
640 if (DIFF_FILE_VALID(one)) {
641 if (!one->sha1_valid) {
642 struct stat st;
643 if (stat(one->path, &st) < 0)
644 die("stat %s", one->path);
645 if (index_path(one->sha1, one->path, &st, 0))
646 die("cannot hash %s\n", one->path);
647 }
648 }
649 else
650 memset(one->sha1, 0, 20);
651 }
652
653 static void run_diff(struct diff_filepair *p)
654 {
655 const char *pgm = external_diff();
656 char msg[PATH_MAX*2+300], *xfrm_msg;
657 struct diff_filespec *one;
658 struct diff_filespec *two;
659 const char *name;
660 const char *other;
661 char *name_munged, *other_munged;
662 int complete_rewrite = 0;
663 int len;
664
665 if (DIFF_PAIR_UNMERGED(p)) {
666 /* unmerged */
667 run_external_diff(pgm, p->one->path, NULL, NULL, NULL, NULL,
668 0);
669 return;
670 }
671
672 name = p->one->path;
673 other = (strcmp(name, p->two->path) ? p->two->path : NULL);
674 name_munged = quote_one(name);
675 other_munged = quote_one(other);
676 one = p->one; two = p->two;
677
678 diff_fill_sha1_info(one);
679 diff_fill_sha1_info(two);
680
681 len = 0;
682 switch (p->status) {
683 case DIFF_STATUS_COPIED:
684 len += snprintf(msg + len, sizeof(msg) - len,
685 "similarity index %d%%\n"
686 "copy from %s\n"
687 "copy to %s\n",
688 (int)(0.5 + p->score * 100.0/MAX_SCORE),
689 name_munged, other_munged);
690 break;
691 case DIFF_STATUS_RENAMED:
692 len += snprintf(msg + len, sizeof(msg) - len,
693 "similarity index %d%%\n"
694 "rename from %s\n"
695 "rename to %s\n",
696 (int)(0.5 + p->score * 100.0/MAX_SCORE),
697 name_munged, other_munged);
698 break;
699 case DIFF_STATUS_MODIFIED:
700 if (p->score) {
701 len += snprintf(msg + len, sizeof(msg) - len,
702 "dissimilarity index %d%%\n",
703 (int)(0.5 + p->score *
704 100.0/MAX_SCORE));
705 complete_rewrite = 1;
706 break;
707 }
708 /* fallthru */
709 default:
710 /* nothing */
711 ;
712 }
713
714 if (memcmp(one->sha1, two->sha1, 20)) {
715 char one_sha1[41];
716 memcpy(one_sha1, sha1_to_hex(one->sha1), 41);
717
718 len += snprintf(msg + len, sizeof(msg) - len,
719 "index %.7s..%.7s", one_sha1,
720 sha1_to_hex(two->sha1));
721 if (one->mode == two->mode)
722 len += snprintf(msg + len, sizeof(msg) - len,
723 " %06o", one->mode);
724 len += snprintf(msg + len, sizeof(msg) - len, "\n");
725 }
726
727 if (len)
728 msg[--len] = 0;
729 xfrm_msg = len ? msg : NULL;
730
731 if (!pgm &&
732 DIFF_FILE_VALID(one) && DIFF_FILE_VALID(two) &&
733 (S_IFMT & one->mode) != (S_IFMT & two->mode)) {
734 /* a filepair that changes between file and symlink
735 * needs to be split into deletion and creation.
736 */
737 struct diff_filespec *null = alloc_filespec(two->path);
738 run_external_diff(NULL, name, other, one, null, xfrm_msg, 0);
739 free(null);
740 null = alloc_filespec(one->path);
741 run_external_diff(NULL, name, other, null, two, xfrm_msg, 0);
742 free(null);
743 }
744 else
745 run_external_diff(pgm, name, other, one, two, xfrm_msg,
746 complete_rewrite);
747
748 free(name_munged);
749 free(other_munged);
750 }
751
752 void diff_setup(struct diff_options *options)
753 {
754 memset(options, 0, sizeof(*options));
755 options->output_format = DIFF_FORMAT_RAW;
756 options->line_termination = '\n';
757 options->break_opt = -1;
758 options->rename_limit = -1;
759
760 options->change = diff_change;
761 options->add_remove = diff_addremove;
762 }
763
764 int diff_setup_done(struct diff_options *options)
765 {
766 if ((options->find_copies_harder &&
767 options->detect_rename != DIFF_DETECT_COPY) ||
768 (0 <= options->rename_limit && !options->detect_rename))
769 return -1;
770 if (options->detect_rename && options->rename_limit < 0)
771 options->rename_limit = diff_rename_limit_default;
772 if (options->setup & DIFF_SETUP_USE_CACHE) {
773 if (!active_cache)
774 /* read-cache does not die even when it fails
775 * so it is safe for us to do this here. Also
776 * it does not smudge active_cache or active_nr
777 * when it fails, so we do not have to worry about
778 * cleaning it up oufselves either.
779 */
780 read_cache();
781 }
782 if (options->setup & DIFF_SETUP_USE_SIZE_CACHE)
783 use_size_cache = 1;
784
785 return 0;
786 }
787
788 int diff_opt_parse(struct diff_options *options, const char **av, int ac)
789 {
790 const char *arg = av[0];
791 if (!strcmp(arg, "-p") || !strcmp(arg, "-u"))
792 options->output_format = DIFF_FORMAT_PATCH;
793 else if (!strcmp(arg, "-z"))
794 options->line_termination = 0;
795 else if (!strncmp(arg, "-l", 2))
796 options->rename_limit = strtoul(arg+2, NULL, 10);
797 else if (!strcmp(arg, "--name-only"))
798 options->output_format = DIFF_FORMAT_NAME;
799 else if (!strcmp(arg, "--name-status"))
800 options->output_format = DIFF_FORMAT_NAME_STATUS;
801 else if (!strcmp(arg, "-R"))
802 options->reverse_diff = 1;
803 else if (!strncmp(arg, "-S", 2))
804 options->pickaxe = arg + 2;
805 else if (!strcmp(arg, "-s"))
806 options->output_format = DIFF_FORMAT_NO_OUTPUT;
807 else if (!strncmp(arg, "-O", 2))
808 options->orderfile = arg + 2;
809 else if (!strncmp(arg, "--diff-filter=", 14))
810 options->filter = arg + 14;
811 else if (!strcmp(arg, "--pickaxe-all"))
812 options->pickaxe_opts = DIFF_PICKAXE_ALL;
813 else if (!strncmp(arg, "-B", 2)) {
814 if ((options->break_opt =
815 diff_scoreopt_parse(arg)) == -1)
816 return -1;
817 }
818 else if (!strncmp(arg, "-M", 2)) {
819 if ((options->rename_score =
820 diff_scoreopt_parse(arg)) == -1)
821 return -1;
822 options->detect_rename = DIFF_DETECT_RENAME;
823 }
824 else if (!strncmp(arg, "-C", 2)) {
825 if ((options->rename_score =
826 diff_scoreopt_parse(arg)) == -1)
827 return -1;
828 options->detect_rename = DIFF_DETECT_COPY;
829 }
830 else if (!strcmp(arg, "--find-copies-harder"))
831 options->find_copies_harder = 1;
832 else
833 return 0;
834 return 1;
835 }
836
837 static int parse_num(const char **cp_p)
838 {
839 int num, scale, ch, cnt;
840 const char *cp = *cp_p;
841
842 cnt = num = 0;
843 scale = 1;
844 while ('0' <= (ch = *cp) && ch <= '9') {
845 if (cnt++ < 5) {
846 /* We simply ignore more than 5 digits precision. */
847 scale *= 10;
848 num = num * 10 + ch - '0';
849 }
850 cp++;
851 }
852 *cp_p = cp;
853
854 /* user says num divided by scale and we say internally that
855 * is MAX_SCORE * num / scale.
856 */
857 return (MAX_SCORE * num / scale);
858 }
859
860 int diff_scoreopt_parse(const char *opt)
861 {
862 int opt1, opt2, cmd;
863
864 if (*opt++ != '-')
865 return -1;
866 cmd = *opt++;
867 if (cmd != 'M' && cmd != 'C' && cmd != 'B')
868 return -1; /* that is not a -M, -C nor -B option */
869
870 opt1 = parse_num(&opt);
871 if (cmd != 'B')
872 opt2 = 0;
873 else {
874 if (*opt == 0)
875 opt2 = 0;
876 else if (*opt != '/')
877 return -1; /* we expect -B80/99 or -B80 */
878 else {
879 opt++;
880 opt2 = parse_num(&opt);
881 }
882 }
883 if (*opt != 0)
884 return -1;
885 return opt1 | (opt2 << 16);
886 }
887
888 struct diff_queue_struct diff_queued_diff;
889
890 void diff_q(struct diff_queue_struct *queue, struct diff_filepair *dp)
891 {
892 if (queue->alloc <= queue->nr) {
893 queue->alloc = alloc_nr(queue->alloc);
894 queue->queue = xrealloc(queue->queue,
895 sizeof(dp) * queue->alloc);
896 }
897 queue->queue[queue->nr++] = dp;
898 }
899
900 struct diff_filepair *diff_queue(struct diff_queue_struct *queue,
901 struct diff_filespec *one,
902 struct diff_filespec *two)
903 {
904 struct diff_filepair *dp = xmalloc(sizeof(*dp));
905 dp->one = one;
906 dp->two = two;
907 dp->score = 0;
908 dp->status = 0;
909 dp->source_stays = 0;
910 dp->broken_pair = 0;
911 if (queue)
912 diff_q(queue, dp);
913 return dp;
914 }
915
916 void diff_free_filepair(struct diff_filepair *p)
917 {
918 diff_free_filespec_data(p->one);
919 diff_free_filespec_data(p->two);
920 free(p->one);
921 free(p->two);
922 free(p);
923 }
924
925 static void diff_flush_raw(struct diff_filepair *p,
926 int line_termination,
927 int inter_name_termination,
928 int output_format)
929 {
930 int two_paths;
931 char status[10];
932 const char *path_one, *path_two;
933
934 path_one = p->one->path;
935 path_two = p->two->path;
936 if (line_termination) {
937 path_one = quote_one(path_one);
938 path_two = quote_one(path_two);
939 }
940
941 if (p->score)
942 sprintf(status, "%c%03d", p->status,
943 (int)(0.5 + p->score * 100.0/MAX_SCORE));
944 else {
945 status[0] = p->status;
946 status[1] = 0;
947 }
948 switch (p->status) {
949 case DIFF_STATUS_COPIED:
950 case DIFF_STATUS_RENAMED:
951 two_paths = 1;
952 break;
953 case DIFF_STATUS_ADDED:
954 case DIFF_STATUS_DELETED:
955 two_paths = 0;
956 break;
957 default:
958 two_paths = 0;
959 break;
960 }
961 if (output_format != DIFF_FORMAT_NAME_STATUS) {
962 printf(":%06o %06o %s ",
963 p->one->mode, p->two->mode, sha1_to_hex(p->one->sha1));
964 printf("%s ", sha1_to_hex(p->two->sha1));
965 }
966 printf("%s%c%s", status, inter_name_termination, path_one);
967 if (two_paths)
968 printf("%c%s", inter_name_termination, path_two);
969 putchar(line_termination);
970 if (path_one != p->one->path)
971 free((void*)path_one);
972 if (path_two != p->two->path)
973 free((void*)path_two);
974 }
975
976 static void diff_flush_name(struct diff_filepair *p,
977 int inter_name_termination,
978 int line_termination)
979 {
980 char *path = p->two->path;
981
982 if (line_termination)
983 path = quote_one(p->two->path);
984 else
985 path = p->two->path;
986 printf("%s%c", path, line_termination);
987 if (p->two->path != path)
988 free(path);
989 }
990
991 int diff_unmodified_pair(struct diff_filepair *p)
992 {
993 /* This function is written stricter than necessary to support
994 * the currently implemented transformers, but the idea is to
995 * let transformers to produce diff_filepairs any way they want,
996 * and filter and clean them up here before producing the output.
997 */
998 struct diff_filespec *one, *two;
999
1000 if (DIFF_PAIR_UNMERGED(p))
1001 return 0; /* unmerged is interesting */
1002
1003 one = p->one;
1004 two = p->two;
1005
1006 /* deletion, addition, mode or type change
1007 * and rename are all interesting.
1008 */
1009 if (DIFF_FILE_VALID(one) != DIFF_FILE_VALID(two) ||
1010 DIFF_PAIR_MODE_CHANGED(p) ||
1011 strcmp(one->path, two->path))
1012 return 0;
1013
1014 /* both are valid and point at the same path. that is, we are
1015 * dealing with a change.
1016 */
1017 if (one->sha1_valid && two->sha1_valid &&
1018 !memcmp(one->sha1, two->sha1, sizeof(one->sha1)))
1019 return 1; /* no change */
1020 if (!one->sha1_valid && !two->sha1_valid)
1021 return 1; /* both look at the same file on the filesystem. */
1022 return 0;
1023 }
1024
1025 static void diff_flush_patch(struct diff_filepair *p)
1026 {
1027 if (diff_unmodified_pair(p))
1028 return;
1029
1030 if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) ||
1031 (DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode)))
1032 return; /* no tree diffs in patch format */
1033
1034 run_diff(p);
1035 }
1036
1037 int diff_queue_is_empty(void)
1038 {
1039 struct diff_queue_struct *q = &diff_queued_diff;
1040 int i;
1041 for (i = 0; i < q->nr; i++)
1042 if (!diff_unmodified_pair(q->queue[i]))
1043 return 0;
1044 return 1;
1045 }
1046
1047 #if DIFF_DEBUG
1048 void diff_debug_filespec(struct diff_filespec *s, int x, const char *one)
1049 {
1050 fprintf(stderr, "queue[%d] %s (%s) %s %06o %s\n",
1051 x, one ? one : "",
1052 s->path,
1053 DIFF_FILE_VALID(s) ? "valid" : "invalid",
1054 s->mode,
1055 s->sha1_valid ? sha1_to_hex(s->sha1) : "");
1056 fprintf(stderr, "queue[%d] %s size %lu flags %d\n",
1057 x, one ? one : "",
1058 s->size, s->xfrm_flags);
1059 }
1060
1061 void diff_debug_filepair(const struct diff_filepair *p, int i)
1062 {
1063 diff_debug_filespec(p->one, i, "one");
1064 diff_debug_filespec(p->two, i, "two");
1065 fprintf(stderr, "score %d, status %c stays %d broken %d\n",
1066 p->score, p->status ? p->status : '?',
1067 p->source_stays, p->broken_pair);
1068 }
1069
1070 void diff_debug_queue(const char *msg, struct diff_queue_struct *q)
1071 {
1072 int i;
1073 if (msg)
1074 fprintf(stderr, "%s\n", msg);
1075 fprintf(stderr, "q->nr = %d\n", q->nr);
1076 for (i = 0; i < q->nr; i++) {
1077 struct diff_filepair *p = q->queue[i];
1078 diff_debug_filepair(p, i);
1079 }
1080 }
1081 #endif
1082
1083 static void diff_resolve_rename_copy(void)
1084 {
1085 int i, j;
1086 struct diff_filepair *p, *pp;
1087 struct diff_queue_struct *q = &diff_queued_diff;
1088
1089 diff_debug_queue("resolve-rename-copy", q);
1090
1091 for (i = 0; i < q->nr; i++) {
1092 p = q->queue[i];
1093 p->status = 0; /* undecided */
1094 if (DIFF_PAIR_UNMERGED(p))
1095 p->status = DIFF_STATUS_UNMERGED;
1096 else if (!DIFF_FILE_VALID(p->one))
1097 p->status = DIFF_STATUS_ADDED;
1098 else if (!DIFF_FILE_VALID(p->two))
1099 p->status = DIFF_STATUS_DELETED;
1100 else if (DIFF_PAIR_TYPE_CHANGED(p))
1101 p->status = DIFF_STATUS_TYPE_CHANGED;
1102
1103 /* from this point on, we are dealing with a pair
1104 * whose both sides are valid and of the same type, i.e.
1105 * either in-place edit or rename/copy edit.
1106 */
1107 else if (DIFF_PAIR_RENAME(p)) {
1108 if (p->source_stays) {
1109 p->status = DIFF_STATUS_COPIED;
1110 continue;
1111 }
1112 /* See if there is some other filepair that
1113 * copies from the same source as us. If so
1114 * we are a copy. Otherwise we are either a
1115 * copy if the path stays, or a rename if it
1116 * does not, but we already handled "stays" case.
1117 */
1118 for (j = i + 1; j < q->nr; j++) {
1119 pp = q->queue[j];
1120 if (strcmp(pp->one->path, p->one->path))
1121 continue; /* not us */
1122 if (!DIFF_PAIR_RENAME(pp))
1123 continue; /* not a rename/copy */
1124 /* pp is a rename/copy from the same source */
1125 p->status = DIFF_STATUS_COPIED;
1126 break;
1127 }
1128 if (!p->status)
1129 p->status = DIFF_STATUS_RENAMED;
1130 }
1131 else if (memcmp(p->one->sha1, p->two->sha1, 20) ||
1132 p->one->mode != p->two->mode)
1133 p->status = DIFF_STATUS_MODIFIED;
1134 else {
1135 /* This is a "no-change" entry and should not
1136 * happen anymore, but prepare for broken callers.
1137 */
1138 error("feeding unmodified %s to diffcore",
1139 p->one->path);
1140 p->status = DIFF_STATUS_UNKNOWN;
1141 }
1142 }
1143 diff_debug_queue("resolve-rename-copy done", q);
1144 }
1145
1146 void diff_flush(struct diff_options *options)
1147 {
1148 struct diff_queue_struct *q = &diff_queued_diff;
1149 int i;
1150 int inter_name_termination = '\t';
1151 int diff_output_format = options->output_format;
1152 int line_termination = options->line_termination;
1153
1154 if (!line_termination)
1155 inter_name_termination = 0;
1156
1157 for (i = 0; i < q->nr; i++) {
1158 struct diff_filepair *p = q->queue[i];
1159 if ((diff_output_format == DIFF_FORMAT_NO_OUTPUT) ||
1160 (p->status == DIFF_STATUS_UNKNOWN))
1161 continue;
1162 if (p->status == 0)
1163 die("internal error in diff-resolve-rename-copy");
1164 switch (diff_output_format) {
1165 case DIFF_FORMAT_PATCH:
1166 diff_flush_patch(p);
1167 break;
1168 case DIFF_FORMAT_RAW:
1169 case DIFF_FORMAT_NAME_STATUS:
1170 diff_flush_raw(p, line_termination,
1171 inter_name_termination,
1172 diff_output_format);
1173 break;
1174 case DIFF_FORMAT_NAME:
1175 diff_flush_name(p,
1176 inter_name_termination,
1177 line_termination);
1178 break;
1179 }
1180 diff_free_filepair(q->queue[i]);
1181 }
1182 free(q->queue);
1183 q->queue = NULL;
1184 q->nr = q->alloc = 0;
1185 }
1186
1187 static void diffcore_apply_filter(const char *filter)
1188 {
1189 int i;
1190 struct diff_queue_struct *q = &diff_queued_diff;
1191 struct diff_queue_struct outq;
1192 outq.queue = NULL;
1193 outq.nr = outq.alloc = 0;
1194
1195 if (!filter)
1196 return;
1197
1198 if (strchr(filter, DIFF_STATUS_FILTER_AON)) {
1199 int found;
1200 for (i = found = 0; !found && i < q->nr; i++) {
1201 struct diff_filepair *p = q->queue[i];
1202 if (((p->status == DIFF_STATUS_MODIFIED) &&
1203 ((p->score &&
1204 strchr(filter, DIFF_STATUS_FILTER_BROKEN)) ||
1205 (!p->score &&
1206 strchr(filter, DIFF_STATUS_MODIFIED)))) ||
1207 ((p->status != DIFF_STATUS_MODIFIED) &&
1208 strchr(filter, p->status)))
1209 found++;
1210 }
1211 if (found)
1212 return;
1213
1214 /* otherwise we will clear the whole queue
1215 * by copying the empty outq at the end of this
1216 * function, but first clear the current entries
1217 * in the queue.
1218 */
1219 for (i = 0; i < q->nr; i++)
1220 diff_free_filepair(q->queue[i]);
1221 }
1222 else {
1223 /* Only the matching ones */
1224 for (i = 0; i < q->nr; i++) {
1225 struct diff_filepair *p = q->queue[i];
1226
1227 if (((p->status == DIFF_STATUS_MODIFIED) &&
1228 ((p->score &&
1229 strchr(filter, DIFF_STATUS_FILTER_BROKEN)) ||
1230 (!p->score &&
1231 strchr(filter, DIFF_STATUS_MODIFIED)))) ||
1232 ((p->status != DIFF_STATUS_MODIFIED) &&
1233 strchr(filter, p->status)))
1234 diff_q(&outq, p);
1235 else
1236 diff_free_filepair(p);
1237 }
1238 }
1239 free(q->queue);
1240 *q = outq;
1241 }
1242
1243 void diffcore_std(struct diff_options *options)
1244 {
1245 if (options->paths && options->paths[0])
1246 diffcore_pathspec(options->paths);
1247 if (options->break_opt != -1)
1248 diffcore_break(options->break_opt);
1249 if (options->detect_rename)
1250 diffcore_rename(options);
1251 if (options->break_opt != -1)
1252 diffcore_merge_broken();
1253 if (options->pickaxe)
1254 diffcore_pickaxe(options->pickaxe, options->pickaxe_opts);
1255 if (options->orderfile)
1256 diffcore_order(options->orderfile);
1257 diff_resolve_rename_copy();
1258 diffcore_apply_filter(options->filter);
1259 }
1260
1261
1262 void diffcore_std_no_resolve(struct diff_options *options)
1263 {
1264 if (options->pickaxe)
1265 diffcore_pickaxe(options->pickaxe, options->pickaxe_opts);
1266 if (options->orderfile)
1267 diffcore_order(options->orderfile);
1268 diffcore_apply_filter(options->filter);
1269 }
1270
1271 void diff_addremove(struct diff_options *options,
1272 int addremove, unsigned mode,
1273 const unsigned char *sha1,
1274 const char *base, const char *path)
1275 {
1276 char concatpath[PATH_MAX];
1277 struct diff_filespec *one, *two;
1278
1279 /* This may look odd, but it is a preparation for
1280 * feeding "there are unchanged files which should
1281 * not produce diffs, but when you are doing copy
1282 * detection you would need them, so here they are"
1283 * entries to the diff-core. They will be prefixed
1284 * with something like '=' or '*' (I haven't decided
1285 * which but should not make any difference).
1286 * Feeding the same new and old to diff_change()
1287 * also has the same effect.
1288 * Before the final output happens, they are pruned after
1289 * merged into rename/copy pairs as appropriate.
1290 */
1291 if (options->reverse_diff)
1292 addremove = (addremove == '+' ? '-' :
1293 addremove == '-' ? '+' : addremove);
1294
1295 if (!path) path = "";
1296 sprintf(concatpath, "%s%s", base, path);
1297 one = alloc_filespec(concatpath);
1298 two = alloc_filespec(concatpath);
1299
1300 if (addremove != '+')
1301 fill_filespec(one, sha1, mode);
1302 if (addremove != '-')
1303 fill_filespec(two, sha1, mode);
1304
1305 diff_queue(&diff_queued_diff, one, two);
1306 }
1307
1308 void diff_change(struct diff_options *options,
1309 unsigned old_mode, unsigned new_mode,
1310 const unsigned char *old_sha1,
1311 const unsigned char *new_sha1,
1312 const char *base, const char *path)
1313 {
1314 char concatpath[PATH_MAX];
1315 struct diff_filespec *one, *two;
1316
1317 if (options->reverse_diff) {
1318 unsigned tmp;
1319 const unsigned char *tmp_c;
1320 tmp = old_mode; old_mode = new_mode; new_mode = tmp;
1321 tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c;
1322 }
1323 if (!path) path = "";
1324 sprintf(concatpath, "%s%s", base, path);
1325 one = alloc_filespec(concatpath);
1326 two = alloc_filespec(concatpath);
1327 fill_filespec(one, old_sha1, old_mode);
1328 fill_filespec(two, new_sha1, new_mode);
1329
1330 diff_queue(&diff_queued_diff, one, two);
1331 }
1332
1333 void diff_unmerge(struct diff_options *options,
1334 const char *path)
1335 {
1336 struct diff_filespec *one, *two;
1337 one = alloc_filespec(path);
1338 two = alloc_filespec(path);
1339 diff_queue(&diff_queued_diff, one, two);
1340 }