double free in builtin-update-index.c
[git/git.git] / convert.c
CommitLineData
6c510bee 1#include "cache.h"
35ebfd6a 2#include "attr.h"
3fed15f5 3#include "run-command.h"
35ebfd6a 4
6c510bee
LT
5/*
6 * convert.c - convert a file when checking it out and checking it in.
7 *
8 * This should use the pathname to decide on whether it wants to do some
9 * more interesting conversions (automatic gzip/unzip, general format
10 * conversions etc etc), but by default it just does automatic CRLF<->LF
11 * translation when the "auto_crlf" option is set.
12 */
13
163b9591
JH
14#define CRLF_GUESS (-1)
15#define CRLF_BINARY 0
16#define CRLF_TEXT 1
17#define CRLF_INPUT 2
18
6c510bee
LT
19struct text_stat {
20 /* CR, LF and CRLF counts */
21 unsigned cr, lf, crlf;
22
23 /* These are just approximations! */
24 unsigned printable, nonprintable;
25};
26
27static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
28{
29 unsigned long i;
30
31 memset(stats, 0, sizeof(*stats));
32
33 for (i = 0; i < size; i++) {
34 unsigned char c = buf[i];
35 if (c == '\r') {
36 stats->cr++;
37 if (i+1 < size && buf[i+1] == '\n')
38 stats->crlf++;
39 continue;
40 }
41 if (c == '\n') {
42 stats->lf++;
43 continue;
44 }
45 if (c == 127)
46 /* DEL */
47 stats->nonprintable++;
48 else if (c < 32) {
49 switch (c) {
50 /* BS, HT, ESC and FF */
51 case '\b': case '\t': case '\033': case '\014':
52 stats->printable++;
53 break;
54 default:
55 stats->nonprintable++;
56 }
57 }
58 else
59 stats->printable++;
60 }
61}
62
63/*
64 * The same heuristics as diff.c::mmfile_is_binary()
65 */
66static int is_binary(unsigned long size, struct text_stat *stats)
67{
68
69 if ((stats->printable >> 7) < stats->nonprintable)
70 return 1;
71 /*
72 * Other heuristics? Average line length might be relevant,
73 * as might LF vs CR vs CRLF counts..
74 *
75 * NOTE! It might be normal to have a low ratio of CRLF to LF
76 * (somebody starts with a LF-only file and edits it with an editor
77 * that adds CRLF only to lines that are added..). But do we
78 * want to support CR-only? Probably not.
79 */
80 return 0;
81}
82
5ecd293d
PH
83static int crlf_to_git(const char *path, const char *src, size_t len,
84 struct strbuf *buf, int action)
6c510bee 85{
6c510bee 86 struct text_stat stats;
5ecd293d 87 char *dst;
6c510bee 88
5ecd293d
PH
89 if ((action == CRLF_BINARY) || !auto_crlf || !len)
90 return 0;
6c510bee 91
5ecd293d 92 gather_stats(src, len, &stats);
6c510bee
LT
93 /* No CR? Nothing to convert, regardless. */
94 if (!stats.cr)
5ecd293d 95 return 0;
6c510bee 96
163b9591 97 if (action == CRLF_GUESS) {
201ac8ef
JH
98 /*
99 * We're currently not going to even try to convert stuff
100 * that has bare CR characters. Does anybody do that crazy
101 * stuff?
102 */
103 if (stats.cr != stats.crlf)
5ecd293d 104 return 0;
201ac8ef
JH
105
106 /*
107 * And add some heuristics for binary vs text, of course...
108 */
5ecd293d
PH
109 if (is_binary(len, &stats))
110 return 0;
201ac8ef 111 }
6c510bee 112
5ecd293d
PH
113 strbuf_grow(buf, len);
114 dst = buf->buf;
163b9591
JH
115 if (action == CRLF_GUESS) {
116 /*
117 * If we guessed, we already know we rejected a file with
118 * lone CR, and we can strip a CR without looking at what
119 * follow it.
120 */
201ac8ef 121 do {
ac78e548 122 unsigned char c = *src++;
201ac8ef 123 if (c != '\r')
ac78e548 124 *dst++ = c;
5ecd293d 125 } while (--len);
201ac8ef
JH
126 } else {
127 do {
ac78e548 128 unsigned char c = *src++;
5ecd293d 129 if (! (c == '\r' && (1 < len && *src == '\n')))
ac78e548 130 *dst++ = c;
5ecd293d 131 } while (--len);
201ac8ef 132 }
5ecd293d
PH
133 strbuf_setlen(buf, dst - buf->buf);
134 return 1;
6c510bee
LT
135}
136
5ecd293d
PH
137static int crlf_to_worktree(const char *path, const char *src, size_t len,
138 struct strbuf *buf, int action)
6c510bee 139{
5ecd293d 140 char *to_free = NULL;
6c510bee 141 struct text_stat stats;
6c510bee 142
163b9591 143 if ((action == CRLF_BINARY) || (action == CRLF_INPUT) ||
760f0c62 144 auto_crlf <= 0)
5ecd293d 145 return 0;
6c510bee 146
5ecd293d
PH
147 if (!len)
148 return 0;
6c510bee 149
5ecd293d 150 gather_stats(src, len, &stats);
6c510bee
LT
151
152 /* No LF? Nothing to convert, regardless. */
153 if (!stats.lf)
5ecd293d 154 return 0;
6c510bee
LT
155
156 /* Was it already in CRLF format? */
157 if (stats.lf == stats.crlf)
5ecd293d 158 return 0;
6c510bee 159
163b9591 160 if (action == CRLF_GUESS) {
201ac8ef
JH
161 /* If we have any bare CR characters, we're not going to touch it */
162 if (stats.cr != stats.crlf)
5ecd293d 163 return 0;
6c510bee 164
5ecd293d
PH
165 if (is_binary(len, &stats))
166 return 0;
201ac8ef 167 }
6c510bee 168
5ecd293d
PH
169 /* are we "faking" in place editing ? */
170 if (src == buf->buf)
171 to_free = strbuf_detach(buf);
172
173 strbuf_grow(buf, len + stats.lf - stats.crlf);
174 for (;;) {
175 const char *nl = memchr(src, '\n', len);
176 if (!nl)
177 break;
178 if (nl > src && nl[-1] == '\r') {
179 strbuf_add(buf, src, nl + 1 - src);
180 } else {
181 strbuf_add(buf, src, nl - src);
182 strbuf_addstr(buf, "\r\n");
183 }
184 len -= nl + 1 - src;
185 src = nl + 1;
186 }
187 strbuf_add(buf, src, len);
188
189 free(to_free);
190 return 1;
6c510bee 191}
35ebfd6a 192
aa4ed402
JH
193static int filter_buffer(const char *path, const char *src,
194 unsigned long size, const char *cmd)
195{
196 /*
197 * Spawn cmd and feed the buffer contents through its stdin.
198 */
199 struct child_process child_process;
200 int pipe_feed[2];
201 int write_err, status;
202
203 memset(&child_process, 0, sizeof(child_process));
204
205 if (pipe(pipe_feed) < 0) {
206 error("cannot create pipe to run external filter %s", cmd);
207 return 1;
208 }
209
210 child_process.pid = fork();
211 if (child_process.pid < 0) {
212 error("cannot fork to run external filter %s", cmd);
213 close(pipe_feed[0]);
214 close(pipe_feed[1]);
215 return 1;
216 }
217 if (!child_process.pid) {
218 dup2(pipe_feed[0], 0);
219 close(pipe_feed[0]);
220 close(pipe_feed[1]);
221 execlp("sh", "sh", "-c", cmd, NULL);
222 return 1;
223 }
224 close(pipe_feed[0]);
225
226 write_err = (write_in_full(pipe_feed[1], src, size) < 0);
227 if (close(pipe_feed[1]))
228 write_err = 1;
229 if (write_err)
230 error("cannot feed the input to external filter %s", cmd);
231
232 status = finish_command(&child_process);
233 if (status)
234 error("external filter %s failed %d", cmd, -status);
235 return (write_err || status);
236}
237
5ecd293d
PH
238static int apply_filter(const char *path, const char *src, size_t len,
239 struct strbuf *dst, const char *cmd)
aa4ed402
JH
240{
241 /*
242 * Create a pipeline to have the command filter the buffer's
243 * contents.
244 *
245 * (child --> cmd) --> us
246 */
aa4ed402 247 int pipe_feed[2];
5ecd293d 248 int status, ret = 1;
aa4ed402 249 struct child_process child_process;
5ecd293d 250 struct strbuf nbuf;
aa4ed402
JH
251
252 if (!cmd)
5ecd293d 253 return 0;
aa4ed402
JH
254
255 memset(&child_process, 0, sizeof(child_process));
256
257 if (pipe(pipe_feed) < 0) {
258 error("cannot create pipe to run external filter %s", cmd);
5ecd293d 259 return 0;
aa4ed402
JH
260 }
261
262 fflush(NULL);
263 child_process.pid = fork();
264 if (child_process.pid < 0) {
265 error("cannot fork to run external filter %s", cmd);
266 close(pipe_feed[0]);
267 close(pipe_feed[1]);
5ecd293d 268 return 0;
aa4ed402
JH
269 }
270 if (!child_process.pid) {
271 dup2(pipe_feed[1], 1);
272 close(pipe_feed[0]);
273 close(pipe_feed[1]);
5ecd293d 274 exit(filter_buffer(path, src, len, cmd));
aa4ed402
JH
275 }
276 close(pipe_feed[1]);
277
5ecd293d
PH
278 strbuf_init(&nbuf, 0);
279 if (strbuf_read(&nbuf, pipe_feed[0], len) < 0) {
280 error("read from external filter %s failed", cmd);
281 ret = 0;
aa4ed402
JH
282 }
283 if (close(pipe_feed[0])) {
5ecd293d
PH
284 ret = error("read from external filter %s failed", cmd);
285 ret = 0;
aa4ed402 286 }
aa4ed402
JH
287 status = finish_command(&child_process);
288 if (status) {
5ecd293d
PH
289 ret = error("external filter %s failed %d", cmd, -status);
290 ret = 0;
aa4ed402
JH
291 }
292
5ecd293d
PH
293 if (ret) {
294 *dst = nbuf;
295 } else {
296 strbuf_release(&nbuf);
297 }
298 return ret;
aa4ed402
JH
299}
300
301static struct convert_driver {
302 const char *name;
303 struct convert_driver *next;
304 char *smudge;
305 char *clean;
306} *user_convert, **user_convert_tail;
307
308static int read_convert_config(const char *var, const char *value)
309{
310 const char *ep, *name;
311 int namelen;
312 struct convert_driver *drv;
313
314 /*
315 * External conversion drivers are configured using
316 * "filter.<name>.variable".
317 */
318 if (prefixcmp(var, "filter.") || (ep = strrchr(var, '.')) == var + 6)
319 return 0;
320 name = var + 7;
321 namelen = ep - name;
322 for (drv = user_convert; drv; drv = drv->next)
323 if (!strncmp(drv->name, name, namelen) && !drv->name[namelen])
324 break;
325 if (!drv) {
aa4ed402 326 drv = xcalloc(1, sizeof(struct convert_driver));
182af834 327 drv->name = xmemdupz(name, namelen);
aa4ed402
JH
328 *user_convert_tail = drv;
329 user_convert_tail = &(drv->next);
330 }
331
332 ep++;
333
334 /*
335 * filter.<name>.smudge and filter.<name>.clean specifies
336 * the command line:
337 *
338 * command-line
339 *
340 * The command-line will not be interpolated in any way.
341 */
342
343 if (!strcmp("smudge", ep)) {
344 if (!value)
345 return error("%s: lacks value", var);
346 drv->smudge = strdup(value);
347 return 0;
348 }
349
350 if (!strcmp("clean", ep)) {
351 if (!value)
352 return error("%s: lacks value", var);
353 drv->clean = strdup(value);
354 return 0;
355 }
356 return 0;
357}
358
6073ee85 359static void setup_convert_check(struct git_attr_check *check)
35ebfd6a
JH
360{
361 static struct git_attr *attr_crlf;
3fed15f5 362 static struct git_attr *attr_ident;
aa4ed402 363 static struct git_attr *attr_filter;
35ebfd6a 364
3fed15f5 365 if (!attr_crlf) {
35ebfd6a 366 attr_crlf = git_attr("crlf", 4);
3fed15f5 367 attr_ident = git_attr("ident", 5);
aa4ed402
JH
368 attr_filter = git_attr("filter", 6);
369 user_convert_tail = &user_convert;
370 git_config(read_convert_config);
3fed15f5
JH
371 }
372 check[0].attr = attr_crlf;
373 check[1].attr = attr_ident;
aa4ed402 374 check[2].attr = attr_filter;
3fed15f5
JH
375}
376
377static int count_ident(const char *cp, unsigned long size)
378{
379 /*
af9b54bb 380 * "$Id: 0000000000000000000000000000000000000000 $" <=> "$Id$"
3fed15f5
JH
381 */
382 int cnt = 0;
383 char ch;
384
385 while (size) {
386 ch = *cp++;
387 size--;
388 if (ch != '$')
389 continue;
af9b54bb 390 if (size < 3)
3fed15f5 391 break;
af9b54bb 392 if (memcmp("Id", cp, 2))
3fed15f5 393 continue;
af9b54bb
AP
394 ch = cp[2];
395 cp += 3;
396 size -= 3;
3fed15f5 397 if (ch == '$')
af9b54bb 398 cnt++; /* $Id$ */
3fed15f5
JH
399 if (ch != ':')
400 continue;
401
402 /*
af9b54bb 403 * "$Id: ... "; scan up to the closing dollar sign and discard.
3fed15f5
JH
404 */
405 while (size) {
406 ch = *cp++;
407 size--;
408 if (ch == '$') {
409 cnt++;
410 break;
411 }
412 }
413 }
414 return cnt;
415}
416
5ecd293d
PH
417static int ident_to_git(const char *path, const char *src, size_t len,
418 struct strbuf *buf, int ident)
3fed15f5 419{
5ecd293d 420 char *dst, *dollar;
3fed15f5 421
5ecd293d
PH
422 if (!ident || !count_ident(src, len))
423 return 0;
424
425 strbuf_grow(buf, len);
426 dst = buf->buf;
427 for (;;) {
428 dollar = memchr(src, '$', len);
429 if (!dollar)
430 break;
431 memcpy(dst, src, dollar + 1 - src);
432 dst += dollar + 1 - src;
433 len -= dollar + 1 - src;
434 src = dollar + 1;
435
436 if (len > 3 && !memcmp(src, "Id:", 3)) {
437 dollar = memchr(src + 3, '$', len - 3);
438 if (!dollar)
439 break;
af9b54bb
AP
440 memcpy(dst, "Id$", 3);
441 dst += 3;
5ecd293d
PH
442 len -= dollar + 1 - src;
443 src = dollar + 1;
3fed15f5
JH
444 }
445 }
5ecd293d
PH
446 memcpy(dst, src, len);
447 strbuf_setlen(buf, dst + len - buf->buf);
448 return 1;
3fed15f5
JH
449}
450
5ecd293d
PH
451static int ident_to_worktree(const char *path, const char *src, size_t len,
452 struct strbuf *buf, int ident)
3fed15f5 453{
3fed15f5 454 unsigned char sha1[20];
5ecd293d
PH
455 char *to_free = NULL, *dollar;
456 int cnt;
3fed15f5
JH
457
458 if (!ident)
5ecd293d 459 return 0;
3fed15f5 460
5ecd293d 461 cnt = count_ident(src, len);
3fed15f5 462 if (!cnt)
5ecd293d 463 return 0;
3fed15f5 464
5ecd293d
PH
465 /* are we "faking" in place editing ? */
466 if (src == buf->buf)
467 to_free = strbuf_detach(buf);
468 hash_sha1_file(src, len, "blob", sha1);
3fed15f5 469
5ecd293d
PH
470 strbuf_grow(buf, len + cnt * 43);
471 for (;;) {
472 /* step 1: run to the next '$' */
473 dollar = memchr(src, '$', len);
474 if (!dollar)
475 break;
476 strbuf_add(buf, src, dollar + 1 - src);
477 len -= dollar + 1 - src;
478 src = dollar + 1;
c23290d5 479
5ecd293d
PH
480 /* step 2: does it looks like a bit like Id:xxx$ or Id$ ? */
481 if (len < 3 || memcmp("Id", src, 2))
3fed15f5
JH
482 continue;
483
5ecd293d
PH
484 /* step 3: skip over Id$ or Id:xxxxx$ */
485 if (src[2] == '$') {
486 src += 3;
487 len -= 3;
488 } else if (src[2] == ':') {
489 /*
490 * It's possible that an expanded Id has crept its way into the
491 * repository, we cope with that by stripping the expansion out
492 */
493 dollar = memchr(src + 3, '$', len - 3);
494 if (!dollar) {
495 /* incomplete keyword, no more '$', so just quit the loop */
496 break;
497 }
c23290d5 498
5ecd293d
PH
499 len -= dollar + 1 - src;
500 src = dollar + 1;
501 } else {
502 /* it wasn't a "Id$" or "Id:xxxx$" */
503 continue;
504 }
c23290d5 505
5ecd293d
PH
506 /* step 4: substitute */
507 strbuf_addstr(buf, "Id: ");
508 strbuf_add(buf, sha1_to_hex(sha1), 40);
509 strbuf_addstr(buf, " $");
3fed15f5 510 }
5ecd293d 511 strbuf_add(buf, src, len);
3fed15f5 512
5ecd293d
PH
513 free(to_free);
514 return 1;
35ebfd6a
JH
515}
516
6073ee85 517static int git_path_check_crlf(const char *path, struct git_attr_check *check)
35ebfd6a 518{
6073ee85
JH
519 const char *value = check->value;
520
521 if (ATTR_TRUE(value))
522 return CRLF_TEXT;
523 else if (ATTR_FALSE(value))
524 return CRLF_BINARY;
525 else if (ATTR_UNSET(value))
526 ;
527 else if (!strcmp(value, "input"))
528 return CRLF_INPUT;
163b9591 529 return CRLF_GUESS;
35ebfd6a
JH
530}
531
aa4ed402
JH
532static struct convert_driver *git_path_check_convert(const char *path,
533 struct git_attr_check *check)
534{
535 const char *value = check->value;
536 struct convert_driver *drv;
537
538 if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value))
539 return NULL;
540 for (drv = user_convert; drv; drv = drv->next)
541 if (!strcmp(value, drv->name))
542 return drv;
543 return NULL;
544}
545
3fed15f5
JH
546static int git_path_check_ident(const char *path, struct git_attr_check *check)
547{
548 const char *value = check->value;
549
550 return !!ATTR_TRUE(value);
551}
552
5ecd293d 553int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst)
35ebfd6a 554{
aa4ed402 555 struct git_attr_check check[3];
6073ee85 556 int crlf = CRLF_GUESS;
5ecd293d 557 int ident = 0, ret = 0;
aa4ed402 558 char *filter = NULL;
6073ee85
JH
559
560 setup_convert_check(check);
3fed15f5 561 if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
aa4ed402 562 struct convert_driver *drv;
3fed15f5
JH
563 crlf = git_path_check_crlf(path, check + 0);
564 ident = git_path_check_ident(path, check + 1);
aa4ed402
JH
565 drv = git_path_check_convert(path, check + 2);
566 if (drv && drv->clean)
567 filter = drv->clean;
3fed15f5
JH
568 }
569
5ecd293d
PH
570 ret |= apply_filter(path, src, len, dst, filter);
571 if (ret) {
572 src = dst->buf;
573 len = dst->len;
aa4ed402 574 }
5ecd293d
PH
575 ret |= crlf_to_git(path, src, len, dst, crlf);
576 if (ret) {
577 src = dst->buf;
578 len = dst->len;
6073ee85 579 }
5ecd293d 580 return ret | ident_to_git(path, src, len, dst, ident);
35ebfd6a
JH
581}
582
5ecd293d 583int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst)
35ebfd6a 584{
aa4ed402 585 struct git_attr_check check[3];
6073ee85 586 int crlf = CRLF_GUESS;
5ecd293d 587 int ident = 0, ret = 0;
aa4ed402 588 char *filter = NULL;
6073ee85
JH
589
590 setup_convert_check(check);
3fed15f5 591 if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
aa4ed402 592 struct convert_driver *drv;
3fed15f5
JH
593 crlf = git_path_check_crlf(path, check + 0);
594 ident = git_path_check_ident(path, check + 1);
aa4ed402
JH
595 drv = git_path_check_convert(path, check + 2);
596 if (drv && drv->smudge)
597 filter = drv->smudge;
6073ee85 598 }
3fed15f5 599
5ecd293d
PH
600 ret |= ident_to_worktree(path, src, len, dst, ident);
601 if (ret) {
602 src = dst->buf;
603 len = dst->len;
3fed15f5 604 }
5ecd293d
PH
605 ret |= crlf_to_worktree(path, src, len, dst, crlf);
606 if (ret) {
607 src = dst->buf;
608 len = dst->len;
aa4ed402 609 }
5ecd293d 610 return ret | apply_filter(path, src, len, dst, filter);
35ebfd6a 611}