Rewrite convert_to_{git,working_tree} to use strbuf's.
[git/git.git] / convert.c
CommitLineData
6c510bee 1#include "cache.h"
35ebfd6a 2#include "attr.h"
3fed15f5 3#include "run-command.h"
5ecd293d 4#include "strbuf.h"
35ebfd6a 5
6c510bee
LT
6/*
7 * convert.c - convert a file when checking it out and checking it in.
8 *
9 * This should use the pathname to decide on whether it wants to do some
10 * more interesting conversions (automatic gzip/unzip, general format
11 * conversions etc etc), but by default it just does automatic CRLF<->LF
12 * translation when the "auto_crlf" option is set.
13 */
14
163b9591
JH
15#define CRLF_GUESS (-1)
16#define CRLF_BINARY 0
17#define CRLF_TEXT 1
18#define CRLF_INPUT 2
19
6c510bee
LT
20struct text_stat {
21 /* CR, LF and CRLF counts */
22 unsigned cr, lf, crlf;
23
24 /* These are just approximations! */
25 unsigned printable, nonprintable;
26};
27
28static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
29{
30 unsigned long i;
31
32 memset(stats, 0, sizeof(*stats));
33
34 for (i = 0; i < size; i++) {
35 unsigned char c = buf[i];
36 if (c == '\r') {
37 stats->cr++;
38 if (i+1 < size && buf[i+1] == '\n')
39 stats->crlf++;
40 continue;
41 }
42 if (c == '\n') {
43 stats->lf++;
44 continue;
45 }
46 if (c == 127)
47 /* DEL */
48 stats->nonprintable++;
49 else if (c < 32) {
50 switch (c) {
51 /* BS, HT, ESC and FF */
52 case '\b': case '\t': case '\033': case '\014':
53 stats->printable++;
54 break;
55 default:
56 stats->nonprintable++;
57 }
58 }
59 else
60 stats->printable++;
61 }
62}
63
64/*
65 * The same heuristics as diff.c::mmfile_is_binary()
66 */
67static int is_binary(unsigned long size, struct text_stat *stats)
68{
69
70 if ((stats->printable >> 7) < stats->nonprintable)
71 return 1;
72 /*
73 * Other heuristics? Average line length might be relevant,
74 * as might LF vs CR vs CRLF counts..
75 *
76 * NOTE! It might be normal to have a low ratio of CRLF to LF
77 * (somebody starts with a LF-only file and edits it with an editor
78 * that adds CRLF only to lines that are added..). But do we
79 * want to support CR-only? Probably not.
80 */
81 return 0;
82}
83
5ecd293d
PH
84static int crlf_to_git(const char *path, const char *src, size_t len,
85 struct strbuf *buf, int action)
6c510bee 86{
6c510bee 87 struct text_stat stats;
5ecd293d 88 char *dst;
6c510bee 89
5ecd293d
PH
90 if ((action == CRLF_BINARY) || !auto_crlf || !len)
91 return 0;
6c510bee 92
5ecd293d 93 gather_stats(src, len, &stats);
6c510bee
LT
94 /* No CR? Nothing to convert, regardless. */
95 if (!stats.cr)
5ecd293d 96 return 0;
6c510bee 97
163b9591 98 if (action == CRLF_GUESS) {
201ac8ef
JH
99 /*
100 * We're currently not going to even try to convert stuff
101 * that has bare CR characters. Does anybody do that crazy
102 * stuff?
103 */
104 if (stats.cr != stats.crlf)
5ecd293d 105 return 0;
201ac8ef
JH
106
107 /*
108 * And add some heuristics for binary vs text, of course...
109 */
5ecd293d
PH
110 if (is_binary(len, &stats))
111 return 0;
201ac8ef 112 }
6c510bee 113
5ecd293d
PH
114 strbuf_grow(buf, len);
115 dst = buf->buf;
163b9591
JH
116 if (action == CRLF_GUESS) {
117 /*
118 * If we guessed, we already know we rejected a file with
119 * lone CR, and we can strip a CR without looking at what
120 * follow it.
121 */
201ac8ef 122 do {
ac78e548 123 unsigned char c = *src++;
201ac8ef 124 if (c != '\r')
ac78e548 125 *dst++ = c;
5ecd293d 126 } while (--len);
201ac8ef
JH
127 } else {
128 do {
ac78e548 129 unsigned char c = *src++;
5ecd293d 130 if (! (c == '\r' && (1 < len && *src == '\n')))
ac78e548 131 *dst++ = c;
5ecd293d 132 } while (--len);
201ac8ef 133 }
5ecd293d
PH
134 strbuf_setlen(buf, dst - buf->buf);
135 return 1;
6c510bee
LT
136}
137
5ecd293d
PH
138static int crlf_to_worktree(const char *path, const char *src, size_t len,
139 struct strbuf *buf, int action)
6c510bee 140{
5ecd293d 141 char *to_free = NULL;
6c510bee 142 struct text_stat stats;
6c510bee 143
163b9591 144 if ((action == CRLF_BINARY) || (action == CRLF_INPUT) ||
760f0c62 145 auto_crlf <= 0)
5ecd293d 146 return 0;
6c510bee 147
5ecd293d
PH
148 if (!len)
149 return 0;
6c510bee 150
5ecd293d 151 gather_stats(src, len, &stats);
6c510bee
LT
152
153 /* No LF? Nothing to convert, regardless. */
154 if (!stats.lf)
5ecd293d 155 return 0;
6c510bee
LT
156
157 /* Was it already in CRLF format? */
158 if (stats.lf == stats.crlf)
5ecd293d 159 return 0;
6c510bee 160
163b9591 161 if (action == CRLF_GUESS) {
201ac8ef
JH
162 /* If we have any bare CR characters, we're not going to touch it */
163 if (stats.cr != stats.crlf)
5ecd293d 164 return 0;
6c510bee 165
5ecd293d
PH
166 if (is_binary(len, &stats))
167 return 0;
201ac8ef 168 }
6c510bee 169
5ecd293d
PH
170 /* are we "faking" in place editing ? */
171 if (src == buf->buf)
172 to_free = strbuf_detach(buf);
173
174 strbuf_grow(buf, len + stats.lf - stats.crlf);
175 for (;;) {
176 const char *nl = memchr(src, '\n', len);
177 if (!nl)
178 break;
179 if (nl > src && nl[-1] == '\r') {
180 strbuf_add(buf, src, nl + 1 - src);
181 } else {
182 strbuf_add(buf, src, nl - src);
183 strbuf_addstr(buf, "\r\n");
184 }
185 len -= nl + 1 - src;
186 src = nl + 1;
187 }
188 strbuf_add(buf, src, len);
189
190 free(to_free);
191 return 1;
6c510bee 192}
35ebfd6a 193
aa4ed402
JH
194static int filter_buffer(const char *path, const char *src,
195 unsigned long size, const char *cmd)
196{
197 /*
198 * Spawn cmd and feed the buffer contents through its stdin.
199 */
200 struct child_process child_process;
201 int pipe_feed[2];
202 int write_err, status;
203
204 memset(&child_process, 0, sizeof(child_process));
205
206 if (pipe(pipe_feed) < 0) {
207 error("cannot create pipe to run external filter %s", cmd);
208 return 1;
209 }
210
211 child_process.pid = fork();
212 if (child_process.pid < 0) {
213 error("cannot fork to run external filter %s", cmd);
214 close(pipe_feed[0]);
215 close(pipe_feed[1]);
216 return 1;
217 }
218 if (!child_process.pid) {
219 dup2(pipe_feed[0], 0);
220 close(pipe_feed[0]);
221 close(pipe_feed[1]);
222 execlp("sh", "sh", "-c", cmd, NULL);
223 return 1;
224 }
225 close(pipe_feed[0]);
226
227 write_err = (write_in_full(pipe_feed[1], src, size) < 0);
228 if (close(pipe_feed[1]))
229 write_err = 1;
230 if (write_err)
231 error("cannot feed the input to external filter %s", cmd);
232
233 status = finish_command(&child_process);
234 if (status)
235 error("external filter %s failed %d", cmd, -status);
236 return (write_err || status);
237}
238
5ecd293d
PH
239static int apply_filter(const char *path, const char *src, size_t len,
240 struct strbuf *dst, const char *cmd)
aa4ed402
JH
241{
242 /*
243 * Create a pipeline to have the command filter the buffer's
244 * contents.
245 *
246 * (child --> cmd) --> us
247 */
aa4ed402 248 int pipe_feed[2];
5ecd293d 249 int status, ret = 1;
aa4ed402 250 struct child_process child_process;
5ecd293d 251 struct strbuf nbuf;
aa4ed402
JH
252
253 if (!cmd)
5ecd293d 254 return 0;
aa4ed402
JH
255
256 memset(&child_process, 0, sizeof(child_process));
257
258 if (pipe(pipe_feed) < 0) {
259 error("cannot create pipe to run external filter %s", cmd);
5ecd293d 260 return 0;
aa4ed402
JH
261 }
262
263 fflush(NULL);
264 child_process.pid = fork();
265 if (child_process.pid < 0) {
266 error("cannot fork to run external filter %s", cmd);
267 close(pipe_feed[0]);
268 close(pipe_feed[1]);
5ecd293d 269 return 0;
aa4ed402
JH
270 }
271 if (!child_process.pid) {
272 dup2(pipe_feed[1], 1);
273 close(pipe_feed[0]);
274 close(pipe_feed[1]);
5ecd293d 275 exit(filter_buffer(path, src, len, cmd));
aa4ed402
JH
276 }
277 close(pipe_feed[1]);
278
5ecd293d
PH
279 strbuf_init(&nbuf, 0);
280 if (strbuf_read(&nbuf, pipe_feed[0], len) < 0) {
281 error("read from external filter %s failed", cmd);
282 ret = 0;
aa4ed402
JH
283 }
284 if (close(pipe_feed[0])) {
5ecd293d
PH
285 ret = error("read from external filter %s failed", cmd);
286 ret = 0;
aa4ed402 287 }
aa4ed402
JH
288 status = finish_command(&child_process);
289 if (status) {
5ecd293d
PH
290 ret = error("external filter %s failed %d", cmd, -status);
291 ret = 0;
aa4ed402
JH
292 }
293
5ecd293d
PH
294 if (ret) {
295 *dst = nbuf;
296 } else {
297 strbuf_release(&nbuf);
298 }
299 return ret;
aa4ed402
JH
300}
301
302static struct convert_driver {
303 const char *name;
304 struct convert_driver *next;
305 char *smudge;
306 char *clean;
307} *user_convert, **user_convert_tail;
308
309static int read_convert_config(const char *var, const char *value)
310{
311 const char *ep, *name;
312 int namelen;
313 struct convert_driver *drv;
314
315 /*
316 * External conversion drivers are configured using
317 * "filter.<name>.variable".
318 */
319 if (prefixcmp(var, "filter.") || (ep = strrchr(var, '.')) == var + 6)
320 return 0;
321 name = var + 7;
322 namelen = ep - name;
323 for (drv = user_convert; drv; drv = drv->next)
324 if (!strncmp(drv->name, name, namelen) && !drv->name[namelen])
325 break;
326 if (!drv) {
327 char *namebuf;
328 drv = xcalloc(1, sizeof(struct convert_driver));
329 namebuf = xmalloc(namelen + 1);
330 memcpy(namebuf, name, namelen);
331 namebuf[namelen] = 0;
332 drv->name = namebuf;
333 drv->next = NULL;
334 *user_convert_tail = drv;
335 user_convert_tail = &(drv->next);
336 }
337
338 ep++;
339
340 /*
341 * filter.<name>.smudge and filter.<name>.clean specifies
342 * the command line:
343 *
344 * command-line
345 *
346 * The command-line will not be interpolated in any way.
347 */
348
349 if (!strcmp("smudge", ep)) {
350 if (!value)
351 return error("%s: lacks value", var);
352 drv->smudge = strdup(value);
353 return 0;
354 }
355
356 if (!strcmp("clean", ep)) {
357 if (!value)
358 return error("%s: lacks value", var);
359 drv->clean = strdup(value);
360 return 0;
361 }
362 return 0;
363}
364
6073ee85 365static void setup_convert_check(struct git_attr_check *check)
35ebfd6a
JH
366{
367 static struct git_attr *attr_crlf;
3fed15f5 368 static struct git_attr *attr_ident;
aa4ed402 369 static struct git_attr *attr_filter;
35ebfd6a 370
3fed15f5 371 if (!attr_crlf) {
35ebfd6a 372 attr_crlf = git_attr("crlf", 4);
3fed15f5 373 attr_ident = git_attr("ident", 5);
aa4ed402
JH
374 attr_filter = git_attr("filter", 6);
375 user_convert_tail = &user_convert;
376 git_config(read_convert_config);
3fed15f5
JH
377 }
378 check[0].attr = attr_crlf;
379 check[1].attr = attr_ident;
aa4ed402 380 check[2].attr = attr_filter;
3fed15f5
JH
381}
382
383static int count_ident(const char *cp, unsigned long size)
384{
385 /*
af9b54bb 386 * "$Id: 0000000000000000000000000000000000000000 $" <=> "$Id$"
3fed15f5
JH
387 */
388 int cnt = 0;
389 char ch;
390
391 while (size) {
392 ch = *cp++;
393 size--;
394 if (ch != '$')
395 continue;
af9b54bb 396 if (size < 3)
3fed15f5 397 break;
af9b54bb 398 if (memcmp("Id", cp, 2))
3fed15f5 399 continue;
af9b54bb
AP
400 ch = cp[2];
401 cp += 3;
402 size -= 3;
3fed15f5 403 if (ch == '$')
af9b54bb 404 cnt++; /* $Id$ */
3fed15f5
JH
405 if (ch != ':')
406 continue;
407
408 /*
af9b54bb 409 * "$Id: ... "; scan up to the closing dollar sign and discard.
3fed15f5
JH
410 */
411 while (size) {
412 ch = *cp++;
413 size--;
414 if (ch == '$') {
415 cnt++;
416 break;
417 }
418 }
419 }
420 return cnt;
421}
422
5ecd293d
PH
423static int ident_to_git(const char *path, const char *src, size_t len,
424 struct strbuf *buf, int ident)
3fed15f5 425{
5ecd293d 426 char *dst, *dollar;
3fed15f5 427
5ecd293d
PH
428 if (!ident || !count_ident(src, len))
429 return 0;
430
431 strbuf_grow(buf, len);
432 dst = buf->buf;
433 for (;;) {
434 dollar = memchr(src, '$', len);
435 if (!dollar)
436 break;
437 memcpy(dst, src, dollar + 1 - src);
438 dst += dollar + 1 - src;
439 len -= dollar + 1 - src;
440 src = dollar + 1;
441
442 if (len > 3 && !memcmp(src, "Id:", 3)) {
443 dollar = memchr(src + 3, '$', len - 3);
444 if (!dollar)
445 break;
af9b54bb
AP
446 memcpy(dst, "Id$", 3);
447 dst += 3;
5ecd293d
PH
448 len -= dollar + 1 - src;
449 src = dollar + 1;
3fed15f5
JH
450 }
451 }
5ecd293d
PH
452 memcpy(dst, src, len);
453 strbuf_setlen(buf, dst + len - buf->buf);
454 return 1;
3fed15f5
JH
455}
456
5ecd293d
PH
457static int ident_to_worktree(const char *path, const char *src, size_t len,
458 struct strbuf *buf, int ident)
3fed15f5 459{
3fed15f5 460 unsigned char sha1[20];
5ecd293d
PH
461 char *to_free = NULL, *dollar;
462 int cnt;
3fed15f5
JH
463
464 if (!ident)
5ecd293d 465 return 0;
3fed15f5 466
5ecd293d 467 cnt = count_ident(src, len);
3fed15f5 468 if (!cnt)
5ecd293d 469 return 0;
3fed15f5 470
5ecd293d
PH
471 /* are we "faking" in place editing ? */
472 if (src == buf->buf)
473 to_free = strbuf_detach(buf);
474 hash_sha1_file(src, len, "blob", sha1);
3fed15f5 475
5ecd293d
PH
476 strbuf_grow(buf, len + cnt * 43);
477 for (;;) {
478 /* step 1: run to the next '$' */
479 dollar = memchr(src, '$', len);
480 if (!dollar)
481 break;
482 strbuf_add(buf, src, dollar + 1 - src);
483 len -= dollar + 1 - src;
484 src = dollar + 1;
c23290d5 485
5ecd293d
PH
486 /* step 2: does it looks like a bit like Id:xxx$ or Id$ ? */
487 if (len < 3 || memcmp("Id", src, 2))
3fed15f5
JH
488 continue;
489
5ecd293d
PH
490 /* step 3: skip over Id$ or Id:xxxxx$ */
491 if (src[2] == '$') {
492 src += 3;
493 len -= 3;
494 } else if (src[2] == ':') {
495 /*
496 * It's possible that an expanded Id has crept its way into the
497 * repository, we cope with that by stripping the expansion out
498 */
499 dollar = memchr(src + 3, '$', len - 3);
500 if (!dollar) {
501 /* incomplete keyword, no more '$', so just quit the loop */
502 break;
503 }
c23290d5 504
5ecd293d
PH
505 len -= dollar + 1 - src;
506 src = dollar + 1;
507 } else {
508 /* it wasn't a "Id$" or "Id:xxxx$" */
509 continue;
510 }
c23290d5 511
5ecd293d
PH
512 /* step 4: substitute */
513 strbuf_addstr(buf, "Id: ");
514 strbuf_add(buf, sha1_to_hex(sha1), 40);
515 strbuf_addstr(buf, " $");
3fed15f5 516 }
5ecd293d 517 strbuf_add(buf, src, len);
3fed15f5 518
5ecd293d
PH
519 free(to_free);
520 return 1;
35ebfd6a
JH
521}
522
6073ee85 523static int git_path_check_crlf(const char *path, struct git_attr_check *check)
35ebfd6a 524{
6073ee85
JH
525 const char *value = check->value;
526
527 if (ATTR_TRUE(value))
528 return CRLF_TEXT;
529 else if (ATTR_FALSE(value))
530 return CRLF_BINARY;
531 else if (ATTR_UNSET(value))
532 ;
533 else if (!strcmp(value, "input"))
534 return CRLF_INPUT;
163b9591 535 return CRLF_GUESS;
35ebfd6a
JH
536}
537
aa4ed402
JH
538static struct convert_driver *git_path_check_convert(const char *path,
539 struct git_attr_check *check)
540{
541 const char *value = check->value;
542 struct convert_driver *drv;
543
544 if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value))
545 return NULL;
546 for (drv = user_convert; drv; drv = drv->next)
547 if (!strcmp(value, drv->name))
548 return drv;
549 return NULL;
550}
551
3fed15f5
JH
552static int git_path_check_ident(const char *path, struct git_attr_check *check)
553{
554 const char *value = check->value;
555
556 return !!ATTR_TRUE(value);
557}
558
5ecd293d 559int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst)
35ebfd6a 560{
aa4ed402 561 struct git_attr_check check[3];
6073ee85 562 int crlf = CRLF_GUESS;
5ecd293d 563 int ident = 0, ret = 0;
aa4ed402 564 char *filter = NULL;
6073ee85
JH
565
566 setup_convert_check(check);
3fed15f5 567 if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
aa4ed402 568 struct convert_driver *drv;
3fed15f5
JH
569 crlf = git_path_check_crlf(path, check + 0);
570 ident = git_path_check_ident(path, check + 1);
aa4ed402
JH
571 drv = git_path_check_convert(path, check + 2);
572 if (drv && drv->clean)
573 filter = drv->clean;
3fed15f5
JH
574 }
575
5ecd293d
PH
576 ret |= apply_filter(path, src, len, dst, filter);
577 if (ret) {
578 src = dst->buf;
579 len = dst->len;
aa4ed402 580 }
5ecd293d
PH
581 ret |= crlf_to_git(path, src, len, dst, crlf);
582 if (ret) {
583 src = dst->buf;
584 len = dst->len;
6073ee85 585 }
5ecd293d 586 return ret | ident_to_git(path, src, len, dst, ident);
35ebfd6a
JH
587}
588
5ecd293d 589int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst)
35ebfd6a 590{
aa4ed402 591 struct git_attr_check check[3];
6073ee85 592 int crlf = CRLF_GUESS;
5ecd293d 593 int ident = 0, ret = 0;
aa4ed402 594 char *filter = NULL;
6073ee85
JH
595
596 setup_convert_check(check);
3fed15f5 597 if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
aa4ed402 598 struct convert_driver *drv;
3fed15f5
JH
599 crlf = git_path_check_crlf(path, check + 0);
600 ident = git_path_check_ident(path, check + 1);
aa4ed402
JH
601 drv = git_path_check_convert(path, check + 2);
602 if (drv && drv->smudge)
603 filter = drv->smudge;
6073ee85 604 }
3fed15f5 605
5ecd293d
PH
606 ret |= ident_to_worktree(path, src, len, dst, ident);
607 if (ret) {
608 src = dst->buf;
609 len = dst->len;
3fed15f5 610 }
5ecd293d
PH
611 ret |= crlf_to_worktree(path, src, len, dst, crlf);
612 if (ret) {
613 src = dst->buf;
614 len = dst->len;
aa4ed402 615 }
5ecd293d 616 return ret | apply_filter(path, src, len, dst, filter);
35ebfd6a 617}