Lazy man's auto-CRLF
[git/git.git] / convert.c
CommitLineData
6c510bee
LT
1#include "cache.h"
2/*
3 * convert.c - convert a file when checking it out and checking it in.
4 *
5 * This should use the pathname to decide on whether it wants to do some
6 * more interesting conversions (automatic gzip/unzip, general format
7 * conversions etc etc), but by default it just does automatic CRLF<->LF
8 * translation when the "auto_crlf" option is set.
9 */
10
11struct text_stat {
12 /* CR, LF and CRLF counts */
13 unsigned cr, lf, crlf;
14
15 /* These are just approximations! */
16 unsigned printable, nonprintable;
17};
18
19static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
20{
21 unsigned long i;
22
23 memset(stats, 0, sizeof(*stats));
24
25 for (i = 0; i < size; i++) {
26 unsigned char c = buf[i];
27 if (c == '\r') {
28 stats->cr++;
29 if (i+1 < size && buf[i+1] == '\n')
30 stats->crlf++;
31 continue;
32 }
33 if (c == '\n') {
34 stats->lf++;
35 continue;
36 }
37 if (c == 127)
38 /* DEL */
39 stats->nonprintable++;
40 else if (c < 32) {
41 switch (c) {
42 /* BS, HT, ESC and FF */
43 case '\b': case '\t': case '\033': case '\014':
44 stats->printable++;
45 break;
46 default:
47 stats->nonprintable++;
48 }
49 }
50 else
51 stats->printable++;
52 }
53}
54
55/*
56 * The same heuristics as diff.c::mmfile_is_binary()
57 */
58static int is_binary(unsigned long size, struct text_stat *stats)
59{
60
61 if ((stats->printable >> 7) < stats->nonprintable)
62 return 1;
63 /*
64 * Other heuristics? Average line length might be relevant,
65 * as might LF vs CR vs CRLF counts..
66 *
67 * NOTE! It might be normal to have a low ratio of CRLF to LF
68 * (somebody starts with a LF-only file and edits it with an editor
69 * that adds CRLF only to lines that are added..). But do we
70 * want to support CR-only? Probably not.
71 */
72 return 0;
73}
74
75int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
76{
77 char *buffer, *nbuf;
78 unsigned long size, nsize;
79 struct text_stat stats;
80
81 /*
82 * FIXME! Other pluggable conversions should go here,
83 * based on filename patterns. Right now we just do the
84 * stupid auto-CRLF one.
85 */
86 if (!auto_crlf)
87 return 0;
88
89 size = *sizep;
90 if (!size)
91 return 0;
92 buffer = *bufp;
93
94 gather_stats(buffer, size, &stats);
95
96 /* No CR? Nothing to convert, regardless. */
97 if (!stats.cr)
98 return 0;
99
100 /*
101 * We're currently not going to even try to convert stuff
102 * that has bare CR characters. Does anybody do that crazy
103 * stuff?
104 */
105 if (stats.cr != stats.crlf)
106 return 0;
107
108 /*
109 * And add some heuristics for binary vs text, of course...
110 */
111 if (is_binary(size, &stats))
112 return 0;
113
114 /*
115 * Ok, allocate a new buffer, fill it in, and return true
116 * to let the caller know that we switched buffers on it.
117 */
118 nsize = size - stats.crlf;
119 nbuf = xmalloc(nsize);
120 *bufp = nbuf;
121 *sizep = nsize;
122 do {
123 unsigned char c = *buffer++;
124 if (c != '\r')
125 *nbuf++ = c;
126 } while (--size);
127
128 return 1;
129}
130
131int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
132{
133 char *buffer, *nbuf;
134 unsigned long size, nsize;
135 struct text_stat stats;
136 unsigned char last;
137
138 /*
139 * FIXME! Other pluggable conversions should go here,
140 * based on filename patterns. Right now we just do the
141 * stupid auto-CRLF one.
142 */
143 if (!auto_crlf)
144 return 0;
145
146 size = *sizep;
147 if (!size)
148 return 0;
149 buffer = *bufp;
150
151 gather_stats(buffer, size, &stats);
152
153 /* No LF? Nothing to convert, regardless. */
154 if (!stats.lf)
155 return 0;
156
157 /* Was it already in CRLF format? */
158 if (stats.lf == stats.crlf)
159 return 0;
160
161 /* If we have any bare CR characters, we're not going to touch it */
162 if (stats.cr != stats.crlf)
163 return 0;
164
165 if (is_binary(size, &stats))
166 return 0;
167
168 /*
169 * Ok, allocate a new buffer, fill it in, and return true
170 * to let the caller know that we switched buffers on it.
171 */
172 nsize = size + stats.lf - stats.crlf;
173 nbuf = xmalloc(nsize);
174 *bufp = nbuf;
175 *sizep = nsize;
176 last = 0;
177 do {
178 unsigned char c = *buffer++;
179 if (c == '\n' && last != '\r')
180 *nbuf++ = '\r';
181 *nbuf++ = c;
182 last = c;
183 } while (--size);
184
185 return 1;
186}