Document gitattributes(5)
[git/git.git] / convert.c
CommitLineData
6c510bee 1#include "cache.h"
35ebfd6a
JH
2#include "attr.h"
3
6c510bee
LT
4/*
5 * convert.c - convert a file when checking it out and checking it in.
6 *
7 * This should use the pathname to decide on whether it wants to do some
8 * more interesting conversions (automatic gzip/unzip, general format
9 * conversions etc etc), but by default it just does automatic CRLF<->LF
10 * translation when the "auto_crlf" option is set.
11 */
12
163b9591
JH
13#define CRLF_GUESS (-1)
14#define CRLF_BINARY 0
15#define CRLF_TEXT 1
16#define CRLF_INPUT 2
17
6c510bee
LT
18struct text_stat {
19 /* CR, LF and CRLF counts */
20 unsigned cr, lf, crlf;
21
22 /* These are just approximations! */
23 unsigned printable, nonprintable;
24};
25
26static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
27{
28 unsigned long i;
29
30 memset(stats, 0, sizeof(*stats));
31
32 for (i = 0; i < size; i++) {
33 unsigned char c = buf[i];
34 if (c == '\r') {
35 stats->cr++;
36 if (i+1 < size && buf[i+1] == '\n')
37 stats->crlf++;
38 continue;
39 }
40 if (c == '\n') {
41 stats->lf++;
42 continue;
43 }
44 if (c == 127)
45 /* DEL */
46 stats->nonprintable++;
47 else if (c < 32) {
48 switch (c) {
49 /* BS, HT, ESC and FF */
50 case '\b': case '\t': case '\033': case '\014':
51 stats->printable++;
52 break;
53 default:
54 stats->nonprintable++;
55 }
56 }
57 else
58 stats->printable++;
59 }
60}
61
62/*
63 * The same heuristics as diff.c::mmfile_is_binary()
64 */
65static int is_binary(unsigned long size, struct text_stat *stats)
66{
67
68 if ((stats->printable >> 7) < stats->nonprintable)
69 return 1;
70 /*
71 * Other heuristics? Average line length might be relevant,
72 * as might LF vs CR vs CRLF counts..
73 *
74 * NOTE! It might be normal to have a low ratio of CRLF to LF
75 * (somebody starts with a LF-only file and edits it with an editor
76 * that adds CRLF only to lines that are added..). But do we
77 * want to support CR-only? Probably not.
78 */
79 return 0;
80}
81
163b9591 82static int crlf_to_git(const char *path, char **bufp, unsigned long *sizep, int action)
6c510bee
LT
83{
84 char *buffer, *nbuf;
85 unsigned long size, nsize;
86 struct text_stat stats;
87
163b9591 88 if ((action == CRLF_BINARY) || (action == CRLF_GUESS && !auto_crlf))
6c510bee
LT
89 return 0;
90
91 size = *sizep;
92 if (!size)
93 return 0;
94 buffer = *bufp;
95
96 gather_stats(buffer, size, &stats);
97
98 /* No CR? Nothing to convert, regardless. */
99 if (!stats.cr)
100 return 0;
101
163b9591 102 if (action == CRLF_GUESS) {
201ac8ef
JH
103 /*
104 * We're currently not going to even try to convert stuff
105 * that has bare CR characters. Does anybody do that crazy
106 * stuff?
107 */
108 if (stats.cr != stats.crlf)
109 return 0;
110
111 /*
112 * And add some heuristics for binary vs text, of course...
113 */
114 if (is_binary(size, &stats))
115 return 0;
116 }
6c510bee
LT
117
118 /*
119 * Ok, allocate a new buffer, fill it in, and return true
120 * to let the caller know that we switched buffers on it.
121 */
122 nsize = size - stats.crlf;
123 nbuf = xmalloc(nsize);
124 *bufp = nbuf;
125 *sizep = nsize;
201ac8ef 126
163b9591
JH
127 if (action == CRLF_GUESS) {
128 /*
129 * If we guessed, we already know we rejected a file with
130 * lone CR, and we can strip a CR without looking at what
131 * follow it.
132 */
201ac8ef
JH
133 do {
134 unsigned char c = *buffer++;
135 if (c != '\r')
136 *nbuf++ = c;
137 } while (--size);
138 } else {
139 do {
140 unsigned char c = *buffer++;
141 if (! (c == '\r' && (1 < size && *buffer == '\n')))
142 *nbuf++ = c;
143 } while (--size);
144 }
6c510bee
LT
145
146 return 1;
147}
148
163b9591 149static int crlf_to_worktree(const char *path, char **bufp, unsigned long *sizep, int action)
6c510bee
LT
150{
151 char *buffer, *nbuf;
152 unsigned long size, nsize;
153 struct text_stat stats;
154 unsigned char last;
155
163b9591
JH
156 if ((action == CRLF_BINARY) || (action == CRLF_INPUT) ||
157 (action == CRLF_GUESS && auto_crlf <= 0))
6c510bee
LT
158 return 0;
159
160 size = *sizep;
161 if (!size)
162 return 0;
163 buffer = *bufp;
164
165 gather_stats(buffer, size, &stats);
166
167 /* No LF? Nothing to convert, regardless. */
168 if (!stats.lf)
169 return 0;
170
171 /* Was it already in CRLF format? */
172 if (stats.lf == stats.crlf)
173 return 0;
174
163b9591 175 if (action == CRLF_GUESS) {
201ac8ef
JH
176 /* If we have any bare CR characters, we're not going to touch it */
177 if (stats.cr != stats.crlf)
178 return 0;
6c510bee 179
201ac8ef
JH
180 if (is_binary(size, &stats))
181 return 0;
182 }
6c510bee
LT
183
184 /*
185 * Ok, allocate a new buffer, fill it in, and return true
186 * to let the caller know that we switched buffers on it.
187 */
188 nsize = size + stats.lf - stats.crlf;
189 nbuf = xmalloc(nsize);
190 *bufp = nbuf;
191 *sizep = nsize;
192 last = 0;
193 do {
194 unsigned char c = *buffer++;
195 if (c == '\n' && last != '\r')
196 *nbuf++ = '\r';
197 *nbuf++ = c;
198 last = c;
199 } while (--size);
200
201 return 1;
202}
35ebfd6a
JH
203
204static void setup_crlf_check(struct git_attr_check *check)
205{
206 static struct git_attr *attr_crlf;
207
208 if (!attr_crlf)
209 attr_crlf = git_attr("crlf", 4);
210 check->attr = attr_crlf;
211}
212
201ac8ef 213static int git_path_check_crlf(const char *path)
35ebfd6a
JH
214{
215 struct git_attr_check attr_crlf_check;
216
217 setup_crlf_check(&attr_crlf_check);
218
515106fa 219 if (!git_checkattr(path, 1, &attr_crlf_check)) {
a5e92abd 220 const char *value = attr_crlf_check.value;
515106fa 221 if (ATTR_TRUE(value))
163b9591 222 return CRLF_TEXT;
515106fa 223 else if (ATTR_FALSE(value))
163b9591 224 return CRLF_BINARY;
515106fa
JH
225 else if (ATTR_UNSET(value))
226 ;
163b9591
JH
227 else if (!strcmp(value, "input"))
228 return CRLF_INPUT;
229 /* fallthru */
515106fa 230 }
163b9591 231 return CRLF_GUESS;
35ebfd6a
JH
232}
233
234int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
235{
163b9591 236 return crlf_to_git(path, bufp, sizep, git_path_check_crlf(path));
35ebfd6a
JH
237}
238
239int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
240{
163b9591 241 return crlf_to_worktree(path, bufp, sizep, git_path_check_crlf(path));
35ebfd6a 242}