Merge branch 'kd/t0028-octal-del-is-377-not-777'
[git/git.git] / t / t0028-working-tree-encoding.sh
1 #!/bin/sh
2
3 test_description='working-tree-encoding conversion via gitattributes'
4
5 . ./test-lib.sh
6
7 GIT_TRACE_WORKING_TREE_ENCODING=1 && export GIT_TRACE_WORKING_TREE_ENCODING
8
9 test_lazy_prereq NO_UTF16_BOM '
10 test $(printf abc | iconv -f UTF-8 -t UTF-16 | wc -c) = 6
11 '
12
13 test_lazy_prereq NO_UTF32_BOM '
14 test $(printf abc | iconv -f UTF-8 -t UTF-32 | wc -c) = 12
15 '
16
17 write_utf16 () {
18 if test_have_prereq NO_UTF16_BOM
19 then
20 printf '\xfe\xff'
21 fi &&
22 iconv -f UTF-8 -t UTF-16
23 }
24
25 write_utf32 () {
26 if test_have_prereq NO_UTF32_BOM
27 then
28 printf '\x00\x00\xfe\xff'
29 fi &&
30 iconv -f UTF-8 -t UTF-32
31 }
32
33 test_expect_success 'setup test files' '
34 git config core.eol lf &&
35
36 text="hallo there!\ncan you read me?" &&
37 echo "*.utf16 text working-tree-encoding=utf-16" >.gitattributes &&
38 echo "*.utf16lebom text working-tree-encoding=UTF-16LE-BOM" >>.gitattributes &&
39 printf "$text" >test.utf8.raw &&
40 printf "$text" | write_utf16 >test.utf16.raw &&
41 printf "$text" | write_utf32 >test.utf32.raw &&
42 printf "\377\376" >test.utf16lebom.raw &&
43 printf "$text" | iconv -f UTF-8 -t UTF-32LE >>test.utf16lebom.raw &&
44
45 # Line ending tests
46 printf "one\ntwo\nthree\n" >lf.utf8.raw &&
47 printf "one\r\ntwo\r\nthree\r\n" >crlf.utf8.raw &&
48
49 # BOM tests
50 printf "\0a\0b\0c" >nobom.utf16be.raw &&
51 printf "a\0b\0c\0" >nobom.utf16le.raw &&
52 printf "\376\377\0a\0b\0c" >bebom.utf16be.raw &&
53 printf "\377\376a\0b\0c\0" >lebom.utf16le.raw &&
54 printf "\0\0\0a\0\0\0b\0\0\0c" >nobom.utf32be.raw &&
55 printf "a\0\0\0b\0\0\0c\0\0\0" >nobom.utf32le.raw &&
56 printf "\0\0\376\377\0\0\0a\0\0\0b\0\0\0c" >bebom.utf32be.raw &&
57 printf "\377\376\0\0a\0\0\0b\0\0\0c\0\0\0" >lebom.utf32le.raw &&
58
59 # Add only UTF-16 file, we will add the UTF-32 file later
60 cp test.utf16.raw test.utf16 &&
61 cp test.utf32.raw test.utf32 &&
62 cp test.utf16lebom.raw test.utf16lebom &&
63 git add .gitattributes test.utf16 test.utf16lebom &&
64 git commit -m initial
65 '
66
67 test_expect_success 'ensure UTF-8 is stored in Git' '
68 test_when_finished "rm -f test.utf16.git" &&
69
70 git cat-file -p :test.utf16 >test.utf16.git &&
71 test_cmp_bin test.utf8.raw test.utf16.git
72 '
73
74 test_expect_success 're-encode to UTF-16 on checkout' '
75 test_when_finished "rm -f test.utf16.raw" &&
76
77 rm test.utf16 &&
78 git checkout test.utf16 &&
79 test_cmp_bin test.utf16.raw test.utf16
80 '
81
82 test_expect_success 're-encode to UTF-16-LE-BOM on checkout' '
83 rm test.utf16lebom &&
84 git checkout test.utf16lebom &&
85 test_cmp_bin test.utf16lebom.raw test.utf16lebom
86 '
87
88 test_expect_success 'check $GIT_DIR/info/attributes support' '
89 test_when_finished "rm -f test.utf32.git" &&
90 test_when_finished "git reset --hard HEAD" &&
91
92 echo "*.utf32 text working-tree-encoding=utf-32" >.git/info/attributes &&
93 git add test.utf32 &&
94
95 git cat-file -p :test.utf32 >test.utf32.git &&
96 test_cmp_bin test.utf8.raw test.utf32.git
97 '
98
99 for i in 16 32
100 do
101 test_expect_success "check prohibited UTF-${i} BOM" '
102 test_when_finished "git reset --hard HEAD" &&
103
104 echo "*.utf${i}be text working-tree-encoding=utf-${i}be" >>.gitattributes &&
105 echo "*.utf${i}le text working-tree-encoding=utf-${i}LE" >>.gitattributes &&
106
107 # Here we add a UTF-16 (resp. UTF-32) files with BOM (big/little-endian)
108 # but we tell Git to treat it as UTF-16BE/UTF-16LE (resp. UTF-32).
109 # In these cases the BOM is prohibited.
110 cp bebom.utf${i}be.raw bebom.utf${i}be &&
111 test_must_fail git add bebom.utf${i}be 2>err.out &&
112 test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
113 test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
114
115 cp lebom.utf${i}le.raw lebom.utf${i}be &&
116 test_must_fail git add lebom.utf${i}be 2>err.out &&
117 test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
118 test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
119
120 cp bebom.utf${i}be.raw bebom.utf${i}le &&
121 test_must_fail git add bebom.utf${i}le 2>err.out &&
122 test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
123 test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
124
125 cp lebom.utf${i}le.raw lebom.utf${i}le &&
126 test_must_fail git add lebom.utf${i}le 2>err.out &&
127 test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
128 test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out
129 '
130
131 test_expect_success "check required UTF-${i} BOM" '
132 test_when_finished "git reset --hard HEAD" &&
133
134 echo "*.utf${i} text working-tree-encoding=utf-${i}" >>.gitattributes &&
135
136 cp nobom.utf${i}be.raw nobom.utf${i} &&
137 test_must_fail git add nobom.utf${i} 2>err.out &&
138 test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
139 test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out &&
140
141 cp nobom.utf${i}le.raw nobom.utf${i} &&
142 test_must_fail git add nobom.utf${i} 2>err.out &&
143 test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
144 test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out
145 '
146
147 test_expect_success "eol conversion for UTF-${i} encoded files on checkout" '
148 test_when_finished "rm -f crlf.utf${i}.raw lf.utf${i}.raw" &&
149 test_when_finished "git reset --hard HEAD^" &&
150
151 cat lf.utf8.raw | write_utf${i} >lf.utf${i}.raw &&
152 cat crlf.utf8.raw | write_utf${i} >crlf.utf${i}.raw &&
153 cp crlf.utf${i}.raw eol.utf${i} &&
154
155 cat >expectIndexLF <<-EOF &&
156 i/lf w/-text attr/text eol.utf${i}
157 EOF
158
159 git add eol.utf${i} &&
160 git commit -m eol &&
161
162 # UTF-${i} with CRLF (Windows line endings)
163 rm eol.utf${i} &&
164 git -c core.eol=crlf checkout eol.utf${i} &&
165 test_cmp_bin crlf.utf${i}.raw eol.utf${i} &&
166
167 # Although the file has CRLF in the working tree,
168 # ensure LF in the index
169 git ls-files --eol eol.utf${i} >actual &&
170 test_cmp expectIndexLF actual &&
171
172 # UTF-${i} with LF (Unix line endings)
173 rm eol.utf${i} &&
174 git -c core.eol=lf checkout eol.utf${i} &&
175 test_cmp_bin lf.utf${i}.raw eol.utf${i} &&
176
177 # The file LF in the working tree, ensure LF in the index
178 git ls-files --eol eol.utf${i} >actual &&
179 test_cmp expectIndexLF actual
180 '
181 done
182
183 test_expect_success 'check unsupported encodings' '
184 test_when_finished "git reset --hard HEAD" &&
185
186 echo "*.set text working-tree-encoding" >.gitattributes &&
187 printf "set" >t.set &&
188 test_must_fail git add t.set 2>err.out &&
189 test_i18ngrep "true/false are no valid working-tree-encodings" err.out &&
190
191 echo "*.unset text -working-tree-encoding" >.gitattributes &&
192 printf "unset" >t.unset &&
193 git add t.unset &&
194
195 echo "*.empty text working-tree-encoding=" >.gitattributes &&
196 printf "empty" >t.empty &&
197 git add t.empty &&
198
199 echo "*.garbage text working-tree-encoding=garbage" >.gitattributes &&
200 printf "garbage" >t.garbage &&
201 test_must_fail git add t.garbage 2>err.out &&
202 test_i18ngrep "failed to encode" err.out
203 '
204
205 test_expect_success 'error if encoding round trip is not the same during refresh' '
206 BEFORE_STATE=$(git rev-parse HEAD) &&
207 test_when_finished "git reset --hard $BEFORE_STATE" &&
208
209 # Add and commit a UTF-16 file but skip the "working-tree-encoding"
210 # filter. Consequently, the in-repo representation is UTF-16 and not
211 # UTF-8. This simulates a Git version that has no working tree encoding
212 # support.
213 echo "*.utf16le text working-tree-encoding=utf-16le" >.gitattributes &&
214 echo "hallo" >nonsense.utf16le &&
215 TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16le) &&
216 git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16le &&
217 COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
218 git update-ref refs/heads/master $COMMIT &&
219
220 test_must_fail git checkout HEAD^ 2>err.out &&
221 test_i18ngrep "error: .* overwritten by checkout:" err.out
222 '
223
224 test_expect_success 'error if encoding garbage is already in Git' '
225 BEFORE_STATE=$(git rev-parse HEAD) &&
226 test_when_finished "git reset --hard $BEFORE_STATE" &&
227
228 # Skip the UTF-16 filter for the added file
229 # This simulates a Git version that has no checkoutEncoding support
230 cp nobom.utf16be.raw nonsense.utf16 &&
231 TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16) &&
232 git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16 &&
233 COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
234 git update-ref refs/heads/master $COMMIT &&
235
236 git diff 2>err.out &&
237 test_i18ngrep "error: BOM is required" err.out
238 '
239
240 test_lazy_prereq ICONV_SHIFT_JIS '
241 iconv -f UTF-8 -t SHIFT-JIS </dev/null
242 '
243
244 test_expect_success ICONV_SHIFT_JIS 'check roundtrip encoding' '
245 test_when_finished "rm -f roundtrip.shift roundtrip.utf16" &&
246 test_when_finished "git reset --hard HEAD" &&
247
248 text="hallo there!\nroundtrip test here!" &&
249 printf "$text" | iconv -f UTF-8 -t SHIFT-JIS >roundtrip.shift &&
250 printf "$text" | write_utf16 >roundtrip.utf16 &&
251 echo "*.shift text working-tree-encoding=SHIFT-JIS" >>.gitattributes &&
252
253 # SHIFT-JIS encoded files are round-trip checked by default...
254 GIT_TRACE=1 git add .gitattributes roundtrip.shift 2>&1 |
255 grep "Checking roundtrip encoding for SHIFT-JIS" &&
256 git reset &&
257
258 # ... unless we overwrite the Git config!
259 ! GIT_TRACE=1 git -c core.checkRoundtripEncoding=garbage \
260 add .gitattributes roundtrip.shift 2>&1 |
261 grep "Checking roundtrip encoding for SHIFT-JIS" &&
262 git reset &&
263
264 # UTF-16 encoded files should not be round-trip checked by default...
265 ! GIT_TRACE=1 git add roundtrip.utf16 2>&1 |
266 grep "Checking roundtrip encoding for UTF-16" &&
267 git reset &&
268
269 # ... unless we tell Git to check it!
270 GIT_TRACE=1 git -c core.checkRoundtripEncoding="UTF-16, UTF-32" \
271 add roundtrip.utf16 2>&1 |
272 grep "Checking roundtrip encoding for utf-16" &&
273 git reset &&
274
275 # ... unless we tell Git to check it!
276 # (here we also check that the casing of the encoding is irrelevant)
277 GIT_TRACE=1 git -c core.checkRoundtripEncoding="UTF-32, utf-16" \
278 add roundtrip.utf16 2>&1 |
279 grep "Checking roundtrip encoding for utf-16" &&
280 git reset
281 '
282
283 test_done