archive-zip: streaming for stored files
[git/git.git] / archive-zip.c
CommitLineData
e4fbbfe9
RS
1/*
2 * Copyright (c) 2006 Rene Scharfe
3 */
e4fbbfe9 4#include "cache.h"
ec06bff5 5#include "archive.h"
2158f883 6#include "streaming.h"
e4fbbfe9 7
e4fbbfe9
RS
8static int zip_date;
9static int zip_time;
10
11static unsigned char *zip_dir;
12static unsigned int zip_dir_size;
13
14static unsigned int zip_offset;
15static unsigned int zip_dir_offset;
16static unsigned int zip_dir_entries;
17
18#define ZIP_DIRECTORY_MIN_SIZE (1024 * 1024)
2158f883 19#define ZIP_STREAM (8)
e4fbbfe9
RS
20
21struct zip_local_header {
22 unsigned char magic[4];
23 unsigned char version[2];
24 unsigned char flags[2];
25 unsigned char compression_method[2];
26 unsigned char mtime[2];
27 unsigned char mdate[2];
28 unsigned char crc32[4];
29 unsigned char compressed_size[4];
30 unsigned char size[4];
31 unsigned char filename_length[2];
32 unsigned char extra_length[2];
0ea865ce 33 unsigned char _end[1];
e4fbbfe9
RS
34};
35
2158f883
RS
36struct zip_data_desc {
37 unsigned char magic[4];
38 unsigned char crc32[4];
39 unsigned char compressed_size[4];
40 unsigned char size[4];
41 unsigned char _end[1];
42};
43
e4fbbfe9
RS
44struct zip_dir_header {
45 unsigned char magic[4];
46 unsigned char creator_version[2];
47 unsigned char version[2];
48 unsigned char flags[2];
49 unsigned char compression_method[2];
50 unsigned char mtime[2];
51 unsigned char mdate[2];
52 unsigned char crc32[4];
53 unsigned char compressed_size[4];
54 unsigned char size[4];
55 unsigned char filename_length[2];
56 unsigned char extra_length[2];
57 unsigned char comment_length[2];
58 unsigned char disk[2];
59 unsigned char attr1[2];
60 unsigned char attr2[4];
61 unsigned char offset[4];
0ea865ce 62 unsigned char _end[1];
e4fbbfe9
RS
63};
64
65struct zip_dir_trailer {
66 unsigned char magic[4];
67 unsigned char disk[2];
68 unsigned char directory_start_disk[2];
69 unsigned char entries_on_this_disk[2];
70 unsigned char entries[2];
71 unsigned char size[4];
72 unsigned char offset[4];
73 unsigned char comment_length[2];
0ea865ce 74 unsigned char _end[1];
e4fbbfe9
RS
75};
76
0ea865ce
RS
77/*
78 * On ARM, padding is added at the end of the struct, so a simple
79 * sizeof(struct ...) reports two bytes more than the payload size
80 * we're interested in.
81 */
82#define ZIP_LOCAL_HEADER_SIZE offsetof(struct zip_local_header, _end)
2158f883 83#define ZIP_DATA_DESC_SIZE offsetof(struct zip_data_desc, _end)
0ea865ce
RS
84#define ZIP_DIR_HEADER_SIZE offsetof(struct zip_dir_header, _end)
85#define ZIP_DIR_TRAILER_SIZE offsetof(struct zip_dir_trailer, _end)
86
e4fbbfe9
RS
87static void copy_le16(unsigned char *dest, unsigned int n)
88{
89 dest[0] = 0xff & n;
90 dest[1] = 0xff & (n >> 010);
91}
92
93static void copy_le32(unsigned char *dest, unsigned int n)
94{
95 dest[0] = 0xff & n;
96 dest[1] = 0xff & (n >> 010);
97 dest[2] = 0xff & (n >> 020);
98 dest[3] = 0xff & (n >> 030);
99}
100
101static void *zlib_deflate(void *data, unsigned long size,
3a176c6c 102 int compression_level, unsigned long *compressed_size)
e4fbbfe9 103{
ef49a7a0 104 git_zstream stream;
e4fbbfe9
RS
105 unsigned long maxsize;
106 void *buffer;
107 int result;
108
109 memset(&stream, 0, sizeof(stream));
55bb5c91 110 git_deflate_init(&stream, compression_level);
225a6f10 111 maxsize = git_deflate_bound(&stream, size);
e4fbbfe9
RS
112 buffer = xmalloc(maxsize);
113
114 stream.next_in = data;
115 stream.avail_in = size;
116 stream.next_out = buffer;
117 stream.avail_out = maxsize;
118
119 do {
55bb5c91 120 result = git_deflate(&stream, Z_FINISH);
e4fbbfe9
RS
121 } while (result == Z_OK);
122
123 if (result != Z_STREAM_END) {
124 free(buffer);
125 return NULL;
126 }
127
55bb5c91 128 git_deflate_end(&stream);
e4fbbfe9
RS
129 *compressed_size = stream.total_out;
130
131 return buffer;
132}
133
2158f883
RS
134static void write_zip_data_desc(unsigned long size,
135 unsigned long compressed_size,
136 unsigned long crc)
137{
138 struct zip_data_desc trailer;
139
140 copy_le32(trailer.magic, 0x08074b50);
141 copy_le32(trailer.crc32, crc);
142 copy_le32(trailer.compressed_size, compressed_size);
143 copy_le32(trailer.size, size);
144 write_or_die(1, &trailer, ZIP_DATA_DESC_SIZE);
145}
146
ebf5374a
RS
147static void set_zip_dir_data_desc(struct zip_dir_header *header,
148 unsigned long size,
149 unsigned long compressed_size,
150 unsigned long crc)
151{
152 copy_le32(header->crc32, crc);
153 copy_le32(header->compressed_size, compressed_size);
154 copy_le32(header->size, size);
155}
156
157static void set_zip_header_data_desc(struct zip_local_header *header,
158 unsigned long size,
159 unsigned long compressed_size,
160 unsigned long crc)
161{
162 copy_le32(header->crc32, crc);
163 copy_le32(header->compressed_size, compressed_size);
164 copy_le32(header->size, size);
165}
166
2158f883
RS
167#define STREAM_BUFFER_SIZE (1024 * 16)
168
562e25ab 169static int write_zip_entry(struct archiver_args *args,
9cb513b7
NTND
170 const unsigned char *sha1,
171 const char *path, size_t pathlen,
172 unsigned int mode)
e4fbbfe9
RS
173{
174 struct zip_local_header header;
175 struct zip_dir_header dirent;
62cdce17 176 unsigned long attr2;
e4fbbfe9 177 unsigned long compressed_size;
e4fbbfe9
RS
178 unsigned long crc;
179 unsigned long direntsize;
e4fbbfe9 180 int method;
e4fbbfe9 181 unsigned char *out;
e4fbbfe9 182 void *deflated = NULL;
9cb513b7 183 void *buffer;
2158f883
RS
184 struct git_istream *stream = NULL;
185 unsigned long flags = 0;
9cb513b7 186 unsigned long size;
e4fbbfe9 187
38f4d138 188 crc = crc32(0, NULL, 0);
e4fbbfe9 189
e4fbbfe9 190 if (pathlen > 0xffff) {
562e25ab
RS
191 return error("path too long (%d chars, SHA1: %s): %s",
192 (int)pathlen, sha1_to_hex(sha1), path);
e4fbbfe9
RS
193 }
194
302b9282 195 if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
e4fbbfe9 196 method = 0;
62cdce17 197 attr2 = 16;
e4fbbfe9 198 out = NULL;
60df6bd1 199 size = 0;
e4fbbfe9 200 compressed_size = 0;
9cb513b7
NTND
201 buffer = NULL;
202 size = 0;
62cdce17 203 } else if (S_ISREG(mode) || S_ISLNK(mode)) {
2158f883 204 enum object_type type = sha1_object_info(sha1, &size);
9cb513b7 205
62cdce17 206 method = 0;
76bf8d0e
DP
207 attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) :
208 (mode & 0111) ? ((mode) << 16) : 0;
2158f883 209 if (S_ISREG(mode) && args->compression_level != 0 && size > 0)
62cdce17 210 method = 8;
e4fbbfe9 211 compressed_size = size;
2158f883
RS
212
213 if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert &&
214 size > big_file_threshold && method == 0) {
215 stream = open_istream(sha1, &type, &size, NULL);
216 if (!stream)
217 return error("cannot stream blob %s",
218 sha1_to_hex(sha1));
219 flags |= ZIP_STREAM;
220 out = buffer = NULL;
221 } else {
222 buffer = sha1_file_to_archive(args, path, sha1, mode,
223 &type, &size);
224 if (!buffer)
225 return error("cannot read %s",
226 sha1_to_hex(sha1));
227 crc = crc32(crc, buffer, size);
228 out = buffer;
229 }
e4fbbfe9 230 } else {
562e25ab
RS
231 return error("unsupported file mode: 0%o (SHA1: %s)", mode,
232 sha1_to_hex(sha1));
e4fbbfe9
RS
233 }
234
2158f883 235 if (buffer && method == 8) {
3a176c6c
RS
236 deflated = zlib_deflate(buffer, size, args->compression_level,
237 &compressed_size);
e4fbbfe9
RS
238 if (deflated && compressed_size - 6 < size) {
239 /* ZLIB --> raw compressed data (see RFC 1950) */
240 /* CMF and FLG ... */
241 out = (unsigned char *)deflated + 2;
242 compressed_size -= 6; /* ... and ADLER32 */
243 } else {
244 method = 0;
245 compressed_size = size;
246 }
247 }
248
249 /* make sure we have enough free space in the dictionary */
0ea865ce 250 direntsize = ZIP_DIR_HEADER_SIZE + pathlen;
e4fbbfe9
RS
251 while (zip_dir_size < zip_dir_offset + direntsize) {
252 zip_dir_size += ZIP_DIRECTORY_MIN_SIZE;
253 zip_dir = xrealloc(zip_dir, zip_dir_size);
254 }
255
256 copy_le32(dirent.magic, 0x02014b50);
76bf8d0e
DP
257 copy_le16(dirent.creator_version,
258 S_ISLNK(mode) || (S_ISREG(mode) && (mode & 0111)) ? 0x0317 : 0);
cf72fb07 259 copy_le16(dirent.version, 10);
2158f883 260 copy_le16(dirent.flags, flags);
e4fbbfe9
RS
261 copy_le16(dirent.compression_method, method);
262 copy_le16(dirent.mtime, zip_time);
263 copy_le16(dirent.mdate, zip_date);
ebf5374a 264 set_zip_dir_data_desc(&dirent, size, compressed_size, crc);
e4fbbfe9
RS
265 copy_le16(dirent.filename_length, pathlen);
266 copy_le16(dirent.extra_length, 0);
267 copy_le16(dirent.comment_length, 0);
268 copy_le16(dirent.disk, 0);
269 copy_le16(dirent.attr1, 0);
62cdce17 270 copy_le32(dirent.attr2, attr2);
e4fbbfe9 271 copy_le32(dirent.offset, zip_offset);
e4fbbfe9
RS
272
273 copy_le32(header.magic, 0x04034b50);
cf72fb07 274 copy_le16(header.version, 10);
2158f883 275 copy_le16(header.flags, flags);
e4fbbfe9
RS
276 copy_le16(header.compression_method, method);
277 copy_le16(header.mtime, zip_time);
278 copy_le16(header.mdate, zip_date);
2158f883
RS
279 if (flags & ZIP_STREAM)
280 set_zip_header_data_desc(&header, 0, 0, 0);
281 else
282 set_zip_header_data_desc(&header, size, compressed_size, crc);
e4fbbfe9
RS
283 copy_le16(header.filename_length, pathlen);
284 copy_le16(header.extra_length, 0);
0ea865ce
RS
285 write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE);
286 zip_offset += ZIP_LOCAL_HEADER_SIZE;
e4fbbfe9
RS
287 write_or_die(1, path, pathlen);
288 zip_offset += pathlen;
2158f883
RS
289 if (stream && method == 0) {
290 unsigned char buf[STREAM_BUFFER_SIZE];
291 ssize_t readlen;
292
293 for (;;) {
294 readlen = read_istream(stream, buf, sizeof(buf));
295 if (readlen <= 0)
296 break;
297 crc = crc32(crc, buf, readlen);
298 write_or_die(1, buf, readlen);
299 }
300 close_istream(stream);
301 if (readlen)
302 return readlen;
303
304 compressed_size = size;
305 zip_offset += compressed_size;
306
307 write_zip_data_desc(size, compressed_size, crc);
308 zip_offset += ZIP_DATA_DESC_SIZE;
309
310 set_zip_dir_data_desc(&dirent, size, compressed_size, crc);
311 } else if (compressed_size > 0) {
e4fbbfe9
RS
312 write_or_die(1, out, compressed_size);
313 zip_offset += compressed_size;
314 }
315
e4fbbfe9 316 free(deflated);
9cb513b7 317 free(buffer);
e4fbbfe9 318
ebf5374a
RS
319 memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE);
320 zip_dir_offset += ZIP_DIR_HEADER_SIZE;
321 memcpy(zip_dir + zip_dir_offset, path, pathlen);
322 zip_dir_offset += pathlen;
323 zip_dir_entries++;
324
562e25ab 325 return 0;
e4fbbfe9
RS
326}
327
328static void write_zip_trailer(const unsigned char *sha1)
329{
330 struct zip_dir_trailer trailer;
331
332 copy_le32(trailer.magic, 0x06054b50);
333 copy_le16(trailer.disk, 0);
334 copy_le16(trailer.directory_start_disk, 0);
335 copy_le16(trailer.entries_on_this_disk, zip_dir_entries);
336 copy_le16(trailer.entries, zip_dir_entries);
337 copy_le32(trailer.size, zip_dir_offset);
338 copy_le32(trailer.offset, zip_offset);
339 copy_le16(trailer.comment_length, sha1 ? 40 : 0);
340
341 write_or_die(1, zip_dir, zip_dir_offset);
0ea865ce 342 write_or_die(1, &trailer, ZIP_DIR_TRAILER_SIZE);
e4fbbfe9
RS
343 if (sha1)
344 write_or_die(1, sha1_to_hex(sha1), 40);
345}
346
347static void dos_time(time_t *time, int *dos_date, int *dos_time)
348{
349 struct tm *t = localtime(time);
350
351 *dos_date = t->tm_mday + (t->tm_mon + 1) * 32 +
352 (t->tm_year + 1900 - 1980) * 512;
353 *dos_time = t->tm_sec / 2 + t->tm_min * 32 + t->tm_hour * 2048;
354}
355
4d7c9898
JK
356static int write_zip_archive(const struct archiver *ar,
357 struct archiver_args *args)
ec06bff5 358{
562e25ab
RS
359 int err;
360
ec06bff5
FBH
361 dos_time(&args->time, &zip_date, &zip_time);
362
363 zip_dir = xmalloc(ZIP_DIRECTORY_MIN_SIZE);
364 zip_dir_size = ZIP_DIRECTORY_MIN_SIZE;
562e25ab
RS
365
366 err = write_archive_entries(args, write_zip_entry);
367 if (!err)
368 write_zip_trailer(args->commit_sha1);
ec06bff5
FBH
369
370 free(zip_dir);
371
562e25ab 372 return err;
ec06bff5 373}
13e0f88d
JK
374
375static struct archiver zip_archiver = {
376 "zip",
377 write_zip_archive,
7b97730b 378 ARCHIVER_WANT_COMPRESSION_LEVELS|ARCHIVER_REMOTE
13e0f88d
JK
379};
380
381void init_zip_archiver(void)
382{
383 register_archiver(&zip_archiver);
384}