repack: refactor pack deletion for future use
[git/git.git] / builtin / repack.c
1 #include "builtin.h"
2 #include "cache.h"
3 #include "config.h"
4 #include "dir.h"
5 #include "parse-options.h"
6 #include "run-command.h"
7 #include "sigchain.h"
8 #include "strbuf.h"
9 #include "string-list.h"
10 #include "argv-array.h"
11 #include "midx.h"
12 #include "packfile.h"
13 #include "object-store.h"
14
15 static int delta_base_offset = 1;
16 static int pack_kept_objects = -1;
17 static int write_bitmaps = -1;
18 static int use_delta_islands;
19 static char *packdir, *packtmp;
20
21 static const char *const git_repack_usage[] = {
22 N_("git repack [<options>]"),
23 NULL
24 };
25
26 static const char incremental_bitmap_conflict_error[] = N_(
27 "Incremental repacks are incompatible with bitmap indexes. Use\n"
28 "--no-write-bitmap-index or disable the pack.writebitmaps configuration."
29 );
30
31
32 static int repack_config(const char *var, const char *value, void *cb)
33 {
34 if (!strcmp(var, "repack.usedeltabaseoffset")) {
35 delta_base_offset = git_config_bool(var, value);
36 return 0;
37 }
38 if (!strcmp(var, "repack.packkeptobjects")) {
39 pack_kept_objects = git_config_bool(var, value);
40 return 0;
41 }
42 if (!strcmp(var, "repack.writebitmaps") ||
43 !strcmp(var, "pack.writebitmaps")) {
44 write_bitmaps = git_config_bool(var, value);
45 return 0;
46 }
47 if (!strcmp(var, "repack.usedeltaislands")) {
48 use_delta_islands = git_config_bool(var, value);
49 return 0;
50 }
51 return git_default_config(var, value, cb);
52 }
53
54 /*
55 * Remove temporary $GIT_OBJECT_DIRECTORY/pack/.tmp-$$-pack-* files.
56 */
57 static void remove_temporary_files(void)
58 {
59 struct strbuf buf = STRBUF_INIT;
60 size_t dirlen, prefixlen;
61 DIR *dir;
62 struct dirent *e;
63
64 dir = opendir(packdir);
65 if (!dir)
66 return;
67
68 /* Point at the slash at the end of ".../objects/pack/" */
69 dirlen = strlen(packdir) + 1;
70 strbuf_addstr(&buf, packtmp);
71 /* Hold the length of ".tmp-%d-pack-" */
72 prefixlen = buf.len - dirlen;
73
74 while ((e = readdir(dir))) {
75 if (strncmp(e->d_name, buf.buf + dirlen, prefixlen))
76 continue;
77 strbuf_setlen(&buf, dirlen);
78 strbuf_addstr(&buf, e->d_name);
79 unlink(buf.buf);
80 }
81 closedir(dir);
82 strbuf_release(&buf);
83 }
84
85 static void remove_pack_on_signal(int signo)
86 {
87 remove_temporary_files();
88 sigchain_pop(signo);
89 raise(signo);
90 }
91
92 /*
93 * Adds all packs hex strings to the fname list, which do not
94 * have a corresponding .keep file. These packs are not to
95 * be kept if we are going to pack everything into one file.
96 */
97 static void get_non_kept_pack_filenames(struct string_list *fname_list,
98 const struct string_list *extra_keep)
99 {
100 DIR *dir;
101 struct dirent *e;
102 char *fname;
103
104 if (!(dir = opendir(packdir)))
105 return;
106
107 while ((e = readdir(dir)) != NULL) {
108 size_t len;
109 int i;
110
111 for (i = 0; i < extra_keep->nr; i++)
112 if (!fspathcmp(e->d_name, extra_keep->items[i].string))
113 break;
114 if (extra_keep->nr > 0 && i < extra_keep->nr)
115 continue;
116
117 if (!strip_suffix(e->d_name, ".pack", &len))
118 continue;
119
120 fname = xmemdupz(e->d_name, len);
121
122 if (!file_exists(mkpath("%s/%s.keep", packdir, fname)))
123 string_list_append_nodup(fname_list, fname);
124 else
125 free(fname);
126 }
127 closedir(dir);
128 }
129
130 static void remove_redundant_pack(const char *dir_name, const char *base_name)
131 {
132 struct strbuf buf = STRBUF_INIT;
133 strbuf_addf(&buf, "%s/%s.pack", dir_name, base_name);
134 unlink_pack_path(buf.buf, 1);
135 strbuf_release(&buf);
136 }
137
138 struct pack_objects_args {
139 const char *window;
140 const char *window_memory;
141 const char *depth;
142 const char *threads;
143 const char *max_pack_size;
144 int no_reuse_delta;
145 int no_reuse_object;
146 int quiet;
147 int local;
148 };
149
150 static void prepare_pack_objects(struct child_process *cmd,
151 const struct pack_objects_args *args)
152 {
153 argv_array_push(&cmd->args, "pack-objects");
154 if (args->window)
155 argv_array_pushf(&cmd->args, "--window=%s", args->window);
156 if (args->window_memory)
157 argv_array_pushf(&cmd->args, "--window-memory=%s", args->window_memory);
158 if (args->depth)
159 argv_array_pushf(&cmd->args, "--depth=%s", args->depth);
160 if (args->threads)
161 argv_array_pushf(&cmd->args, "--threads=%s", args->threads);
162 if (args->max_pack_size)
163 argv_array_pushf(&cmd->args, "--max-pack-size=%s", args->max_pack_size);
164 if (args->no_reuse_delta)
165 argv_array_pushf(&cmd->args, "--no-reuse-delta");
166 if (args->no_reuse_object)
167 argv_array_pushf(&cmd->args, "--no-reuse-object");
168 if (args->local)
169 argv_array_push(&cmd->args, "--local");
170 if (args->quiet)
171 argv_array_push(&cmd->args, "--quiet");
172 if (delta_base_offset)
173 argv_array_push(&cmd->args, "--delta-base-offset");
174 argv_array_push(&cmd->args, packtmp);
175 cmd->git_cmd = 1;
176 cmd->out = -1;
177 }
178
179 /*
180 * Write oid to the given struct child_process's stdin, starting it first if
181 * necessary.
182 */
183 static int write_oid(const struct object_id *oid, struct packed_git *pack,
184 uint32_t pos, void *data)
185 {
186 struct child_process *cmd = data;
187
188 if (cmd->in == -1) {
189 if (start_command(cmd))
190 die(_("could not start pack-objects to repack promisor objects"));
191 }
192
193 xwrite(cmd->in, oid_to_hex(oid), GIT_SHA1_HEXSZ);
194 xwrite(cmd->in, "\n", 1);
195 return 0;
196 }
197
198 static void repack_promisor_objects(const struct pack_objects_args *args,
199 struct string_list *names)
200 {
201 struct child_process cmd = CHILD_PROCESS_INIT;
202 FILE *out;
203 struct strbuf line = STRBUF_INIT;
204
205 prepare_pack_objects(&cmd, args);
206 cmd.in = -1;
207
208 /*
209 * NEEDSWORK: Giving pack-objects only the OIDs without any ordering
210 * hints may result in suboptimal deltas in the resulting pack. See if
211 * the OIDs can be sent with fake paths such that pack-objects can use a
212 * {type -> existing pack order} ordering when computing deltas instead
213 * of a {type -> size} ordering, which may produce better deltas.
214 */
215 for_each_packed_object(write_oid, &cmd,
216 FOR_EACH_OBJECT_PROMISOR_ONLY);
217
218 if (cmd.in == -1)
219 /* No packed objects; cmd was never started */
220 return;
221
222 close(cmd.in);
223
224 out = xfdopen(cmd.out, "r");
225 while (strbuf_getline_lf(&line, out) != EOF) {
226 char *promisor_name;
227 int fd;
228 if (line.len != the_hash_algo->hexsz)
229 die(_("repack: Expecting full hex object ID lines only from pack-objects."));
230 string_list_append(names, line.buf);
231
232 /*
233 * pack-objects creates the .pack and .idx files, but not the
234 * .promisor file. Create the .promisor file, which is empty.
235 */
236 promisor_name = mkpathdup("%s-%s.promisor", packtmp,
237 line.buf);
238 fd = open(promisor_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
239 if (fd < 0)
240 die_errno(_("unable to create '%s'"), promisor_name);
241 close(fd);
242 free(promisor_name);
243 }
244 fclose(out);
245 if (finish_command(&cmd))
246 die(_("could not finish pack-objects to repack promisor objects"));
247 }
248
249 #define ALL_INTO_ONE 1
250 #define LOOSEN_UNREACHABLE 2
251
252 int cmd_repack(int argc, const char **argv, const char *prefix)
253 {
254 struct {
255 const char *name;
256 unsigned optional:1;
257 } exts[] = {
258 {".pack"},
259 {".idx"},
260 {".bitmap", 1},
261 {".promisor", 1},
262 };
263 struct child_process cmd = CHILD_PROCESS_INIT;
264 struct string_list_item *item;
265 struct string_list names = STRING_LIST_INIT_DUP;
266 struct string_list rollback = STRING_LIST_INIT_NODUP;
267 struct string_list existing_packs = STRING_LIST_INIT_DUP;
268 struct strbuf line = STRBUF_INIT;
269 int i, ext, ret, failed;
270 FILE *out;
271
272 /* variables to be filled by option parsing */
273 int pack_everything = 0;
274 int delete_redundant = 0;
275 const char *unpack_unreachable = NULL;
276 int keep_unreachable = 0;
277 struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
278 int no_update_server_info = 0;
279 int midx_cleared = 0;
280 struct pack_objects_args po_args = {NULL};
281
282 struct option builtin_repack_options[] = {
283 OPT_BIT('a', NULL, &pack_everything,
284 N_("pack everything in a single pack"), ALL_INTO_ONE),
285 OPT_BIT('A', NULL, &pack_everything,
286 N_("same as -a, and turn unreachable objects loose"),
287 LOOSEN_UNREACHABLE | ALL_INTO_ONE),
288 OPT_BOOL('d', NULL, &delete_redundant,
289 N_("remove redundant packs, and run git-prune-packed")),
290 OPT_BOOL('f', NULL, &po_args.no_reuse_delta,
291 N_("pass --no-reuse-delta to git-pack-objects")),
292 OPT_BOOL('F', NULL, &po_args.no_reuse_object,
293 N_("pass --no-reuse-object to git-pack-objects")),
294 OPT_BOOL('n', NULL, &no_update_server_info,
295 N_("do not run git-update-server-info")),
296 OPT__QUIET(&po_args.quiet, N_("be quiet")),
297 OPT_BOOL('l', "local", &po_args.local,
298 N_("pass --local to git-pack-objects")),
299 OPT_BOOL('b', "write-bitmap-index", &write_bitmaps,
300 N_("write bitmap index")),
301 OPT_BOOL('i', "delta-islands", &use_delta_islands,
302 N_("pass --delta-islands to git-pack-objects")),
303 OPT_STRING(0, "unpack-unreachable", &unpack_unreachable, N_("approxidate"),
304 N_("with -A, do not loosen objects older than this")),
305 OPT_BOOL('k', "keep-unreachable", &keep_unreachable,
306 N_("with -a, repack unreachable objects")),
307 OPT_STRING(0, "window", &po_args.window, N_("n"),
308 N_("size of the window used for delta compression")),
309 OPT_STRING(0, "window-memory", &po_args.window_memory, N_("bytes"),
310 N_("same as the above, but limit memory size instead of entries count")),
311 OPT_STRING(0, "depth", &po_args.depth, N_("n"),
312 N_("limits the maximum delta depth")),
313 OPT_STRING(0, "threads", &po_args.threads, N_("n"),
314 N_("limits the maximum number of threads")),
315 OPT_STRING(0, "max-pack-size", &po_args.max_pack_size, N_("bytes"),
316 N_("maximum size of each packfile")),
317 OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects,
318 N_("repack objects in packs marked with .keep")),
319 OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
320 N_("do not repack this pack")),
321 OPT_END()
322 };
323
324 git_config(repack_config, NULL);
325
326 argc = parse_options(argc, argv, prefix, builtin_repack_options,
327 git_repack_usage, 0);
328
329 if (delete_redundant && repository_format_precious_objects)
330 die(_("cannot delete packs in a precious-objects repo"));
331
332 if (keep_unreachable &&
333 (unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE)))
334 die(_("--keep-unreachable and -A are incompatible"));
335
336 if (write_bitmaps < 0)
337 write_bitmaps = (pack_everything & ALL_INTO_ONE) &&
338 is_bare_repository();
339 if (pack_kept_objects < 0)
340 pack_kept_objects = write_bitmaps;
341
342 if (write_bitmaps && !(pack_everything & ALL_INTO_ONE))
343 die(_(incremental_bitmap_conflict_error));
344
345 packdir = mkpathdup("%s/pack", get_object_directory());
346 packtmp = mkpathdup("%s/.tmp-%d-pack", packdir, (int)getpid());
347
348 sigchain_push_common(remove_pack_on_signal);
349
350 prepare_pack_objects(&cmd, &po_args);
351
352 argv_array_push(&cmd.args, "--keep-true-parents");
353 if (!pack_kept_objects)
354 argv_array_push(&cmd.args, "--honor-pack-keep");
355 for (i = 0; i < keep_pack_list.nr; i++)
356 argv_array_pushf(&cmd.args, "--keep-pack=%s",
357 keep_pack_list.items[i].string);
358 argv_array_push(&cmd.args, "--non-empty");
359 argv_array_push(&cmd.args, "--all");
360 argv_array_push(&cmd.args, "--reflog");
361 argv_array_push(&cmd.args, "--indexed-objects");
362 if (repository_format_partial_clone)
363 argv_array_push(&cmd.args, "--exclude-promisor-objects");
364 if (write_bitmaps)
365 argv_array_push(&cmd.args, "--write-bitmap-index");
366 if (use_delta_islands)
367 argv_array_push(&cmd.args, "--delta-islands");
368
369 if (pack_everything & ALL_INTO_ONE) {
370 get_non_kept_pack_filenames(&existing_packs, &keep_pack_list);
371
372 repack_promisor_objects(&po_args, &names);
373
374 if (existing_packs.nr && delete_redundant) {
375 if (unpack_unreachable) {
376 argv_array_pushf(&cmd.args,
377 "--unpack-unreachable=%s",
378 unpack_unreachable);
379 argv_array_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
380 } else if (pack_everything & LOOSEN_UNREACHABLE) {
381 argv_array_push(&cmd.args,
382 "--unpack-unreachable");
383 } else if (keep_unreachable) {
384 argv_array_push(&cmd.args, "--keep-unreachable");
385 argv_array_push(&cmd.args, "--pack-loose-unreachable");
386 } else {
387 argv_array_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
388 }
389 }
390 } else {
391 argv_array_push(&cmd.args, "--unpacked");
392 argv_array_push(&cmd.args, "--incremental");
393 }
394
395 cmd.no_stdin = 1;
396
397 ret = start_command(&cmd);
398 if (ret)
399 return ret;
400
401 out = xfdopen(cmd.out, "r");
402 while (strbuf_getline_lf(&line, out) != EOF) {
403 if (line.len != the_hash_algo->hexsz)
404 die(_("repack: Expecting full hex object ID lines only from pack-objects."));
405 string_list_append(&names, line.buf);
406 }
407 fclose(out);
408 ret = finish_command(&cmd);
409 if (ret)
410 return ret;
411
412 if (!names.nr && !po_args.quiet)
413 printf_ln(_("Nothing new to pack."));
414
415 close_all_packs(the_repository->objects);
416
417 /*
418 * Ok we have prepared all new packfiles.
419 * First see if there are packs of the same name and if so
420 * if we can move them out of the way (this can happen if we
421 * repacked immediately after packing fully.
422 */
423 failed = 0;
424 for_each_string_list_item(item, &names) {
425 for (ext = 0; ext < ARRAY_SIZE(exts); ext++) {
426 char *fname, *fname_old;
427
428 if (!midx_cleared) {
429 clear_midx_file(the_repository);
430 midx_cleared = 1;
431 }
432
433 fname = mkpathdup("%s/pack-%s%s", packdir,
434 item->string, exts[ext].name);
435 if (!file_exists(fname)) {
436 free(fname);
437 continue;
438 }
439
440 fname_old = mkpathdup("%s/old-%s%s", packdir,
441 item->string, exts[ext].name);
442 if (file_exists(fname_old))
443 if (unlink(fname_old))
444 failed = 1;
445
446 if (!failed && rename(fname, fname_old)) {
447 free(fname);
448 free(fname_old);
449 failed = 1;
450 break;
451 } else {
452 string_list_append(&rollback, fname);
453 free(fname_old);
454 }
455 }
456 if (failed)
457 break;
458 }
459 if (failed) {
460 struct string_list rollback_failure = STRING_LIST_INIT_DUP;
461 for_each_string_list_item(item, &rollback) {
462 char *fname, *fname_old;
463 fname = mkpathdup("%s/%s", packdir, item->string);
464 fname_old = mkpathdup("%s/old-%s", packdir, item->string);
465 if (rename(fname_old, fname))
466 string_list_append(&rollback_failure, fname);
467 free(fname);
468 free(fname_old);
469 }
470
471 if (rollback_failure.nr) {
472 int i;
473 fprintf(stderr,
474 _("WARNING: Some packs in use have been renamed by\n"
475 "WARNING: prefixing old- to their name, in order to\n"
476 "WARNING: replace them with the new version of the\n"
477 "WARNING: file. But the operation failed, and the\n"
478 "WARNING: attempt to rename them back to their\n"
479 "WARNING: original names also failed.\n"
480 "WARNING: Please rename them in %s manually:\n"), packdir);
481 for (i = 0; i < rollback_failure.nr; i++)
482 fprintf(stderr, "WARNING: old-%s -> %s\n",
483 rollback_failure.items[i].string,
484 rollback_failure.items[i].string);
485 }
486 exit(1);
487 }
488
489 /* Now the ones with the same name are out of the way... */
490 for_each_string_list_item(item, &names) {
491 for (ext = 0; ext < ARRAY_SIZE(exts); ext++) {
492 char *fname, *fname_old;
493 struct stat statbuffer;
494 int exists = 0;
495 fname = mkpathdup("%s/pack-%s%s",
496 packdir, item->string, exts[ext].name);
497 fname_old = mkpathdup("%s-%s%s",
498 packtmp, item->string, exts[ext].name);
499 if (!stat(fname_old, &statbuffer)) {
500 statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
501 chmod(fname_old, statbuffer.st_mode);
502 exists = 1;
503 }
504 if (exists || !exts[ext].optional) {
505 if (rename(fname_old, fname))
506 die_errno(_("renaming '%s' failed"), fname_old);
507 }
508 free(fname);
509 free(fname_old);
510 }
511 }
512
513 /* Remove the "old-" files */
514 for_each_string_list_item(item, &names) {
515 for (ext = 0; ext < ARRAY_SIZE(exts); ext++) {
516 char *fname;
517 fname = mkpathdup("%s/old-%s%s",
518 packdir,
519 item->string,
520 exts[ext].name);
521 if (remove_path(fname))
522 warning(_("failed to remove '%s'"), fname);
523 free(fname);
524 }
525 }
526
527 /* End of pack replacement. */
528
529 reprepare_packed_git(the_repository);
530
531 if (delete_redundant) {
532 const int hexsz = the_hash_algo->hexsz;
533 int opts = 0;
534 string_list_sort(&names);
535 for_each_string_list_item(item, &existing_packs) {
536 char *sha1;
537 size_t len = strlen(item->string);
538 if (len < hexsz)
539 continue;
540 sha1 = item->string + len - hexsz;
541 if (!string_list_has_string(&names, sha1))
542 remove_redundant_pack(packdir, item->string);
543 }
544 if (!po_args.quiet && isatty(2))
545 opts |= PRUNE_PACKED_VERBOSE;
546 prune_packed_objects(opts);
547
548 if (!keep_unreachable &&
549 (!(pack_everything & LOOSEN_UNREACHABLE) ||
550 unpack_unreachable) &&
551 is_repository_shallow(the_repository))
552 prune_shallow(PRUNE_QUICK);
553 }
554
555 if (!no_update_server_info)
556 update_server_info(0);
557 remove_temporary_files();
558
559 if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0))
560 write_midx_file(get_object_directory());
561
562 string_list_clear(&names, 0);
563 string_list_clear(&rollback, 0);
564 string_list_clear(&existing_packs, 0);
565 strbuf_release(&line);
566
567 return 0;
568 }