git-fsck-cache.c: check commit objects more carefully
[git/git.git] / fsck-cache.c
1 #include <sys/types.h>
2 #include <dirent.h>
3
4 #include "cache.h"
5 #include "commit.h"
6 #include "tree.h"
7 #include "blob.h"
8 #include "tag.h"
9 #include "refs.h"
10 #include "pack.h"
11
12 #define REACHABLE 0x0001
13
14 static int show_root = 0;
15 static int show_tags = 0;
16 static int show_unreachable = 0;
17 static int standalone = 0;
18 static int check_full = 0;
19 static int check_strict = 0;
20 static int keep_cache_objects = 0;
21 static unsigned char head_sha1[20];
22
23 static void check_connectivity(void)
24 {
25 int i;
26
27 /* Look up all the requirements, warn about missing objects.. */
28 for (i = 0; i < nr_objs; i++) {
29 struct object *obj = objs[i];
30 struct object_list *refs;
31
32 if (!obj->parsed) {
33 if (!standalone && has_sha1_file(obj->sha1))
34 ; /* it is in pack */
35 else
36 printf("missing %s %s\n",
37 obj->type, sha1_to_hex(obj->sha1));
38 continue;
39 }
40
41 for (refs = obj->refs; refs; refs = refs->next) {
42 if (refs->item->parsed ||
43 (!standalone && has_sha1_file(refs->item->sha1)))
44 continue;
45 printf("broken link from %7s %s\n",
46 obj->type, sha1_to_hex(obj->sha1));
47 printf(" to %7s %s\n",
48 refs->item->type, sha1_to_hex(refs->item->sha1));
49 }
50
51 if (show_unreachable && !(obj->flags & REACHABLE)) {
52 printf("unreachable %s %s\n",
53 obj->type, sha1_to_hex(obj->sha1));
54 continue;
55 }
56
57 if (!obj->used) {
58 printf("dangling %s %s\n", obj->type,
59 sha1_to_hex(obj->sha1));
60 }
61 }
62 }
63
64 /*
65 * The entries in a tree are ordered in the _path_ order,
66 * which means that a directory entry is ordered by adding
67 * a slash to the end of it.
68 *
69 * So a directory called "a" is ordered _after_ a file
70 * called "a.c", because "a/" sorts after "a.c".
71 */
72 #define TREE_UNORDERED (-1)
73 #define TREE_HAS_DUPS (-2)
74
75 static int verify_ordered(struct tree_entry_list *a, struct tree_entry_list *b)
76 {
77 int len1 = strlen(a->name);
78 int len2 = strlen(b->name);
79 int len = len1 < len2 ? len1 : len2;
80 unsigned char c1, c2;
81 int cmp;
82
83 cmp = memcmp(a->name, b->name, len);
84 if (cmp < 0)
85 return 0;
86 if (cmp > 0)
87 return TREE_UNORDERED;
88
89 /*
90 * Ok, the first <len> characters are the same.
91 * Now we need to order the next one, but turn
92 * a '\0' into a '/' for a directory entry.
93 */
94 c1 = a->name[len];
95 c2 = b->name[len];
96 if (!c1 && !c2)
97 /*
98 * git-write-tree used to write out a nonsense tree that has
99 * entries with the same name, one blob and one tree. Make
100 * sure we do not have duplicate entries.
101 */
102 return TREE_HAS_DUPS;
103 if (!c1 && a->directory)
104 c1 = '/';
105 if (!c2 && b->directory)
106 c2 = '/';
107 return c1 < c2 ? 0 : TREE_UNORDERED;
108 }
109
110 static int fsck_tree(struct tree *item)
111 {
112 int has_full_path = 0;
113 struct tree_entry_list *entry, *last;
114
115 last = NULL;
116 for (entry = item->entries; entry; entry = entry->next) {
117 if (strchr(entry->name, '/'))
118 has_full_path = 1;
119
120 switch (entry->mode) {
121 /*
122 * Standard modes..
123 */
124 case S_IFREG | 0755:
125 case S_IFREG | 0644:
126 case S_IFLNK:
127 case S_IFDIR:
128 break;
129 /*
130 * This is nonstandard, but we had a few of these
131 * early on when we honored the full set of mode
132 * bits..
133 */
134 case S_IFREG | 0664:
135 if (!check_strict)
136 break;
137 default:
138 printf("tree %s has entry %o %s\n",
139 sha1_to_hex(item->object.sha1),
140 entry->mode, entry->name);
141 }
142
143 if (last) {
144 switch (verify_ordered(last, entry)) {
145 case TREE_UNORDERED:
146 fprintf(stderr, "tree %s not ordered\n",
147 sha1_to_hex(item->object.sha1));
148 return -1;
149 case TREE_HAS_DUPS:
150 fprintf(stderr, "tree %s has duplicate entries for '%s'\n",
151 sha1_to_hex(item->object.sha1),
152 entry->name);
153 return -1;
154 default:
155 break;
156 }
157 }
158
159 last = entry;
160 }
161
162 if (has_full_path) {
163 fprintf(stderr, "warning: git-fsck-cache: tree %s "
164 "has full pathnames in it\n",
165 sha1_to_hex(item->object.sha1));
166 }
167
168 return 0;
169 }
170
171 static int fsck_commit(struct commit *commit)
172 {
173 char *buffer = commit->buffer;
174 unsigned char sha1[20];
175
176 if (memcmp(buffer, "tree ", 5))
177 return -1;
178 if (get_sha1_hex(buffer+5, sha1) || buffer[45] != '\n')
179 return -1;
180 buffer += 46;
181 while (!memcmp(buffer, "parent ", 7)) {
182 if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n')
183 return -1;
184 buffer += 48;
185 }
186 if (memcmp(buffer, "author ", 7))
187 return -1;
188 free(commit->buffer);
189 commit->buffer = NULL;
190 if (!commit->tree)
191 return -1;
192 if (!commit->parents && show_root)
193 printf("root %s\n", sha1_to_hex(commit->object.sha1));
194 if (!commit->date)
195 printf("bad commit date in %s\n",
196 sha1_to_hex(commit->object.sha1));
197 return 0;
198 }
199
200 static int fsck_tag(struct tag *tag)
201 {
202 struct object *tagged = tag->tagged;
203
204 if (!tagged) {
205 printf("bad object in tag %s\n", sha1_to_hex(tag->object.sha1));
206 return -1;
207 }
208 if (!show_tags)
209 return 0;
210
211 printf("tagged %s %s", tagged->type, sha1_to_hex(tagged->sha1));
212 printf(" (%s) in %s\n", tag->tag, sha1_to_hex(tag->object.sha1));
213 return 0;
214 }
215
216 static int fsck_sha1(unsigned char *sha1)
217 {
218 struct object *obj = parse_object(sha1);
219 if (!obj)
220 return -1;
221 if (obj->type == blob_type)
222 return 0;
223 if (obj->type == tree_type)
224 return fsck_tree((struct tree *) obj);
225 if (obj->type == commit_type)
226 return fsck_commit((struct commit *) obj);
227 if (obj->type == tag_type)
228 return fsck_tag((struct tag *) obj);
229 return -1;
230 }
231
232 /*
233 * This is the sorting chunk size: make it reasonably
234 * big so that we can sort well..
235 */
236 #define MAX_SHA1_ENTRIES (1024)
237
238 struct sha1_entry {
239 unsigned long ino;
240 unsigned char sha1[20];
241 };
242
243 static struct {
244 unsigned long nr;
245 struct sha1_entry *entry[MAX_SHA1_ENTRIES];
246 } sha1_list;
247
248 static int ino_compare(const void *_a, const void *_b)
249 {
250 const struct sha1_entry *a = _a, *b = _b;
251 unsigned long ino1 = a->ino, ino2 = b->ino;
252 return ino1 < ino2 ? -1 : ino1 > ino2 ? 1 : 0;
253 }
254
255 static void fsck_sha1_list(void)
256 {
257 int i, nr = sha1_list.nr;
258
259 qsort(sha1_list.entry, nr, sizeof(struct sha1_entry *), ino_compare);
260 for (i = 0; i < nr; i++) {
261 struct sha1_entry *entry = sha1_list.entry[i];
262 unsigned char *sha1 = entry->sha1;
263
264 sha1_list.entry[i] = NULL;
265 if (fsck_sha1(sha1) < 0)
266 fprintf(stderr, "bad sha1 entry '%s'\n", sha1_to_hex(sha1));
267 free(entry);
268 }
269 sha1_list.nr = 0;
270 }
271
272 static void add_sha1_list(unsigned char *sha1, unsigned long ino)
273 {
274 struct sha1_entry *entry = xmalloc(sizeof(*entry));
275 int nr;
276
277 entry->ino = ino;
278 memcpy(entry->sha1, sha1, 20);
279 nr = sha1_list.nr;
280 if (nr == MAX_SHA1_ENTRIES) {
281 fsck_sha1_list();
282 nr = 0;
283 }
284 sha1_list.entry[nr] = entry;
285 sha1_list.nr = ++nr;
286 }
287
288 static int fsck_dir(int i, char *path)
289 {
290 DIR *dir = opendir(path);
291 struct dirent *de;
292
293 if (!dir) {
294 return error("missing sha1 directory '%s'", path);
295 }
296
297 while ((de = readdir(dir)) != NULL) {
298 char name[100];
299 unsigned char sha1[20];
300 int len = strlen(de->d_name);
301
302 switch (len) {
303 case 2:
304 if (de->d_name[1] != '.')
305 break;
306 case 1:
307 if (de->d_name[0] != '.')
308 break;
309 continue;
310 case 38:
311 sprintf(name, "%02x", i);
312 memcpy(name+2, de->d_name, len+1);
313 if (get_sha1_hex(name, sha1) < 0)
314 break;
315 add_sha1_list(sha1, de->d_ino);
316 continue;
317 }
318 fprintf(stderr, "bad sha1 file: %s/%s\n", path, de->d_name);
319 }
320 closedir(dir);
321 return 0;
322 }
323
324 static int default_refs = 0;
325
326 static int fsck_handle_ref(const char *refname, const unsigned char *sha1)
327 {
328 struct object *obj;
329
330 obj = lookup_object(sha1);
331 if (!obj) {
332 if (!standalone && has_sha1_file(sha1)) {
333 default_refs++;
334 return 0; /* it is in a pack */
335 }
336 error("%s: invalid sha1 pointer %s", refname, sha1_to_hex(sha1));
337 /* We'll continue with the rest despite the error.. */
338 return 0;
339 }
340 default_refs++;
341 obj->used = 1;
342 mark_reachable(obj, REACHABLE);
343 return 0;
344 }
345
346 static void get_default_heads(void)
347 {
348 for_each_ref(fsck_handle_ref);
349 if (!default_refs)
350 die("No default references");
351 }
352
353 static void fsck_object_dir(const char *path)
354 {
355 int i;
356 for (i = 0; i < 256; i++) {
357 static char dir[4096];
358 sprintf(dir, "%s/%02x", path, i);
359 fsck_dir(i, dir);
360 }
361 fsck_sha1_list();
362 }
363
364 static int fsck_head_link(void)
365 {
366 int fd, count;
367 char hex[40];
368 unsigned char sha1[20];
369 static char path[PATH_MAX], link[PATH_MAX];
370 const char *git_dir = gitenv(GIT_DIR_ENVIRONMENT) ? : DEFAULT_GIT_DIR_ENVIRONMENT;
371
372 snprintf(path, sizeof(path), "%s/HEAD", git_dir);
373 if (readlink(path, link, sizeof(link)) < 0)
374 return error("HEAD is not a symlink");
375 if (strncmp("refs/heads/", link, 11))
376 return error("HEAD points to something strange (%s)", link);
377 fd = open(path, O_RDONLY);
378 if (fd < 0)
379 return error("HEAD: %s", strerror(errno));
380 count = read(fd, hex, sizeof(hex));
381 close(fd);
382 if (count < 0)
383 return error("HEAD: %s", strerror(errno));
384 if (count < 40 || get_sha1_hex(hex, sha1))
385 return error("HEAD: not a valid git pointer");
386 return 0;
387 }
388
389 int main(int argc, char **argv)
390 {
391 int i, heads;
392
393 for (i = 1; i < argc; i++) {
394 const char *arg = argv[i];
395
396 if (!strcmp(arg, "--unreachable")) {
397 show_unreachable = 1;
398 continue;
399 }
400 if (!strcmp(arg, "--tags")) {
401 show_tags = 1;
402 continue;
403 }
404 if (!strcmp(arg, "--root")) {
405 show_root = 1;
406 continue;
407 }
408 if (!strcmp(arg, "--cache")) {
409 keep_cache_objects = 1;
410 continue;
411 }
412 if (!strcmp(arg, "--standalone")) {
413 standalone = 1;
414 continue;
415 }
416 if (!strcmp(arg, "--full")) {
417 check_full = 1;
418 continue;
419 }
420 if (!strcmp(arg, "--strict")) {
421 check_strict = 1;
422 continue;
423 }
424 if (*arg == '-')
425 usage("git-fsck-cache [--tags] [[--unreachable] [--cache] [--standalone | --full] <head-sha1>*]");
426 }
427
428 if (standalone && check_full)
429 die("Only one of --standalone or --full can be used.");
430 if (standalone)
431 unsetenv("GIT_ALTERNATE_OBJECT_DIRECTORIES");
432
433 fsck_head_link();
434 fsck_object_dir(get_object_directory());
435 if (check_full) {
436 int j;
437 struct packed_git *p;
438 prepare_alt_odb();
439 for (j = 0; alt_odb[j].base; j++) {
440 char namebuf[PATH_MAX];
441 int namelen = alt_odb[j].name - alt_odb[j].base;
442 memcpy(namebuf, alt_odb[j].base, namelen);
443 namebuf[namelen - 1] = 0;
444 fsck_object_dir(namebuf);
445 }
446 prepare_packed_git();
447 for (p = packed_git; p; p = p->next)
448 /* verify gives error messages itself */
449 verify_pack(p, 0);
450
451 for (p = packed_git; p; p = p->next) {
452 int num = num_packed_objects(p);
453 for (i = 0; i < num; i++) {
454 unsigned char sha1[20];
455 nth_packed_object_sha1(p, i, sha1);
456 if (fsck_sha1(sha1) < 0)
457 fprintf(stderr, "bad sha1 entry '%s'\n", sha1_to_hex(sha1));
458
459 }
460 }
461 }
462
463 heads = 0;
464 for (i = 1; i < argc; i++) {
465 const char *arg = argv[i];
466
467 if (*arg == '-')
468 continue;
469
470 if (!get_sha1(arg, head_sha1)) {
471 struct object *obj = lookup_object(head_sha1);
472
473 /* Error is printed by lookup_object(). */
474 if (!obj)
475 continue;
476
477 obj->used = 1;
478 mark_reachable(obj, REACHABLE);
479 heads++;
480 continue;
481 }
482 error("expected sha1, got %s", arg);
483 }
484
485 /*
486 * If we've not been given any explicit head information, do the
487 * default ones from .git/refs. We also consider the index file
488 * in this case (ie this implies --cache).
489 */
490 if (!heads) {
491 get_default_heads();
492 keep_cache_objects = 1;
493 }
494
495 if (keep_cache_objects) {
496 int i;
497 read_cache();
498 for (i = 0; i < active_nr; i++) {
499 struct blob *blob = lookup_blob(active_cache[i]->sha1);
500 struct object *obj;
501 if (!blob)
502 continue;
503 obj = &blob->object;
504 obj->used = 1;
505 mark_reachable(obj, REACHABLE);
506 }
507 }
508
509 check_connectivity();
510 return 0;
511 }