Optimize directory listing with pathspec limiter.
[git/git.git] / dir.c
1 /*
2 * This handles recursive filename detection with exclude
3 * files, index knowledge etc..
4 *
5 * Copyright (C) Linus Torvalds, 2005-2006
6 * Junio Hamano, 2005-2006
7 */
8 #include "cache.h"
9 #include "dir.h"
10
11 struct path_simplify {
12 int len;
13 const char *path;
14 };
15
16 int common_prefix(const char **pathspec)
17 {
18 const char *path, *slash, *next;
19 int prefix;
20
21 if (!pathspec)
22 return 0;
23
24 path = *pathspec;
25 slash = strrchr(path, '/');
26 if (!slash)
27 return 0;
28
29 prefix = slash - path + 1;
30 while ((next = *++pathspec) != NULL) {
31 int len = strlen(next);
32 if (len >= prefix && !memcmp(path, next, len))
33 continue;
34 for (;;) {
35 if (!len)
36 return 0;
37 if (next[--len] != '/')
38 continue;
39 if (memcmp(path, next, len+1))
40 continue;
41 prefix = len + 1;
42 break;
43 }
44 }
45 return prefix;
46 }
47
48 /*
49 * Does 'match' matches the given name?
50 * A match is found if
51 *
52 * (1) the 'match' string is leading directory of 'name', or
53 * (2) the 'match' string is a wildcard and matches 'name', or
54 * (3) the 'match' string is exactly the same as 'name'.
55 *
56 * and the return value tells which case it was.
57 *
58 * It returns 0 when there is no match.
59 */
60 static int match_one(const char *match, const char *name, int namelen)
61 {
62 int matchlen;
63
64 /* If the match was just the prefix, we matched */
65 matchlen = strlen(match);
66 if (!matchlen)
67 return MATCHED_RECURSIVELY;
68
69 /*
70 * If we don't match the matchstring exactly,
71 * we need to match by fnmatch
72 */
73 if (strncmp(match, name, matchlen))
74 return !fnmatch(match, name, 0) ? MATCHED_FNMATCH : 0;
75
76 if (!name[matchlen])
77 return MATCHED_EXACTLY;
78 if (match[matchlen-1] == '/' || name[matchlen] == '/')
79 return MATCHED_RECURSIVELY;
80 return 0;
81 }
82
83 /*
84 * Given a name and a list of pathspecs, see if the name matches
85 * any of the pathspecs. The caller is also interested in seeing
86 * all pathspec matches some names it calls this function with
87 * (otherwise the user could have mistyped the unmatched pathspec),
88 * and a mark is left in seen[] array for pathspec element that
89 * actually matched anything.
90 */
91 int match_pathspec(const char **pathspec, const char *name, int namelen, int prefix, char *seen)
92 {
93 int retval;
94 const char *match;
95
96 name += prefix;
97 namelen -= prefix;
98
99 for (retval = 0; (match = *pathspec++) != NULL; seen++) {
100 int how;
101 if (retval && *seen == MATCHED_EXACTLY)
102 continue;
103 match += prefix;
104 how = match_one(match, name, namelen);
105 if (how) {
106 if (retval < how)
107 retval = how;
108 if (*seen < how)
109 *seen = how;
110 }
111 }
112 return retval;
113 }
114
115 void add_exclude(const char *string, const char *base,
116 int baselen, struct exclude_list *which)
117 {
118 struct exclude *x = xmalloc(sizeof (*x));
119
120 x->pattern = string;
121 x->base = base;
122 x->baselen = baselen;
123 if (which->nr == which->alloc) {
124 which->alloc = alloc_nr(which->alloc);
125 which->excludes = xrealloc(which->excludes,
126 which->alloc * sizeof(x));
127 }
128 which->excludes[which->nr++] = x;
129 }
130
131 static int add_excludes_from_file_1(const char *fname,
132 const char *base,
133 int baselen,
134 struct exclude_list *which)
135 {
136 struct stat st;
137 int fd, i;
138 size_t size;
139 char *buf, *entry;
140
141 fd = open(fname, O_RDONLY);
142 if (fd < 0 || fstat(fd, &st) < 0)
143 goto err;
144 size = xsize_t(st.st_size);
145 if (size == 0) {
146 close(fd);
147 return 0;
148 }
149 buf = xmalloc(size+1);
150 if (read_in_full(fd, buf, size) != size)
151 goto err;
152 close(fd);
153
154 buf[size++] = '\n';
155 entry = buf;
156 for (i = 0; i < size; i++) {
157 if (buf[i] == '\n') {
158 if (entry != buf + i && entry[0] != '#') {
159 buf[i - (i && buf[i-1] == '\r')] = 0;
160 add_exclude(entry, base, baselen, which);
161 }
162 entry = buf + i + 1;
163 }
164 }
165 return 0;
166
167 err:
168 if (0 <= fd)
169 close(fd);
170 return -1;
171 }
172
173 void add_excludes_from_file(struct dir_struct *dir, const char *fname)
174 {
175 if (add_excludes_from_file_1(fname, "", 0,
176 &dir->exclude_list[EXC_FILE]) < 0)
177 die("cannot use %s as an exclude file", fname);
178 }
179
180 int push_exclude_per_directory(struct dir_struct *dir, const char *base, int baselen)
181 {
182 char exclude_file[PATH_MAX];
183 struct exclude_list *el = &dir->exclude_list[EXC_DIRS];
184 int current_nr = el->nr;
185
186 if (dir->exclude_per_dir) {
187 memcpy(exclude_file, base, baselen);
188 strcpy(exclude_file + baselen, dir->exclude_per_dir);
189 add_excludes_from_file_1(exclude_file, base, baselen, el);
190 }
191 return current_nr;
192 }
193
194 void pop_exclude_per_directory(struct dir_struct *dir, int stk)
195 {
196 struct exclude_list *el = &dir->exclude_list[EXC_DIRS];
197
198 while (stk < el->nr)
199 free(el->excludes[--el->nr]);
200 }
201
202 /* Scan the list and let the last match determines the fate.
203 * Return 1 for exclude, 0 for include and -1 for undecided.
204 */
205 static int excluded_1(const char *pathname,
206 int pathlen,
207 struct exclude_list *el)
208 {
209 int i;
210
211 if (el->nr) {
212 for (i = el->nr - 1; 0 <= i; i--) {
213 struct exclude *x = el->excludes[i];
214 const char *exclude = x->pattern;
215 int to_exclude = 1;
216
217 if (*exclude == '!') {
218 to_exclude = 0;
219 exclude++;
220 }
221
222 if (!strchr(exclude, '/')) {
223 /* match basename */
224 const char *basename = strrchr(pathname, '/');
225 basename = (basename) ? basename+1 : pathname;
226 if (fnmatch(exclude, basename, 0) == 0)
227 return to_exclude;
228 }
229 else {
230 /* match with FNM_PATHNAME:
231 * exclude has base (baselen long) implicitly
232 * in front of it.
233 */
234 int baselen = x->baselen;
235 if (*exclude == '/')
236 exclude++;
237
238 if (pathlen < baselen ||
239 (baselen && pathname[baselen-1] != '/') ||
240 strncmp(pathname, x->base, baselen))
241 continue;
242
243 if (fnmatch(exclude, pathname+baselen,
244 FNM_PATHNAME) == 0)
245 return to_exclude;
246 }
247 }
248 }
249 return -1; /* undecided */
250 }
251
252 int excluded(struct dir_struct *dir, const char *pathname)
253 {
254 int pathlen = strlen(pathname);
255 int st;
256
257 for (st = EXC_CMDL; st <= EXC_FILE; st++) {
258 switch (excluded_1(pathname, pathlen, &dir->exclude_list[st])) {
259 case 0:
260 return 0;
261 case 1:
262 return 1;
263 }
264 }
265 return 0;
266 }
267
268 struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len)
269 {
270 struct dir_entry *ent;
271
272 if (cache_name_pos(pathname, len) >= 0)
273 return NULL;
274
275 if (dir->nr == dir->alloc) {
276 int alloc = alloc_nr(dir->alloc);
277 dir->alloc = alloc;
278 dir->entries = xrealloc(dir->entries, alloc*sizeof(ent));
279 }
280 ent = xmalloc(sizeof(*ent) + len + 1);
281 ent->ignored = ent->ignored_dir = 0;
282 ent->len = len;
283 memcpy(ent->name, pathname, len);
284 ent->name[len] = 0;
285 dir->entries[dir->nr++] = ent;
286 return ent;
287 }
288
289 static int dir_exists(const char *dirname, int len)
290 {
291 int pos = cache_name_pos(dirname, len);
292 if (pos >= 0)
293 return 1;
294 pos = -pos-1;
295 if (pos >= active_nr) /* can't */
296 return 0;
297 return !strncmp(active_cache[pos]->name, dirname, len);
298 }
299
300 /*
301 * This is an inexact early pruning of any recursive directory
302 * reading - if the path cannot possibly be in the pathspec,
303 * return true, and we'll skip it early.
304 */
305 static int simplify_away(const char *path, int pathlen, const struct path_simplify *simplify)
306 {
307 if (simplify) {
308 for (;;) {
309 const char *match = simplify->path;
310 int len = simplify->len;
311
312 if (!match)
313 break;
314 if (len > pathlen)
315 len = pathlen;
316 if (!memcmp(path, match, len))
317 return 0;
318 simplify++;
319 }
320 return 1;
321 }
322 return 0;
323 }
324
325 /*
326 * Read a directory tree. We currently ignore anything but
327 * directories, regular files and symlinks. That's because git
328 * doesn't handle them at all yet. Maybe that will change some
329 * day.
330 *
331 * Also, we ignore the name ".git" (even if it is not a directory).
332 * That likely will not change.
333 */
334 static int read_directory_recursive(struct dir_struct *dir, const char *path, const char *base, int baselen, int check_only, const struct path_simplify *simplify)
335 {
336 DIR *fdir = opendir(path);
337 int contents = 0;
338
339 if (fdir) {
340 int exclude_stk;
341 struct dirent *de;
342 char fullname[PATH_MAX + 1];
343 memcpy(fullname, base, baselen);
344
345 exclude_stk = push_exclude_per_directory(dir, base, baselen);
346
347 while ((de = readdir(fdir)) != NULL) {
348 int len;
349
350 if ((de->d_name[0] == '.') &&
351 (de->d_name[1] == 0 ||
352 !strcmp(de->d_name + 1, ".") ||
353 !strcmp(de->d_name + 1, "git")))
354 continue;
355 len = strlen(de->d_name);
356 memcpy(fullname + baselen, de->d_name, len+1);
357 if (simplify_away(fullname, baselen + len, simplify))
358 continue;
359 if (excluded(dir, fullname) != dir->show_ignored) {
360 if (!dir->show_ignored || DTYPE(de) != DT_DIR) {
361 continue;
362 }
363 }
364
365 switch (DTYPE(de)) {
366 struct stat st;
367 default:
368 continue;
369 case DT_UNKNOWN:
370 if (lstat(fullname, &st))
371 continue;
372 if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode))
373 break;
374 if (!S_ISDIR(st.st_mode))
375 continue;
376 /* fallthrough */
377 case DT_DIR:
378 memcpy(fullname + baselen + len, "/", 2);
379 len++;
380 if (dir->show_other_directories &&
381 !dir_exists(fullname, baselen + len)) {
382 if (dir->hide_empty_directories &&
383 !read_directory_recursive(dir,
384 fullname, fullname,
385 baselen + len, 1, simplify))
386 continue;
387 break;
388 }
389
390 contents += read_directory_recursive(dir,
391 fullname, fullname, baselen + len, 0, simplify);
392 continue;
393 case DT_REG:
394 case DT_LNK:
395 break;
396 }
397 contents++;
398 if (check_only)
399 goto exit_early;
400 else
401 dir_add_name(dir, fullname, baselen + len);
402 }
403 exit_early:
404 closedir(fdir);
405
406 pop_exclude_per_directory(dir, exclude_stk);
407 }
408
409 return contents;
410 }
411
412 static int cmp_name(const void *p1, const void *p2)
413 {
414 const struct dir_entry *e1 = *(const struct dir_entry **)p1;
415 const struct dir_entry *e2 = *(const struct dir_entry **)p2;
416
417 return cache_name_compare(e1->name, e1->len,
418 e2->name, e2->len);
419 }
420
421 /*
422 * Return the length of the "simple" part of a path match limiter.
423 */
424 static int simple_length(const char *match)
425 {
426 const char special[256] = {
427 [0] = 1, ['?'] = 1,
428 ['\\'] = 1, ['*'] = 1,
429 ['['] = 1
430 };
431 int len = -1;
432
433 for (;;) {
434 unsigned char c = *match++;
435 len++;
436 if (special[c])
437 return len;
438 }
439 }
440
441 static struct path_simplify *create_simplify(const char **pathspec)
442 {
443 int nr, alloc = 0;
444 struct path_simplify *simplify = NULL;
445
446 if (!pathspec)
447 return NULL;
448
449 for (nr = 0 ; ; nr++) {
450 const char *match;
451 if (nr >= alloc) {
452 alloc = alloc_nr(alloc);
453 simplify = xrealloc(simplify, alloc * sizeof(*simplify));
454 }
455 match = *pathspec++;
456 if (!match)
457 break;
458 simplify[nr].path = match;
459 simplify[nr].len = simple_length(match);
460 }
461 simplify[nr].path = NULL;
462 simplify[nr].len = 0;
463 return simplify;
464 }
465
466 static void free_simplify(struct path_simplify *simplify)
467 {
468 if (simplify)
469 free(simplify);
470 }
471
472 int read_directory(struct dir_struct *dir, const char *path, const char *base, int baselen, const char **pathspec)
473 {
474 struct path_simplify *simplify = create_simplify(pathspec);
475
476 /*
477 * Make sure to do the per-directory exclude for all the
478 * directories leading up to our base.
479 */
480 if (baselen) {
481 if (dir->exclude_per_dir) {
482 char *p, *pp = xmalloc(baselen+1);
483 memcpy(pp, base, baselen+1);
484 p = pp;
485 while (1) {
486 char save = *p;
487 *p = 0;
488 push_exclude_per_directory(dir, pp, p-pp);
489 *p++ = save;
490 if (!save)
491 break;
492 p = strchr(p, '/');
493 if (p)
494 p++;
495 else
496 p = pp + baselen;
497 }
498 free(pp);
499 }
500 }
501
502 read_directory_recursive(dir, path, base, baselen, 0, simplify);
503 free_simplify(simplify);
504 qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
505 return dir->nr;
506 }
507
508 int
509 file_exists(const char *f)
510 {
511 struct stat sb;
512 return stat(f, &sb) == 0;
513 }