Commit | Line | Data |
---|---|---|
5010cb5f JH |
1 | /* |
2 | * Builtin "git grep" | |
3 | * | |
4 | * Copyright (c) 2006 Junio C Hamano | |
5 | */ | |
6 | #include "cache.h" | |
7 | #include "blob.h" | |
8 | #include "tree.h" | |
9 | #include "commit.h" | |
10 | #include "tag.h" | |
11 | #include "diff.h" | |
12 | #include "revision.h" | |
13 | #include "builtin.h" | |
14 | #include <regex.h> | |
e0eb889f | 15 | #include <fnmatch.h> |
5010cb5f | 16 | |
e0eb889f JH |
17 | /* |
18 | * git grep pathspecs are somewhat different from diff-tree pathspecs; | |
19 | * pathname wildcards are allowed. | |
20 | */ | |
5010cb5f JH |
21 | static int pathspec_matches(struct diff_options *opt, const char *name) |
22 | { | |
e0eb889f | 23 | int namelen, i; |
5010cb5f JH |
24 | if (!opt->nr_paths) |
25 | return 1; | |
26 | namelen = strlen(name); | |
27 | for (i = 0; i < opt->nr_paths; i++) { | |
28 | const char *match = opt->paths[i]; | |
29 | int matchlen = opt->pathlens[i]; | |
e0eb889f JH |
30 | const char *slash, *cp; |
31 | ||
32 | if ((matchlen <= namelen) && | |
33 | !strncmp(name, match, matchlen) && | |
34 | (match[matchlen-1] == '/' || | |
35 | name[matchlen] == '\0' || name[matchlen] == '/')) | |
36 | return 1; | |
37 | if (!fnmatch(match, name, 0)) | |
38 | return 1; | |
39 | if (name[namelen-1] != '/') | |
5010cb5f | 40 | continue; |
e0eb889f JH |
41 | |
42 | /* We are being asked if the name directory is worth | |
43 | * descending into. | |
44 | * | |
45 | * Find the longest leading directory name that does | |
46 | * not have metacharacter in the pathspec; the name | |
47 | * we are looking at must overlap with that directory. | |
5010cb5f | 48 | */ |
e0eb889f JH |
49 | for (cp = match, slash = NULL; cp - match < matchlen; cp++) { |
50 | char ch = *cp; | |
51 | if (ch == '/') | |
52 | slash = cp; | |
53 | if (ch == '*' || ch == '[') | |
5010cb5f | 54 | break; |
e0eb889f JH |
55 | } |
56 | if (!slash) | |
57 | slash = match; /* toplevel */ | |
58 | else | |
59 | slash++; | |
60 | if (namelen <= slash - match) { | |
61 | /* Looking at "Documentation/" and | |
62 | * the pattern says "Documentation/howto/", or | |
63 | * "Documentation/diff*.txt". | |
64 | */ | |
65 | if (!memcmp(match, name, namelen)) | |
66 | return 1; | |
67 | } | |
68 | else { | |
69 | /* Looking at "Documentation/howto/" and | |
70 | * the pattern says "Documentation/h*". | |
71 | */ | |
72 | if (!memcmp(match, name, slash - match)) | |
73 | return 1; | |
74 | } | |
5010cb5f JH |
75 | } |
76 | return 0; | |
77 | } | |
78 | ||
79 | struct grep_opt { | |
80 | const char *pattern; | |
81 | regex_t regexp; | |
82 | unsigned linenum:1; | |
83 | unsigned invert:1; | |
df0e7aa8 | 84 | unsigned name_only:1; |
5010cb5f JH |
85 | int regflags; |
86 | unsigned pre_context; | |
87 | unsigned post_context; | |
88 | }; | |
89 | ||
90 | static char *end_of_line(char *cp, unsigned long *left) | |
91 | { | |
92 | unsigned long l = *left; | |
93 | while (l && *cp != '\n') { | |
94 | l--; | |
95 | cp++; | |
96 | } | |
97 | *left = l; | |
98 | return cp; | |
99 | } | |
100 | ||
101 | static void show_line(struct grep_opt *opt, const char *bol, const char *eol, | |
102 | const char *name, unsigned lno, char sign) | |
103 | { | |
104 | printf("%s%c", name, sign); | |
105 | if (opt->linenum) | |
106 | printf("%d%c", lno, sign); | |
107 | printf("%.*s\n", eol-bol, bol); | |
108 | } | |
109 | ||
110 | static int grep_buffer(struct grep_opt *opt, const char *name, | |
111 | char *buf, unsigned long size) | |
112 | { | |
113 | char *bol = buf; | |
114 | unsigned long left = size; | |
115 | unsigned lno = 1; | |
116 | struct pre_context_line { | |
117 | char *bol; | |
118 | char *eol; | |
119 | } *prev = NULL, *pcl; | |
120 | unsigned last_hit = 0; | |
121 | unsigned last_shown = 0; | |
122 | const char *hunk_mark = ""; | |
123 | ||
124 | if (opt->pre_context) | |
125 | prev = xcalloc(opt->pre_context, sizeof(*prev)); | |
126 | if (opt->pre_context || opt->post_context) | |
127 | hunk_mark = "--\n"; | |
128 | ||
129 | while (left) { | |
130 | regmatch_t pmatch[10]; | |
131 | char *eol, ch; | |
132 | int hit; | |
133 | ||
134 | eol = end_of_line(bol, &left); | |
135 | ch = *eol; | |
136 | *eol = 0; | |
137 | ||
138 | hit = !regexec(&opt->regexp, bol, ARRAY_SIZE(pmatch), | |
139 | pmatch, 0); | |
140 | if (opt->invert) | |
141 | hit = !hit; | |
142 | if (hit) { | |
df0e7aa8 JH |
143 | if (opt->name_only) { |
144 | printf("%s\n", name); | |
145 | return 1; | |
146 | } | |
5010cb5f JH |
147 | /* Hit at this line. If we haven't shown the |
148 | * pre-context lines, we would need to show them. | |
149 | */ | |
150 | if (opt->pre_context) { | |
151 | unsigned from; | |
152 | if (opt->pre_context < lno) | |
153 | from = lno - opt->pre_context; | |
154 | else | |
155 | from = 1; | |
156 | if (from <= last_shown) | |
157 | from = last_shown + 1; | |
158 | if (last_shown && from != last_shown + 1) | |
159 | printf(hunk_mark); | |
160 | while (from < lno) { | |
161 | pcl = &prev[lno-from-1]; | |
162 | show_line(opt, pcl->bol, pcl->eol, | |
163 | name, from, '-'); | |
164 | from++; | |
165 | } | |
166 | last_shown = lno-1; | |
167 | } | |
168 | if (last_shown && lno != last_shown + 1) | |
169 | printf(hunk_mark); | |
170 | show_line(opt, bol, eol, name, lno, ':'); | |
171 | last_shown = last_hit = lno; | |
172 | } | |
173 | else if (last_hit && | |
174 | lno <= last_hit + opt->post_context) { | |
175 | /* If the last hit is within the post context, | |
176 | * we need to show this line. | |
177 | */ | |
178 | if (last_shown && lno != last_shown + 1) | |
179 | printf(hunk_mark); | |
180 | show_line(opt, bol, eol, name, lno, '-'); | |
181 | last_shown = lno; | |
182 | } | |
183 | if (opt->pre_context) { | |
184 | memmove(prev+1, prev, | |
185 | (opt->pre_context-1) * sizeof(*prev)); | |
186 | prev->bol = bol; | |
187 | prev->eol = eol; | |
188 | } | |
189 | *eol = ch; | |
190 | bol = eol + 1; | |
191 | left--; | |
192 | lno++; | |
193 | } | |
194 | return !!last_hit; | |
195 | } | |
196 | ||
197 | static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *name) | |
198 | { | |
199 | unsigned long size; | |
200 | char *data; | |
201 | char type[20]; | |
202 | int hit; | |
203 | data = read_sha1_file(sha1, type, &size); | |
204 | if (!data) { | |
205 | error("'%s': unable to read %s", name, sha1_to_hex(sha1)); | |
206 | return 0; | |
207 | } | |
208 | hit = grep_buffer(opt, name, data, size); | |
209 | free(data); | |
210 | return hit; | |
211 | } | |
212 | ||
213 | static int grep_file(struct grep_opt *opt, const char *filename) | |
214 | { | |
215 | struct stat st; | |
216 | int i; | |
217 | char *data; | |
218 | if (lstat(filename, &st) < 0) { | |
219 | err_ret: | |
220 | if (errno != ENOENT) | |
221 | error("'%s': %s", filename, strerror(errno)); | |
222 | return 0; | |
223 | } | |
224 | if (!st.st_size) | |
225 | return 0; /* empty file -- no grep hit */ | |
226 | if (!S_ISREG(st.st_mode)) | |
227 | return 0; | |
228 | i = open(filename, O_RDONLY); | |
229 | if (i < 0) | |
230 | goto err_ret; | |
231 | data = xmalloc(st.st_size + 1); | |
232 | if (st.st_size != xread(i, data, st.st_size)) { | |
233 | error("'%s': short read %s", filename, strerror(errno)); | |
234 | close(i); | |
235 | free(data); | |
236 | return 0; | |
237 | } | |
238 | close(i); | |
239 | i = grep_buffer(opt, filename, data, st.st_size); | |
240 | free(data); | |
241 | return i; | |
242 | } | |
243 | ||
244 | static int grep_cache(struct grep_opt *opt, struct rev_info *revs, int cached) | |
245 | { | |
246 | int hit = 0; | |
247 | int nr; | |
248 | read_cache(); | |
249 | ||
250 | for (nr = 0; nr < active_nr; nr++) { | |
251 | struct cache_entry *ce = active_cache[nr]; | |
252 | if (ce_stage(ce) || !S_ISREG(ntohl(ce->ce_mode))) | |
253 | continue; | |
254 | if (!pathspec_matches(&revs->diffopt, ce->name)) | |
255 | continue; | |
256 | if (cached) | |
257 | hit |= grep_sha1(opt, ce->sha1, ce->name); | |
258 | else | |
259 | hit |= grep_file(opt, ce->name); | |
260 | } | |
261 | return hit; | |
262 | } | |
263 | ||
264 | static int grep_tree(struct grep_opt *opt, struct rev_info *revs, | |
265 | struct tree_desc *tree, | |
266 | const char *tree_name, const char *base) | |
267 | { | |
268 | unsigned mode; | |
269 | int len; | |
270 | int hit = 0; | |
271 | const char *path; | |
272 | const unsigned char *sha1; | |
e0eb889f | 273 | char *down; |
5010cb5f JH |
274 | char *path_buf = xmalloc(PATH_MAX + strlen(tree_name) + 100); |
275 | ||
276 | if (tree_name[0]) { | |
277 | int offset = sprintf(path_buf, "%s:", tree_name); | |
e0eb889f JH |
278 | down = path_buf + offset; |
279 | strcat(down, base); | |
5010cb5f JH |
280 | } |
281 | else { | |
e0eb889f JH |
282 | down = path_buf; |
283 | strcpy(down, base); | |
5010cb5f JH |
284 | } |
285 | len = strlen(path_buf); | |
286 | ||
287 | while (tree->size) { | |
288 | int pathlen; | |
289 | sha1 = tree_entry_extract(tree, &path, &mode); | |
290 | pathlen = strlen(path); | |
291 | strcpy(path_buf + len, path); | |
292 | ||
e0eb889f JH |
293 | if (S_ISDIR(mode)) |
294 | /* Match "abc/" against pathspec to | |
295 | * decide if we want to descend into "abc" | |
296 | * directory. | |
297 | */ | |
298 | strcpy(path_buf + len + pathlen, "/"); | |
299 | ||
300 | if (!pathspec_matches(&revs->diffopt, down)) | |
5010cb5f JH |
301 | ; |
302 | else if (S_ISREG(mode)) | |
303 | hit |= grep_sha1(opt, sha1, path_buf); | |
304 | else if (S_ISDIR(mode)) { | |
305 | char type[20]; | |
306 | struct tree_desc sub; | |
307 | void *data; | |
308 | data = read_sha1_file(sha1, type, &sub.size); | |
309 | if (!data) | |
310 | die("unable to read tree (%s)", | |
311 | sha1_to_hex(sha1)); | |
5010cb5f | 312 | sub.buf = data; |
e0eb889f | 313 | hit |= grep_tree(opt, revs, &sub, tree_name, down); |
5010cb5f JH |
314 | free(data); |
315 | } | |
316 | update_tree_entry(tree); | |
317 | } | |
318 | return hit; | |
319 | } | |
320 | ||
321 | static int grep_object(struct grep_opt *opt, struct rev_info *revs, | |
322 | struct object *obj, const char *name) | |
323 | { | |
324 | if (!strcmp(obj->type, blob_type)) | |
325 | return grep_sha1(opt, obj->sha1, name); | |
326 | if (!strcmp(obj->type, commit_type) || | |
327 | !strcmp(obj->type, tree_type)) { | |
328 | struct tree_desc tree; | |
329 | void *data; | |
330 | int hit; | |
331 | data = read_object_with_reference(obj->sha1, tree_type, | |
332 | &tree.size, NULL); | |
333 | if (!data) | |
334 | die("unable to read tree (%s)", sha1_to_hex(obj->sha1)); | |
335 | tree.buf = data; | |
336 | hit = grep_tree(opt, revs, &tree, name, ""); | |
337 | free(data); | |
338 | return hit; | |
339 | } | |
340 | die("unable to grep from object of type %s", obj->type); | |
341 | } | |
342 | ||
343 | static const char builtin_grep_usage[] = | |
344 | "git-grep <option>* <rev>* [-e] <pattern> [<path>...]"; | |
345 | ||
346 | int cmd_grep(int argc, const char **argv, char **envp) | |
347 | { | |
348 | struct rev_info rev; | |
349 | const char **dst, **src; | |
350 | int err; | |
351 | int hit = 0; | |
352 | int no_more_arg = 0; | |
353 | int seen_range = 0; | |
354 | int seen_noncommit = 0; | |
355 | int cached = 0; | |
356 | struct grep_opt opt; | |
357 | struct object_list *list; | |
358 | ||
359 | memset(&opt, 0, sizeof(opt)); | |
360 | opt.regflags = REG_NEWLINE; | |
361 | ||
362 | /* | |
363 | * Interpret and remove the grep options upfront. Sigh... | |
364 | */ | |
365 | for (dst = src = &argv[1]; src < argc + argv; ) { | |
366 | const char *arg = *src++; | |
367 | if (!no_more_arg) { | |
368 | if (!strcmp("--", arg)) { | |
369 | no_more_arg = 1; | |
370 | *dst++ = arg; | |
371 | continue; | |
372 | } | |
373 | if (!strcmp("--cached", arg)) { | |
374 | cached = 1; | |
375 | continue; | |
376 | } | |
377 | if (!strcmp("-i", arg) || | |
378 | !strcmp("--ignore-case", arg)) { | |
379 | opt.regflags |= REG_ICASE; | |
380 | continue; | |
381 | } | |
382 | if (!strcmp("-v", arg) || | |
383 | !strcmp("--invert-match", arg)) { | |
384 | opt.invert = 1; | |
385 | continue; | |
386 | } | |
387 | if (!strcmp("-E", arg) || | |
388 | !strcmp("--extended-regexp", arg)) { | |
389 | opt.regflags |= REG_EXTENDED; | |
390 | continue; | |
391 | } | |
392 | if (!strcmp("-G", arg) || | |
393 | !strcmp("--basic-regexp", arg)) { | |
394 | opt.regflags &= ~REG_EXTENDED; | |
395 | continue; | |
396 | } | |
397 | if (!strcmp("-e", arg)) { | |
398 | if (src < argc + argv) { | |
399 | opt.pattern = *src++; | |
400 | continue; | |
401 | } | |
402 | usage(builtin_grep_usage); | |
403 | } | |
404 | if (!strcmp("-n", arg)) { | |
405 | opt.linenum = 1; | |
406 | continue; | |
407 | } | |
408 | if (!strcmp("-H", arg)) { | |
409 | /* We always show the pathname, so this | |
410 | * is a noop. | |
411 | */ | |
412 | continue; | |
413 | } | |
df0e7aa8 JH |
414 | if (!strcmp("-l", arg) || |
415 | !strcmp("--files-with-matches", arg)) { | |
416 | opt.name_only = 1; | |
417 | continue; | |
418 | } | |
5010cb5f JH |
419 | if (!strcmp("-A", arg) || |
420 | !strcmp("-B", arg) || | |
421 | !strcmp("-C", arg)) { | |
422 | unsigned num; | |
423 | if ((argc + argv <= src) || | |
424 | sscanf(*src++, "%u", &num) != 1) | |
425 | usage(builtin_grep_usage); | |
426 | switch (arg[1]) { | |
427 | case 'A': | |
428 | opt.post_context = num; | |
429 | break; | |
430 | case 'C': | |
431 | opt.post_context = num; | |
432 | case 'B': | |
433 | opt.pre_context = num; | |
434 | break; | |
435 | } | |
436 | continue; | |
437 | } | |
438 | } | |
439 | *dst++ = arg; | |
440 | } | |
441 | if (!opt.pattern) | |
442 | die("no pattern given."); | |
443 | ||
444 | err = regcomp(&opt.regexp, opt.pattern, opt.regflags); | |
445 | if (err) { | |
446 | char errbuf[1024]; | |
447 | regerror(err, &opt.regexp, errbuf, 1024); | |
448 | regfree(&opt.regexp); | |
449 | die("'%s': %s", opt.pattern, errbuf); | |
450 | } | |
451 | ||
452 | init_revisions(&rev); | |
453 | *dst = NULL; | |
454 | argc = setup_revisions(dst - argv, argv, &rev, NULL); | |
455 | ||
456 | /* | |
457 | * Do not walk "grep -e foo master next pu -- Documentation/" | |
458 | * but do walk "grep -e foo master..next -- Documentation/". | |
459 | * Ranged request mixed with a blob or tree object, like | |
460 | * "grep -e foo v1.0.0:Documentation/ master..next" | |
461 | * so detect that and complain. | |
462 | */ | |
463 | for (list = rev.pending_objects; list; list = list->next) { | |
464 | struct object *real_obj; | |
465 | if (list->item->flags & UNINTERESTING) | |
466 | seen_range = 1; | |
467 | real_obj = deref_tag(list->item, NULL, 0); | |
468 | if (strcmp(real_obj->type, commit_type)) | |
469 | seen_noncommit = 1; | |
470 | } | |
471 | if (!rev.pending_objects) | |
472 | return !grep_cache(&opt, &rev, cached); | |
473 | if (cached) | |
474 | die("both --cached and revisions given."); | |
475 | ||
476 | if (seen_range && seen_noncommit) | |
477 | die("both A..B and non commit are given."); | |
478 | if (seen_range) { | |
479 | struct commit *commit; | |
480 | prepare_revision_walk(&rev); | |
481 | while ((commit = get_revision(&rev)) != NULL) { | |
482 | unsigned char *sha1 = commit->object.sha1; | |
483 | const char *n = find_unique_abbrev(sha1, rev.abbrev); | |
484 | char rev_name[41]; | |
485 | strcpy(rev_name, n); | |
486 | if (grep_object(&opt, &rev, &commit->object, rev_name)) | |
487 | hit = 1; | |
488 | commit->buffer = NULL; | |
489 | } | |
490 | return !hit; | |
491 | } | |
492 | ||
493 | /* all of them are non-commit; do not walk, and | |
494 | * do not lose their names. | |
495 | */ | |
496 | for (list = rev.pending_objects; list; list = list->next) { | |
497 | struct object *real_obj; | |
498 | real_obj = deref_tag(list->item, NULL, 0); | |
499 | if (grep_object(&opt, &rev, real_obj, list->name)) | |
500 | hit = 1; | |
501 | } | |
502 | return !hit; | |
503 | } |