list-objects-filter: teach tree:# how to handle >0
[git/git.git] / list-objects-filter.c
CommitLineData
25ec7bca
JH
1#include "cache.h"
2#include "dir.h"
3#include "tag.h"
4#include "commit.h"
5#include "tree.h"
6#include "blob.h"
7#include "diff.h"
8#include "tree-walk.h"
9#include "revision.h"
10#include "list-objects.h"
11#include "list-objects-filter.h"
12#include "list-objects-filter-options.h"
c813a7c3 13#include "oidmap.h"
25ec7bca 14#include "oidset.h"
cbd53a21 15#include "object-store.h"
25ec7bca
JH
16
17/* Remember to update object flag allocation in object.h */
18/*
19 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
20 * that have been shown, but should be revisited if they appear
21 * in the traversal (until we mark it SEEN). This is a way to
22 * let us silently de-dup calls to show() in the caller. This
23 * is subtly different from the "revision.h:SHOWN" and the
e5e5e088 24 * "sha1-name.c:ONELINE_SEEN" bits. And also different from
25ec7bca
JH
25 * the non-de-dup usage in pack-bitmap.c
26 */
27#define FILTER_SHOWN_BUT_REVISIT (1<<21)
28
29/*
30 * A filter for list-objects to omit ALL blobs from the traversal.
31 * And to OPTIONALLY collect a list of the omitted OIDs.
32 */
33struct filter_blobs_none_data {
34 struct oidset *omits;
35};
36
37static enum list_objects_filter_result filter_blobs_none(
01d40c84 38 struct repository *r,
25ec7bca
JH
39 enum list_objects_filter_situation filter_situation,
40 struct object *obj,
41 const char *pathname,
42 const char *filename,
43 void *filter_data_)
44{
45 struct filter_blobs_none_data *filter_data = filter_data_;
46
47 switch (filter_situation) {
48 default:
696aa739 49 BUG("unknown filter_situation: %d", filter_situation);
25ec7bca
JH
50
51 case LOFS_BEGIN_TREE:
52 assert(obj->type == OBJ_TREE);
53 /* always include all tree objects */
54 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
55
56 case LOFS_END_TREE:
57 assert(obj->type == OBJ_TREE);
58 return LOFR_ZERO;
59
60 case LOFS_BLOB:
61 assert(obj->type == OBJ_BLOB);
62 assert((obj->flags & SEEN) == 0);
63
64 if (filter_data->omits)
65 oidset_insert(filter_data->omits, &obj->oid);
66 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
67 }
68}
69
70static void *filter_blobs_none__init(
71 struct oidset *omitted,
72 struct list_objects_filter_options *filter_options,
73 filter_object_fn *filter_fn,
74 filter_free_fn *filter_free_fn)
75{
76 struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
77 d->omits = omitted;
78
79 *filter_fn = filter_blobs_none;
80 *filter_free_fn = free;
81 return d;
82}
83
bc5975d2
MD
84/*
85 * A filter for list-objects to omit ALL trees and blobs from the traversal.
86 * Can OPTIONALLY collect a list of the omitted OIDs.
87 */
c813a7c3 88struct filter_trees_depth_data {
bc5975d2 89 struct oidset *omits;
c813a7c3
MD
90
91 /*
92 * Maps trees to the minimum depth at which they were seen. It is not
93 * necessary to re-traverse a tree at deeper or equal depths than it has
94 * already been traversed.
95 *
96 * We can't use LOFR_MARK_SEEN for tree objects since this will prevent
97 * it from being traversed at shallower depths.
98 */
99 struct oidmap seen_at_depth;
100
101 unsigned long exclude_depth;
102 unsigned long current_depth;
bc5975d2
MD
103};
104
c813a7c3
MD
105struct seen_map_entry {
106 struct oidmap_entry base;
107 size_t depth;
108};
109
110static void filter_trees_update_omits(
111 struct object *obj,
112 struct filter_trees_depth_data *filter_data,
113 int include_it)
114{
115 if (!filter_data->omits)
116 return;
117
118 if (include_it)
119 oidset_remove(filter_data->omits, &obj->oid);
120 else
121 oidset_insert(filter_data->omits, &obj->oid);
122}
123
124static enum list_objects_filter_result filter_trees_depth(
01d40c84 125 struct repository *r,
bc5975d2
MD
126 enum list_objects_filter_situation filter_situation,
127 struct object *obj,
128 const char *pathname,
129 const char *filename,
130 void *filter_data_)
131{
c813a7c3
MD
132 struct filter_trees_depth_data *filter_data = filter_data_;
133 struct seen_map_entry *seen_info;
134 int include_it = filter_data->current_depth <
135 filter_data->exclude_depth;
136 int filter_res;
137 int already_seen;
138
139 /*
140 * Note that we do not use _MARK_SEEN in order to allow re-traversal in
141 * case we encounter a tree or blob again at a shallower depth.
142 */
bc5975d2
MD
143
144 switch (filter_situation) {
145 default:
146 BUG("unknown filter_situation: %d", filter_situation);
147
c813a7c3
MD
148 case LOFS_END_TREE:
149 assert(obj->type == OBJ_TREE);
150 filter_data->current_depth--;
151 return LOFR_ZERO;
152
bc5975d2 153 case LOFS_BLOB:
c813a7c3
MD
154 filter_trees_update_omits(obj, filter_data, include_it);
155 return include_it ? LOFR_MARK_SEEN | LOFR_DO_SHOW : LOFR_ZERO;
156
157 case LOFS_BEGIN_TREE:
158 seen_info = oidmap_get(
159 &filter_data->seen_at_depth, &obj->oid);
160 if (!seen_info) {
161 seen_info = xcalloc(1, sizeof(*seen_info));
162 oidcpy(&seen_info->base.oid, &obj->oid);
163 seen_info->depth = filter_data->current_depth;
164 oidmap_put(&filter_data->seen_at_depth, seen_info);
165 already_seen = 0;
8b10a206 166 } else {
c813a7c3
MD
167 already_seen =
168 filter_data->current_depth >= seen_info->depth;
8b10a206 169 }
bc5975d2 170
c813a7c3
MD
171 if (already_seen) {
172 filter_res = LOFR_SKIP_TREE;
173 } else {
174 seen_info->depth = filter_data->current_depth;
175 filter_trees_update_omits(obj, filter_data, include_it);
176
177 if (include_it)
178 filter_res = LOFR_DO_SHOW;
179 else if (filter_data->omits)
180 filter_res = LOFR_ZERO;
181 else
182 filter_res = LOFR_SKIP_TREE;
183 }
bc5975d2 184
c813a7c3
MD
185 filter_data->current_depth++;
186 return filter_res;
bc5975d2
MD
187 }
188}
189
c813a7c3
MD
190static void filter_trees_free(void *filter_data) {
191 struct filter_trees_depth_data *d = filter_data;
192 if (!d)
193 return;
194 oidmap_free(&d->seen_at_depth, 1);
195 free(d);
196}
197
198static void *filter_trees_depth__init(
bc5975d2
MD
199 struct oidset *omitted,
200 struct list_objects_filter_options *filter_options,
201 filter_object_fn *filter_fn,
202 filter_free_fn *filter_free_fn)
203{
c813a7c3 204 struct filter_trees_depth_data *d = xcalloc(1, sizeof(*d));
bc5975d2 205 d->omits = omitted;
c813a7c3
MD
206 oidmap_init(&d->seen_at_depth, 0);
207 d->exclude_depth = filter_options->tree_exclude_depth;
208 d->current_depth = 0;
bc5975d2 209
c813a7c3
MD
210 *filter_fn = filter_trees_depth;
211 *filter_free_fn = filter_trees_free;
bc5975d2
MD
212 return d;
213}
214
25ec7bca
JH
215/*
216 * A filter for list-objects to omit large blobs.
217 * And to OPTIONALLY collect a list of the omitted OIDs.
218 */
219struct filter_blobs_limit_data {
220 struct oidset *omits;
221 unsigned long max_bytes;
222};
223
224static enum list_objects_filter_result filter_blobs_limit(
01d40c84 225 struct repository *r,
25ec7bca
JH
226 enum list_objects_filter_situation filter_situation,
227 struct object *obj,
228 const char *pathname,
229 const char *filename,
230 void *filter_data_)
231{
232 struct filter_blobs_limit_data *filter_data = filter_data_;
233 unsigned long object_length;
234 enum object_type t;
235
236 switch (filter_situation) {
237 default:
696aa739 238 BUG("unknown filter_situation: %d", filter_situation);
25ec7bca
JH
239
240 case LOFS_BEGIN_TREE:
241 assert(obj->type == OBJ_TREE);
242 /* always include all tree objects */
243 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
244
245 case LOFS_END_TREE:
246 assert(obj->type == OBJ_TREE);
247 return LOFR_ZERO;
248
249 case LOFS_BLOB:
250 assert(obj->type == OBJ_BLOB);
251 assert((obj->flags & SEEN) == 0);
252
01d40c84 253 t = oid_object_info(r, &obj->oid, &object_length);
25ec7bca
JH
254 if (t != OBJ_BLOB) { /* probably OBJ_NONE */
255 /*
256 * We DO NOT have the blob locally, so we cannot
257 * apply the size filter criteria. Be conservative
258 * and force show it (and let the caller deal with
259 * the ambiguity).
260 */
261 goto include_it;
262 }
263
264 if (object_length < filter_data->max_bytes)
265 goto include_it;
266
267 if (filter_data->omits)
268 oidset_insert(filter_data->omits, &obj->oid);
269 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
270 }
271
272include_it:
273 if (filter_data->omits)
274 oidset_remove(filter_data->omits, &obj->oid);
275 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
276}
277
278static void *filter_blobs_limit__init(
279 struct oidset *omitted,
280 struct list_objects_filter_options *filter_options,
281 filter_object_fn *filter_fn,
282 filter_free_fn *filter_free_fn)
283{
284 struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
285 d->omits = omitted;
286 d->max_bytes = filter_options->blob_limit_value;
287
288 *filter_fn = filter_blobs_limit;
289 *filter_free_fn = free;
290 return d;
291}
292
293/*
294 * A filter driven by a sparse-checkout specification to only
295 * include blobs that a sparse checkout would populate.
296 *
297 * The sparse-checkout spec can be loaded from a blob with the
298 * given OID or from a local pathname. We allow an OID because
299 * the repo may be bare or we may be doing the filtering on the
300 * server.
301 */
302struct frame {
303 /*
304 * defval is the usual default include/exclude value that
305 * should be inherited as we recurse into directories based
306 * upon pattern matching of the directory itself or of a
307 * containing directory.
308 */
309 int defval;
310
311 /*
312 * 1 if the directory (recursively) contains any provisionally
313 * omitted objects.
314 *
315 * 0 if everything (recursively) contained in this directory
316 * has been explicitly included (SHOWN) in the result and
317 * the directory may be short-cut later in the traversal.
318 */
319 unsigned child_prov_omit : 1;
320};
321
322struct filter_sparse_data {
323 struct oidset *omits;
324 struct exclude_list el;
325
326 size_t nr, alloc;
327 struct frame *array_frame;
328};
329
330static enum list_objects_filter_result filter_sparse(
01d40c84 331 struct repository *r,
25ec7bca
JH
332 enum list_objects_filter_situation filter_situation,
333 struct object *obj,
334 const char *pathname,
335 const char *filename,
336 void *filter_data_)
337{
338 struct filter_sparse_data *filter_data = filter_data_;
339 int val, dtype;
340 struct frame *frame;
341
342 switch (filter_situation) {
343 default:
696aa739 344 BUG("unknown filter_situation: %d", filter_situation);
25ec7bca
JH
345
346 case LOFS_BEGIN_TREE:
347 assert(obj->type == OBJ_TREE);
348 dtype = DT_DIR;
349 val = is_excluded_from_list(pathname, strlen(pathname),
350 filename, &dtype, &filter_data->el,
01d40c84 351 r->index);
25ec7bca
JH
352 if (val < 0)
353 val = filter_data->array_frame[filter_data->nr].defval;
354
355 ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1,
356 filter_data->alloc);
357 filter_data->nr++;
358 filter_data->array_frame[filter_data->nr].defval = val;
359 filter_data->array_frame[filter_data->nr].child_prov_omit = 0;
360
361 /*
362 * A directory with this tree OID may appear in multiple
363 * places in the tree. (Think of a directory move or copy,
364 * with no other changes, so the OID is the same, but the
365 * full pathnames of objects within this directory are new
366 * and may match is_excluded() patterns differently.)
367 * So we cannot mark this directory as SEEN (yet), since
368 * that will prevent process_tree() from revisiting this
369 * tree object with other pathname prefixes.
370 *
371 * Only _DO_SHOW the tree object the first time we visit
372 * this tree object.
373 *
374 * We always show all tree objects. A future optimization
375 * may want to attempt to narrow this.
376 */
377 if (obj->flags & FILTER_SHOWN_BUT_REVISIT)
378 return LOFR_ZERO;
379 obj->flags |= FILTER_SHOWN_BUT_REVISIT;
380 return LOFR_DO_SHOW;
381
382 case LOFS_END_TREE:
383 assert(obj->type == OBJ_TREE);
384 assert(filter_data->nr > 0);
385
386 frame = &filter_data->array_frame[filter_data->nr];
387 filter_data->nr--;
388
389 /*
390 * Tell our parent directory if any of our children were
391 * provisionally omitted.
392 */
393 filter_data->array_frame[filter_data->nr].child_prov_omit |=
394 frame->child_prov_omit;
395
396 /*
397 * If there are NO provisionally omitted child objects (ALL child
398 * objects in this folder were INCLUDED), then we can mark the
399 * folder as SEEN (so we will not have to revisit it again).
400 */
401 if (!frame->child_prov_omit)
402 return LOFR_MARK_SEEN;
403 return LOFR_ZERO;
404
405 case LOFS_BLOB:
406 assert(obj->type == OBJ_BLOB);
407 assert((obj->flags & SEEN) == 0);
408
409 frame = &filter_data->array_frame[filter_data->nr];
410
411 dtype = DT_REG;
412 val = is_excluded_from_list(pathname, strlen(pathname),
413 filename, &dtype, &filter_data->el,
01d40c84 414 r->index);
25ec7bca
JH
415 if (val < 0)
416 val = frame->defval;
417 if (val > 0) {
418 if (filter_data->omits)
419 oidset_remove(filter_data->omits, &obj->oid);
420 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
421 }
422
423 /*
424 * Provisionally omit it. We've already established that
425 * this pathname is not in the sparse-checkout specification
426 * with the CURRENT pathname, so we *WANT* to omit this blob.
427 *
428 * However, a pathname elsewhere in the tree may also
429 * reference this same blob, so we cannot reject it yet.
430 * Leave the LOFR_ bits unset so that if the blob appears
431 * again in the traversal, we will be asked again.
432 */
433 if (filter_data->omits)
434 oidset_insert(filter_data->omits, &obj->oid);
435
436 /*
437 * Remember that at least 1 blob in this tree was
438 * provisionally omitted. This prevents us from short
439 * cutting the tree in future iterations.
440 */
441 frame->child_prov_omit = 1;
442 return LOFR_ZERO;
443 }
444}
445
446
447static void filter_sparse_free(void *filter_data)
448{
449 struct filter_sparse_data *d = filter_data;
450 /* TODO free contents of 'd' */
451 free(d);
452}
453
454static void *filter_sparse_oid__init(
455 struct oidset *omitted,
456 struct list_objects_filter_options *filter_options,
457 filter_object_fn *filter_fn,
458 filter_free_fn *filter_free_fn)
459{
460 struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
461 d->omits = omitted;
462 if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
463 NULL, 0, &d->el) < 0)
464 die("could not load filter specification");
465
466 ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
467 d->array_frame[d->nr].defval = 0; /* default to include */
468 d->array_frame[d->nr].child_prov_omit = 0;
469
470 *filter_fn = filter_sparse;
471 *filter_free_fn = filter_sparse_free;
472 return d;
473}
474
475static void *filter_sparse_path__init(
476 struct oidset *omitted,
477 struct list_objects_filter_options *filter_options,
478 filter_object_fn *filter_fn,
479 filter_free_fn *filter_free_fn)
480{
481 struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
482 d->omits = omitted;
483 if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
484 NULL, 0, &d->el, NULL) < 0)
485 die("could not load filter specification");
486
487 ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
488 d->array_frame[d->nr].defval = 0; /* default to include */
489 d->array_frame[d->nr].child_prov_omit = 0;
490
491 *filter_fn = filter_sparse;
492 *filter_free_fn = filter_sparse_free;
493 return d;
494}
495
496typedef void *(*filter_init_fn)(
497 struct oidset *omitted,
498 struct list_objects_filter_options *filter_options,
499 filter_object_fn *filter_fn,
500 filter_free_fn *filter_free_fn);
501
502/*
503 * Must match "enum list_objects_filter_choice".
504 */
505static filter_init_fn s_filters[] = {
506 NULL,
507 filter_blobs_none__init,
508 filter_blobs_limit__init,
c813a7c3 509 filter_trees_depth__init,
25ec7bca
JH
510 filter_sparse_oid__init,
511 filter_sparse_path__init,
512};
513
514void *list_objects_filter__init(
515 struct oidset *omitted,
516 struct list_objects_filter_options *filter_options,
517 filter_object_fn *filter_fn,
518 filter_free_fn *filter_free_fn)
519{
520 filter_init_fn init_fn;
521
522 assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
523
524 if (filter_options->choice >= LOFC__COUNT)
696aa739 525 BUG("invalid list-objects filter choice: %d",
25ec7bca
JH
526 filter_options->choice);
527
528 init_fn = s_filters[filter_options->choice];
529 if (init_fn)
530 return init_fn(omitted, filter_options,
531 filter_fn, filter_free_fn);
532 *filter_fn = NULL;
533 *filter_free_fn = NULL;
534 return NULL;
535}