midx: write object id fanout chunk
authorDerrick Stolee <stolee@gmail.com>
Thu, 12 Jul 2018 19:39:31 +0000 (15:39 -0400)
committerJunio C Hamano <gitster@pobox.com>
Fri, 20 Jul 2018 18:27:28 +0000 (11:27 -0700)
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/technical/pack-format.txt
midx.c
midx.h
t/helper/test-read-midx.c
t/t5319-multi-pack-index.sh

index 78ee048..3215f7b 100644 (file)
@@ -302,6 +302,11 @@ CHUNK DATA:
            name. This is the only chunk not guaranteed to be a multiple of four
            bytes in length, so should be the last chunk for alignment reasons.
 
+       OID Fanout (ID: {'O', 'I', 'D', 'F'})
+           The ith entry, F[i], stores the number of OIDs with first
+           byte at most i. Thus F[255] stores the total
+           number of objects.
+
        OID Lookup (ID: {'O', 'I', 'D', 'L'})
            The OIDs for all objects in the MIDX are stored in lexicographic
            order in this chunk.
diff --git a/midx.c b/midx.c
index 3f113e1..7a954eb 100644 (file)
--- a/midx.c
+++ b/midx.c
 #define MIDX_HASH_LEN 20
 #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN)
 
-#define MIDX_MAX_CHUNKS 2
+#define MIDX_MAX_CHUNKS 3
 #define MIDX_CHUNK_ALIGNMENT 4
 #define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */
+#define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
 #define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
 #define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t))
+#define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256)
 
 static char *get_midx_filename(const char *object_dir)
 {
@@ -102,6 +104,10 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir)
                                m->chunk_pack_names = m->data + chunk_offset;
                                break;
 
+                       case MIDX_CHUNKID_OIDFANOUT:
+                               m->chunk_oid_fanout = (uint32_t *)(m->data + chunk_offset);
+                               break;
+
                        case MIDX_CHUNKID_OIDLOOKUP:
                                m->chunk_oid_lookup = m->data + chunk_offset;
                                break;
@@ -121,9 +127,13 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir)
 
        if (!m->chunk_pack_names)
                die(_("multi-pack-index missing required pack-name chunk"));
+       if (!m->chunk_oid_fanout)
+               die(_("multi-pack-index missing required OID fanout chunk"));
        if (!m->chunk_oid_lookup)
                die(_("multi-pack-index missing required OID lookup chunk"));
 
+       m->num_objects = ntohl(m->chunk_oid_fanout[255]);
+
        m->pack_names = xcalloc(m->num_packs, sizeof(*m->pack_names));
 
        cur_pack_name = (const char *)m->chunk_pack_names;
@@ -389,6 +399,35 @@ static size_t write_midx_pack_names(struct hashfile *f,
        return written;
 }
 
+static size_t write_midx_oid_fanout(struct hashfile *f,
+                                   struct pack_midx_entry *objects,
+                                   uint32_t nr_objects)
+{
+       struct pack_midx_entry *list = objects;
+       struct pack_midx_entry *last = objects + nr_objects;
+       uint32_t count = 0;
+       uint32_t i;
+
+       /*
+       * Write the first-level table (the list is sorted,
+       * but we use a 256-entry lookup to be able to avoid
+       * having to do eight extra binary search iterations).
+       */
+       for (i = 0; i < 256; i++) {
+               struct pack_midx_entry *next = list;
+
+               while (next < last && next->oid.hash[0] == i) {
+                       count++;
+                       next++;
+               }
+
+               hashwrite_be32(f, count);
+               list = next;
+       }
+
+       return MIDX_CHUNK_FANOUT_SIZE;
+}
+
 static size_t write_midx_oid_lookup(struct hashfile *f, unsigned char hash_len,
                                    struct pack_midx_entry *objects,
                                    uint32_t nr_objects)
@@ -461,7 +500,7 @@ int write_midx_file(const char *object_dir)
        FREE_AND_NULL(midx_name);
 
        cur_chunk = 0;
-       num_chunks = 2;
+       num_chunks = 3;
 
        written = write_midx_header(f, num_chunks, packs.nr);
 
@@ -469,9 +508,13 @@ int write_midx_file(const char *object_dir)
        chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH;
 
        cur_chunk++;
-       chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP;
+       chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDFANOUT;
        chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + packs.pack_name_concat_len;
 
+       cur_chunk++;
+       chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP;
+       chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + MIDX_CHUNK_FANOUT_SIZE;
+
        cur_chunk++;
        chunk_ids[cur_chunk] = 0;
        chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_entries * MIDX_HASH_LEN;
@@ -505,6 +548,10 @@ int write_midx_file(const char *object_dir)
                                written += write_midx_pack_names(f, packs.names, packs.nr);
                                break;
 
+                       case MIDX_CHUNKID_OIDFANOUT:
+                               written += write_midx_oid_fanout(f, entries, nr_entries);
+                               break;
+
                        case MIDX_CHUNKID_OIDLOOKUP:
                                written += write_midx_oid_lookup(f, MIDX_HASH_LEN, entries, nr_entries);
                                break;
diff --git a/midx.h b/midx.h
index 4d3bcea..8572cf0 100644 (file)
--- a/midx.h
+++ b/midx.h
@@ -15,6 +15,7 @@ struct multi_pack_index {
        uint32_t num_objects;
 
        const unsigned char *chunk_pack_names;
+       const uint32_t *chunk_oid_fanout;
        const unsigned char *chunk_oid_lookup;
 
        const char **pack_names;
index de6d452..f7c17b0 100644 (file)
@@ -22,10 +22,12 @@ static int read_midx_file(const char *object_dir)
 
        if (m->chunk_pack_names)
                printf(" pack-names");
+       if (m->chunk_oid_fanout)
+               printf(" oid-fanout");
        if (m->chunk_oid_lookup)
                printf(" oid-lookup");
 
-       printf("\n");
+       printf("\nnum_objects: %d\n", m->num_objects);
 
        printf("packs:\n");
        for (i = 0; i < m->num_packs; i++)
index 4813610..95e731a 100755 (executable)
@@ -5,10 +5,12 @@ test_description='multi-pack-indexes'
 
 midx_read_expect () {
        NUM_PACKS=$1
+       NUM_OBJECTS=$2
        {
                cat <<-EOF &&
-               header: 4d494458 1 2 $NUM_PACKS
-               chunks: pack-names oid-lookup
+               header: 4d494458 1 3 $NUM_PACKS
+               chunks: pack-names oid-fanout oid-lookup
+               num_objects: $NUM_OBJECTS
                packs:
                EOF
                if test $NUM_PACKS -ge 1
@@ -24,7 +26,7 @@ midx_read_expect () {
 test_expect_success 'write midx with no packs' '
        test_when_finished rm -f pack/multi-pack-index &&
        git multi-pack-index --object-dir=. write &&
-       midx_read_expect 0
+       midx_read_expect 0 0
 '
 
 generate_objects () {
@@ -74,13 +76,13 @@ test_expect_success 'write midx with one v1 pack' '
        pack=$(git pack-objects --index-version=1 pack/test <obj-list) &&
        test_when_finished rm pack/test-$pack.pack pack/test-$pack.idx pack/multi-pack-index &&
        git multi-pack-index --object-dir=. write &&
-       midx_read_expect 1
+       midx_read_expect 1 18
 '
 
 test_expect_success 'write midx with one v2 pack' '
        git pack-objects --index-version=2,0x40 pack/test <obj-list &&
        git multi-pack-index --object-dir=. write &&
-       midx_read_expect 1
+       midx_read_expect 1 18
 '
 
 test_expect_success 'add more objects' '
@@ -94,7 +96,7 @@ test_expect_success 'add more objects' '
 test_expect_success 'write midx with two packs' '
        git pack-objects --index-version=1 pack/test-2 <obj-list &&
        git multi-pack-index --object-dir=. write &&
-       midx_read_expect 2
+       midx_read_expect 2 34
 '
 
 test_expect_success 'add more packs' '
@@ -108,7 +110,7 @@ test_expect_success 'add more packs' '
 
 test_expect_success 'write midx with twelve packs' '
        git multi-pack-index --object-dir=. write &&
-       midx_read_expect 12
+       midx_read_expect 12 74
 '
 
 test_done