Merge branch 'jt/fetch-negotiator-skipping'
authorJunio C Hamano <gitster@pobox.com>
Thu, 2 Aug 2018 22:30:46 +0000 (15:30 -0700)
committerJunio C Hamano <gitster@pobox.com>
Thu, 2 Aug 2018 22:30:46 +0000 (15:30 -0700)
Add a server-side knob to skip commits in exponential/fibbonacci
stride in an attempt to cover wider swath of history with a smaller
number of iterations, potentially accepting a larger packfile
transfer, instead of going back one commit a time during common
ancestor discovery during the "git fetch" transaction.

* jt/fetch-negotiator-skipping:
  negotiator/skipping: skip commits during fetch

Documentation/config.txt
Makefile
fetch-negotiator.c
fetch-negotiator.h
fetch-pack.c
negotiator/skipping.c [new file with mode: 0644]
negotiator/skipping.h [new file with mode: 0644]
t/t5552-skipping-fetch-negotiator.sh [new file with mode: 0755]

index 44df580..63365dc 100644 (file)
@@ -1531,6 +1531,15 @@ fetch.output::
        `full` and `compact`. Default value is `full`. See section
        OUTPUT in linkgit:git-fetch[1] for detail.
 
+fetch.negotiationAlgorithm::
+       Control how information about the commits in the local repository is
+       sent when negotiating the contents of the packfile to be sent by the
+       server. Set to "skipping" to use an algorithm that skips commits in an
+       effort to converge faster, but may result in a larger-than-necessary
+       packfile; any other value instructs Git to use the default algorithm
+       that never skips commits (unless the server has acknowledged it or one
+       of its descendants).
+
 format.attach::
        Enable multipart/mixed attachments as the default for
        'format-patch'.  The value can also be a double quoted string
index ab5f603..74588d3 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -893,6 +893,7 @@ LIB_OBJS += merge-recursive.o
 LIB_OBJS += mergesort.o
 LIB_OBJS += name-hash.o
 LIB_OBJS += negotiator/default.o
+LIB_OBJS += negotiator/skipping.o
 LIB_OBJS += notes.o
 LIB_OBJS += notes-cache.o
 LIB_OBJS += notes-merge.o
index 2675d12..5d28304 100644 (file)
@@ -1,8 +1,14 @@
 #include "git-compat-util.h"
 #include "fetch-negotiator.h"
 #include "negotiator/default.h"
+#include "negotiator/skipping.h"
 
-void fetch_negotiator_init(struct fetch_negotiator *negotiator)
+void fetch_negotiator_init(struct fetch_negotiator *negotiator,
+                          const char *algorithm)
 {
+       if (algorithm && !strcmp(algorithm, "skipping")) {
+               skipping_negotiator_init(negotiator);
+               return;
+       }
        default_negotiator_init(negotiator);
 }
index b1290aa..ddb44a2 100644 (file)
@@ -52,6 +52,7 @@ struct fetch_negotiator {
        void *data;
 };
 
-void fetch_negotiator_init(struct fetch_negotiator *negotiator);
+void fetch_negotiator_init(struct fetch_negotiator *negotiator,
+                          const char *algorithm);
 
 #endif
index 0a9d184..5714bcb 100644 (file)
@@ -35,6 +35,7 @@ static int agent_supported;
 static int server_supports_filtering;
 static struct lock_file shallow_lock;
 static const char *alternate_shallow_file;
+static char *negotiation_algorithm;
 
 /* Remember to update object flag allocation in object.h */
 #define COMPLETE       (1U << 0)
@@ -913,7 +914,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
        const char *agent_feature;
        int agent_len;
        struct fetch_negotiator negotiator;
-       fetch_negotiator_init(&negotiator);
+       fetch_negotiator_init(&negotiator, negotiation_algorithm);
 
        sort_ref_list(&ref, ref_compare_name);
        QSORT(sought, nr_sought, cmp_ref_by_name);
@@ -1324,7 +1325,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
        int in_vain = 0;
        int haves_to_send = INITIAL_FLUSH;
        struct fetch_negotiator negotiator;
-       fetch_negotiator_init(&negotiator);
+       fetch_negotiator_init(&negotiator, negotiation_algorithm);
        packet_reader_init(&reader, fd[0], NULL, 0,
                           PACKET_READ_CHOMP_NEWLINE);
 
@@ -1406,6 +1407,8 @@ static void fetch_pack_config(void)
        git_config_get_bool("repack.usedeltabaseoffset", &prefer_ofs_delta);
        git_config_get_bool("fetch.fsckobjects", &fetch_fsck_objects);
        git_config_get_bool("transfer.fsckobjects", &transfer_fsck_objects);
+       git_config_get_string("fetch.negotiationalgorithm",
+                             &negotiation_algorithm);
 
        git_config(git_default_config, NULL);
 }
diff --git a/negotiator/skipping.c b/negotiator/skipping.c
new file mode 100644 (file)
index 0000000..dffbc76
--- /dev/null
@@ -0,0 +1,250 @@
+#include "cache.h"
+#include "skipping.h"
+#include "../commit.h"
+#include "../fetch-negotiator.h"
+#include "../prio-queue.h"
+#include "../refs.h"
+#include "../tag.h"
+
+/* Remember to update object flag allocation in object.h */
+/*
+ * Both us and the server know that both parties have this object.
+ */
+#define COMMON         (1U << 2)
+/*
+ * The server has told us that it has this object. We still need to tell the
+ * server that we have this object (or one of its descendants), but since we are
+ * going to do that, we do not need to tell the server about its ancestors.
+ */
+#define ADVERTISED     (1U << 3)
+/*
+ * This commit has entered the priority queue.
+ */
+#define SEEN           (1U << 4)
+/*
+ * This commit has left the priority queue.
+ */
+#define POPPED         (1U << 5)
+
+static int marked;
+
+/*
+ * An entry in the priority queue.
+ */
+struct entry {
+       struct commit *commit;
+
+       /*
+        * Used only if commit is not COMMON.
+        */
+       uint16_t original_ttl;
+       uint16_t ttl;
+};
+
+struct data {
+       struct prio_queue rev_list;
+
+       /*
+        * The number of non-COMMON commits in rev_list.
+        */
+       int non_common_revs;
+};
+
+static int compare(const void *a_, const void *b_, void *unused)
+{
+       const struct entry *a = a_;
+       const struct entry *b = b_;
+       return compare_commits_by_commit_date(a->commit, b->commit, NULL);
+}
+
+static struct entry *rev_list_push(struct data *data, struct commit *commit, int mark)
+{
+       struct entry *entry;
+       commit->object.flags |= mark | SEEN;
+
+       entry = xcalloc(1, sizeof(*entry));
+       entry->commit = commit;
+       prio_queue_put(&data->rev_list, entry);
+
+       if (!(mark & COMMON))
+               data->non_common_revs++;
+       return entry;
+}
+
+static int clear_marks(const char *refname, const struct object_id *oid,
+                      int flag, void *cb_data)
+{
+       struct object *o = deref_tag(the_repository, parse_object(the_repository, oid), refname, 0);
+
+       if (o && o->type == OBJ_COMMIT)
+               clear_commit_marks((struct commit *)o,
+                                  COMMON | ADVERTISED | SEEN | POPPED);
+       return 0;
+}
+
+/*
+ * Mark this SEEN commit and all its SEEN ancestors as COMMON.
+ */
+static void mark_common(struct data *data, struct commit *c)
+{
+       struct commit_list *p;
+
+       if (c->object.flags & COMMON)
+               return;
+       c->object.flags |= COMMON;
+       if (!(c->object.flags & POPPED))
+               data->non_common_revs--;
+
+       if (!c->object.parsed)
+               return;
+       for (p = c->parents; p; p = p->next) {
+               if (p->item->object.flags & SEEN)
+                       mark_common(data, p->item);
+       }
+}
+
+/*
+ * Ensure that the priority queue has an entry for to_push, and ensure that the
+ * entry has the correct flags and ttl.
+ *
+ * This function returns 1 if an entry was found or created, and 0 otherwise
+ * (because the entry for this commit had already been popped).
+ */
+static int push_parent(struct data *data, struct entry *entry,
+                      struct commit *to_push)
+{
+       struct entry *parent_entry;
+
+       if (to_push->object.flags & SEEN) {
+               int i;
+               if (to_push->object.flags & POPPED)
+                       /*
+                        * The entry for this commit has already been popped,
+                        * due to clock skew. Pretend that this parent does not
+                        * exist.
+                        */
+                       return 0;
+               /*
+                * Find the existing entry and use it.
+                */
+               for (i = 0; i < data->rev_list.nr; i++) {
+                       parent_entry = data->rev_list.array[i].data;
+                       if (parent_entry->commit == to_push)
+                               goto parent_found;
+               }
+               BUG("missing parent in priority queue");
+parent_found:
+               ;
+       } else {
+               parent_entry = rev_list_push(data, to_push, 0);
+       }
+
+       if (entry->commit->object.flags & (COMMON | ADVERTISED)) {
+               mark_common(data, to_push);
+       } else {
+               uint16_t new_original_ttl = entry->ttl
+                       ? entry->original_ttl : entry->original_ttl * 3 / 2 + 1;
+               uint16_t new_ttl = entry->ttl
+                       ? entry->ttl - 1 : new_original_ttl;
+               if (parent_entry->original_ttl < new_original_ttl) {
+                       parent_entry->original_ttl = new_original_ttl;
+                       parent_entry->ttl = new_ttl;
+               }
+       }
+
+       return 1;
+}
+
+static const struct object_id *get_rev(struct data *data)
+{
+       struct commit *to_send = NULL;
+
+       while (to_send == NULL) {
+               struct entry *entry;
+               struct commit *commit;
+               struct commit_list *p;
+               int parent_pushed = 0;
+
+               if (data->rev_list.nr == 0 || data->non_common_revs == 0)
+                       return NULL;
+
+               entry = prio_queue_get(&data->rev_list);
+               commit = entry->commit;
+               commit->object.flags |= POPPED;
+               if (!(commit->object.flags & COMMON))
+                       data->non_common_revs--;
+
+               if (!(commit->object.flags & COMMON) && !entry->ttl)
+                       to_send = commit;
+
+               parse_commit(commit);
+               for (p = commit->parents; p; p = p->next)
+                       parent_pushed |= push_parent(data, entry, p->item);
+
+               if (!(commit->object.flags & COMMON) && !parent_pushed)
+                       /*
+                        * This commit has no parents, or all of its parents
+                        * have already been popped (due to clock skew), so send
+                        * it anyway.
+                        */
+                       to_send = commit;
+
+               free(entry);
+       }
+
+       return &to_send->object.oid;
+}
+
+static void known_common(struct fetch_negotiator *n, struct commit *c)
+{
+       if (c->object.flags & SEEN)
+               return;
+       rev_list_push(n->data, c, ADVERTISED);
+}
+
+static void add_tip(struct fetch_negotiator *n, struct commit *c)
+{
+       n->known_common = NULL;
+       if (c->object.flags & SEEN)
+               return;
+       rev_list_push(n->data, c, 0);
+}
+
+static const struct object_id *next(struct fetch_negotiator *n)
+{
+       n->known_common = NULL;
+       n->add_tip = NULL;
+       return get_rev(n->data);
+}
+
+static int ack(struct fetch_negotiator *n, struct commit *c)
+{
+       int known_to_be_common = !!(c->object.flags & COMMON);
+       if (!(c->object.flags & SEEN))
+               die("received ack for commit %s not sent as 'have'\n",
+                   oid_to_hex(&c->object.oid));
+       mark_common(n->data, c);
+       return known_to_be_common;
+}
+
+static void release(struct fetch_negotiator *n)
+{
+       clear_prio_queue(&((struct data *)n->data)->rev_list);
+       FREE_AND_NULL(n->data);
+}
+
+void skipping_negotiator_init(struct fetch_negotiator *negotiator)
+{
+       struct data *data;
+       negotiator->known_common = known_common;
+       negotiator->add_tip = add_tip;
+       negotiator->next = next;
+       negotiator->ack = ack;
+       negotiator->release = release;
+       negotiator->data = data = xcalloc(1, sizeof(*data));
+       data->rev_list.compare = compare;
+
+       if (marked)
+               for_each_ref(clear_marks, NULL);
+       marked = 1;
+}
diff --git a/negotiator/skipping.h b/negotiator/skipping.h
new file mode 100644 (file)
index 0000000..d7dfa6c
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef NEGOTIATOR_SKIPPING_H
+#define NEGOTIATOR_SKIPPING_H
+
+struct fetch_negotiator;
+
+void skipping_negotiator_init(struct fetch_negotiator *negotiator);
+
+#endif
diff --git a/t/t5552-skipping-fetch-negotiator.sh b/t/t5552-skipping-fetch-negotiator.sh
new file mode 100755 (executable)
index 0000000..0a8e0e4
--- /dev/null
@@ -0,0 +1,179 @@
+#!/bin/sh
+
+test_description='test skipping fetch negotiator'
+. ./test-lib.sh
+
+have_sent () {
+       while test "$#" -ne 0
+       do
+               grep "fetch> have $(git -C client rev-parse $1)" trace
+               if test $? -ne 0
+               then
+                       echo "No have $(git -C client rev-parse $1) ($1)"
+                       return 1
+               fi
+               shift
+       done
+}
+
+have_not_sent () {
+       while test "$#" -ne 0
+       do
+               grep "fetch> have $(git -C client rev-parse $1)" trace
+               if test $? -eq 0
+               then
+                       return 1
+               fi
+               shift
+       done
+}
+
+test_expect_success 'commits with no parents are sent regardless of skip distance' '
+       git init server &&
+       test_commit -C server to_fetch &&
+
+       git init client &&
+       for i in $(seq 7)
+       do
+               test_commit -C client c$i
+       done &&
+
+       # We send: "c7" (skip 1) "c5" (skip 2) "c2" (skip 4). After that, since
+       # "c1" has no parent, it is still sent as "have" even though it would
+       # normally be skipped.
+       test_config -C client fetch.negotiationalgorithm skipping &&
+       GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" &&
+       have_sent c7 c5 c2 c1 &&
+       have_not_sent c6 c4 c3
+'
+
+test_expect_success 'when two skips collide, favor the larger one' '
+       rm -rf server client trace &&
+       git init server &&
+       test_commit -C server to_fetch &&
+
+       git init client &&
+       for i in $(seq 11)
+       do
+               test_commit -C client c$i
+       done &&
+       git -C client checkout c5 &&
+       test_commit -C client c5side &&
+
+       # Before reaching c5, we send "c5side" (skip 1) and "c11" (skip 1) "c9"
+       # (skip 2) "c6" (skip 4). The larger skip (skip 4) takes precedence, so
+       # the next "have" sent will be "c1" (from "c6" skip 4) and not "c4"
+       # (from "c5side" skip 1).
+       test_config -C client fetch.negotiationalgorithm skipping &&
+       GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" &&
+       have_sent c5side c11 c9 c6 c1 &&
+       have_not_sent c10 c8 c7 c5 c4 c3 c2
+'
+
+test_expect_success 'use ref advertisement to filter out commits' '
+       rm -rf server client trace &&
+       git init server &&
+       test_commit -C server c1 &&
+       test_commit -C server c2 &&
+       test_commit -C server c3 &&
+       git -C server tag -d c1 c2 c3 &&
+
+       git clone server client &&
+       test_commit -C client c4 &&
+       test_commit -C client c5 &&
+       git -C client checkout c4^^ &&
+       test_commit -C client c2side &&
+
+       git -C server checkout --orphan anotherbranch &&
+       test_commit -C server to_fetch &&
+
+       # The server advertising "c3" (as "refs/heads/master") means that we do
+       # not need to send any ancestors of "c3", but we still need to send "c3"
+       # itself.
+       test_config -C client fetch.negotiationalgorithm skipping &&
+       GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch origin to_fetch &&
+       have_sent c5 c4^ c2side &&
+       have_not_sent c4 c4^^ c4^^^
+'
+
+test_expect_success 'handle clock skew' '
+       rm -rf server client trace &&
+       git init server &&
+       test_commit -C server to_fetch &&
+
+       git init client &&
+
+       # 2 regular commits
+       test_tick=2000000000 &&
+       test_commit -C client c1 &&
+       test_commit -C client c2 &&
+
+       # 4 old commits
+       test_tick=1000000000 &&
+       git -C client checkout c1 &&
+       test_commit -C client old1 &&
+       test_commit -C client old2 &&
+       test_commit -C client old3 &&
+       test_commit -C client old4 &&
+
+       # "c2" and "c1" are popped first, then "old4" to "old1". "old1" would
+       # normally be skipped, but is treated as a commit without a parent here
+       # and sent, because (due to clock skew) its only parent has already been
+       # popped off the priority queue.
+       test_config -C client fetch.negotiationalgorithm skipping &&
+       GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" &&
+       have_sent c2 c1 old4 old2 old1 &&
+       have_not_sent old3
+'
+
+test_expect_success 'do not send "have" with ancestors of commits that server ACKed' '
+       rm -rf server client trace &&
+       git init server &&
+       test_commit -C server to_fetch &&
+
+       git init client &&
+       for i in $(seq 8)
+       do
+               git -C client checkout --orphan b$i &&
+               test_commit -C client b$i.c0
+       done &&
+       for j in $(seq 19)
+       do
+               for i in $(seq 8)
+               do
+                       git -C client checkout b$i &&
+                       test_commit -C client b$i.c$j
+               done
+       done &&
+
+       # Copy this branch over to the server and add a commit on it so that it
+       # is reachable but not advertised.
+       git -C server fetch --no-tags "$(pwd)/client" b1:refs/heads/b1 &&
+       git -C server checkout b1 &&
+       test_commit -C server commit-on-b1 &&
+
+       test_config -C client fetch.negotiationalgorithm skipping &&
+       GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" to_fetch &&
+       grep "  fetch" trace &&
+
+       # fetch-pack sends 2 requests each containing 16 "have" lines before
+       # processing the first response. In these 2 requests, 4 commits from
+       # each branch are sent. Just check the first branch.
+       have_sent b1.c19 b1.c17 b1.c14 b1.c9 &&
+       have_not_sent b1.c18 b1.c16 b1.c15 b1.c13 b1.c12 b1.c11 b1.c10 &&
+
+       # While fetch-pack is processing the first response, it should read that
+       # the server ACKs b1.c19 and b1.c17.
+       grep "fetch< ACK $(git -C client rev-parse b1.c19) common" trace &&
+       grep "fetch< ACK $(git -C client rev-parse b1.c17) common" trace &&
+
+       # fetch-pack should thus not send any more commits in the b1 branch, but
+       # should still send the others (in this test, just check b2).
+       for i in $(seq 0 8)
+       do
+               have_not_sent b1.c$i
+       done &&
+       have_sent b2.c1 b2.c0
+'
+
+test_done