Merge branch 'jh/unpack-trees-micro-optim'
authorJunio C Hamano <gitster@pobox.com>
Mon, 24 Apr 2017 05:07:48 +0000 (22:07 -0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 24 Apr 2017 05:07:48 +0000 (22:07 -0700)
In a 2- and 3-way merge of trees, more than one source trees often
end up sharing an identical subtree; optimize by not reading the
same tree multiple times in such a case.

* jh/unpack-trees-micro-optim:
  unpack-trees: avoid duplicate ODB lookups during checkout

1  2 
unpack-trees.c

diff --combined unpack-trees.c
@@@ -10,8 -10,6 +10,8 @@@
  #include "attr.h"
  #include "split-index.h"
  #include "dir.h"
 +#include "submodule.h"
 +#include "submodule-config.h"
  
  /*
   * Error messages expected by scripts out of plumbing commands such as
@@@ -47,9 -45,6 +47,9 @@@ static const char *unpack_plumbing_erro
  
        /* ERROR_WOULD_LOSE_ORPHANED_REMOVED */
        "Working tree file '%s' would be removed by sparse checkout update.",
 +
 +      /* ERROR_WOULD_LOSE_SUBMODULE */
 +      "Submodule '%s' cannot checkout new HEAD.",
  };
  
  #define ERRORMSG(o,type) \
@@@ -166,8 -161,6 +166,8 @@@ void setup_unpack_trees_porcelain(struc
                _("The following working tree files would be overwritten by sparse checkout update:\n%s");
        msgs[ERROR_WOULD_LOSE_ORPHANED_REMOVED] =
                _("The following working tree files would be removed by sparse checkout update:\n%s");
 +      msgs[ERROR_WOULD_LOSE_SUBMODULE] =
 +              _("Cannot update submodule:\n%s");
  
        opts->show_all_errors = 1;
        /* rejected paths may not have a static buffer */
@@@ -247,75 -240,12 +247,75 @@@ static void display_error_msgs(struct u
                fprintf(stderr, _("Aborting\n"));
  }
  
 +static int check_submodule_move_head(const struct cache_entry *ce,
 +                                   const char *old_id,
 +                                   const char *new_id,
 +                                   struct unpack_trees_options *o)
 +{
 +      const struct submodule *sub = submodule_from_ce(ce);
 +      if (!sub)
 +              return 0;
 +
 +      switch (sub->update_strategy.type) {
 +      case SM_UPDATE_UNSPECIFIED:
 +      case SM_UPDATE_CHECKOUT:
 +              if (submodule_move_head(ce->name, old_id, new_id, SUBMODULE_MOVE_HEAD_DRY_RUN))
 +                      return o->gently ? -1 :
 +                              add_rejected_path(o, ERROR_WOULD_LOSE_SUBMODULE, ce->name);
 +              return 0;
 +      case SM_UPDATE_NONE:
 +              return 0;
 +      case SM_UPDATE_REBASE:
 +      case SM_UPDATE_MERGE:
 +      case SM_UPDATE_COMMAND:
 +      default:
 +              warning(_("submodule update strategy not supported for submodule '%s'"), ce->name);
 +              return -1;
 +      }
 +}
 +
 +static void reload_gitmodules_file(struct index_state *index,
 +                                 struct checkout *state)
 +{
 +      int i;
 +      for (i = 0; i < index->cache_nr; i++) {
 +              struct cache_entry *ce = index->cache[i];
 +              if (ce->ce_flags & CE_UPDATE) {
 +                      int r = strcmp(ce->name, ".gitmodules");
 +                      if (r < 0)
 +                              continue;
 +                      else if (r == 0) {
 +                              submodule_free();
 +                              checkout_entry(ce, state, NULL);
 +                              gitmodules_config();
 +                              git_config(submodule_config, NULL);
 +                      } else
 +                              break;
 +              }
 +      }
 +}
 +
  /*
   * Unlink the last component and schedule the leading directories for
   * removal, such that empty directories get removed.
   */
  static void unlink_entry(const struct cache_entry *ce)
  {
 +      const struct submodule *sub = submodule_from_ce(ce);
 +      if (sub) {
 +              switch (sub->update_strategy.type) {
 +              case SM_UPDATE_UNSPECIFIED:
 +              case SM_UPDATE_CHECKOUT:
 +              case SM_UPDATE_REBASE:
 +              case SM_UPDATE_MERGE:
 +                      submodule_move_head(ce->name, "HEAD", NULL,
 +                                          SUBMODULE_MOVE_HEAD_FORCE);
 +                      break;
 +              case SM_UPDATE_NONE:
 +              case SM_UPDATE_COMMAND:
 +                      return; /* Do not touch the submodule. */
 +              }
 +      }
        if (!check_leading_path(ce->name, ce_namelen(ce)))
                return;
        if (remove_or_warn(ce->ce_mode, ce->name))
@@@ -371,9 -301,6 +371,9 @@@ static int check_updates(struct unpack_
        remove_marked_cache_entries(index);
        remove_scheduled_dirs();
  
 +      if (should_update_submodules() && o->update && !o->dry_run)
 +              reload_gitmodules_file(index, &state);
 +
        for (i = 0; i < index->cache_nr; i++) {
                struct cache_entry *ce = index->cache[i];
  
@@@ -604,12 -531,18 +604,18 @@@ static int switch_cache_bottom(struct t
        return ret;
  }
  
+ static inline int are_same_oid(struct name_entry *name_j, struct name_entry *name_k)
+ {
+       return name_j->oid && name_k->oid && !oidcmp(name_j->oid, name_k->oid);
+ }
  static int traverse_trees_recursive(int n, unsigned long dirmask,
                                    unsigned long df_conflicts,
                                    struct name_entry *names,
                                    struct traverse_info *info)
  {
        int i, ret, bottom;
+       int nr_buf = 0;
        struct tree_desc t[MAX_UNPACK_TREES];
        void *buf[MAX_UNPACK_TREES];
        struct traverse_info newinfo;
        newinfo.pathlen += tree_entry_len(p) + 1;
        newinfo.df_conflicts |= df_conflicts;
  
+       /*
+        * Fetch the tree from the ODB for each peer directory in the
+        * n commits.
+        *
+        * For 2- and 3-way traversals, we try to avoid hitting the
+        * ODB twice for the same OID.  This should yield a nice speed
+        * up in checkouts and merges when the commits are similar.
+        *
+        * We don't bother doing the full O(n^2) search for larger n,
+        * because wider traversals don't happen that often and we
+        * avoid the search setup.
+        *
+        * When 2 peer OIDs are the same, we just copy the tree
+        * descriptor data.  This implicitly borrows the buffer
+        * data from the earlier cell.
+        */
        for (i = 0; i < n; i++, dirmask >>= 1) {
-               const unsigned char *sha1 = NULL;
-               if (dirmask & 1)
-                       sha1 = names[i].oid->hash;
-               buf[i] = fill_tree_descriptor(t+i, sha1);
+               if (i > 0 && are_same_oid(&names[i], &names[i - 1]))
+                       t[i] = t[i - 1];
+               else if (i > 1 && are_same_oid(&names[i], &names[i - 2]))
+                       t[i] = t[i - 2];
+               else {
+                       const unsigned char *sha1 = NULL;
+                       if (dirmask & 1)
+                               sha1 = names[i].oid->hash;
+                       buf[nr_buf++] = fill_tree_descriptor(t+i, sha1);
+               }
        }
  
        bottom = switch_cache_bottom(&newinfo);
        ret = traverse_trees(n, t, &newinfo);
        restore_cache_bottom(&newinfo, bottom);
  
-       for (i = 0; i < n; i++)
+       for (i = 0; i < nr_buf; i++)
                free(buf[i]);
  
        return ret;
@@@ -1431,26 -1386,17 +1459,26 @@@ static int verify_uptodate_1(const stru
        if (!lstat(ce->name, &st)) {
                int flags = CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE;
                unsigned changed = ie_match_stat(o->src_index, ce, &st, flags);
 +
 +              if (submodule_from_ce(ce)) {
 +                      int r = check_submodule_move_head(ce,
 +                              "HEAD", oid_to_hex(&ce->oid), o);
 +                      if (r)
 +                              return o->gently ? -1 :
 +                                      add_rejected_path(o, error_type, ce->name);
 +                      return 0;
 +              }
 +
                if (!changed)
                        return 0;
                /*
 -               * NEEDSWORK: the current default policy is to allow
 -               * submodule to be out of sync wrt the superproject
 -               * index.  This needs to be tightened later for
 -               * submodules that are marked to be automatically
 -               * checked out.
 +               * Historic default policy was to allow submodule to be out
 +               * of sync wrt the superproject index. If the submodule was
 +               * not considered interesting above, we don't care here.
                 */
                if (S_ISGITLINK(ce->ce_mode))
                        return 0;
 +
                errno = 0;
        }
        if (errno == ENOENT)
@@@ -1489,16 -1435,11 +1517,16 @@@ static void invalidate_ce_path(const st
   * Currently, git does not checkout subprojects during a superproject
   * checkout, so it is not going to overwrite anything.
   */
 -static int verify_clean_submodule(const struct cache_entry *ce,
 +static int verify_clean_submodule(const char *old_sha1,
 +                                const struct cache_entry *ce,
                                  enum unpack_trees_error_types error_type,
                                  struct unpack_trees_options *o)
  {
 -      return 0;
 +      if (!submodule_from_ce(ce))
 +              return 0;
 +
 +      return check_submodule_move_head(ce, old_sha1,
 +                                       oid_to_hex(&ce->oid), o);
  }
  
  static int verify_clean_subdirectory(const struct cache_entry *ce,
        struct dir_struct d;
        char *pathbuf;
        int cnt = 0;
 -      unsigned char sha1[20];
  
 -      if (S_ISGITLINK(ce->ce_mode) &&
 -          resolve_gitlink_ref(ce->name, "HEAD", sha1) == 0) {
 -              /* If we are not going to update the submodule, then
 +      if (S_ISGITLINK(ce->ce_mode)) {
 +              unsigned char sha1[20];
 +              int sub_head = resolve_gitlink_ref(ce->name, "HEAD", sha1);
 +              /*
 +               * If we are not going to update the submodule, then
                 * we don't care.
                 */
 -              if (!hashcmp(sha1, ce->oid.hash))
 +              if (!sub_head && !hashcmp(sha1, ce->oid.hash))
                        return 0;
 -              return verify_clean_submodule(ce, error_type, o);
 +              return verify_clean_submodule(sub_head ? NULL : sha1_to_hex(sha1),
 +                                            ce, error_type, o);
        }
  
        /*
@@@ -1664,15 -1603,9 +1692,15 @@@ static int verify_absent_1(const struc
                path = xmemdupz(ce->name, len);
                if (lstat(path, &st))
                        ret = error_errno("cannot stat '%s'", path);
 -              else
 -                      ret = check_ok_to_remove(path, len, DT_UNKNOWN, NULL,
 -                                               &st, error_type, o);
 +              else {
 +                      if (submodule_from_ce(ce))
 +                              ret = check_submodule_move_head(ce,
 +                                                              oid_to_hex(&ce->oid),
 +                                                              NULL, o);
 +                      else
 +                              ret = check_ok_to_remove(path, len, DT_UNKNOWN, NULL,
 +                                                       &st, error_type, o);
 +              }
                free(path);
                return ret;
        } else if (lstat(ce->name, &st)) {
                        return error_errno("cannot stat '%s'", ce->name);
                return 0;
        } else {
 +              if (submodule_from_ce(ce))
 +                      return check_submodule_move_head(ce, oid_to_hex(&ce->oid),
 +                                                       NULL, o);
 +
                return check_ok_to_remove(ce->name, ce_namelen(ce),
                                          ce_to_dtype(ce), ce, &st,
                                          error_type, o);
@@@ -1739,15 -1668,6 +1767,15 @@@ static int merged_entry(const struct ca
                        return -1;
                }
                invalidate_ce_path(merge, o);
 +
 +              if (submodule_from_ce(ce)) {
 +                      int ret = check_submodule_move_head(ce, NULL,
 +                                                          oid_to_hex(&ce->oid),
 +                                                          o);
 +                      if (ret)
 +                              return ret;
 +              }
 +
        } else if (!(old->ce_flags & CE_CONFLICTED)) {
                /*
                 * See if we can re-use the old CE directly?
                        update |= old->ce_flags & (CE_SKIP_WORKTREE | CE_NEW_SKIP_WORKTREE);
                        invalidate_ce_path(old, o);
                }
 +
 +              if (submodule_from_ce(ce)) {
 +                      int ret = check_submodule_move_head(ce, oid_to_hex(&old->oid),
 +                                                          oid_to_hex(&ce->oid),
 +                                                          o);
 +                      if (ret)
 +                              return ret;
 +              }
        } else {
                /*
                 * Previously unmerged entry left as an existence