| 1 | #ifndef PACK_OBJECTS_H |
| 2 | #define PACK_OBJECTS_H |
| 3 | |
| 4 | #include "object-store.h" |
| 5 | |
| 6 | #define OE_DFS_STATE_BITS 2 |
| 7 | #define OE_DEPTH_BITS 12 |
| 8 | #define OE_IN_PACK_BITS 10 |
| 9 | #define OE_Z_DELTA_BITS 20 |
| 10 | |
| 11 | /* |
| 12 | * State flags for depth-first search used for analyzing delta cycles. |
| 13 | * |
| 14 | * The depth is measured in delta-links to the base (so if A is a delta |
| 15 | * against B, then A has a depth of 1, and B a depth of 0). |
| 16 | */ |
| 17 | enum dfs_state { |
| 18 | DFS_NONE = 0, |
| 19 | DFS_ACTIVE, |
| 20 | DFS_DONE, |
| 21 | DFS_NUM_STATES |
| 22 | }; |
| 23 | |
| 24 | /* |
| 25 | * basic object info |
| 26 | * ----------------- |
| 27 | * idx.oid is filled up before delta searching starts. idx.crc32 is |
| 28 | * only valid after the object is written out and will be used for |
| 29 | * generating the index. idx.offset will be both gradually set and |
| 30 | * used in writing phase (base objects get offset first, then deltas |
| 31 | * refer to them) |
| 32 | * |
| 33 | * "size" is the uncompressed object size. Compressed size of the raw |
| 34 | * data for an object in a pack is not stored anywhere but is computed |
| 35 | * and made available when reverse .idx is made. Note that when a |
| 36 | * delta is reused, "size" is the uncompressed _delta_ size, not the |
| 37 | * canonical one after the delta has been applied. |
| 38 | * |
| 39 | * "hash" contains a path name hash which is used for sorting the |
| 40 | * delta list and also during delta searching. Once prepare_pack() |
| 41 | * returns it's no longer needed. |
| 42 | * |
| 43 | * source pack info |
| 44 | * ---------------- |
| 45 | * The (in_pack, in_pack_offset) tuple contains the location of the |
| 46 | * object in the source pack. in_pack_header_size allows quickly |
| 47 | * skipping the header and going straight to the zlib stream. |
| 48 | * |
| 49 | * "type" and "in_pack_type" both describe object type. in_pack_type |
| 50 | * may contain a delta type, while type is always the canonical type. |
| 51 | * |
| 52 | * deltas |
| 53 | * ------ |
| 54 | * Delta links (delta, delta_child and delta_sibling) are created to |
| 55 | * reflect that delta graph from the source pack then updated or added |
| 56 | * during delta searching phase when we find better deltas. |
| 57 | * |
| 58 | * delta_child and delta_sibling are last needed in |
| 59 | * compute_write_order(). "delta" and "delta_size" must remain valid |
| 60 | * at object writing phase in case the delta is not cached. |
| 61 | * |
| 62 | * If a delta is cached in memory and is compressed, delta_data points |
| 63 | * to the data and z_delta_size contains the compressed size. If it's |
| 64 | * uncompressed [1], z_delta_size must be zero. delta_size is always |
| 65 | * the uncompressed size and must be valid even if the delta is not |
| 66 | * cached. |
| 67 | * |
| 68 | * [1] during try_delta phase we don't bother with compressing because |
| 69 | * the delta could be quickly replaced with a better one. |
| 70 | */ |
| 71 | struct object_entry { |
| 72 | struct pack_idx_entry idx; |
| 73 | unsigned long size; /* uncompressed size */ |
| 74 | unsigned in_pack_idx:OE_IN_PACK_BITS; /* already in pack */ |
| 75 | off_t in_pack_offset; |
| 76 | uint32_t delta_idx; /* delta base object */ |
| 77 | uint32_t delta_child_idx; /* deltified objects who bases me */ |
| 78 | uint32_t delta_sibling_idx; /* other deltified objects who |
| 79 | * uses the same base as me |
| 80 | */ |
| 81 | void *delta_data; /* cached delta (uncompressed) */ |
| 82 | unsigned long delta_size; /* delta data size (uncompressed) */ |
| 83 | unsigned z_delta_size:OE_Z_DELTA_BITS; |
| 84 | unsigned type_:TYPE_BITS; |
| 85 | unsigned in_pack_type:TYPE_BITS; /* could be delta */ |
| 86 | unsigned type_valid:1; |
| 87 | uint32_t hash; /* name hint hash */ |
| 88 | unsigned char in_pack_header_size; |
| 89 | unsigned preferred_base:1; /* |
| 90 | * we do not pack this, but is available |
| 91 | * to be used as the base object to delta |
| 92 | * objects against. |
| 93 | */ |
| 94 | unsigned no_try_delta:1; |
| 95 | unsigned tagged:1; /* near the very tip of refs */ |
| 96 | unsigned filled:1; /* assigned write-order */ |
| 97 | unsigned dfs_state:OE_DFS_STATE_BITS; |
| 98 | unsigned depth:OE_DEPTH_BITS; |
| 99 | }; |
| 100 | |
| 101 | struct packing_data { |
| 102 | struct object_entry *objects; |
| 103 | uint32_t nr_objects, nr_alloc; |
| 104 | |
| 105 | int32_t *index; |
| 106 | uint32_t index_size; |
| 107 | |
| 108 | unsigned int *in_pack_pos; |
| 109 | |
| 110 | /* |
| 111 | * Only one of these can be non-NULL and they have different |
| 112 | * sizes. if in_pack_by_idx is allocated, oe_in_pack() returns |
| 113 | * the pack of an object using in_pack_idx field. If not, |
| 114 | * in_pack[] array is used the same way as in_pack_pos[] |
| 115 | */ |
| 116 | struct packed_git **in_pack_by_idx; |
| 117 | struct packed_git **in_pack; |
| 118 | }; |
| 119 | |
| 120 | void prepare_packing_data(struct packing_data *pdata); |
| 121 | struct object_entry *packlist_alloc(struct packing_data *pdata, |
| 122 | const unsigned char *sha1, |
| 123 | uint32_t index_pos); |
| 124 | |
| 125 | struct object_entry *packlist_find(struct packing_data *pdata, |
| 126 | const unsigned char *sha1, |
| 127 | uint32_t *index_pos); |
| 128 | |
| 129 | static inline uint32_t pack_name_hash(const char *name) |
| 130 | { |
| 131 | uint32_t c, hash = 0; |
| 132 | |
| 133 | if (!name) |
| 134 | return 0; |
| 135 | |
| 136 | /* |
| 137 | * This effectively just creates a sortable number from the |
| 138 | * last sixteen non-whitespace characters. Last characters |
| 139 | * count "most", so things that end in ".c" sort together. |
| 140 | */ |
| 141 | while ((c = *name++) != 0) { |
| 142 | if (isspace(c)) |
| 143 | continue; |
| 144 | hash = (hash >> 2) + (c << 24); |
| 145 | } |
| 146 | return hash; |
| 147 | } |
| 148 | |
| 149 | static inline enum object_type oe_type(const struct object_entry *e) |
| 150 | { |
| 151 | return e->type_valid ? e->type_ : OBJ_BAD; |
| 152 | } |
| 153 | |
| 154 | static inline void oe_set_type(struct object_entry *e, |
| 155 | enum object_type type) |
| 156 | { |
| 157 | if (type >= OBJ_ANY) |
| 158 | BUG("OBJ_ANY cannot be set in pack-objects code"); |
| 159 | |
| 160 | e->type_valid = type >= OBJ_NONE; |
| 161 | e->type_ = (unsigned)type; |
| 162 | } |
| 163 | |
| 164 | static inline unsigned int oe_in_pack_pos(const struct packing_data *pack, |
| 165 | const struct object_entry *e) |
| 166 | { |
| 167 | return pack->in_pack_pos[e - pack->objects]; |
| 168 | } |
| 169 | |
| 170 | static inline void oe_set_in_pack_pos(const struct packing_data *pack, |
| 171 | const struct object_entry *e, |
| 172 | unsigned int pos) |
| 173 | { |
| 174 | pack->in_pack_pos[e - pack->objects] = pos; |
| 175 | } |
| 176 | |
| 177 | static inline struct packed_git *oe_in_pack(const struct packing_data *pack, |
| 178 | const struct object_entry *e) |
| 179 | { |
| 180 | if (pack->in_pack_by_idx) |
| 181 | return pack->in_pack_by_idx[e->in_pack_idx]; |
| 182 | else |
| 183 | return pack->in_pack[e - pack->objects]; |
| 184 | } |
| 185 | |
| 186 | void oe_map_new_pack(struct packing_data *pack, |
| 187 | struct packed_git *p); |
| 188 | static inline void oe_set_in_pack(struct packing_data *pack, |
| 189 | struct object_entry *e, |
| 190 | struct packed_git *p) |
| 191 | { |
| 192 | if (!p->index) |
| 193 | oe_map_new_pack(pack, p); |
| 194 | if (pack->in_pack_by_idx) |
| 195 | e->in_pack_idx = p->index; |
| 196 | else |
| 197 | pack->in_pack[e - pack->objects] = p; |
| 198 | } |
| 199 | |
| 200 | static inline struct object_entry *oe_delta( |
| 201 | const struct packing_data *pack, |
| 202 | const struct object_entry *e) |
| 203 | { |
| 204 | if (e->delta_idx) |
| 205 | return &pack->objects[e->delta_idx - 1]; |
| 206 | return NULL; |
| 207 | } |
| 208 | |
| 209 | static inline void oe_set_delta(struct packing_data *pack, |
| 210 | struct object_entry *e, |
| 211 | struct object_entry *delta) |
| 212 | { |
| 213 | if (delta) |
| 214 | e->delta_idx = (delta - pack->objects) + 1; |
| 215 | else |
| 216 | e->delta_idx = 0; |
| 217 | } |
| 218 | |
| 219 | static inline struct object_entry *oe_delta_child( |
| 220 | const struct packing_data *pack, |
| 221 | const struct object_entry *e) |
| 222 | { |
| 223 | if (e->delta_child_idx) |
| 224 | return &pack->objects[e->delta_child_idx - 1]; |
| 225 | return NULL; |
| 226 | } |
| 227 | |
| 228 | static inline void oe_set_delta_child(struct packing_data *pack, |
| 229 | struct object_entry *e, |
| 230 | struct object_entry *delta) |
| 231 | { |
| 232 | if (delta) |
| 233 | e->delta_child_idx = (delta - pack->objects) + 1; |
| 234 | else |
| 235 | e->delta_child_idx = 0; |
| 236 | } |
| 237 | |
| 238 | static inline struct object_entry *oe_delta_sibling( |
| 239 | const struct packing_data *pack, |
| 240 | const struct object_entry *e) |
| 241 | { |
| 242 | if (e->delta_sibling_idx) |
| 243 | return &pack->objects[e->delta_sibling_idx - 1]; |
| 244 | return NULL; |
| 245 | } |
| 246 | |
| 247 | static inline void oe_set_delta_sibling(struct packing_data *pack, |
| 248 | struct object_entry *e, |
| 249 | struct object_entry *delta) |
| 250 | { |
| 251 | if (delta) |
| 252 | e->delta_sibling_idx = (delta - pack->objects) + 1; |
| 253 | else |
| 254 | e->delta_sibling_idx = 0; |
| 255 | } |
| 256 | |
| 257 | #endif |