Change check_ref_format() to take a flags argument
[git/git.git] / git_remote_helpers / git / git.py
CommitLineData
2fe40b63
SR
1#!/usr/bin/env python
2
3"""Functionality for interacting with Git repositories.
4
5This module provides classes for interfacing with a Git repository.
6"""
7
8import os
9import re
10import time
11from binascii import hexlify
12from cStringIO import StringIO
13import unittest
14
15from git_remote_helpers.util import debug, error, die, start_command, run_command
16
17
18def get_git_dir ():
19 """Return the path to the GIT_DIR for this repo."""
20 args = ("git", "rev-parse", "--git-dir")
21 exit_code, output, errors = run_command(args)
22 if exit_code:
23 die("Failed to retrieve git dir")
24 assert not errors
25 return output.strip()
26
27
28def parse_git_config ():
29 """Return a dict containing the parsed version of 'git config -l'."""
30 exit_code, output, errors = run_command(("git", "config", "-z", "-l"))
31 if exit_code:
32 die("Failed to retrieve git configuration")
33 assert not errors
34 return dict([e.split('\n', 1) for e in output.split("\0") if e])
35
36
37def git_config_bool (value):
38 """Convert the given git config string value to True or False.
39
40 Raise ValueError if the given string was not recognized as a
41 boolean value.
42
43 """
44 norm_value = str(value).strip().lower()
45 if norm_value in ("true", "1", "yes", "on", ""):
46 return True
47 if norm_value in ("false", "0", "no", "off", "none"):
48 return False
49 raise ValueError("Failed to parse '%s' into a boolean value" % (value))
50
51
52def valid_git_ref (ref_name):
53 """Return True iff the given ref name is a valid git ref name."""
54 # The following is a reimplementation of the git check-ref-format
55 # command. The rules were derived from the git check-ref-format(1)
56 # manual page. This code should be replaced by a call to
8d9c5010 57 # check_refname_format() in the git library, when such is available.
2fe40b63
SR
58 if ref_name.endswith('/') or \
59 ref_name.startswith('.') or \
60 ref_name.count('/.') or \
61 ref_name.count('..') or \
62 ref_name.endswith('.lock'):
63 return False
64 for c in ref_name:
65 if ord(c) < 0x20 or ord(c) == 0x7f or c in " ~^:?*[":
66 return False
67 return True
68
69
70class GitObjectFetcher(object):
71
72 """Provide parsed access to 'git cat-file --batch'.
73
74 This provides a read-only interface to the Git object database.
75
76 """
77
78 def __init__ (self):
79 """Initiate a 'git cat-file --batch' session."""
80 self.queue = [] # List of object names to be submitted
81 self.in_transit = None # Object name currently in transit
82
83 # 'git cat-file --batch' produces binary output which is likely
84 # to be corrupted by the default "rU"-mode pipe opened by
85 # start_command. (Mode == "rU" does universal new-line
86 # conversion, which mangles carriage returns.) Therefore, we
87 # open an explicitly binary-safe pipe for transferring the
88 # output from 'git cat-file --batch'.
89 pipe_r_fd, pipe_w_fd = os.pipe()
90 pipe_r = os.fdopen(pipe_r_fd, "rb")
91 pipe_w = os.fdopen(pipe_w_fd, "wb")
92 self.proc = start_command(("git", "cat-file", "--batch"),
93 stdout = pipe_w)
94 self.f = pipe_r
95
96 def __del__ (self):
97 """Verify completed communication with 'git cat-file --batch'."""
98 assert not self.queue
99 assert self.in_transit is None
100 self.proc.stdin.close()
101 assert self.proc.wait() == 0 # Zero exit code
102 assert self.f.read() == "" # No remaining output
103
104 def _submit_next_object (self):
105 """Submit queue items to the 'git cat-file --batch' process.
106
107 If there are items in the queue, and there is currently no item
108 currently in 'transit', then pop the first item off the queue,
109 and submit it.
110
111 """
112 if self.queue and self.in_transit is None:
113 self.in_transit = self.queue.pop(0)
114 print >> self.proc.stdin, self.in_transit[0]
115
116 def push (self, obj, callback):
117 """Push the given object name onto the queue.
118
119 The given callback function will at some point in the future
120 be called exactly once with the following arguments:
121 - self - this GitObjectFetcher instance
122 - obj - the object name provided to push()
123 - sha1 - the SHA1 of the object, if 'None' obj is missing
124 - t - the type of the object (tag/commit/tree/blob)
125 - size - the size of the object in bytes
126 - data - the object contents
127
128 """
129 self.queue.append((obj, callback))
130 self._submit_next_object() # (Re)start queue processing
131
132 def process_next_entry (self):
133 """Read the next entry off the queue and invoke callback."""
134 obj, cb = self.in_transit
135 self.in_transit = None
136 header = self.f.readline()
137 if header == "%s missing\n" % (obj):
138 cb(self, obj, None, None, None, None)
139 return
140 sha1, t, size = header.split(" ")
141 assert len(sha1) == 40
142 assert t in ("tag", "commit", "tree", "blob")
143 assert size.endswith("\n")
144 size = int(size.strip())
145 data = self.f.read(size)
146 assert self.f.read(1) == "\n"
147 cb(self, obj, sha1, t, size, data)
148 self._submit_next_object()
149
150 def process (self):
151 """Process the current queue until empty."""
152 while self.in_transit is not None:
153 self.process_next_entry()
154
155 # High-level convenience methods:
156
157 def get_sha1 (self, objspec):
158 """Return the SHA1 of the object specified by 'objspec'.
159
160 Return None if 'objspec' does not specify an existing object.
161
162 """
163 class _ObjHandler(object):
164 """Helper class for getting the returned SHA1."""
165 def __init__ (self, parser):
166 self.parser = parser
167 self.sha1 = None
168
169 def __call__ (self, parser, obj, sha1, t, size, data):
170 # FIXME: Many unused arguments. Could this be cheaper?
171 assert parser == self.parser
172 self.sha1 = sha1
173
174 handler = _ObjHandler(self)
175 self.push(objspec, handler)
176 self.process()
177 return handler.sha1
178
179 def open_obj (self, objspec):
180 """Return a file object wrapping the contents of a named object.
181
182 The caller is responsible for calling .close() on the returned
183 file object.
184
185 Raise KeyError if 'objspec' does not exist in the repo.
186
187 """
188 class _ObjHandler(object):
189 """Helper class for parsing the returned git object."""
190 def __init__ (self, parser):
191 """Set up helper."""
192 self.parser = parser
193 self.contents = StringIO()
194 self.err = None
195
196 def __call__ (self, parser, obj, sha1, t, size, data):
197 """Git object callback (see GitObjectFetcher documentation)."""
198 assert parser == self.parser
199 if not sha1: # Missing object
200 self.err = "Missing object '%s'" % obj
201 else:
202 assert size == len(data)
203 self.contents.write(data)
204
205 handler = _ObjHandler(self)
206 self.push(objspec, handler)
207 self.process()
208 if handler.err:
209 raise KeyError(handler.err)
210 handler.contents.seek(0)
211 return handler.contents
212
213 def walk_tree (self, tree_objspec, callback, prefix = ""):
214 """Recursively walk the given Git tree object.
215
216 Recursively walk all subtrees of the given tree object, and
217 invoke the given callback passing three arguments:
218 (path, mode, data) with the path, permission bits, and contents
219 of all the blobs found in the entire tree structure.
220
221 """
222 class _ObjHandler(object):
223 """Helper class for walking a git tree structure."""
224 def __init__ (self, parser, cb, path, mode = None):
225 """Set up helper."""
226 self.parser = parser
227 self.cb = cb
228 self.path = path
229 self.mode = mode
230 self.err = None
231
232 def parse_tree (self, treedata):
233 """Parse tree object data, yield tree entries.
234
235 Each tree entry is a 3-tuple (mode, sha1, path)
236
237 self.path is prepended to all paths yielded
238 from this method.
239
240 """
241 while treedata:
242 mode = int(treedata[:6], 10)
243 # Turn 100xxx into xxx
244 if mode > 100000:
245 mode -= 100000
246 assert treedata[6] == " "
247 i = treedata.find("\0", 7)
248 assert i > 0
249 path = treedata[7:i]
250 sha1 = hexlify(treedata[i + 1: i + 21])
251 yield (mode, sha1, self.path + path)
252 treedata = treedata[i + 21:]
253
254 def __call__ (self, parser, obj, sha1, t, size, data):
255 """Git object callback (see GitObjectFetcher documentation)."""
256 assert parser == self.parser
257 if not sha1: # Missing object
258 self.err = "Missing object '%s'" % (obj)
259 return
260 assert size == len(data)
261 if t == "tree":
262 if self.path:
263 self.path += "/"
264 # Recurse into all blobs and subtrees
265 for m, s, p in self.parse_tree(data):
266 parser.push(s,
267 self.__class__(self.parser, self.cb, p, m))
268 elif t == "blob":
269 self.cb(self.path, self.mode, data)
270 else:
271 raise ValueError("Unknown object type '%s'" % (t))
272
273 self.push(tree_objspec, _ObjHandler(self, callback, prefix))
274 self.process()
275
276
277class GitRefMap(object):
278
279 """Map Git ref names to the Git object names they currently point to.
280
281 Behaves like a dictionary of Git ref names -> Git object names.
282
283 """
284
285 def __init__ (self, obj_fetcher):
286 """Create a new Git ref -> object map."""
287 self.obj_fetcher = obj_fetcher
288 self._cache = {} # dict: refname -> objname
289
290 def _load (self, ref):
291 """Retrieve the object currently bound to the given ref.
292
293 The name of the object pointed to by the given ref is stored
294 into this mapping, and also returned.
295
296 """
297 if ref not in self._cache:
298 self._cache[ref] = self.obj_fetcher.get_sha1(ref)
299 return self._cache[ref]
300
301 def __contains__ (self, refname):
302 """Return True if the given refname is present in this cache."""
303 return bool(self._load(refname))
304
305 def __getitem__ (self, refname):
306 """Return the git object name pointed to by the given refname."""
307 commit = self._load(refname)
308 if commit is None:
309 raise KeyError("Unknown ref '%s'" % (refname))
310 return commit
311
312 def get (self, refname, default = None):
313 """Return the git object name pointed to by the given refname."""
314 commit = self._load(refname)
315 if commit is None:
316 return default
317 return commit
318
319
320class GitFICommit(object):
321
322 """Encapsulate the data in a Git fast-import commit command."""
323
324 SHA1RE = re.compile(r'^[0-9a-f]{40}$')
325
326 @classmethod
327 def parse_mode (cls, mode):
328 """Verify the given git file mode, and return it as a string."""
329 assert mode in (644, 755, 100644, 100755, 120000)
330 return "%i" % (mode)
331
332 @classmethod
333 def parse_objname (cls, objname):
334 """Return the given object name (or mark number) as a string."""
335 if isinstance(objname, int): # Object name is a mark number
336 assert objname > 0
337 return ":%i" % (objname)
338
339 # No existence check is done, only checks for valid format
340 assert cls.SHA1RE.match(objname) # Object name is valid SHA1
341 return objname
342
343 @classmethod
344 def quote_path (cls, path):
345 """Return a quoted version of the given path."""
346 path = path.replace("\\", "\\\\")
347 path = path.replace("\n", "\\n")
348 path = path.replace('"', '\\"')
349 return '"%s"' % (path)
350
351 @classmethod
352 def parse_path (cls, path):
353 """Verify that the given path is valid, and quote it, if needed."""
354 assert not isinstance(path, int) # Cannot be a mark number
355
356 # These checks verify the rules on the fast-import man page
357 assert not path.count("//")
358 assert not path.endswith("/")
359 assert not path.startswith("/")
360 assert not path.count("/./")
361 assert not path.count("/../")
362 assert not path.endswith("/.")
363 assert not path.endswith("/..")
364 assert not path.startswith("./")
365 assert not path.startswith("../")
366
367 if path.count('"') + path.count('\n') + path.count('\\'):
368 return cls.quote_path(path)
369 return path
370
371 def __init__ (self, name, email, timestamp, timezone, message):
372 """Create a new Git fast-import commit, with the given metadata."""
373 self.name = name
374 self.email = email
375 self.timestamp = timestamp
376 self.timezone = timezone
377 self.message = message
378 self.pathops = [] # List of path operations in this commit
379
380 def modify (self, mode, blobname, path):
381 """Add a file modification to this Git fast-import commit."""
382 self.pathops.append(("M",
383 self.parse_mode(mode),
384 self.parse_objname(blobname),
385 self.parse_path(path)))
386
387 def delete (self, path):
388 """Add a file deletion to this Git fast-import commit."""
389 self.pathops.append(("D", self.parse_path(path)))
390
391 def copy (self, path, newpath):
392 """Add a file copy to this Git fast-import commit."""
393 self.pathops.append(("C",
394 self.parse_path(path),
395 self.parse_path(newpath)))
396
397 def rename (self, path, newpath):
398 """Add a file rename to this Git fast-import commit."""
399 self.pathops.append(("R",
400 self.parse_path(path),
401 self.parse_path(newpath)))
402
403 def note (self, blobname, commit):
404 """Add a note object to this Git fast-import commit."""
405 self.pathops.append(("N",
406 self.parse_objname(blobname),
407 self.parse_objname(commit)))
408
409 def deleteall (self):
410 """Delete all files in this Git fast-import commit."""
411 self.pathops.append("deleteall")
412
413
414class TestGitFICommit(unittest.TestCase):
415
416 """GitFICommit selftests."""
417
418 def test_basic (self):
419 """GitFICommit basic selftests."""
420
421 def expect_fail (method, data):
422 """Verify that the method(data) raises an AssertionError."""
423 try:
424 method(data)
425 except AssertionError:
426 return
427 raise AssertionError("Failed test for invalid data '%s(%s)'" %
428 (method.__name__, repr(data)))
429
430 def test_parse_mode (self):
431 """GitFICommit.parse_mode() selftests."""
432 self.assertEqual(GitFICommit.parse_mode(644), "644")
433 self.assertEqual(GitFICommit.parse_mode(755), "755")
434 self.assertEqual(GitFICommit.parse_mode(100644), "100644")
435 self.assertEqual(GitFICommit.parse_mode(100755), "100755")
436 self.assertEqual(GitFICommit.parse_mode(120000), "120000")
437 self.assertRaises(AssertionError, GitFICommit.parse_mode, 0)
438 self.assertRaises(AssertionError, GitFICommit.parse_mode, 123)
439 self.assertRaises(AssertionError, GitFICommit.parse_mode, 600)
440 self.assertRaises(AssertionError, GitFICommit.parse_mode, "644")
441 self.assertRaises(AssertionError, GitFICommit.parse_mode, "abc")
442
443 def test_parse_objname (self):
444 """GitFICommit.parse_objname() selftests."""
445 self.assertEqual(GitFICommit.parse_objname(1), ":1")
446 self.assertRaises(AssertionError, GitFICommit.parse_objname, 0)
447 self.assertRaises(AssertionError, GitFICommit.parse_objname, -1)
448 self.assertEqual(GitFICommit.parse_objname("0123456789" * 4),
449 "0123456789" * 4)
450 self.assertEqual(GitFICommit.parse_objname("2468abcdef" * 4),
451 "2468abcdef" * 4)
452 self.assertRaises(AssertionError, GitFICommit.parse_objname,
453 "abcdefghij" * 4)
454
455 def test_parse_path (self):
456 """GitFICommit.parse_path() selftests."""
457 self.assertEqual(GitFICommit.parse_path("foo/bar"), "foo/bar")
458 self.assertEqual(GitFICommit.parse_path("path/with\n and \" in it"),
459 '"path/with\\n and \\" in it"')
460 self.assertRaises(AssertionError, GitFICommit.parse_path, 1)
461 self.assertRaises(AssertionError, GitFICommit.parse_path, 0)
462 self.assertRaises(AssertionError, GitFICommit.parse_path, -1)
463 self.assertRaises(AssertionError, GitFICommit.parse_path, "foo//bar")
464 self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/bar/")
465 self.assertRaises(AssertionError, GitFICommit.parse_path, "/foo/bar")
466 self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/./bar")
467 self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/../bar")
468 self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/bar/.")
469 self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/bar/..")
470 self.assertRaises(AssertionError, GitFICommit.parse_path, "./foo/bar")
471 self.assertRaises(AssertionError, GitFICommit.parse_path, "../foo/bar")
472
473
474class GitFastImport(object):
475
476 """Encapsulate communication with git fast-import."""
477
478 def __init__ (self, f, obj_fetcher, last_mark = 0):
479 """Set up self to communicate with a fast-import process through f."""
480 self.f = f # File object where fast-import stream is written
481 self.obj_fetcher = obj_fetcher # GitObjectFetcher instance
482 self.next_mark = last_mark + 1 # Next mark number
483 self.refs = set() # Keep track of the refnames we've seen
484
485 def comment (self, s):
486 """Write the given comment in the fast-import stream."""
487 assert "\n" not in s, "Malformed comment: '%s'" % (s)
488 self.f.write("# %s\n" % (s))
489
490 def commit (self, ref, commitdata):
491 """Make a commit on the given ref, with the given GitFICommit.
492
493 Return the mark number identifying this commit.
494
495 """
496 self.f.write("""\
497commit %(ref)s
498mark :%(mark)i
499committer %(name)s <%(email)s> %(timestamp)i %(timezone)s
500data %(msgLength)i
501%(msg)s
502""" % {
503 'ref': ref,
504 'mark': self.next_mark,
505 'name': commitdata.name,
506 'email': commitdata.email,
507 'timestamp': commitdata.timestamp,
508 'timezone': commitdata.timezone,
509 'msgLength': len(commitdata.message),
510 'msg': commitdata.message,
511})
512
513 if ref not in self.refs:
514 self.refs.add(ref)
515 parent = ref + "^0"
516 if self.obj_fetcher.get_sha1(parent):
517 self.f.write("from %s\n" % (parent))
518
519 for op in commitdata.pathops:
520 self.f.write(" ".join(op))
521 self.f.write("\n")
522 self.f.write("\n")
523 retval = self.next_mark
524 self.next_mark += 1
525 return retval
526
527 def blob (self, data):
528 """Import the given blob.
529
530 Return the mark number identifying this blob.
531
532 """
533 self.f.write("blob\nmark :%i\ndata %i\n%s\n" %
534 (self.next_mark, len(data), data))
535 retval = self.next_mark
536 self.next_mark += 1
537 return retval
538
539 def reset (self, ref, objname):
540 """Reset the given ref to point at the given Git object."""
541 self.f.write("reset %s\nfrom %s\n\n" %
542 (ref, GitFICommit.parse_objname(objname)))
543 if ref not in self.refs:
544 self.refs.add(ref)
545
546
547class GitNotes(object):
548
549 """Encapsulate access to Git notes.
550
551 Simulates a dictionary of object name (SHA1) -> Git note mappings.
552
553 """
554
555 def __init__ (self, notes_ref, obj_fetcher):
556 """Create a new Git notes interface, bound to the given notes ref."""
557 self.notes_ref = notes_ref
558 self.obj_fetcher = obj_fetcher # Used to get objects from repo
559 self.imports = [] # list: (objname, note data blob name) tuples
560
561 def __del__ (self):
562 """Verify that self.commit_notes() was called before destruction."""
563 if self.imports:
564 error("Missing call to self.commit_notes().")
565 error("%i notes are not committed!", len(self.imports))
566
567 def _load (self, objname):
568 """Return the note data associated with the given git object.
569
570 The note data is returned in string form. If no note is found
571 for the given object, None is returned.
572
573 """
574 try:
575 f = self.obj_fetcher.open_obj("%s:%s" % (self.notes_ref, objname))
576 ret = f.read()
577 f.close()
578 except KeyError:
579 ret = None
580 return ret
581
582 def __getitem__ (self, objname):
583 """Return the note contents associated with the given object.
584
585 Raise KeyError if given object has no associated note.
586
587 """
588 blobdata = self._load(objname)
589 if blobdata is None:
590 raise KeyError("Object '%s' has no note" % (objname))
591 return blobdata
592
593 def get (self, objname, default = None):
594 """Return the note contents associated with the given object.
595
596 Return given default if given object has no associated note.
597
598 """
599 blobdata = self._load(objname)
600 if blobdata is None:
601 return default
602 return blobdata
603
604 def import_note (self, objname, data, gfi):
605 """Tell git fast-import to store data as a note for objname.
606
607 This method uses the given GitFastImport object to create a
608 blob containing the given note data. Also an entry mapping the
609 given object name to the created blob is stored until
610 commit_notes() is called.
611
612 Note that this method only works if it is later followed by a
613 call to self.commit_notes() (which produces the note commit
614 that refers to the blob produced here).
615
616 """
617 if not data.endswith("\n"):
618 data += "\n"
619 gfi.comment("Importing note for object %s" % (objname))
620 mark = gfi.blob(data)
621 self.imports.append((objname, mark))
622
623 def commit_notes (self, gfi, author, message):
624 """Produce a git fast-import note commit for the imported notes.
625
626 This method uses the given GitFastImport object to create a
627 commit on the notes ref, introducing the notes previously
628 submitted to import_note().
629
630 """
631 if not self.imports:
632 return
633 commitdata = GitFICommit(author[0], author[1],
634 time.time(), "0000", message)
635 for objname, blobname in self.imports:
636 assert isinstance(objname, int) and objname > 0
637 assert isinstance(blobname, int) and blobname > 0
638 commitdata.note(blobname, objname)
639 gfi.commit(self.notes_ref, commitdata)
640 self.imports = []
641
642
643class GitCachedNotes(GitNotes):
644
645 """Encapsulate access to Git notes (cached version).
646
647 Only use this class if no caching is done at a higher level.
648
649 Simulates a dictionary of object name (SHA1) -> Git note mappings.
650
651 """
652
653 def __init__ (self, notes_ref, obj_fetcher):
654 """Set up a caching wrapper around GitNotes."""
655 GitNotes.__init__(self, notes_ref, obj_fetcher)
656 self._cache = {} # Cache: object name -> note data
657
658 def __del__ (self):
659 """Verify that GitNotes' destructor is called."""
660 GitNotes.__del__(self)
661
662 def _load (self, objname):
663 """Extend GitNotes._load() with a local objname -> note cache."""
664 if objname not in self._cache:
665 self._cache[objname] = GitNotes._load(self, objname)
666 return self._cache[objname]
667
668 def import_note (self, objname, data, gfi):
669 """Extend GitNotes.import_note() with a local objname -> note cache."""
670 if not data.endswith("\n"):
671 data += "\n"
672 assert objname not in self._cache
673 self._cache[objname] = data
674 GitNotes.import_note(self, objname, data, gfi)
675
676
677if __name__ == '__main__':
678 unittest.main()