Merge branch 'rh/maint-gitweb-highlight-ext' into maint
[git/git.git] / gitweb / gitweb.perl
1 #!/usr/bin/perl
2
3 # gitweb - simple web interface to track changes in git repositories
4 #
5 # (C) 2005-2006, Kay Sievers <kay.sievers@vrfy.org>
6 # (C) 2005, Christian Gierke
7 #
8 # This program is licensed under the GPLv2
9
10 use 5.008;
11 use strict;
12 use warnings;
13 use CGI qw(:standard :escapeHTML -nosticky);
14 use CGI::Util qw(unescape);
15 use CGI::Carp qw(fatalsToBrowser set_message);
16 use Encode;
17 use Fcntl ':mode';
18 use File::Find qw();
19 use File::Basename qw(basename);
20 use Time::HiRes qw(gettimeofday tv_interval);
21 binmode STDOUT, ':utf8';
22
23 our $t0 = [ gettimeofday() ];
24 our $number_of_git_cmds = 0;
25
26 BEGIN {
27 CGI->compile() if $ENV{'MOD_PERL'};
28 }
29
30 our $version = "++GIT_VERSION++";
31
32 our ($my_url, $my_uri, $base_url, $path_info, $home_link);
33 sub evaluate_uri {
34 our $cgi;
35
36 our $my_url = $cgi->url();
37 our $my_uri = $cgi->url(-absolute => 1);
38
39 # Base URL for relative URLs in gitweb ($logo, $favicon, ...),
40 # needed and used only for URLs with nonempty PATH_INFO
41 our $base_url = $my_url;
42
43 # When the script is used as DirectoryIndex, the URL does not contain the name
44 # of the script file itself, and $cgi->url() fails to strip PATH_INFO, so we
45 # have to do it ourselves. We make $path_info global because it's also used
46 # later on.
47 #
48 # Another issue with the script being the DirectoryIndex is that the resulting
49 # $my_url data is not the full script URL: this is good, because we want
50 # generated links to keep implying the script name if it wasn't explicitly
51 # indicated in the URL we're handling, but it means that $my_url cannot be used
52 # as base URL.
53 # Therefore, if we needed to strip PATH_INFO, then we know that we have
54 # to build the base URL ourselves:
55 our $path_info = decode_utf8($ENV{"PATH_INFO"});
56 if ($path_info) {
57 # $path_info has already been URL-decoded by the web server, but
58 # $my_url and $my_uri have not. URL-decode them so we can properly
59 # strip $path_info.
60 $my_url = unescape($my_url);
61 $my_uri = unescape($my_uri);
62 if ($my_url =~ s,\Q$path_info\E$,, &&
63 $my_uri =~ s,\Q$path_info\E$,, &&
64 defined $ENV{'SCRIPT_NAME'}) {
65 $base_url = $cgi->url(-base => 1) . $ENV{'SCRIPT_NAME'};
66 }
67 }
68
69 # target of the home link on top of all pages
70 our $home_link = $my_uri || "/";
71 }
72
73 # core git executable to use
74 # this can just be "git" if your webserver has a sensible PATH
75 our $GIT = "++GIT_BINDIR++/git";
76
77 # absolute fs-path which will be prepended to the project path
78 #our $projectroot = "/pub/scm";
79 our $projectroot = "++GITWEB_PROJECTROOT++";
80
81 # fs traversing limit for getting project list
82 # the number is relative to the projectroot
83 our $project_maxdepth = "++GITWEB_PROJECT_MAXDEPTH++";
84
85 # string of the home link on top of all pages
86 our $home_link_str = "++GITWEB_HOME_LINK_STR++";
87
88 # name of your site or organization to appear in page titles
89 # replace this with something more descriptive for clearer bookmarks
90 our $site_name = "++GITWEB_SITENAME++"
91 || ($ENV{'SERVER_NAME'} || "Untitled") . " Git";
92
93 # html snippet to include in the <head> section of each page
94 our $site_html_head_string = "++GITWEB_SITE_HTML_HEAD_STRING++";
95 # filename of html text to include at top of each page
96 our $site_header = "++GITWEB_SITE_HEADER++";
97 # html text to include at home page
98 our $home_text = "++GITWEB_HOMETEXT++";
99 # filename of html text to include at bottom of each page
100 our $site_footer = "++GITWEB_SITE_FOOTER++";
101
102 # URI of stylesheets
103 our @stylesheets = ("++GITWEB_CSS++");
104 # URI of a single stylesheet, which can be overridden in GITWEB_CONFIG.
105 our $stylesheet = undef;
106 # URI of GIT logo (72x27 size)
107 our $logo = "++GITWEB_LOGO++";
108 # URI of GIT favicon, assumed to be image/png type
109 our $favicon = "++GITWEB_FAVICON++";
110 # URI of gitweb.js (JavaScript code for gitweb)
111 our $javascript = "++GITWEB_JS++";
112
113 # URI and label (title) of GIT logo link
114 #our $logo_url = "http://www.kernel.org/pub/software/scm/git/docs/";
115 #our $logo_label = "git documentation";
116 our $logo_url = "http://git-scm.com/";
117 our $logo_label = "git homepage";
118
119 # source of projects list
120 our $projects_list = "++GITWEB_LIST++";
121
122 # the width (in characters) of the projects list "Description" column
123 our $projects_list_description_width = 25;
124
125 # group projects by category on the projects list
126 # (enabled if this variable evaluates to true)
127 our $projects_list_group_categories = 0;
128
129 # default category if none specified
130 # (leave the empty string for no category)
131 our $project_list_default_category = "";
132
133 # default order of projects list
134 # valid values are none, project, descr, owner, and age
135 our $default_projects_order = "project";
136
137 # show repository only if this file exists
138 # (only effective if this variable evaluates to true)
139 our $export_ok = "++GITWEB_EXPORT_OK++";
140
141 # don't generate age column on the projects list page
142 our $omit_age_column = 0;
143
144 # don't generate information about owners of repositories
145 our $omit_owner=0;
146
147 # show repository only if this subroutine returns true
148 # when given the path to the project, for example:
149 # sub { return -e "$_[0]/git-daemon-export-ok"; }
150 our $export_auth_hook = undef;
151
152 # only allow viewing of repositories also shown on the overview page
153 our $strict_export = "++GITWEB_STRICT_EXPORT++";
154
155 # list of git base URLs used for URL to where fetch project from,
156 # i.e. full URL is "$git_base_url/$project"
157 our @git_base_url_list = grep { $_ ne '' } ("++GITWEB_BASE_URL++");
158
159 # default blob_plain mimetype and default charset for text/plain blob
160 our $default_blob_plain_mimetype = 'text/plain';
161 our $default_text_plain_charset = undef;
162
163 # file to use for guessing MIME types before trying /etc/mime.types
164 # (relative to the current git repository)
165 our $mimetypes_file = undef;
166
167 # assume this charset if line contains non-UTF-8 characters;
168 # it should be valid encoding (see Encoding::Supported(3pm) for list),
169 # for which encoding all byte sequences are valid, for example
170 # 'iso-8859-1' aka 'latin1' (it is decoded without checking, so it
171 # could be even 'utf-8' for the old behavior)
172 our $fallback_encoding = 'latin1';
173
174 # rename detection options for git-diff and git-diff-tree
175 # - default is '-M', with the cost proportional to
176 # (number of removed files) * (number of new files).
177 # - more costly is '-C' (which implies '-M'), with the cost proportional to
178 # (number of changed files + number of removed files) * (number of new files)
179 # - even more costly is '-C', '--find-copies-harder' with cost
180 # (number of files in the original tree) * (number of new files)
181 # - one might want to include '-B' option, e.g. '-B', '-M'
182 our @diff_opts = ('-M'); # taken from git_commit
183
184 # Disables features that would allow repository owners to inject script into
185 # the gitweb domain.
186 our $prevent_xss = 0;
187
188 # Path to the highlight executable to use (must be the one from
189 # http://www.andre-simon.de due to assumptions about parameters and output).
190 # Useful if highlight is not installed on your webserver's PATH.
191 # [Default: highlight]
192 our $highlight_bin = "++HIGHLIGHT_BIN++";
193
194 # information about snapshot formats that gitweb is capable of serving
195 our %known_snapshot_formats = (
196 # name => {
197 # 'display' => display name,
198 # 'type' => mime type,
199 # 'suffix' => filename suffix,
200 # 'format' => --format for git-archive,
201 # 'compressor' => [compressor command and arguments]
202 # (array reference, optional)
203 # 'disabled' => boolean (optional)}
204 #
205 'tgz' => {
206 'display' => 'tar.gz',
207 'type' => 'application/x-gzip',
208 'suffix' => '.tar.gz',
209 'format' => 'tar',
210 'compressor' => ['gzip', '-n']},
211
212 'tbz2' => {
213 'display' => 'tar.bz2',
214 'type' => 'application/x-bzip2',
215 'suffix' => '.tar.bz2',
216 'format' => 'tar',
217 'compressor' => ['bzip2']},
218
219 'txz' => {
220 'display' => 'tar.xz',
221 'type' => 'application/x-xz',
222 'suffix' => '.tar.xz',
223 'format' => 'tar',
224 'compressor' => ['xz'],
225 'disabled' => 1},
226
227 'zip' => {
228 'display' => 'zip',
229 'type' => 'application/x-zip',
230 'suffix' => '.zip',
231 'format' => 'zip'},
232 );
233
234 # Aliases so we understand old gitweb.snapshot values in repository
235 # configuration.
236 our %known_snapshot_format_aliases = (
237 'gzip' => 'tgz',
238 'bzip2' => 'tbz2',
239 'xz' => 'txz',
240
241 # backward compatibility: legacy gitweb config support
242 'x-gzip' => undef, 'gz' => undef,
243 'x-bzip2' => undef, 'bz2' => undef,
244 'x-zip' => undef, '' => undef,
245 );
246
247 # Pixel sizes for icons and avatars. If the default font sizes or lineheights
248 # are changed, it may be appropriate to change these values too via
249 # $GITWEB_CONFIG.
250 our %avatar_size = (
251 'default' => 16,
252 'double' => 32
253 );
254
255 # Used to set the maximum load that we will still respond to gitweb queries.
256 # If server load exceed this value then return "503 server busy" error.
257 # If gitweb cannot determined server load, it is taken to be 0.
258 # Leave it undefined (or set to 'undef') to turn off load checking.
259 our $maxload = 300;
260
261 # configuration for 'highlight' (http://www.andre-simon.de/)
262 # match by basename
263 our %highlight_basename = (
264 #'Program' => 'py',
265 #'Library' => 'py',
266 'SConstruct' => 'py', # SCons equivalent of Makefile
267 'Makefile' => 'make',
268 );
269 # match by extension
270 our %highlight_ext = (
271 # main extensions, defining name of syntax;
272 # see files in /usr/share/highlight/langDefs/ directory
273 (map { $_ => $_ } qw(py rb java css js tex bib xml awk bat ini spec tcl sql)),
274 # alternate extensions, see /etc/highlight/filetypes.conf
275 (map { $_ => 'c' } qw(c h)),
276 (map { $_ => 'sh' } qw(sh bash zsh ksh)),
277 (map { $_ => 'cpp' } qw(cpp cxx c++ cc)),
278 (map { $_ => 'php' } qw(php php3 php4 php5 phps)),
279 (map { $_ => 'pl' } qw(pl perl pm)), # perhaps also 'cgi'
280 (map { $_ => 'make'} qw(make mak mk)),
281 (map { $_ => 'xml' } qw(xml xhtml html htm)),
282 );
283
284 # You define site-wide feature defaults here; override them with
285 # $GITWEB_CONFIG as necessary.
286 our %feature = (
287 # feature => {
288 # 'sub' => feature-sub (subroutine),
289 # 'override' => allow-override (boolean),
290 # 'default' => [ default options...] (array reference)}
291 #
292 # if feature is overridable (it means that allow-override has true value),
293 # then feature-sub will be called with default options as parameters;
294 # return value of feature-sub indicates if to enable specified feature
295 #
296 # if there is no 'sub' key (no feature-sub), then feature cannot be
297 # overridden
298 #
299 # use gitweb_get_feature(<feature>) to retrieve the <feature> value
300 # (an array) or gitweb_check_feature(<feature>) to check if <feature>
301 # is enabled
302
303 # Enable the 'blame' blob view, showing the last commit that modified
304 # each line in the file. This can be very CPU-intensive.
305
306 # To enable system wide have in $GITWEB_CONFIG
307 # $feature{'blame'}{'default'} = [1];
308 # To have project specific config enable override in $GITWEB_CONFIG
309 # $feature{'blame'}{'override'} = 1;
310 # and in project config gitweb.blame = 0|1;
311 'blame' => {
312 'sub' => sub { feature_bool('blame', @_) },
313 'override' => 0,
314 'default' => [0]},
315
316 # Enable the 'snapshot' link, providing a compressed archive of any
317 # tree. This can potentially generate high traffic if you have large
318 # project.
319
320 # Value is a list of formats defined in %known_snapshot_formats that
321 # you wish to offer.
322 # To disable system wide have in $GITWEB_CONFIG
323 # $feature{'snapshot'}{'default'} = [];
324 # To have project specific config enable override in $GITWEB_CONFIG
325 # $feature{'snapshot'}{'override'} = 1;
326 # and in project config, a comma-separated list of formats or "none"
327 # to disable. Example: gitweb.snapshot = tbz2,zip;
328 'snapshot' => {
329 'sub' => \&feature_snapshot,
330 'override' => 0,
331 'default' => ['tgz']},
332
333 # Enable text search, which will list the commits which match author,
334 # committer or commit text to a given string. Enabled by default.
335 # Project specific override is not supported.
336 #
337 # Note that this controls all search features, which means that if
338 # it is disabled, then 'grep' and 'pickaxe' search would also be
339 # disabled.
340 'search' => {
341 'override' => 0,
342 'default' => [1]},
343
344 # Enable grep search, which will list the files in currently selected
345 # tree containing the given string. Enabled by default. This can be
346 # potentially CPU-intensive, of course.
347 # Note that you need to have 'search' feature enabled too.
348
349 # To enable system wide have in $GITWEB_CONFIG
350 # $feature{'grep'}{'default'} = [1];
351 # To have project specific config enable override in $GITWEB_CONFIG
352 # $feature{'grep'}{'override'} = 1;
353 # and in project config gitweb.grep = 0|1;
354 'grep' => {
355 'sub' => sub { feature_bool('grep', @_) },
356 'override' => 0,
357 'default' => [1]},
358
359 # Enable the pickaxe search, which will list the commits that modified
360 # a given string in a file. This can be practical and quite faster
361 # alternative to 'blame', but still potentially CPU-intensive.
362 # Note that you need to have 'search' feature enabled too.
363
364 # To enable system wide have in $GITWEB_CONFIG
365 # $feature{'pickaxe'}{'default'} = [1];
366 # To have project specific config enable override in $GITWEB_CONFIG
367 # $feature{'pickaxe'}{'override'} = 1;
368 # and in project config gitweb.pickaxe = 0|1;
369 'pickaxe' => {
370 'sub' => sub { feature_bool('pickaxe', @_) },
371 'override' => 0,
372 'default' => [1]},
373
374 # Enable showing size of blobs in a 'tree' view, in a separate
375 # column, similar to what 'ls -l' does. This cost a bit of IO.
376
377 # To disable system wide have in $GITWEB_CONFIG
378 # $feature{'show-sizes'}{'default'} = [0];
379 # To have project specific config enable override in $GITWEB_CONFIG
380 # $feature{'show-sizes'}{'override'} = 1;
381 # and in project config gitweb.showsizes = 0|1;
382 'show-sizes' => {
383 'sub' => sub { feature_bool('showsizes', @_) },
384 'override' => 0,
385 'default' => [1]},
386
387 # Make gitweb use an alternative format of the URLs which can be
388 # more readable and natural-looking: project name is embedded
389 # directly in the path and the query string contains other
390 # auxiliary information. All gitweb installations recognize
391 # URL in either format; this configures in which formats gitweb
392 # generates links.
393
394 # To enable system wide have in $GITWEB_CONFIG
395 # $feature{'pathinfo'}{'default'} = [1];
396 # Project specific override is not supported.
397
398 # Note that you will need to change the default location of CSS,
399 # favicon, logo and possibly other files to an absolute URL. Also,
400 # if gitweb.cgi serves as your indexfile, you will need to force
401 # $my_uri to contain the script name in your $GITWEB_CONFIG.
402 'pathinfo' => {
403 'override' => 0,
404 'default' => [0]},
405
406 # Make gitweb consider projects in project root subdirectories
407 # to be forks of existing projects. Given project $projname.git,
408 # projects matching $projname/*.git will not be shown in the main
409 # projects list, instead a '+' mark will be added to $projname
410 # there and a 'forks' view will be enabled for the project, listing
411 # all the forks. If project list is taken from a file, forks have
412 # to be listed after the main project.
413
414 # To enable system wide have in $GITWEB_CONFIG
415 # $feature{'forks'}{'default'} = [1];
416 # Project specific override is not supported.
417 'forks' => {
418 'override' => 0,
419 'default' => [0]},
420
421 # Insert custom links to the action bar of all project pages.
422 # This enables you mainly to link to third-party scripts integrating
423 # into gitweb; e.g. git-browser for graphical history representation
424 # or custom web-based repository administration interface.
425
426 # The 'default' value consists of a list of triplets in the form
427 # (label, link, position) where position is the label after which
428 # to insert the link and link is a format string where %n expands
429 # to the project name, %f to the project path within the filesystem,
430 # %h to the current hash (h gitweb parameter) and %b to the current
431 # hash base (hb gitweb parameter); %% expands to %.
432
433 # To enable system wide have in $GITWEB_CONFIG e.g.
434 # $feature{'actions'}{'default'} = [('graphiclog',
435 # '/git-browser/by-commit.html?r=%n', 'summary')];
436 # Project specific override is not supported.
437 'actions' => {
438 'override' => 0,
439 'default' => []},
440
441 # Allow gitweb scan project content tags of project repository,
442 # and display the popular Web 2.0-ish "tag cloud" near the projects
443 # list. Note that this is something COMPLETELY different from the
444 # normal Git tags.
445
446 # gitweb by itself can show existing tags, but it does not handle
447 # tagging itself; you need to do it externally, outside gitweb.
448 # The format is described in git_get_project_ctags() subroutine.
449 # You may want to install the HTML::TagCloud Perl module to get
450 # a pretty tag cloud instead of just a list of tags.
451
452 # To enable system wide have in $GITWEB_CONFIG
453 # $feature{'ctags'}{'default'} = [1];
454 # Project specific override is not supported.
455
456 # In the future whether ctags editing is enabled might depend
457 # on the value, but using 1 should always mean no editing of ctags.
458 'ctags' => {
459 'override' => 0,
460 'default' => [0]},
461
462 # The maximum number of patches in a patchset generated in patch
463 # view. Set this to 0 or undef to disable patch view, or to a
464 # negative number to remove any limit.
465
466 # To disable system wide have in $GITWEB_CONFIG
467 # $feature{'patches'}{'default'} = [0];
468 # To have project specific config enable override in $GITWEB_CONFIG
469 # $feature{'patches'}{'override'} = 1;
470 # and in project config gitweb.patches = 0|n;
471 # where n is the maximum number of patches allowed in a patchset.
472 'patches' => {
473 'sub' => \&feature_patches,
474 'override' => 0,
475 'default' => [16]},
476
477 # Avatar support. When this feature is enabled, views such as
478 # shortlog or commit will display an avatar associated with
479 # the email of the committer(s) and/or author(s).
480
481 # Currently available providers are gravatar and picon.
482 # If an unknown provider is specified, the feature is disabled.
483
484 # Gravatar depends on Digest::MD5.
485 # Picon currently relies on the indiana.edu database.
486
487 # To enable system wide have in $GITWEB_CONFIG
488 # $feature{'avatar'}{'default'} = ['<provider>'];
489 # where <provider> is either gravatar or picon.
490 # To have project specific config enable override in $GITWEB_CONFIG
491 # $feature{'avatar'}{'override'} = 1;
492 # and in project config gitweb.avatar = <provider>;
493 'avatar' => {
494 'sub' => \&feature_avatar,
495 'override' => 0,
496 'default' => ['']},
497
498 # Enable displaying how much time and how many git commands
499 # it took to generate and display page. Disabled by default.
500 # Project specific override is not supported.
501 'timed' => {
502 'override' => 0,
503 'default' => [0]},
504
505 # Enable turning some links into links to actions which require
506 # JavaScript to run (like 'blame_incremental'). Not enabled by
507 # default. Project specific override is currently not supported.
508 'javascript-actions' => {
509 'override' => 0,
510 'default' => [0]},
511
512 # Enable and configure ability to change common timezone for dates
513 # in gitweb output via JavaScript. Enabled by default.
514 # Project specific override is not supported.
515 'javascript-timezone' => {
516 'override' => 0,
517 'default' => [
518 'local', # default timezone: 'utc', 'local', or '(-|+)HHMM' format,
519 # or undef to turn off this feature
520 'gitweb_tz', # name of cookie where to store selected timezone
521 'datetime', # CSS class used to mark up dates for manipulation
522 ]},
523
524 # Syntax highlighting support. This is based on Daniel Svensson's
525 # and Sham Chukoury's work in gitweb-xmms2.git.
526 # It requires the 'highlight' program present in $PATH,
527 # and therefore is disabled by default.
528
529 # To enable system wide have in $GITWEB_CONFIG
530 # $feature{'highlight'}{'default'} = [1];
531
532 'highlight' => {
533 'sub' => sub { feature_bool('highlight', @_) },
534 'override' => 0,
535 'default' => [0]},
536
537 # Enable displaying of remote heads in the heads list
538
539 # To enable system wide have in $GITWEB_CONFIG
540 # $feature{'remote_heads'}{'default'} = [1];
541 # To have project specific config enable override in $GITWEB_CONFIG
542 # $feature{'remote_heads'}{'override'} = 1;
543 # and in project config gitweb.remote_heads = 0|1;
544 'remote_heads' => {
545 'sub' => sub { feature_bool('remote_heads', @_) },
546 'override' => 0,
547 'default' => [0]},
548 );
549
550 sub gitweb_get_feature {
551 my ($name) = @_;
552 return unless exists $feature{$name};
553 my ($sub, $override, @defaults) = (
554 $feature{$name}{'sub'},
555 $feature{$name}{'override'},
556 @{$feature{$name}{'default'}});
557 # project specific override is possible only if we have project
558 our $git_dir; # global variable, declared later
559 if (!$override || !defined $git_dir) {
560 return @defaults;
561 }
562 if (!defined $sub) {
563 warn "feature $name is not overridable";
564 return @defaults;
565 }
566 return $sub->(@defaults);
567 }
568
569 # A wrapper to check if a given feature is enabled.
570 # With this, you can say
571 #
572 # my $bool_feat = gitweb_check_feature('bool_feat');
573 # gitweb_check_feature('bool_feat') or somecode;
574 #
575 # instead of
576 #
577 # my ($bool_feat) = gitweb_get_feature('bool_feat');
578 # (gitweb_get_feature('bool_feat'))[0] or somecode;
579 #
580 sub gitweb_check_feature {
581 return (gitweb_get_feature(@_))[0];
582 }
583
584
585 sub feature_bool {
586 my $key = shift;
587 my ($val) = git_get_project_config($key, '--bool');
588
589 if (!defined $val) {
590 return ($_[0]);
591 } elsif ($val eq 'true') {
592 return (1);
593 } elsif ($val eq 'false') {
594 return (0);
595 }
596 }
597
598 sub feature_snapshot {
599 my (@fmts) = @_;
600
601 my ($val) = git_get_project_config('snapshot');
602
603 if ($val) {
604 @fmts = ($val eq 'none' ? () : split /\s*[,\s]\s*/, $val);
605 }
606
607 return @fmts;
608 }
609
610 sub feature_patches {
611 my @val = (git_get_project_config('patches', '--int'));
612
613 if (@val) {
614 return @val;
615 }
616
617 return ($_[0]);
618 }
619
620 sub feature_avatar {
621 my @val = (git_get_project_config('avatar'));
622
623 return @val ? @val : @_;
624 }
625
626 # checking HEAD file with -e is fragile if the repository was
627 # initialized long time ago (i.e. symlink HEAD) and was pack-ref'ed
628 # and then pruned.
629 sub check_head_link {
630 my ($dir) = @_;
631 my $headfile = "$dir/HEAD";
632 return ((-e $headfile) ||
633 (-l $headfile && readlink($headfile) =~ /^refs\/heads\//));
634 }
635
636 sub check_export_ok {
637 my ($dir) = @_;
638 return (check_head_link($dir) &&
639 (!$export_ok || -e "$dir/$export_ok") &&
640 (!$export_auth_hook || $export_auth_hook->($dir)));
641 }
642
643 # process alternate names for backward compatibility
644 # filter out unsupported (unknown) snapshot formats
645 sub filter_snapshot_fmts {
646 my @fmts = @_;
647
648 @fmts = map {
649 exists $known_snapshot_format_aliases{$_} ?
650 $known_snapshot_format_aliases{$_} : $_} @fmts;
651 @fmts = grep {
652 exists $known_snapshot_formats{$_} &&
653 !$known_snapshot_formats{$_}{'disabled'}} @fmts;
654 }
655
656 # If it is set to code reference, it is code that it is to be run once per
657 # request, allowing updating configurations that change with each request,
658 # while running other code in config file only once.
659 #
660 # Otherwise, if it is false then gitweb would process config file only once;
661 # if it is true then gitweb config would be run for each request.
662 our $per_request_config = 1;
663
664 # read and parse gitweb config file given by its parameter.
665 # returns true on success, false on recoverable error, allowing
666 # to chain this subroutine, using first file that exists.
667 # dies on errors during parsing config file, as it is unrecoverable.
668 sub read_config_file {
669 my $filename = shift;
670 return unless defined $filename;
671 # die if there are errors parsing config file
672 if (-e $filename) {
673 do $filename;
674 die $@ if $@;
675 return 1;
676 }
677 return;
678 }
679
680 our ($GITWEB_CONFIG, $GITWEB_CONFIG_SYSTEM, $GITWEB_CONFIG_COMMON);
681 sub evaluate_gitweb_config {
682 our $GITWEB_CONFIG = $ENV{'GITWEB_CONFIG'} || "++GITWEB_CONFIG++";
683 our $GITWEB_CONFIG_SYSTEM = $ENV{'GITWEB_CONFIG_SYSTEM'} || "++GITWEB_CONFIG_SYSTEM++";
684 our $GITWEB_CONFIG_COMMON = $ENV{'GITWEB_CONFIG_COMMON'} || "++GITWEB_CONFIG_COMMON++";
685
686 # Protect agains duplications of file names, to not read config twice.
687 # Only one of $GITWEB_CONFIG and $GITWEB_CONFIG_SYSTEM is used, so
688 # there possibility of duplication of filename there doesn't matter.
689 $GITWEB_CONFIG = "" if ($GITWEB_CONFIG eq $GITWEB_CONFIG_COMMON);
690 $GITWEB_CONFIG_SYSTEM = "" if ($GITWEB_CONFIG_SYSTEM eq $GITWEB_CONFIG_COMMON);
691
692 # Common system-wide settings for convenience.
693 # Those settings can be ovverriden by GITWEB_CONFIG or GITWEB_CONFIG_SYSTEM.
694 read_config_file($GITWEB_CONFIG_COMMON);
695
696 # Use first config file that exists. This means use the per-instance
697 # GITWEB_CONFIG if exists, otherwise use GITWEB_SYSTEM_CONFIG.
698 read_config_file($GITWEB_CONFIG) and return;
699 read_config_file($GITWEB_CONFIG_SYSTEM);
700 }
701
702 # Get loadavg of system, to compare against $maxload.
703 # Currently it requires '/proc/loadavg' present to get loadavg;
704 # if it is not present it returns 0, which means no load checking.
705 sub get_loadavg {
706 if( -e '/proc/loadavg' ){
707 open my $fd, '<', '/proc/loadavg'
708 or return 0;
709 my @load = split(/\s+/, scalar <$fd>);
710 close $fd;
711
712 # The first three columns measure CPU and IO utilization of the last one,
713 # five, and 10 minute periods. The fourth column shows the number of
714 # currently running processes and the total number of processes in the m/n
715 # format. The last column displays the last process ID used.
716 return $load[0] || 0;
717 }
718 # additional checks for load average should go here for things that don't export
719 # /proc/loadavg
720
721 return 0;
722 }
723
724 # version of the core git binary
725 our $git_version;
726 sub evaluate_git_version {
727 our $git_version = qx("$GIT" --version) =~ m/git version (.*)$/ ? $1 : "unknown";
728 $number_of_git_cmds++;
729 }
730
731 sub check_loadavg {
732 if (defined $maxload && get_loadavg() > $maxload) {
733 die_error(503, "The load average on the server is too high");
734 }
735 }
736
737 # ======================================================================
738 # input validation and dispatch
739
740 # input parameters can be collected from a variety of sources (presently, CGI
741 # and PATH_INFO), so we define an %input_params hash that collects them all
742 # together during validation: this allows subsequent uses (e.g. href()) to be
743 # agnostic of the parameter origin
744
745 our %input_params = ();
746
747 # input parameters are stored with the long parameter name as key. This will
748 # also be used in the href subroutine to convert parameters to their CGI
749 # equivalent, and since the href() usage is the most frequent one, we store
750 # the name -> CGI key mapping here, instead of the reverse.
751 #
752 # XXX: Warning: If you touch this, check the search form for updating,
753 # too.
754
755 our @cgi_param_mapping = (
756 project => "p",
757 action => "a",
758 file_name => "f",
759 file_parent => "fp",
760 hash => "h",
761 hash_parent => "hp",
762 hash_base => "hb",
763 hash_parent_base => "hpb",
764 page => "pg",
765 order => "o",
766 searchtext => "s",
767 searchtype => "st",
768 snapshot_format => "sf",
769 extra_options => "opt",
770 search_use_regexp => "sr",
771 ctag => "by_tag",
772 diff_style => "ds",
773 project_filter => "pf",
774 # this must be last entry (for manipulation from JavaScript)
775 javascript => "js"
776 );
777 our %cgi_param_mapping = @cgi_param_mapping;
778
779 # we will also need to know the possible actions, for validation
780 our %actions = (
781 "blame" => \&git_blame,
782 "blame_incremental" => \&git_blame_incremental,
783 "blame_data" => \&git_blame_data,
784 "blobdiff" => \&git_blobdiff,
785 "blobdiff_plain" => \&git_blobdiff_plain,
786 "blob" => \&git_blob,
787 "blob_plain" => \&git_blob_plain,
788 "commitdiff" => \&git_commitdiff,
789 "commitdiff_plain" => \&git_commitdiff_plain,
790 "commit" => \&git_commit,
791 "forks" => \&git_forks,
792 "heads" => \&git_heads,
793 "history" => \&git_history,
794 "log" => \&git_log,
795 "patch" => \&git_patch,
796 "patches" => \&git_patches,
797 "remotes" => \&git_remotes,
798 "rss" => \&git_rss,
799 "atom" => \&git_atom,
800 "search" => \&git_search,
801 "search_help" => \&git_search_help,
802 "shortlog" => \&git_shortlog,
803 "summary" => \&git_summary,
804 "tag" => \&git_tag,
805 "tags" => \&git_tags,
806 "tree" => \&git_tree,
807 "snapshot" => \&git_snapshot,
808 "object" => \&git_object,
809 # those below don't need $project
810 "opml" => \&git_opml,
811 "project_list" => \&git_project_list,
812 "project_index" => \&git_project_index,
813 );
814
815 # finally, we have the hash of allowed extra_options for the commands that
816 # allow them
817 our %allowed_options = (
818 "--no-merges" => [ qw(rss atom log shortlog history) ],
819 );
820
821 # fill %input_params with the CGI parameters. All values except for 'opt'
822 # should be single values, but opt can be an array. We should probably
823 # build an array of parameters that can be multi-valued, but since for the time
824 # being it's only this one, we just single it out
825 sub evaluate_query_params {
826 our $cgi;
827
828 while (my ($name, $symbol) = each %cgi_param_mapping) {
829 if ($symbol eq 'opt') {
830 $input_params{$name} = [ map { decode_utf8($_) } $cgi->param($symbol) ];
831 } else {
832 $input_params{$name} = decode_utf8($cgi->param($symbol));
833 }
834 }
835 }
836
837 # now read PATH_INFO and update the parameter list for missing parameters
838 sub evaluate_path_info {
839 return if defined $input_params{'project'};
840 return if !$path_info;
841 $path_info =~ s,^/+,,;
842 return if !$path_info;
843
844 # find which part of PATH_INFO is project
845 my $project = $path_info;
846 $project =~ s,/+$,,;
847 while ($project && !check_head_link("$projectroot/$project")) {
848 $project =~ s,/*[^/]*$,,;
849 }
850 return unless $project;
851 $input_params{'project'} = $project;
852
853 # do not change any parameters if an action is given using the query string
854 return if $input_params{'action'};
855 $path_info =~ s,^\Q$project\E/*,,;
856
857 # next, check if we have an action
858 my $action = $path_info;
859 $action =~ s,/.*$,,;
860 if (exists $actions{$action}) {
861 $path_info =~ s,^$action/*,,;
862 $input_params{'action'} = $action;
863 }
864
865 # list of actions that want hash_base instead of hash, but can have no
866 # pathname (f) parameter
867 my @wants_base = (
868 'tree',
869 'history',
870 );
871
872 # we want to catch, among others
873 # [$hash_parent_base[:$file_parent]..]$hash_parent[:$file_name]
874 my ($parentrefname, $parentpathname, $refname, $pathname) =
875 ($path_info =~ /^(?:(.+?)(?::(.+))?\.\.)?([^:]+?)?(?::(.+))?$/);
876
877 # first, analyze the 'current' part
878 if (defined $pathname) {
879 # we got "branch:filename" or "branch:dir/"
880 # we could use git_get_type(branch:pathname), but:
881 # - it needs $git_dir
882 # - it does a git() call
883 # - the convention of terminating directories with a slash
884 # makes it superfluous
885 # - embedding the action in the PATH_INFO would make it even
886 # more superfluous
887 $pathname =~ s,^/+,,;
888 if (!$pathname || substr($pathname, -1) eq "/") {
889 $input_params{'action'} ||= "tree";
890 $pathname =~ s,/$,,;
891 } else {
892 # the default action depends on whether we had parent info
893 # or not
894 if ($parentrefname) {
895 $input_params{'action'} ||= "blobdiff_plain";
896 } else {
897 $input_params{'action'} ||= "blob_plain";
898 }
899 }
900 $input_params{'hash_base'} ||= $refname;
901 $input_params{'file_name'} ||= $pathname;
902 } elsif (defined $refname) {
903 # we got "branch". In this case we have to choose if we have to
904 # set hash or hash_base.
905 #
906 # Most of the actions without a pathname only want hash to be
907 # set, except for the ones specified in @wants_base that want
908 # hash_base instead. It should also be noted that hand-crafted
909 # links having 'history' as an action and no pathname or hash
910 # set will fail, but that happens regardless of PATH_INFO.
911 if (defined $parentrefname) {
912 # if there is parent let the default be 'shortlog' action
913 # (for http://git.example.com/repo.git/A..B links); if there
914 # is no parent, dispatch will detect type of object and set
915 # action appropriately if required (if action is not set)
916 $input_params{'action'} ||= "shortlog";
917 }
918 if ($input_params{'action'} &&
919 grep { $_ eq $input_params{'action'} } @wants_base) {
920 $input_params{'hash_base'} ||= $refname;
921 } else {
922 $input_params{'hash'} ||= $refname;
923 }
924 }
925
926 # next, handle the 'parent' part, if present
927 if (defined $parentrefname) {
928 # a missing pathspec defaults to the 'current' filename, allowing e.g.
929 # someproject/blobdiff/oldrev..newrev:/filename
930 if ($parentpathname) {
931 $parentpathname =~ s,^/+,,;
932 $parentpathname =~ s,/$,,;
933 $input_params{'file_parent'} ||= $parentpathname;
934 } else {
935 $input_params{'file_parent'} ||= $input_params{'file_name'};
936 }
937 # we assume that hash_parent_base is wanted if a path was specified,
938 # or if the action wants hash_base instead of hash
939 if (defined $input_params{'file_parent'} ||
940 grep { $_ eq $input_params{'action'} } @wants_base) {
941 $input_params{'hash_parent_base'} ||= $parentrefname;
942 } else {
943 $input_params{'hash_parent'} ||= $parentrefname;
944 }
945 }
946
947 # for the snapshot action, we allow URLs in the form
948 # $project/snapshot/$hash.ext
949 # where .ext determines the snapshot and gets removed from the
950 # passed $refname to provide the $hash.
951 #
952 # To be able to tell that $refname includes the format extension, we
953 # require the following two conditions to be satisfied:
954 # - the hash input parameter MUST have been set from the $refname part
955 # of the URL (i.e. they must be equal)
956 # - the snapshot format MUST NOT have been defined already (e.g. from
957 # CGI parameter sf)
958 # It's also useless to try any matching unless $refname has a dot,
959 # so we check for that too
960 if (defined $input_params{'action'} &&
961 $input_params{'action'} eq 'snapshot' &&
962 defined $refname && index($refname, '.') != -1 &&
963 $refname eq $input_params{'hash'} &&
964 !defined $input_params{'snapshot_format'}) {
965 # We loop over the known snapshot formats, checking for
966 # extensions. Allowed extensions are both the defined suffix
967 # (which includes the initial dot already) and the snapshot
968 # format key itself, with a prepended dot
969 while (my ($fmt, $opt) = each %known_snapshot_formats) {
970 my $hash = $refname;
971 unless ($hash =~ s/(\Q$opt->{'suffix'}\E|\Q.$fmt\E)$//) {
972 next;
973 }
974 my $sfx = $1;
975 # a valid suffix was found, so set the snapshot format
976 # and reset the hash parameter
977 $input_params{'snapshot_format'} = $fmt;
978 $input_params{'hash'} = $hash;
979 # we also set the format suffix to the one requested
980 # in the URL: this way a request for e.g. .tgz returns
981 # a .tgz instead of a .tar.gz
982 $known_snapshot_formats{$fmt}{'suffix'} = $sfx;
983 last;
984 }
985 }
986 }
987
988 our ($action, $project, $file_name, $file_parent, $hash, $hash_parent, $hash_base,
989 $hash_parent_base, @extra_options, $page, $searchtype, $search_use_regexp,
990 $searchtext, $search_regexp, $project_filter);
991 sub evaluate_and_validate_params {
992 our $action = $input_params{'action'};
993 if (defined $action) {
994 if (!validate_action($action)) {
995 die_error(400, "Invalid action parameter");
996 }
997 }
998
999 # parameters which are pathnames
1000 our $project = $input_params{'project'};
1001 if (defined $project) {
1002 if (!validate_project($project)) {
1003 undef $project;
1004 die_error(404, "No such project");
1005 }
1006 }
1007
1008 our $project_filter = $input_params{'project_filter'};
1009 if (defined $project_filter) {
1010 if (!validate_pathname($project_filter)) {
1011 die_error(404, "Invalid project_filter parameter");
1012 }
1013 }
1014
1015 our $file_name = $input_params{'file_name'};
1016 if (defined $file_name) {
1017 if (!validate_pathname($file_name)) {
1018 die_error(400, "Invalid file parameter");
1019 }
1020 }
1021
1022 our $file_parent = $input_params{'file_parent'};
1023 if (defined $file_parent) {
1024 if (!validate_pathname($file_parent)) {
1025 die_error(400, "Invalid file parent parameter");
1026 }
1027 }
1028
1029 # parameters which are refnames
1030 our $hash = $input_params{'hash'};
1031 if (defined $hash) {
1032 if (!validate_refname($hash)) {
1033 die_error(400, "Invalid hash parameter");
1034 }
1035 }
1036
1037 our $hash_parent = $input_params{'hash_parent'};
1038 if (defined $hash_parent) {
1039 if (!validate_refname($hash_parent)) {
1040 die_error(400, "Invalid hash parent parameter");
1041 }
1042 }
1043
1044 our $hash_base = $input_params{'hash_base'};
1045 if (defined $hash_base) {
1046 if (!validate_refname($hash_base)) {
1047 die_error(400, "Invalid hash base parameter");
1048 }
1049 }
1050
1051 our @extra_options = @{$input_params{'extra_options'}};
1052 # @extra_options is always defined, since it can only be (currently) set from
1053 # CGI, and $cgi->param() returns the empty array in array context if the param
1054 # is not set
1055 foreach my $opt (@extra_options) {
1056 if (not exists $allowed_options{$opt}) {
1057 die_error(400, "Invalid option parameter");
1058 }
1059 if (not grep(/^$action$/, @{$allowed_options{$opt}})) {
1060 die_error(400, "Invalid option parameter for this action");
1061 }
1062 }
1063
1064 our $hash_parent_base = $input_params{'hash_parent_base'};
1065 if (defined $hash_parent_base) {
1066 if (!validate_refname($hash_parent_base)) {
1067 die_error(400, "Invalid hash parent base parameter");
1068 }
1069 }
1070
1071 # other parameters
1072 our $page = $input_params{'page'};
1073 if (defined $page) {
1074 if ($page =~ m/[^0-9]/) {
1075 die_error(400, "Invalid page parameter");
1076 }
1077 }
1078
1079 our $searchtype = $input_params{'searchtype'};
1080 if (defined $searchtype) {
1081 if ($searchtype =~ m/[^a-z]/) {
1082 die_error(400, "Invalid searchtype parameter");
1083 }
1084 }
1085
1086 our $search_use_regexp = $input_params{'search_use_regexp'};
1087
1088 our $searchtext = $input_params{'searchtext'};
1089 our $search_regexp;
1090 if (defined $searchtext) {
1091 if (length($searchtext) < 2) {
1092 die_error(403, "At least two characters are required for search parameter");
1093 }
1094 if ($search_use_regexp) {
1095 $search_regexp = $searchtext;
1096 if (!eval { qr/$search_regexp/; 1; }) {
1097 (my $error = $@) =~ s/ at \S+ line \d+.*\n?//;
1098 die_error(400, "Invalid search regexp '$search_regexp'",
1099 esc_html($error));
1100 }
1101 } else {
1102 $search_regexp = quotemeta $searchtext;
1103 }
1104 }
1105 }
1106
1107 # path to the current git repository
1108 our $git_dir;
1109 sub evaluate_git_dir {
1110 our $git_dir = "$projectroot/$project" if $project;
1111 }
1112
1113 our (@snapshot_fmts, $git_avatar);
1114 sub configure_gitweb_features {
1115 # list of supported snapshot formats
1116 our @snapshot_fmts = gitweb_get_feature('snapshot');
1117 @snapshot_fmts = filter_snapshot_fmts(@snapshot_fmts);
1118
1119 # check that the avatar feature is set to a known provider name,
1120 # and for each provider check if the dependencies are satisfied.
1121 # if the provider name is invalid or the dependencies are not met,
1122 # reset $git_avatar to the empty string.
1123 our ($git_avatar) = gitweb_get_feature('avatar');
1124 if ($git_avatar eq 'gravatar') {
1125 $git_avatar = '' unless (eval { require Digest::MD5; 1; });
1126 } elsif ($git_avatar eq 'picon') {
1127 # no dependencies
1128 } else {
1129 $git_avatar = '';
1130 }
1131 }
1132
1133 # custom error handler: 'die <message>' is Internal Server Error
1134 sub handle_errors_html {
1135 my $msg = shift; # it is already HTML escaped
1136
1137 # to avoid infinite loop where error occurs in die_error,
1138 # change handler to default handler, disabling handle_errors_html
1139 set_message("Error occured when inside die_error:\n$msg");
1140
1141 # you cannot jump out of die_error when called as error handler;
1142 # the subroutine set via CGI::Carp::set_message is called _after_
1143 # HTTP headers are already written, so it cannot write them itself
1144 die_error(undef, undef, $msg, -error_handler => 1, -no_http_header => 1);
1145 }
1146 set_message(\&handle_errors_html);
1147
1148 # dispatch
1149 sub dispatch {
1150 if (!defined $action) {
1151 if (defined $hash) {
1152 $action = git_get_type($hash);
1153 $action or die_error(404, "Object does not exist");
1154 } elsif (defined $hash_base && defined $file_name) {
1155 $action = git_get_type("$hash_base:$file_name");
1156 $action or die_error(404, "File or directory does not exist");
1157 } elsif (defined $project) {
1158 $action = 'summary';
1159 } else {
1160 $action = 'project_list';
1161 }
1162 }
1163 if (!defined($actions{$action})) {
1164 die_error(400, "Unknown action");
1165 }
1166 if ($action !~ m/^(?:opml|project_list|project_index)$/ &&
1167 !$project) {
1168 die_error(400, "Project needed");
1169 }
1170 $actions{$action}->();
1171 }
1172
1173 sub reset_timer {
1174 our $t0 = [ gettimeofday() ]
1175 if defined $t0;
1176 our $number_of_git_cmds = 0;
1177 }
1178
1179 our $first_request = 1;
1180 sub run_request {
1181 reset_timer();
1182
1183 evaluate_uri();
1184 if ($first_request) {
1185 evaluate_gitweb_config();
1186 evaluate_git_version();
1187 }
1188 if ($per_request_config) {
1189 if (ref($per_request_config) eq 'CODE') {
1190 $per_request_config->();
1191 } elsif (!$first_request) {
1192 evaluate_gitweb_config();
1193 }
1194 }
1195 check_loadavg();
1196
1197 # $projectroot and $projects_list might be set in gitweb config file
1198 $projects_list ||= $projectroot;
1199
1200 evaluate_query_params();
1201 evaluate_path_info();
1202 evaluate_and_validate_params();
1203 evaluate_git_dir();
1204
1205 configure_gitweb_features();
1206
1207 dispatch();
1208 }
1209
1210 our $is_last_request = sub { 1 };
1211 our ($pre_dispatch_hook, $post_dispatch_hook, $pre_listen_hook);
1212 our $CGI = 'CGI';
1213 our $cgi;
1214 sub configure_as_fcgi {
1215 require CGI::Fast;
1216 our $CGI = 'CGI::Fast';
1217
1218 my $request_number = 0;
1219 # let each child service 100 requests
1220 our $is_last_request = sub { ++$request_number > 100 };
1221 }
1222 sub evaluate_argv {
1223 my $script_name = $ENV{'SCRIPT_NAME'} || $ENV{'SCRIPT_FILENAME'} || __FILE__;
1224 configure_as_fcgi()
1225 if $script_name =~ /\.fcgi$/;
1226
1227 return unless (@ARGV);
1228
1229 require Getopt::Long;
1230 Getopt::Long::GetOptions(
1231 'fastcgi|fcgi|f' => \&configure_as_fcgi,
1232 'nproc|n=i' => sub {
1233 my ($arg, $val) = @_;
1234 return unless eval { require FCGI::ProcManager; 1; };
1235 my $proc_manager = FCGI::ProcManager->new({
1236 n_processes => $val,
1237 });
1238 our $pre_listen_hook = sub { $proc_manager->pm_manage() };
1239 our $pre_dispatch_hook = sub { $proc_manager->pm_pre_dispatch() };
1240 our $post_dispatch_hook = sub { $proc_manager->pm_post_dispatch() };
1241 },
1242 );
1243 }
1244
1245 sub run {
1246 evaluate_argv();
1247
1248 $first_request = 1;
1249 $pre_listen_hook->()
1250 if $pre_listen_hook;
1251
1252 REQUEST:
1253 while ($cgi = $CGI->new()) {
1254 $pre_dispatch_hook->()
1255 if $pre_dispatch_hook;
1256
1257 run_request();
1258
1259 $post_dispatch_hook->()
1260 if $post_dispatch_hook;
1261 $first_request = 0;
1262
1263 last REQUEST if ($is_last_request->());
1264 }
1265
1266 DONE_GITWEB:
1267 1;
1268 }
1269
1270 run();
1271
1272 if (defined caller) {
1273 # wrapped in a subroutine processing requests,
1274 # e.g. mod_perl with ModPerl::Registry, or PSGI with Plack::App::WrapCGI
1275 return;
1276 } else {
1277 # pure CGI script, serving single request
1278 exit;
1279 }
1280
1281 ## ======================================================================
1282 ## action links
1283
1284 # possible values of extra options
1285 # -full => 0|1 - use absolute/full URL ($my_uri/$my_url as base)
1286 # -replay => 1 - start from a current view (replay with modifications)
1287 # -path_info => 0|1 - don't use/use path_info URL (if possible)
1288 # -anchor => ANCHOR - add #ANCHOR to end of URL, implies -replay if used alone
1289 sub href {
1290 my %params = @_;
1291 # default is to use -absolute url() i.e. $my_uri
1292 my $href = $params{-full} ? $my_url : $my_uri;
1293
1294 # implicit -replay, must be first of implicit params
1295 $params{-replay} = 1 if (keys %params == 1 && $params{-anchor});
1296
1297 $params{'project'} = $project unless exists $params{'project'};
1298
1299 if ($params{-replay}) {
1300 while (my ($name, $symbol) = each %cgi_param_mapping) {
1301 if (!exists $params{$name}) {
1302 $params{$name} = $input_params{$name};
1303 }
1304 }
1305 }
1306
1307 my $use_pathinfo = gitweb_check_feature('pathinfo');
1308 if (defined $params{'project'} &&
1309 (exists $params{-path_info} ? $params{-path_info} : $use_pathinfo)) {
1310 # try to put as many parameters as possible in PATH_INFO:
1311 # - project name
1312 # - action
1313 # - hash_parent or hash_parent_base:/file_parent
1314 # - hash or hash_base:/filename
1315 # - the snapshot_format as an appropriate suffix
1316
1317 # When the script is the root DirectoryIndex for the domain,
1318 # $href here would be something like http://gitweb.example.com/
1319 # Thus, we strip any trailing / from $href, to spare us double
1320 # slashes in the final URL
1321 $href =~ s,/$,,;
1322
1323 # Then add the project name, if present
1324 $href .= "/".esc_path_info($params{'project'});
1325 delete $params{'project'};
1326
1327 # since we destructively absorb parameters, we keep this
1328 # boolean that remembers if we're handling a snapshot
1329 my $is_snapshot = $params{'action'} eq 'snapshot';
1330
1331 # Summary just uses the project path URL, any other action is
1332 # added to the URL
1333 if (defined $params{'action'}) {
1334 $href .= "/".esc_path_info($params{'action'})
1335 unless $params{'action'} eq 'summary';
1336 delete $params{'action'};
1337 }
1338
1339 # Next, we put hash_parent_base:/file_parent..hash_base:/file_name,
1340 # stripping nonexistent or useless pieces
1341 $href .= "/" if ($params{'hash_base'} || $params{'hash_parent_base'}
1342 || $params{'hash_parent'} || $params{'hash'});
1343 if (defined $params{'hash_base'}) {
1344 if (defined $params{'hash_parent_base'}) {
1345 $href .= esc_path_info($params{'hash_parent_base'});
1346 # skip the file_parent if it's the same as the file_name
1347 if (defined $params{'file_parent'}) {
1348 if (defined $params{'file_name'} && $params{'file_parent'} eq $params{'file_name'}) {
1349 delete $params{'file_parent'};
1350 } elsif ($params{'file_parent'} !~ /\.\./) {
1351 $href .= ":/".esc_path_info($params{'file_parent'});
1352 delete $params{'file_parent'};
1353 }
1354 }
1355 $href .= "..";
1356 delete $params{'hash_parent'};
1357 delete $params{'hash_parent_base'};
1358 } elsif (defined $params{'hash_parent'}) {
1359 $href .= esc_path_info($params{'hash_parent'}). "..";
1360 delete $params{'hash_parent'};
1361 }
1362
1363 $href .= esc_path_info($params{'hash_base'});
1364 if (defined $params{'file_name'} && $params{'file_name'} !~ /\.\./) {
1365 $href .= ":/".esc_path_info($params{'file_name'});
1366 delete $params{'file_name'};
1367 }
1368 delete $params{'hash'};
1369 delete $params{'hash_base'};
1370 } elsif (defined $params{'hash'}) {
1371 $href .= esc_path_info($params{'hash'});
1372 delete $params{'hash'};
1373 }
1374
1375 # If the action was a snapshot, we can absorb the
1376 # snapshot_format parameter too
1377 if ($is_snapshot) {
1378 my $fmt = $params{'snapshot_format'};
1379 # snapshot_format should always be defined when href()
1380 # is called, but just in case some code forgets, we
1381 # fall back to the default
1382 $fmt ||= $snapshot_fmts[0];
1383 $href .= $known_snapshot_formats{$fmt}{'suffix'};
1384 delete $params{'snapshot_format'};
1385 }
1386 }
1387
1388 # now encode the parameters explicitly
1389 my @result = ();
1390 for (my $i = 0; $i < @cgi_param_mapping; $i += 2) {
1391 my ($name, $symbol) = ($cgi_param_mapping[$i], $cgi_param_mapping[$i+1]);
1392 if (defined $params{$name}) {
1393 if (ref($params{$name}) eq "ARRAY") {
1394 foreach my $par (@{$params{$name}}) {
1395 push @result, $symbol . "=" . esc_param($par);
1396 }
1397 } else {
1398 push @result, $symbol . "=" . esc_param($params{$name});
1399 }
1400 }
1401 }
1402 $href .= "?" . join(';', @result) if scalar @result;
1403
1404 # final transformation: trailing spaces must be escaped (URI-encoded)
1405 $href =~ s/(\s+)$/CGI::escape($1)/e;
1406
1407 if ($params{-anchor}) {
1408 $href .= "#".esc_param($params{-anchor});
1409 }
1410
1411 return $href;
1412 }
1413
1414
1415 ## ======================================================================
1416 ## validation, quoting/unquoting and escaping
1417
1418 sub validate_action {
1419 my $input = shift || return undef;
1420 return undef unless exists $actions{$input};
1421 return $input;
1422 }
1423
1424 sub validate_project {
1425 my $input = shift || return undef;
1426 if (!validate_pathname($input) ||
1427 !(-d "$projectroot/$input") ||
1428 !check_export_ok("$projectroot/$input") ||
1429 ($strict_export && !project_in_list($input))) {
1430 return undef;
1431 } else {
1432 return $input;
1433 }
1434 }
1435
1436 sub validate_pathname {
1437 my $input = shift || return undef;
1438
1439 # no '.' or '..' as elements of path, i.e. no '.' nor '..'
1440 # at the beginning, at the end, and between slashes.
1441 # also this catches doubled slashes
1442 if ($input =~ m!(^|/)(|\.|\.\.)(/|$)!) {
1443 return undef;
1444 }
1445 # no null characters
1446 if ($input =~ m!\0!) {
1447 return undef;
1448 }
1449 return $input;
1450 }
1451
1452 sub validate_refname {
1453 my $input = shift || return undef;
1454
1455 # textual hashes are O.K.
1456 if ($input =~ m/^[0-9a-fA-F]{40}$/) {
1457 return $input;
1458 }
1459 # it must be correct pathname
1460 $input = validate_pathname($input)
1461 or return undef;
1462 # restrictions on ref name according to git-check-ref-format
1463 if ($input =~ m!(/\.|\.\.|[\000-\040\177 ~^:?*\[]|/$)!) {
1464 return undef;
1465 }
1466 return $input;
1467 }
1468
1469 # decode sequences of octets in utf8 into Perl's internal form,
1470 # which is utf-8 with utf8 flag set if needed. gitweb writes out
1471 # in utf-8 thanks to "binmode STDOUT, ':utf8'" at beginning
1472 sub to_utf8 {
1473 my $str = shift;
1474 return undef unless defined $str;
1475
1476 if (utf8::is_utf8($str) || utf8::decode($str)) {
1477 return $str;
1478 } else {
1479 return decode($fallback_encoding, $str, Encode::FB_DEFAULT);
1480 }
1481 }
1482
1483 # quote unsafe chars, but keep the slash, even when it's not
1484 # correct, but quoted slashes look too horrible in bookmarks
1485 sub esc_param {
1486 my $str = shift;
1487 return undef unless defined $str;
1488 $str =~ s/([^A-Za-z0-9\-_.~()\/:@ ]+)/CGI::escape($1)/eg;
1489 $str =~ s/ /\+/g;
1490 return $str;
1491 }
1492
1493 # the quoting rules for path_info fragment are slightly different
1494 sub esc_path_info {
1495 my $str = shift;
1496 return undef unless defined $str;
1497
1498 # path_info doesn't treat '+' as space (specially), but '?' must be escaped
1499 $str =~ s/([^A-Za-z0-9\-_.~();\/;:@&= +]+)/CGI::escape($1)/eg;
1500
1501 return $str;
1502 }
1503
1504 # quote unsafe chars in whole URL, so some characters cannot be quoted
1505 sub esc_url {
1506 my $str = shift;
1507 return undef unless defined $str;
1508 $str =~ s/([^A-Za-z0-9\-_.~();\/;?:@&= ]+)/CGI::escape($1)/eg;
1509 $str =~ s/ /\+/g;
1510 return $str;
1511 }
1512
1513 # quote unsafe characters in HTML attributes
1514 sub esc_attr {
1515
1516 # for XHTML conformance escaping '"' to '&quot;' is not enough
1517 return esc_html(@_);
1518 }
1519
1520 # replace invalid utf8 character with SUBSTITUTION sequence
1521 sub esc_html {
1522 my $str = shift;
1523 my %opts = @_;
1524
1525 return undef unless defined $str;
1526
1527 $str = to_utf8($str);
1528 $str = $cgi->escapeHTML($str);
1529 if ($opts{'-nbsp'}) {
1530 $str =~ s/ /&nbsp;/g;
1531 }
1532 $str =~ s|([[:cntrl:]])|(($1 ne "\t") ? quot_cec($1) : $1)|eg;
1533 return $str;
1534 }
1535
1536 # quote control characters and escape filename to HTML
1537 sub esc_path {
1538 my $str = shift;
1539 my %opts = @_;
1540
1541 return undef unless defined $str;
1542
1543 $str = to_utf8($str);
1544 $str = $cgi->escapeHTML($str);
1545 if ($opts{'-nbsp'}) {
1546 $str =~ s/ /&nbsp;/g;
1547 }
1548 $str =~ s|([[:cntrl:]])|quot_cec($1)|eg;
1549 return $str;
1550 }
1551
1552 # Sanitize for use in XHTML + application/xml+xhtm (valid XML 1.0)
1553 sub sanitize {
1554 my $str = shift;
1555
1556 return undef unless defined $str;
1557
1558 $str = to_utf8($str);
1559 $str =~ s|([[:cntrl:]])|($1 =~ /[\t\n\r]/ ? $1 : quot_cec($1))|eg;
1560 return $str;
1561 }
1562
1563 # Make control characters "printable", using character escape codes (CEC)
1564 sub quot_cec {
1565 my $cntrl = shift;
1566 my %opts = @_;
1567 my %es = ( # character escape codes, aka escape sequences
1568 "\t" => '\t', # tab (HT)
1569 "\n" => '\n', # line feed (LF)
1570 "\r" => '\r', # carrige return (CR)
1571 "\f" => '\f', # form feed (FF)
1572 "\b" => '\b', # backspace (BS)
1573 "\a" => '\a', # alarm (bell) (BEL)
1574 "\e" => '\e', # escape (ESC)
1575 "\013" => '\v', # vertical tab (VT)
1576 "\000" => '\0', # nul character (NUL)
1577 );
1578 my $chr = ( (exists $es{$cntrl})
1579 ? $es{$cntrl}
1580 : sprintf('\%2x', ord($cntrl)) );
1581 if ($opts{-nohtml}) {
1582 return $chr;
1583 } else {
1584 return "<span class=\"cntrl\">$chr</span>";
1585 }
1586 }
1587
1588 # Alternatively use unicode control pictures codepoints,
1589 # Unicode "printable representation" (PR)
1590 sub quot_upr {
1591 my $cntrl = shift;
1592 my %opts = @_;
1593
1594 my $chr = sprintf('&#%04d;', 0x2400+ord($cntrl));
1595 if ($opts{-nohtml}) {
1596 return $chr;
1597 } else {
1598 return "<span class=\"cntrl\">$chr</span>";
1599 }
1600 }
1601
1602 # git may return quoted and escaped filenames
1603 sub unquote {
1604 my $str = shift;
1605
1606 sub unq {
1607 my $seq = shift;
1608 my %es = ( # character escape codes, aka escape sequences
1609 't' => "\t", # tab (HT, TAB)
1610 'n' => "\n", # newline (NL)
1611 'r' => "\r", # return (CR)
1612 'f' => "\f", # form feed (FF)
1613 'b' => "\b", # backspace (BS)
1614 'a' => "\a", # alarm (bell) (BEL)
1615 'e' => "\e", # escape (ESC)
1616 'v' => "\013", # vertical tab (VT)
1617 );
1618
1619 if ($seq =~ m/^[0-7]{1,3}$/) {
1620 # octal char sequence
1621 return chr(oct($seq));
1622 } elsif (exists $es{$seq}) {
1623 # C escape sequence, aka character escape code
1624 return $es{$seq};
1625 }
1626 # quoted ordinary character
1627 return $seq;
1628 }
1629
1630 if ($str =~ m/^"(.*)"$/) {
1631 # needs unquoting
1632 $str = $1;
1633 $str =~ s/\\([^0-7]|[0-7]{1,3})/unq($1)/eg;
1634 }
1635 return $str;
1636 }
1637
1638 # escape tabs (convert tabs to spaces)
1639 sub untabify {
1640 my $line = shift;
1641
1642 while ((my $pos = index($line, "\t")) != -1) {
1643 if (my $count = (8 - ($pos % 8))) {
1644 my $spaces = ' ' x $count;
1645 $line =~ s/\t/$spaces/;
1646 }
1647 }
1648
1649 return $line;
1650 }
1651
1652 sub project_in_list {
1653 my $project = shift;
1654 my @list = git_get_projects_list();
1655 return @list && scalar(grep { $_->{'path'} eq $project } @list);
1656 }
1657
1658 ## ----------------------------------------------------------------------
1659 ## HTML aware string manipulation
1660
1661 # Try to chop given string on a word boundary between position
1662 # $len and $len+$add_len. If there is no word boundary there,
1663 # chop at $len+$add_len. Do not chop if chopped part plus ellipsis
1664 # (marking chopped part) would be longer than given string.
1665 sub chop_str {
1666 my $str = shift;
1667 my $len = shift;
1668 my $add_len = shift || 10;
1669 my $where = shift || 'right'; # 'left' | 'center' | 'right'
1670
1671 # Make sure perl knows it is utf8 encoded so we don't
1672 # cut in the middle of a utf8 multibyte char.
1673 $str = to_utf8($str);
1674
1675 # allow only $len chars, but don't cut a word if it would fit in $add_len
1676 # if it doesn't fit, cut it if it's still longer than the dots we would add
1677 # remove chopped character entities entirely
1678
1679 # when chopping in the middle, distribute $len into left and right part
1680 # return early if chopping wouldn't make string shorter
1681 if ($where eq 'center') {
1682 return $str if ($len + 5 >= length($str)); # filler is length 5
1683 $len = int($len/2);
1684 } else {
1685 return $str if ($len + 4 >= length($str)); # filler is length 4
1686 }
1687
1688 # regexps: ending and beginning with word part up to $add_len
1689 my $endre = qr/.{$len}\w{0,$add_len}/;
1690 my $begre = qr/\w{0,$add_len}.{$len}/;
1691
1692 if ($where eq 'left') {
1693 $str =~ m/^(.*?)($begre)$/;
1694 my ($lead, $body) = ($1, $2);
1695 if (length($lead) > 4) {
1696 $lead = " ...";
1697 }
1698 return "$lead$body";
1699
1700 } elsif ($where eq 'center') {
1701 $str =~ m/^($endre)(.*)$/;
1702 my ($left, $str) = ($1, $2);
1703 $str =~ m/^(.*?)($begre)$/;
1704 my ($mid, $right) = ($1, $2);
1705 if (length($mid) > 5) {
1706 $mid = " ... ";
1707 }
1708 return "$left$mid$right";
1709
1710 } else {
1711 $str =~ m/^($endre)(.*)$/;
1712 my $body = $1;
1713 my $tail = $2;
1714 if (length($tail) > 4) {
1715 $tail = "... ";
1716 }
1717 return "$body$tail";
1718 }
1719 }
1720
1721 # takes the same arguments as chop_str, but also wraps a <span> around the
1722 # result with a title attribute if it does get chopped. Additionally, the
1723 # string is HTML-escaped.
1724 sub chop_and_escape_str {
1725 my ($str) = @_;
1726
1727 my $chopped = chop_str(@_);
1728 $str = to_utf8($str);
1729 if ($chopped eq $str) {
1730 return esc_html($chopped);
1731 } else {
1732 $str =~ s/[[:cntrl:]]/?/g;
1733 return $cgi->span({-title=>$str}, esc_html($chopped));
1734 }
1735 }
1736
1737 # Highlight selected fragments of string, using given CSS class,
1738 # and escape HTML. It is assumed that fragments do not overlap.
1739 # Regions are passed as list of pairs (array references).
1740 #
1741 # Example: esc_html_hl_regions("foobar", "mark", [ 0, 3 ]) returns
1742 # '<span class="mark">foo</span>bar'
1743 sub esc_html_hl_regions {
1744 my ($str, $css_class, @sel) = @_;
1745 my %opts = grep { ref($_) ne 'ARRAY' } @sel;
1746 @sel = grep { ref($_) eq 'ARRAY' } @sel;
1747 return esc_html($str, %opts) unless @sel;
1748
1749 my $out = '';
1750 my $pos = 0;
1751
1752 for my $s (@sel) {
1753 my ($begin, $end) = @$s;
1754
1755 # Don't create empty <span> elements.
1756 next if $end <= $begin;
1757
1758 my $escaped = esc_html(substr($str, $begin, $end - $begin),
1759 %opts);
1760
1761 $out .= esc_html(substr($str, $pos, $begin - $pos), %opts)
1762 if ($begin - $pos > 0);
1763 $out .= $cgi->span({-class => $css_class}, $escaped);
1764
1765 $pos = $end;
1766 }
1767 $out .= esc_html(substr($str, $pos), %opts)
1768 if ($pos < length($str));
1769
1770 return $out;
1771 }
1772
1773 # return positions of beginning and end of each match
1774 sub matchpos_list {
1775 my ($str, $regexp) = @_;
1776 return unless (defined $str && defined $regexp);
1777
1778 my @matches;
1779 while ($str =~ /$regexp/g) {
1780 push @matches, [$-[0], $+[0]];
1781 }
1782 return @matches;
1783 }
1784
1785 # highlight match (if any), and escape HTML
1786 sub esc_html_match_hl {
1787 my ($str, $regexp) = @_;
1788 return esc_html($str) unless defined $regexp;
1789
1790 my @matches = matchpos_list($str, $regexp);
1791 return esc_html($str) unless @matches;
1792
1793 return esc_html_hl_regions($str, 'match', @matches);
1794 }
1795
1796
1797 # highlight match (if any) of shortened string, and escape HTML
1798 sub esc_html_match_hl_chopped {
1799 my ($str, $chopped, $regexp) = @_;
1800 return esc_html_match_hl($str, $regexp) unless defined $chopped;
1801
1802 my @matches = matchpos_list($str, $regexp);
1803 return esc_html($chopped) unless @matches;
1804
1805 # filter matches so that we mark chopped string
1806 my $tail = "... "; # see chop_str
1807 unless ($chopped =~ s/\Q$tail\E$//) {
1808 $tail = '';
1809 }
1810 my $chop_len = length($chopped);
1811 my $tail_len = length($tail);
1812 my @filtered;
1813
1814 for my $m (@matches) {
1815 if ($m->[0] > $chop_len) {
1816 push @filtered, [ $chop_len, $chop_len + $tail_len ] if ($tail_len > 0);
1817 last;
1818 } elsif ($m->[1] > $chop_len) {
1819 push @filtered, [ $m->[0], $chop_len + $tail_len ];
1820 last;
1821 }
1822 push @filtered, $m;
1823 }
1824
1825 return esc_html_hl_regions($chopped . $tail, 'match', @filtered);
1826 }
1827
1828 ## ----------------------------------------------------------------------
1829 ## functions returning short strings
1830
1831 # CSS class for given age value (in seconds)
1832 sub age_class {
1833 my $age = shift;
1834
1835 if (!defined $age) {
1836 return "noage";
1837 } elsif ($age < 60*60*2) {
1838 return "age0";
1839 } elsif ($age < 60*60*24*2) {
1840 return "age1";
1841 } else {
1842 return "age2";
1843 }
1844 }
1845
1846 # convert age in seconds to "nn units ago" string
1847 sub age_string {
1848 my $age = shift;
1849 my $age_str;
1850
1851 if ($age > 60*60*24*365*2) {
1852 $age_str = (int $age/60/60/24/365);
1853 $age_str .= " years ago";
1854 } elsif ($age > 60*60*24*(365/12)*2) {
1855 $age_str = int $age/60/60/24/(365/12);
1856 $age_str .= " months ago";
1857 } elsif ($age > 60*60*24*7*2) {
1858 $age_str = int $age/60/60/24/7;
1859 $age_str .= " weeks ago";
1860 } elsif ($age > 60*60*24*2) {
1861 $age_str = int $age/60/60/24;
1862 $age_str .= " days ago";
1863 } elsif ($age > 60*60*2) {
1864 $age_str = int $age/60/60;
1865 $age_str .= " hours ago";
1866 } elsif ($age > 60*2) {
1867 $age_str = int $age/60;
1868 $age_str .= " min ago";
1869 } elsif ($age > 2) {
1870 $age_str = int $age;
1871 $age_str .= " sec ago";
1872 } else {
1873 $age_str .= " right now";
1874 }
1875 return $age_str;
1876 }
1877
1878 use constant {
1879 S_IFINVALID => 0030000,
1880 S_IFGITLINK => 0160000,
1881 };
1882
1883 # submodule/subproject, a commit object reference
1884 sub S_ISGITLINK {
1885 my $mode = shift;
1886
1887 return (($mode & S_IFMT) == S_IFGITLINK)
1888 }
1889
1890 # convert file mode in octal to symbolic file mode string
1891 sub mode_str {
1892 my $mode = oct shift;
1893
1894 if (S_ISGITLINK($mode)) {
1895 return 'm---------';
1896 } elsif (S_ISDIR($mode & S_IFMT)) {
1897 return 'drwxr-xr-x';
1898 } elsif (S_ISLNK($mode)) {
1899 return 'lrwxrwxrwx';
1900 } elsif (S_ISREG($mode)) {
1901 # git cares only about the executable bit
1902 if ($mode & S_IXUSR) {
1903 return '-rwxr-xr-x';
1904 } else {
1905 return '-rw-r--r--';
1906 };
1907 } else {
1908 return '----------';
1909 }
1910 }
1911
1912 # convert file mode in octal to file type string
1913 sub file_type {
1914 my $mode = shift;
1915
1916 if ($mode !~ m/^[0-7]+$/) {
1917 return $mode;
1918 } else {
1919 $mode = oct $mode;
1920 }
1921
1922 if (S_ISGITLINK($mode)) {
1923 return "submodule";
1924 } elsif (S_ISDIR($mode & S_IFMT)) {
1925 return "directory";
1926 } elsif (S_ISLNK($mode)) {
1927 return "symlink";
1928 } elsif (S_ISREG($mode)) {
1929 return "file";
1930 } else {
1931 return "unknown";
1932 }
1933 }
1934
1935 # convert file mode in octal to file type description string
1936 sub file_type_long {
1937 my $mode = shift;
1938
1939 if ($mode !~ m/^[0-7]+$/) {
1940 return $mode;
1941 } else {
1942 $mode = oct $mode;
1943 }
1944
1945 if (S_ISGITLINK($mode)) {
1946 return "submodule";
1947 } elsif (S_ISDIR($mode & S_IFMT)) {
1948 return "directory";
1949 } elsif (S_ISLNK($mode)) {
1950 return "symlink";
1951 } elsif (S_ISREG($mode)) {
1952 if ($mode & S_IXUSR) {
1953 return "executable";
1954 } else {
1955 return "file";
1956 };
1957 } else {
1958 return "unknown";
1959 }
1960 }
1961
1962
1963 ## ----------------------------------------------------------------------
1964 ## functions returning short HTML fragments, or transforming HTML fragments
1965 ## which don't belong to other sections
1966
1967 # format line of commit message.
1968 sub format_log_line_html {
1969 my $line = shift;
1970
1971 $line = esc_html($line, -nbsp=>1);
1972 $line =~ s{\b([0-9a-fA-F]{8,40})\b}{
1973 $cgi->a({-href => href(action=>"object", hash=>$1),
1974 -class => "text"}, $1);
1975 }eg;
1976
1977 return $line;
1978 }
1979
1980 # format marker of refs pointing to given object
1981
1982 # the destination action is chosen based on object type and current context:
1983 # - for annotated tags, we choose the tag view unless it's the current view
1984 # already, in which case we go to shortlog view
1985 # - for other refs, we keep the current view if we're in history, shortlog or
1986 # log view, and select shortlog otherwise
1987 sub format_ref_marker {
1988 my ($refs, $id) = @_;
1989 my $markers = '';
1990
1991 if (defined $refs->{$id}) {
1992 foreach my $ref (@{$refs->{$id}}) {
1993 # this code exploits the fact that non-lightweight tags are the
1994 # only indirect objects, and that they are the only objects for which
1995 # we want to use tag instead of shortlog as action
1996 my ($type, $name) = qw();
1997 my $indirect = ($ref =~ s/\^\{\}$//);
1998 # e.g. tags/v2.6.11 or heads/next
1999 if ($ref =~ m!^(.*?)s?/(.*)$!) {
2000 $type = $1;
2001 $name = $2;
2002 } else {
2003 $type = "ref";
2004 $name = $ref;
2005 }
2006
2007 my $class = $type;
2008 $class .= " indirect" if $indirect;
2009
2010 my $dest_action = "shortlog";
2011
2012 if ($indirect) {
2013 $dest_action = "tag" unless $action eq "tag";
2014 } elsif ($action =~ /^(history|(short)?log)$/) {
2015 $dest_action = $action;
2016 }
2017
2018 my $dest = "";
2019 $dest .= "refs/" unless $ref =~ m!^refs/!;
2020 $dest .= $ref;
2021
2022 my $link = $cgi->a({
2023 -href => href(
2024 action=>$dest_action,
2025 hash=>$dest
2026 )}, $name);
2027
2028 $markers .= " <span class=\"".esc_attr($class)."\" title=\"".esc_attr($ref)."\">" .
2029 $link . "</span>";
2030 }
2031 }
2032
2033 if ($markers) {
2034 return ' <span class="refs">'. $markers . '</span>';
2035 } else {
2036 return "";
2037 }
2038 }
2039
2040 # format, perhaps shortened and with markers, title line
2041 sub format_subject_html {
2042 my ($long, $short, $href, $extra) = @_;
2043 $extra = '' unless defined($extra);
2044
2045 if (length($short) < length($long)) {
2046 $long =~ s/[[:cntrl:]]/?/g;
2047 return $cgi->a({-href => $href, -class => "list subject",
2048 -title => to_utf8($long)},
2049 esc_html($short)) . $extra;
2050 } else {
2051 return $cgi->a({-href => $href, -class => "list subject"},
2052 esc_html($long)) . $extra;
2053 }
2054 }
2055
2056 # Rather than recomputing the url for an email multiple times, we cache it
2057 # after the first hit. This gives a visible benefit in views where the avatar
2058 # for the same email is used repeatedly (e.g. shortlog).
2059 # The cache is shared by all avatar engines (currently gravatar only), which
2060 # are free to use it as preferred. Since only one avatar engine is used for any
2061 # given page, there's no risk for cache conflicts.
2062 our %avatar_cache = ();
2063
2064 # Compute the picon url for a given email, by using the picon search service over at
2065 # http://www.cs.indiana.edu/picons/search.html
2066 sub picon_url {
2067 my $email = lc shift;
2068 if (!$avatar_cache{$email}) {
2069 my ($user, $domain) = split('@', $email);
2070 $avatar_cache{$email} =
2071 "http://www.cs.indiana.edu/cgi-pub/kinzler/piconsearch.cgi/" .
2072 "$domain/$user/" .
2073 "users+domains+unknown/up/single";
2074 }
2075 return $avatar_cache{$email};
2076 }
2077
2078 # Compute the gravatar url for a given email, if it's not in the cache already.
2079 # Gravatar stores only the part of the URL before the size, since that's the
2080 # one computationally more expensive. This also allows reuse of the cache for
2081 # different sizes (for this particular engine).
2082 sub gravatar_url {
2083 my $email = lc shift;
2084 my $size = shift;
2085 $avatar_cache{$email} ||=
2086 "http://www.gravatar.com/avatar/" .
2087 Digest::MD5::md5_hex($email) . "?s=";
2088 return $avatar_cache{$email} . $size;
2089 }
2090
2091 # Insert an avatar for the given $email at the given $size if the feature
2092 # is enabled.
2093 sub git_get_avatar {
2094 my ($email, %opts) = @_;
2095 my $pre_white = ($opts{-pad_before} ? "&nbsp;" : "");
2096 my $post_white = ($opts{-pad_after} ? "&nbsp;" : "");
2097 $opts{-size} ||= 'default';
2098 my $size = $avatar_size{$opts{-size}} || $avatar_size{'default'};
2099 my $url = "";
2100 if ($git_avatar eq 'gravatar') {
2101 $url = gravatar_url($email, $size);
2102 } elsif ($git_avatar eq 'picon') {
2103 $url = picon_url($email);
2104 }
2105 # Other providers can be added by extending the if chain, defining $url
2106 # as needed. If no variant puts something in $url, we assume avatars
2107 # are completely disabled/unavailable.
2108 if ($url) {
2109 return $pre_white .
2110 "<img width=\"$size\" " .
2111 "class=\"avatar\" " .
2112 "src=\"".esc_url($url)."\" " .
2113 "alt=\"\" " .
2114 "/>" . $post_white;
2115 } else {
2116 return "";
2117 }
2118 }
2119
2120 sub format_search_author {
2121 my ($author, $searchtype, $displaytext) = @_;
2122 my $have_search = gitweb_check_feature('search');
2123
2124 if ($have_search) {
2125 my $performed = "";
2126 if ($searchtype eq 'author') {
2127 $performed = "authored";
2128 } elsif ($searchtype eq 'committer') {
2129 $performed = "committed";
2130 }
2131
2132 return $cgi->a({-href => href(action=>"search", hash=>$hash,
2133 searchtext=>$author,
2134 searchtype=>$searchtype), class=>"list",
2135 title=>"Search for commits $performed by $author"},
2136 $displaytext);
2137
2138 } else {
2139 return $displaytext;
2140 }
2141 }
2142
2143 # format the author name of the given commit with the given tag
2144 # the author name is chopped and escaped according to the other
2145 # optional parameters (see chop_str).
2146 sub format_author_html {
2147 my $tag = shift;
2148 my $co = shift;
2149 my $author = chop_and_escape_str($co->{'author_name'}, @_);
2150 return "<$tag class=\"author\">" .
2151 format_search_author($co->{'author_name'}, "author",
2152 git_get_avatar($co->{'author_email'}, -pad_after => 1) .
2153 $author) .
2154 "</$tag>";
2155 }
2156
2157 # format git diff header line, i.e. "diff --(git|combined|cc) ..."
2158 sub format_git_diff_header_line {
2159 my $line = shift;
2160 my $diffinfo = shift;
2161 my ($from, $to) = @_;
2162
2163 if ($diffinfo->{'nparents'}) {
2164 # combined diff
2165 $line =~ s!^(diff (.*?) )"?.*$!$1!;
2166 if ($to->{'href'}) {
2167 $line .= $cgi->a({-href => $to->{'href'}, -class => "path"},
2168 esc_path($to->{'file'}));
2169 } else { # file was deleted (no href)
2170 $line .= esc_path($to->{'file'});
2171 }
2172 } else {
2173 # "ordinary" diff
2174 $line =~ s!^(diff (.*?) )"?a/.*$!$1!;
2175 if ($from->{'href'}) {
2176 $line .= $cgi->a({-href => $from->{'href'}, -class => "path"},
2177 'a/' . esc_path($from->{'file'}));
2178 } else { # file was added (no href)
2179 $line .= 'a/' . esc_path($from->{'file'});
2180 }
2181 $line .= ' ';
2182 if ($to->{'href'}) {
2183 $line .= $cgi->a({-href => $to->{'href'}, -class => "path"},
2184 'b/' . esc_path($to->{'file'}));
2185 } else { # file was deleted
2186 $line .= 'b/' . esc_path($to->{'file'});
2187 }
2188 }
2189
2190 return "<div class=\"diff header\">$line</div>\n";
2191 }
2192
2193 # format extended diff header line, before patch itself
2194 sub format_extended_diff_header_line {
2195 my $line = shift;
2196 my $diffinfo = shift;
2197 my ($from, $to) = @_;
2198
2199 # match <path>
2200 if ($line =~ s!^((copy|rename) from ).*$!$1! && $from->{'href'}) {
2201 $line .= $cgi->a({-href=>$from->{'href'}, -class=>"path"},
2202 esc_path($from->{'file'}));
2203 }
2204 if ($line =~ s!^((copy|rename) to ).*$!$1! && $to->{'href'}) {
2205 $line .= $cgi->a({-href=>$to->{'href'}, -class=>"path"},
2206 esc_path($to->{'file'}));
2207 }
2208 # match single <mode>
2209 if ($line =~ m/\s(\d{6})$/) {
2210 $line .= '<span class="info"> (' .
2211 file_type_long($1) .
2212 ')</span>';
2213 }
2214 # match <hash>
2215 if ($line =~ m/^index [0-9a-fA-F]{40},[0-9a-fA-F]{40}/) {
2216 # can match only for combined diff
2217 $line = 'index ';
2218 for (my $i = 0; $i < $diffinfo->{'nparents'}; $i++) {
2219 if ($from->{'href'}[$i]) {
2220 $line .= $cgi->a({-href=>$from->{'href'}[$i],
2221 -class=>"hash"},
2222 substr($diffinfo->{'from_id'}[$i],0,7));
2223 } else {
2224 $line .= '0' x 7;
2225 }
2226 # separator
2227 $line .= ',' if ($i < $diffinfo->{'nparents'} - 1);
2228 }
2229 $line .= '..';
2230 if ($to->{'href'}) {
2231 $line .= $cgi->a({-href=>$to->{'href'}, -class=>"hash"},
2232 substr($diffinfo->{'to_id'},0,7));
2233 } else {
2234 $line .= '0' x 7;
2235 }
2236
2237 } elsif ($line =~ m/^index [0-9a-fA-F]{40}..[0-9a-fA-F]{40}/) {
2238 # can match only for ordinary diff
2239 my ($from_link, $to_link);
2240 if ($from->{'href'}) {
2241 $from_link = $cgi->a({-href=>$from->{'href'}, -class=>"hash"},
2242 substr($diffinfo->{'from_id'},0,7));
2243 } else {
2244 $from_link = '0' x 7;
2245 }
2246 if ($to->{'href'}) {
2247 $to_link = $cgi->a({-href=>$to->{'href'}, -class=>"hash"},
2248 substr($diffinfo->{'to_id'},0,7));
2249 } else {
2250 $to_link = '0' x 7;
2251 }
2252 my ($from_id, $to_id) = ($diffinfo->{'from_id'}, $diffinfo->{'to_id'});
2253 $line =~ s!$from_id\.\.$to_id!$from_link..$to_link!;
2254 }
2255
2256 return $line . "<br/>\n";
2257 }
2258
2259 # format from-file/to-file diff header
2260 sub format_diff_from_to_header {
2261 my ($from_line, $to_line, $diffinfo, $from, $to, @parents) = @_;
2262 my $line;
2263 my $result = '';
2264
2265 $line = $from_line;
2266 #assert($line =~ m/^---/) if DEBUG;
2267 # no extra formatting for "^--- /dev/null"
2268 if (! $diffinfo->{'nparents'}) {
2269 # ordinary (single parent) diff
2270 if ($line =~ m!^--- "?a/!) {
2271 if ($from->{'href'}) {
2272 $line = '--- a/' .
2273 $cgi->a({-href=>$from->{'href'}, -class=>"path"},
2274 esc_path($from->{'file'}));
2275 } else {
2276 $line = '--- a/' .
2277 esc_path($from->{'file'});
2278 }
2279 }
2280 $result .= qq!<div class="diff from_file">$line</div>\n!;
2281
2282 } else {
2283 # combined diff (merge commit)
2284 for (my $i = 0; $i < $diffinfo->{'nparents'}; $i++) {
2285 if ($from->{'href'}[$i]) {
2286 $line = '--- ' .
2287 $cgi->a({-href=>href(action=>"blobdiff",
2288 hash_parent=>$diffinfo->{'from_id'}[$i],
2289 hash_parent_base=>$parents[$i],
2290 file_parent=>$from->{'file'}[$i],
2291 hash=>$diffinfo->{'to_id'},
2292 hash_base=>$hash,
2293 file_name=>$to->{'file'}),
2294 -class=>"path",
2295 -title=>"diff" . ($i+1)},
2296 $i+1) .
2297 '/' .
2298 $cgi->a({-href=>$from->{'href'}[$i], -class=>"path"},
2299 esc_path($from->{'file'}[$i]));
2300 } else {
2301 $line = '--- /dev/null';
2302 }
2303 $result .= qq!<div class="diff from_file">$line</div>\n!;
2304 }
2305 }
2306
2307 $line = $to_line;
2308 #assert($line =~ m/^\+\+\+/) if DEBUG;
2309 # no extra formatting for "^+++ /dev/null"
2310 if ($line =~ m!^\+\+\+ "?b/!) {
2311 if ($to->{'href'}) {
2312 $line = '+++ b/' .
2313 $cgi->a({-href=>$to->{'href'}, -class=>"path"},
2314 esc_path($to->{'file'}));
2315 } else {
2316 $line = '+++ b/' .
2317 esc_path($to->{'file'});
2318 }
2319 }
2320 $result .= qq!<div class="diff to_file">$line</div>\n!;
2321
2322 return $result;
2323 }
2324
2325 # create note for patch simplified by combined diff
2326 sub format_diff_cc_simplified {
2327 my ($diffinfo, @parents) = @_;
2328 my $result = '';
2329
2330 $result .= "<div class=\"diff header\">" .
2331 "diff --cc ";
2332 if (!is_deleted($diffinfo)) {
2333 $result .= $cgi->a({-href => href(action=>"blob",
2334 hash_base=>$hash,
2335 hash=>$diffinfo->{'to_id'},
2336 file_name=>$diffinfo->{'to_file'}),
2337 -class => "path"},
2338 esc_path($diffinfo->{'to_file'}));
2339 } else {
2340 $result .= esc_path($diffinfo->{'to_file'});
2341 }
2342 $result .= "</div>\n" . # class="diff header"
2343 "<div class=\"diff nodifferences\">" .
2344 "Simple merge" .
2345 "</div>\n"; # class="diff nodifferences"
2346
2347 return $result;
2348 }
2349
2350 sub diff_line_class {
2351 my ($line, $from, $to) = @_;
2352
2353 # ordinary diff
2354 my $num_sign = 1;
2355 # combined diff
2356 if ($from && $to && ref($from->{'href'}) eq "ARRAY") {
2357 $num_sign = scalar @{$from->{'href'}};
2358 }
2359
2360 my @diff_line_classifier = (
2361 { regexp => qr/^\@\@{$num_sign} /, class => "chunk_header"},
2362 { regexp => qr/^\\/, class => "incomplete" },
2363 { regexp => qr/^ {$num_sign}/, class => "ctx" },
2364 # classifier for context must come before classifier add/rem,
2365 # or we would have to use more complicated regexp, for example
2366 # qr/(?= {0,$m}\+)[+ ]{$num_sign}/, where $m = $num_sign - 1;
2367 { regexp => qr/^[+ ]{$num_sign}/, class => "add" },
2368 { regexp => qr/^[- ]{$num_sign}/, class => "rem" },
2369 );
2370 for my $clsfy (@diff_line_classifier) {
2371 return $clsfy->{'class'}
2372 if ($line =~ $clsfy->{'regexp'});
2373 }
2374
2375 # fallback
2376 return "";
2377 }
2378
2379 # assumes that $from and $to are defined and correctly filled,
2380 # and that $line holds a line of chunk header for unified diff
2381 sub format_unidiff_chunk_header {
2382 my ($line, $from, $to) = @_;
2383
2384 my ($from_text, $from_start, $from_lines, $to_text, $to_start, $to_lines, $section) =
2385 $line =~ m/^\@{2} (-(\d+)(?:,(\d+))?) (\+(\d+)(?:,(\d+))?) \@{2}(.*)$/;
2386
2387 $from_lines = 0 unless defined $from_lines;
2388 $to_lines = 0 unless defined $to_lines;
2389
2390 if ($from->{'href'}) {
2391 $from_text = $cgi->a({-href=>"$from->{'href'}#l$from_start",
2392 -class=>"list"}, $from_text);
2393 }
2394 if ($to->{'href'}) {
2395 $to_text = $cgi->a({-href=>"$to->{'href'}#l$to_start",
2396 -class=>"list"}, $to_text);
2397 }
2398 $line = "<span class=\"chunk_info\">@@ $from_text $to_text @@</span>" .
2399 "<span class=\"section\">" . esc_html($section, -nbsp=>1) . "</span>";
2400 return $line;
2401 }
2402
2403 # assumes that $from and $to are defined and correctly filled,
2404 # and that $line holds a line of chunk header for combined diff
2405 sub format_cc_diff_chunk_header {
2406 my ($line, $from, $to) = @_;
2407
2408 my ($prefix, $ranges, $section) = $line =~ m/^(\@+) (.*?) \@+(.*)$/;
2409 my (@from_text, @from_start, @from_nlines, $to_text, $to_start, $to_nlines);
2410
2411 @from_text = split(' ', $ranges);
2412 for (my $i = 0; $i < @from_text; ++$i) {
2413 ($from_start[$i], $from_nlines[$i]) =
2414 (split(',', substr($from_text[$i], 1)), 0);
2415 }
2416
2417 $to_text = pop @from_text;
2418 $to_start = pop @from_start;
2419 $to_nlines = pop @from_nlines;
2420
2421 $line = "<span class=\"chunk_info\">$prefix ";
2422 for (my $i = 0; $i < @from_text; ++$i) {
2423 if ($from->{'href'}[$i]) {
2424 $line .= $cgi->a({-href=>"$from->{'href'}[$i]#l$from_start[$i]",
2425 -class=>"list"}, $from_text[$i]);
2426 } else {
2427 $line .= $from_text[$i];
2428 }
2429 $line .= " ";
2430 }
2431 if ($to->{'href'}) {
2432 $line .= $cgi->a({-href=>"$to->{'href'}#l$to_start",
2433 -class=>"list"}, $to_text);
2434 } else {
2435 $line .= $to_text;
2436 }
2437 $line .= " $prefix</span>" .
2438 "<span class=\"section\">" . esc_html($section, -nbsp=>1) . "</span>";
2439 return $line;
2440 }
2441
2442 # process patch (diff) line (not to be used for diff headers),
2443 # returning HTML-formatted (but not wrapped) line.
2444 # If the line is passed as a reference, it is treated as HTML and not
2445 # esc_html()'ed.
2446 sub format_diff_line {
2447 my ($line, $diff_class, $from, $to) = @_;
2448
2449 if (ref($line)) {
2450 $line = $$line;
2451 } else {
2452 chomp $line;
2453 $line = untabify($line);
2454
2455 if ($from && $to && $line =~ m/^\@{2} /) {
2456 $line = format_unidiff_chunk_header($line, $from, $to);
2457 } elsif ($from && $to && $line =~ m/^\@{3}/) {
2458 $line = format_cc_diff_chunk_header($line, $from, $to);
2459 } else {
2460 $line = esc_html($line, -nbsp=>1);
2461 }
2462 }
2463
2464 my $diff_classes = "diff";
2465 $diff_classes .= " $diff_class" if ($diff_class);
2466 $line = "<div class=\"$diff_classes\">$line</div>\n";
2467
2468 return $line;
2469 }
2470
2471 # Generates undef or something like "_snapshot_" or "snapshot (_tbz2_ _zip_)",
2472 # linked. Pass the hash of the tree/commit to snapshot.
2473 sub format_snapshot_links {
2474 my ($hash) = @_;
2475 my $num_fmts = @snapshot_fmts;
2476 if ($num_fmts > 1) {
2477 # A parenthesized list of links bearing format names.
2478 # e.g. "snapshot (_tar.gz_ _zip_)"
2479 return "snapshot (" . join(' ', map
2480 $cgi->a({
2481 -href => href(
2482 action=>"snapshot",
2483 hash=>$hash,
2484 snapshot_format=>$_
2485 )
2486 }, $known_snapshot_formats{$_}{'display'})
2487 , @snapshot_fmts) . ")";
2488 } elsif ($num_fmts == 1) {
2489 # A single "snapshot" link whose tooltip bears the format name.
2490 # i.e. "_snapshot_"
2491 my ($fmt) = @snapshot_fmts;
2492 return
2493 $cgi->a({
2494 -href => href(
2495 action=>"snapshot",
2496 hash=>$hash,
2497 snapshot_format=>$fmt
2498 ),
2499 -title => "in format: $known_snapshot_formats{$fmt}{'display'}"
2500 }, "snapshot");
2501 } else { # $num_fmts == 0
2502 return undef;
2503 }
2504 }
2505
2506 ## ......................................................................
2507 ## functions returning values to be passed, perhaps after some
2508 ## transformation, to other functions; e.g. returning arguments to href()
2509
2510 # returns hash to be passed to href to generate gitweb URL
2511 # in -title key it returns description of link
2512 sub get_feed_info {
2513 my $format = shift || 'Atom';
2514 my %res = (action => lc($format));
2515
2516 # feed links are possible only for project views
2517 return unless (defined $project);
2518 # some views should link to OPML, or to generic project feed,
2519 # or don't have specific feed yet (so they should use generic)
2520 return if (!$action || $action =~ /^(?:tags|heads|forks|tag|search)$/x);
2521
2522 my $branch;
2523 # branches refs uses 'refs/heads/' prefix (fullname) to differentiate
2524 # from tag links; this also makes possible to detect branch links
2525 if ((defined $hash_base && $hash_base =~ m!^refs/heads/(.*)$!) ||
2526 (defined $hash && $hash =~ m!^refs/heads/(.*)$!)) {
2527 $branch = $1;
2528 }
2529 # find log type for feed description (title)
2530 my $type = 'log';
2531 if (defined $file_name) {
2532 $type = "history of $file_name";
2533 $type .= "/" if ($action eq 'tree');
2534 $type .= " on '$branch'" if (defined $branch);
2535 } else {
2536 $type = "log of $branch" if (defined $branch);
2537 }
2538
2539 $res{-title} = $type;
2540 $res{'hash'} = (defined $branch ? "refs/heads/$branch" : undef);
2541 $res{'file_name'} = $file_name;
2542
2543 return %res;
2544 }
2545
2546 ## ----------------------------------------------------------------------
2547 ## git utility subroutines, invoking git commands
2548
2549 # returns path to the core git executable and the --git-dir parameter as list
2550 sub git_cmd {
2551 $number_of_git_cmds++;
2552 return $GIT, '--git-dir='.$git_dir;
2553 }
2554
2555 # quote the given arguments for passing them to the shell
2556 # quote_command("command", "arg 1", "arg with ' and ! characters")
2557 # => "'command' 'arg 1' 'arg with '\'' and '\!' characters'"
2558 # Try to avoid using this function wherever possible.
2559 sub quote_command {
2560 return join(' ',
2561 map { my $a = $_; $a =~ s/(['!])/'\\$1'/g; "'$a'" } @_ );
2562 }
2563
2564 # get HEAD ref of given project as hash
2565 sub git_get_head_hash {
2566 return git_get_full_hash(shift, 'HEAD');
2567 }
2568
2569 sub git_get_full_hash {
2570 return git_get_hash(@_);
2571 }
2572
2573 sub git_get_short_hash {
2574 return git_get_hash(@_, '--short=7');
2575 }
2576
2577 sub git_get_hash {
2578 my ($project, $hash, @options) = @_;
2579 my $o_git_dir = $git_dir;
2580 my $retval = undef;
2581 $git_dir = "$projectroot/$project";
2582 if (open my $fd, '-|', git_cmd(), 'rev-parse',
2583 '--verify', '-q', @options, $hash) {
2584 $retval = <$fd>;
2585 chomp $retval if defined $retval;
2586 close $fd;
2587 }
2588 if (defined $o_git_dir) {
2589 $git_dir = $o_git_dir;
2590 }
2591 return $retval;
2592 }
2593
2594 # get type of given object
2595 sub git_get_type {
2596 my $hash = shift;
2597
2598 open my $fd, "-|", git_cmd(), "cat-file", '-t', $hash or return;
2599 my $type = <$fd>;
2600 close $fd or return;
2601 chomp $type;
2602 return $type;
2603 }
2604
2605 # repository configuration
2606 our $config_file = '';
2607 our %config;
2608
2609 # store multiple values for single key as anonymous array reference
2610 # single values stored directly in the hash, not as [ <value> ]
2611 sub hash_set_multi {
2612 my ($hash, $key, $value) = @_;
2613
2614 if (!exists $hash->{$key}) {
2615 $hash->{$key} = $value;
2616 } elsif (!ref $hash->{$key}) {
2617 $hash->{$key} = [ $hash->{$key}, $value ];
2618 } else {
2619 push @{$hash->{$key}}, $value;
2620 }
2621 }
2622
2623 # return hash of git project configuration
2624 # optionally limited to some section, e.g. 'gitweb'
2625 sub git_parse_project_config {
2626 my $section_regexp = shift;
2627 my %config;
2628
2629 local $/ = "\0";
2630
2631 open my $fh, "-|", git_cmd(), "config", '-z', '-l',
2632 or return;
2633
2634 while (my $keyval = <$fh>) {
2635 chomp $keyval;
2636 my ($key, $value) = split(/\n/, $keyval, 2);
2637
2638 hash_set_multi(\%config, $key, $value)
2639 if (!defined $section_regexp || $key =~ /^(?:$section_regexp)\./o);
2640 }
2641 close $fh;
2642
2643 return %config;
2644 }
2645
2646 # convert config value to boolean: 'true' or 'false'
2647 # no value, number > 0, 'true' and 'yes' values are true
2648 # rest of values are treated as false (never as error)
2649 sub config_to_bool {
2650 my $val = shift;
2651
2652 return 1 if !defined $val; # section.key
2653
2654 # strip leading and trailing whitespace
2655 $val =~ s/^\s+//;
2656 $val =~ s/\s+$//;
2657
2658 return (($val =~ /^\d+$/ && $val) || # section.key = 1
2659 ($val =~ /^(?:true|yes)$/i)); # section.key = true
2660 }
2661
2662 # convert config value to simple decimal number
2663 # an optional value suffix of 'k', 'm', or 'g' will cause the value
2664 # to be multiplied by 1024, 1048576, or 1073741824
2665 sub config_to_int {
2666 my $val = shift;
2667
2668 # strip leading and trailing whitespace
2669 $val =~ s/^\s+//;
2670 $val =~ s/\s+$//;
2671
2672 if (my ($num, $unit) = ($val =~ /^([0-9]*)([kmg])$/i)) {
2673 $unit = lc($unit);
2674 # unknown unit is treated as 1
2675 return $num * ($unit eq 'g' ? 1073741824 :
2676 $unit eq 'm' ? 1048576 :
2677 $unit eq 'k' ? 1024 : 1);
2678 }
2679 return $val;
2680 }
2681
2682 # convert config value to array reference, if needed
2683 sub config_to_multi {
2684 my $val = shift;
2685
2686 return ref($val) ? $val : (defined($val) ? [ $val ] : []);
2687 }
2688
2689 sub git_get_project_config {
2690 my ($key, $type) = @_;
2691
2692 return unless defined $git_dir;
2693
2694 # key sanity check
2695 return unless ($key);
2696 # only subsection, if exists, is case sensitive,
2697 # and not lowercased by 'git config -z -l'
2698 if (my ($hi, $mi, $lo) = ($key =~ /^([^.]*)\.(.*)\.([^.]*)$/)) {
2699 $key = join(".", lc($hi), $mi, lc($lo));
2700 } else {
2701 $key = lc($key);
2702 }
2703 $key =~ s/^gitweb\.//;
2704 return if ($key =~ m/\W/);
2705
2706 # type sanity check
2707 if (defined $type) {
2708 $type =~ s/^--//;
2709 $type = undef
2710 unless ($type eq 'bool' || $type eq 'int');
2711 }
2712
2713 # get config
2714 if (!defined $config_file ||
2715 $config_file ne "$git_dir/config") {
2716 %config = git_parse_project_config('gitweb');
2717 $config_file = "$git_dir/config";
2718 }
2719
2720 # check if config variable (key) exists
2721 return unless exists $config{"gitweb.$key"};
2722
2723 # ensure given type
2724 if (!defined $type) {
2725 return $config{"gitweb.$key"};
2726 } elsif ($type eq 'bool') {
2727 # backward compatibility: 'git config --bool' returns true/false
2728 return config_to_bool($config{"gitweb.$key"}) ? 'true' : 'false';
2729 } elsif ($type eq 'int') {
2730 return config_to_int($config{"gitweb.$key"});
2731 }
2732 return $config{"gitweb.$key"};
2733 }
2734
2735 # get hash of given path at given ref
2736 sub git_get_hash_by_path {
2737 my $base = shift;
2738 my $path = shift || return undef;
2739 my $type = shift;
2740
2741 $path =~ s,/+$,,;
2742
2743 open my $fd, "-|", git_cmd(), "ls-tree", $base, "--", $path
2744 or die_error(500, "Open git-ls-tree failed");
2745 my $line = <$fd>;
2746 close $fd or return undef;
2747
2748 if (!defined $line) {
2749 # there is no tree or hash given by $path at $base
2750 return undef;
2751 }
2752
2753 #'100644 blob 0fa3f3a66fb6a137f6ec2c19351ed4d807070ffa panic.c'
2754 $line =~ m/^([0-9]+) (.+) ([0-9a-fA-F]{40})\t/;
2755 if (defined $type && $type ne $2) {
2756 # type doesn't match
2757 return undef;
2758 }
2759 return $3;
2760 }
2761
2762 # get path of entry with given hash at given tree-ish (ref)
2763 # used to get 'from' filename for combined diff (merge commit) for renames
2764 sub git_get_path_by_hash {
2765 my $base = shift || return;
2766 my $hash = shift || return;
2767
2768 local $/ = "\0";
2769
2770 open my $fd, "-|", git_cmd(), "ls-tree", '-r', '-t', '-z', $base
2771 or return undef;
2772 while (my $line = <$fd>) {
2773 chomp $line;
2774
2775 #'040000 tree 595596a6a9117ddba9fe379b6b012b558bac8423 gitweb'
2776 #'100644 blob e02e90f0429be0d2a69b76571101f20b8f75530f gitweb/README'
2777 if ($line =~ m/(?:[0-9]+) (?:.+) $hash\t(.+)$/) {
2778 close $fd;
2779 return $1;
2780 }
2781 }
2782 close $fd;
2783 return undef;
2784 }
2785
2786 ## ......................................................................
2787 ## git utility functions, directly accessing git repository
2788
2789 # get the value of config variable either from file named as the variable
2790 # itself in the repository ($GIT_DIR/$name file), or from gitweb.$name
2791 # configuration variable in the repository config file.
2792 sub git_get_file_or_project_config {
2793 my ($path, $name) = @_;
2794
2795 $git_dir = "$projectroot/$path";
2796 open my $fd, '<', "$git_dir/$name"
2797 or return git_get_project_config($name);
2798 my $conf = <$fd>;
2799 close $fd;
2800 if (defined $conf) {
2801 chomp $conf;
2802 }
2803 return $conf;
2804 }
2805
2806 sub git_get_project_description {
2807 my $path = shift;
2808 return git_get_file_or_project_config($path, 'description');
2809 }
2810
2811 sub git_get_project_category {
2812 my $path = shift;
2813 return git_get_file_or_project_config($path, 'category');
2814 }
2815
2816
2817 # supported formats:
2818 # * $GIT_DIR/ctags/<tagname> file (in 'ctags' subdirectory)
2819 # - if its contents is a number, use it as tag weight,
2820 # - otherwise add a tag with weight 1
2821 # * $GIT_DIR/ctags file, each line is a tag (with weight 1)
2822 # the same value multiple times increases tag weight
2823 # * `gitweb.ctag' multi-valued repo config variable
2824 sub git_get_project_ctags {
2825 my $project = shift;
2826 my $ctags = {};
2827
2828 $git_dir = "$projectroot/$project";
2829 if (opendir my $dh, "$git_dir/ctags") {
2830 my @files = grep { -f $_ } map { "$git_dir/ctags/$_" } readdir($dh);
2831 foreach my $tagfile (@files) {
2832 open my $ct, '<', $tagfile
2833 or next;
2834 my $val = <$ct>;
2835 chomp $val if $val;
2836 close $ct;
2837
2838 (my $ctag = $tagfile) =~ s#.*/##;
2839 if ($val =~ /^\d+$/) {
2840 $ctags->{$ctag} = $val;
2841 } else {
2842 $ctags->{$ctag} = 1;
2843 }
2844 }
2845 closedir $dh;
2846
2847 } elsif (open my $fh, '<', "$git_dir/ctags") {
2848 while (my $line = <$fh>) {
2849 chomp $line;
2850 $ctags->{$line}++ if $line;
2851 }
2852 close $fh;
2853
2854 } else {
2855 my $taglist = config_to_multi(git_get_project_config('ctag'));
2856 foreach my $tag (@$taglist) {
2857 $ctags->{$tag}++;
2858 }
2859 }
2860
2861 return $ctags;
2862 }
2863
2864 # return hash, where keys are content tags ('ctags'),
2865 # and values are sum of weights of given tag in every project
2866 sub git_gather_all_ctags {
2867 my $projects = shift;
2868 my $ctags = {};
2869
2870 foreach my $p (@$projects) {
2871 foreach my $ct (keys %{$p->{'ctags'}}) {
2872 $ctags->{$ct} += $p->{'ctags'}->{$ct};
2873 }
2874 }
2875
2876 return $ctags;
2877 }
2878
2879 sub git_populate_project_tagcloud {
2880 my $ctags = shift;
2881
2882 # First, merge different-cased tags; tags vote on casing
2883 my %ctags_lc;
2884 foreach (keys %$ctags) {
2885 $ctags_lc{lc $_}->{count} += $ctags->{$_};
2886 if (not $ctags_lc{lc $_}->{topcount}
2887 or $ctags_lc{lc $_}->{topcount} < $ctags->{$_}) {
2888 $ctags_lc{lc $_}->{topcount} = $ctags->{$_};
2889 $ctags_lc{lc $_}->{topname} = $_;
2890 }
2891 }
2892
2893 my $cloud;
2894 my $matched = $input_params{'ctag'};
2895 if (eval { require HTML::TagCloud; 1; }) {
2896 $cloud = HTML::TagCloud->new;
2897 foreach my $ctag (sort keys %ctags_lc) {
2898 # Pad the title with spaces so that the cloud looks
2899 # less crammed.
2900 my $title = esc_html($ctags_lc{$ctag}->{topname});
2901 $title =~ s/ /&nbsp;/g;
2902 $title =~ s/^/&nbsp;/g;
2903 $title =~ s/$/&nbsp;/g;
2904 if (defined $matched && $matched eq $ctag) {
2905 $title = qq(<span class="match">$title</span>);
2906 }
2907 $cloud->add($title, href(project=>undef, ctag=>$ctag),
2908 $ctags_lc{$ctag}->{count});
2909 }
2910 } else {
2911 $cloud = {};
2912 foreach my $ctag (keys %ctags_lc) {
2913 my $title = esc_html($ctags_lc{$ctag}->{topname}, -nbsp=>1);
2914 if (defined $matched && $matched eq $ctag) {
2915 $title = qq(<span class="match">$title</span>);
2916 }
2917 $cloud->{$ctag}{count} = $ctags_lc{$ctag}->{count};
2918 $cloud->{$ctag}{ctag} =
2919 $cgi->a({-href=>href(project=>undef, ctag=>$ctag)}, $title);
2920 }
2921 }
2922 return $cloud;
2923 }
2924
2925 sub git_show_project_tagcloud {
2926 my ($cloud, $count) = @_;
2927 if (ref $cloud eq 'HTML::TagCloud') {
2928 return $cloud->html_and_css($count);
2929 } else {
2930 my @tags = sort { $cloud->{$a}->{'count'} <=> $cloud->{$b}->{'count'} } keys %$cloud;
2931 return
2932 '<div id="htmltagcloud"'.($project ? '' : ' align="center"').'>' .
2933 join (', ', map {
2934 $cloud->{$_}->{'ctag'}
2935 } splice(@tags, 0, $count)) .
2936 '</div>';
2937 }
2938 }
2939
2940 sub git_get_project_url_list {
2941 my $path = shift;
2942
2943 $git_dir = "$projectroot/$path";
2944 open my $fd, '<', "$git_dir/cloneurl"
2945 or return wantarray ?
2946 @{ config_to_multi(git_get_project_config('url')) } :
2947 config_to_multi(git_get_project_config('url'));
2948 my @git_project_url_list = map { chomp; $_ } <$fd>;
2949 close $fd;
2950
2951 return wantarray ? @git_project_url_list : \@git_project_url_list;
2952 }
2953
2954 sub git_get_projects_list {
2955 my $filter = shift || '';
2956 my $paranoid = shift;
2957 my @list;
2958
2959 if (-d $projects_list) {
2960 # search in directory
2961 my $dir = $projects_list;
2962 # remove the trailing "/"
2963 $dir =~ s!/+$!!;
2964 my $pfxlen = length("$dir");
2965 my $pfxdepth = ($dir =~ tr!/!!);
2966 # when filtering, search only given subdirectory
2967 if ($filter && !$paranoid) {
2968 $dir .= "/$filter";
2969 $dir =~ s!/+$!!;
2970 }
2971
2972 File::Find::find({
2973 follow_fast => 1, # follow symbolic links
2974 follow_skip => 2, # ignore duplicates
2975 dangling_symlinks => 0, # ignore dangling symlinks, silently
2976 wanted => sub {
2977 # global variables
2978 our $project_maxdepth;
2979 our $projectroot;
2980 # skip project-list toplevel, if we get it.
2981 return if (m!^[/.]$!);
2982 # only directories can be git repositories
2983 return unless (-d $_);
2984 # don't traverse too deep (Find is super slow on os x)
2985 # $project_maxdepth excludes depth of $projectroot
2986 if (($File::Find::name =~ tr!/!!) - $pfxdepth > $project_maxdepth) {
2987 $File::Find::prune = 1;
2988 return;
2989 }
2990
2991 my $path = substr($File::Find::name, $pfxlen + 1);
2992 # paranoidly only filter here
2993 if ($paranoid && $filter && $path !~ m!^\Q$filter\E/!) {
2994 next;
2995 }
2996 # we check related file in $projectroot
2997 if (check_export_ok("$projectroot/$path")) {
2998 push @list, { path => $path };
2999 $File::Find::prune = 1;
3000 }
3001 },
3002 }, "$dir");
3003
3004 } elsif (-f $projects_list) {
3005 # read from file(url-encoded):
3006 # 'git%2Fgit.git Linus+Torvalds'
3007 # 'libs%2Fklibc%2Fklibc.git H.+Peter+Anvin'
3008 # 'linux%2Fhotplug%2Fudev.git Greg+Kroah-Hartman'
3009 open my $fd, '<', $projects_list or return;
3010 PROJECT:
3011 while (my $line = <$fd>) {
3012 chomp $line;
3013 my ($path, $owner) = split ' ', $line;
3014 $path = unescape($path);
3015 $owner = unescape($owner);
3016 if (!defined $path) {
3017 next;
3018 }
3019 # if $filter is rpovided, check if $path begins with $filter
3020 if ($filter && $path !~ m!^\Q$filter\E/!) {
3021 next;
3022 }
3023 if (check_export_ok("$projectroot/$path")) {
3024 my $pr = {
3025 path => $path
3026 };
3027 if ($owner) {
3028 $pr->{'owner'} = to_utf8($owner);
3029 }
3030 push @list, $pr;
3031 }
3032 }
3033 close $fd;
3034 }
3035 return @list;
3036 }
3037
3038 # written with help of Tree::Trie module (Perl Artistic License, GPL compatibile)
3039 # as side effects it sets 'forks' field to list of forks for forked projects
3040 sub filter_forks_from_projects_list {
3041 my $projects = shift;
3042
3043 my %trie; # prefix tree of directories (path components)
3044 # generate trie out of those directories that might contain forks
3045 foreach my $pr (@$projects) {
3046 my $path = $pr->{'path'};
3047 $path =~ s/\.git$//; # forks of 'repo.git' are in 'repo/' directory
3048 next if ($path =~ m!/$!); # skip non-bare repositories, e.g. 'repo/.git'
3049 next unless ($path); # skip '.git' repository: tests, git-instaweb
3050 next unless (-d "$projectroot/$path"); # containing directory exists
3051 $pr->{'forks'} = []; # there can be 0 or more forks of project
3052
3053 # add to trie
3054 my @dirs = split('/', $path);
3055 # walk the trie, until either runs out of components or out of trie
3056 my $ref = \%trie;
3057 while (scalar @dirs &&
3058 exists($ref->{$dirs[0]})) {
3059 $ref = $ref->{shift @dirs};
3060 }
3061 # create rest of trie structure from rest of components
3062 foreach my $dir (@dirs) {
3063 $ref = $ref->{$dir} = {};
3064 }
3065 # create end marker, store $pr as a data
3066 $ref->{''} = $pr if (!exists $ref->{''});
3067 }
3068
3069 # filter out forks, by finding shortest prefix match for paths
3070 my @filtered;
3071 PROJECT:
3072 foreach my $pr (@$projects) {
3073 # trie lookup
3074 my $ref = \%trie;
3075 DIR:
3076 foreach my $dir (split('/', $pr->{'path'})) {
3077 if (exists $ref->{''}) {
3078 # found [shortest] prefix, is a fork - skip it
3079 push @{$ref->{''}{'forks'}}, $pr;
3080 next PROJECT;
3081 }
3082 if (!exists $ref->{$dir}) {
3083 # not in trie, cannot have prefix, not a fork
3084 push @filtered, $pr;
3085 next PROJECT;
3086 }
3087 # If the dir is there, we just walk one step down the trie.
3088 $ref = $ref->{$dir};
3089 }
3090 # we ran out of trie
3091 # (shouldn't happen: it's either no match, or end marker)
3092 push @filtered, $pr;
3093 }
3094
3095 return @filtered;
3096 }
3097
3098 # note: fill_project_list_info must be run first,
3099 # for 'descr_long' and 'ctags' to be filled
3100 sub search_projects_list {
3101 my ($projlist, %opts) = @_;
3102 my $tagfilter = $opts{'tagfilter'};
3103 my $search_re = $opts{'search_regexp'};
3104
3105 return @$projlist
3106 unless ($tagfilter || $search_re);
3107
3108 # searching projects require filling to be run before it;
3109 fill_project_list_info($projlist,
3110 $tagfilter ? 'ctags' : (),
3111 $search_re ? ('path', 'descr') : ());
3112 my @projects;
3113 PROJECT:
3114 foreach my $pr (@$projlist) {
3115
3116 if ($tagfilter) {
3117 next unless ref($pr->{'ctags'}) eq 'HASH';
3118 next unless
3119 grep { lc($_) eq lc($tagfilter) } keys %{$pr->{'ctags'}};
3120 }
3121
3122 if ($search_re) {
3123 next unless
3124 $pr->{'path'} =~ /$search_re/ ||
3125 $pr->{'descr_long'} =~ /$search_re/;
3126 }
3127
3128 push @projects, $pr;
3129 }
3130
3131 return @projects;
3132 }
3133
3134 our $gitweb_project_owner = undef;
3135 sub git_get_project_list_from_file {
3136
3137 return if (defined $gitweb_project_owner);
3138
3139 $gitweb_project_owner = {};
3140 # read from file (url-encoded):
3141 # 'git%2Fgit.git Linus+Torvalds'
3142 # 'libs%2Fklibc%2Fklibc.git H.+Peter+Anvin'
3143 # 'linux%2Fhotplug%2Fudev.git Greg+Kroah-Hartman'
3144 if (-f $projects_list) {
3145 open(my $fd, '<', $projects_list);
3146 while (my $line = <$fd>) {
3147 chomp $line;
3148 my ($pr, $ow) = split ' ', $line;
3149 $pr = unescape($pr);
3150 $ow = unescape($ow);
3151 $gitweb_project_owner->{$pr} = to_utf8($ow);
3152 }
3153 close $fd;
3154 }
3155 }
3156
3157 sub git_get_project_owner {
3158 my $project = shift;
3159 my $owner;
3160
3161 return undef unless $project;
3162 $git_dir = "$projectroot/$project";
3163
3164 if (!defined $gitweb_project_owner) {
3165 git_get_project_list_from_file();
3166 }
3167
3168 if (exists $gitweb_project_owner->{$project}) {
3169 $owner = $gitweb_project_owner->{$project};
3170 }
3171 if (!defined $owner){
3172 $owner = git_get_project_config('owner');
3173 }
3174 if (!defined $owner) {
3175 $owner = get_file_owner("$git_dir");
3176 }
3177
3178 return $owner;
3179 }
3180
3181 sub git_get_last_activity {
3182 my ($path) = @_;
3183 my $fd;
3184
3185 $git_dir = "$projectroot/$path";
3186 open($fd, "-|", git_cmd(), 'for-each-ref',
3187 '--format=%(committer)',
3188 '--sort=-committerdate',
3189 '--count=1',
3190 'refs/heads') or return;
3191 my $most_recent = <$fd>;
3192 close $fd or return;
3193 if (defined $most_recent &&
3194 $most_recent =~ / (\d+) [-+][01]\d\d\d$/) {
3195 my $timestamp = $1;
3196 my $age = time - $timestamp;
3197 return ($age, age_string($age));
3198 }
3199 return (undef, undef);
3200 }
3201
3202 # Implementation note: when a single remote is wanted, we cannot use 'git
3203 # remote show -n' because that command always work (assuming it's a remote URL
3204 # if it's not defined), and we cannot use 'git remote show' because that would
3205 # try to make a network roundtrip. So the only way to find if that particular
3206 # remote is defined is to walk the list provided by 'git remote -v' and stop if
3207 # and when we find what we want.
3208 sub git_get_remotes_list {
3209 my $wanted = shift;
3210 my %remotes = ();
3211
3212 open my $fd, '-|' , git_cmd(), 'remote', '-v';
3213 return unless $fd;
3214 while (my $remote = <$fd>) {
3215 chomp $remote;
3216 $remote =~ s!\t(.*?)\s+\((\w+)\)$!!;
3217 next if $wanted and not $remote eq $wanted;
3218 my ($url, $key) = ($1, $2);
3219
3220 $remotes{$remote} ||= { 'heads' => () };
3221 $remotes{$remote}{$key} = $url;
3222 }
3223 close $fd or return;
3224 return wantarray ? %remotes : \%remotes;
3225 }
3226
3227 # Takes a hash of remotes as first parameter and fills it by adding the
3228 # available remote heads for each of the indicated remotes.
3229 sub fill_remote_heads {
3230 my $remotes = shift;
3231 my @heads = map { "remotes/$_" } keys %$remotes;
3232 my @remoteheads = git_get_heads_list(undef, @heads);
3233 foreach my $remote (keys %$remotes) {
3234 $remotes->{$remote}{'heads'} = [ grep {
3235 $_->{'name'} =~ s!^$remote/!!
3236 } @remoteheads ];
3237 }
3238 }
3239
3240 sub git_get_references {
3241 my $type = shift || "";
3242 my %refs;
3243 # 5dc01c595e6c6ec9ccda4f6f69c131c0dd945f8c refs/tags/v2.6.11
3244 # c39ae07f393806ccf406ef966e9a15afc43cc36a refs/tags/v2.6.11^{}
3245 open my $fd, "-|", git_cmd(), "show-ref", "--dereference",
3246 ($type ? ("--", "refs/$type") : ()) # use -- <pattern> if $type
3247 or return;
3248
3249 while (my $line = <$fd>) {
3250 chomp $line;
3251 if ($line =~ m!^([0-9a-fA-F]{40})\srefs/($type.*)$!) {
3252 if (defined $refs{$1}) {
3253 push @{$refs{$1}}, $2;
3254 } else {
3255 $refs{$1} = [ $2 ];
3256 }
3257 }
3258 }
3259 close $fd or return;
3260 return \%refs;
3261 }
3262
3263 sub git_get_rev_name_tags {
3264 my $hash = shift || return undef;
3265
3266 open my $fd, "-|", git_cmd(), "name-rev", "--tags", $hash
3267 or return;
3268 my $name_rev = <$fd>;
3269 close $fd;
3270
3271 if ($name_rev =~ m|^$hash tags/(.*)$|) {
3272 return $1;
3273 } else {
3274 # catches also '$hash undefined' output
3275 return undef;
3276 }
3277 }
3278
3279 ## ----------------------------------------------------------------------
3280 ## parse to hash functions
3281
3282 sub parse_date {
3283 my $epoch = shift;
3284 my $tz = shift || "-0000";
3285
3286 my %date;
3287 my @months = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec");
3288 my @days = ("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat");
3289 my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday) = gmtime($epoch);
3290 $date{'hour'} = $hour;
3291 $date{'minute'} = $min;
3292 $date{'mday'} = $mday;
3293 $date{'day'} = $days[$wday];
3294 $date{'month'} = $months[$mon];
3295 $date{'rfc2822'} = sprintf "%s, %d %s %4d %02d:%02d:%02d +0000",
3296 $days[$wday], $mday, $months[$mon], 1900+$year, $hour ,$min, $sec;
3297 $date{'mday-time'} = sprintf "%d %s %02d:%02d",
3298 $mday, $months[$mon], $hour ,$min;
3299 $date{'iso-8601'} = sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ",
3300 1900+$year, 1+$mon, $mday, $hour ,$min, $sec;
3301
3302 my ($tz_sign, $tz_hour, $tz_min) =
3303 ($tz =~ m/^([-+])(\d\d)(\d\d)$/);
3304 $tz_sign = ($tz_sign eq '-' ? -1 : +1);
3305 my $local = $epoch + $tz_sign*((($tz_hour*60) + $tz_min)*60);
3306 ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday) = gmtime($local);
3307 $date{'hour_local'} = $hour;
3308 $date{'minute_local'} = $min;
3309 $date{'tz_local'} = $tz;
3310 $date{'iso-tz'} = sprintf("%04d-%02d-%02d %02d:%02d:%02d %s",
3311 1900+$year, $mon+1, $mday,
3312 $hour, $min, $sec, $tz);
3313 return %date;
3314 }
3315
3316 sub parse_tag {
3317 my $tag_id = shift;
3318 my %tag;
3319 my @comment;
3320
3321 open my $fd, "-|", git_cmd(), "cat-file", "tag", $tag_id or return;
3322 $tag{'id'} = $tag_id;
3323 while (my $line = <$fd>) {
3324 chomp $line;
3325 if ($line =~ m/^object ([0-9a-fA-F]{40})$/) {
3326 $tag{'object'} = $1;
3327 } elsif ($line =~ m/^type (.+)$/) {
3328 $tag{'type'} = $1;
3329 } elsif ($line =~ m/^tag (.+)$/) {
3330 $tag{'name'} = $1;
3331 } elsif ($line =~ m/^tagger (.*) ([0-9]+) (.*)$/) {
3332 $tag{'author'} = $1;
3333 $tag{'author_epoch'} = $2;
3334 $tag{'author_tz'} = $3;
3335 if ($tag{'author'} =~ m/^([^<]+) <([^>]*)>/) {
3336 $tag{'author_name'} = $1;
3337 $tag{'author_email'} = $2;
3338 } else {
3339 $tag{'author_name'} = $tag{'author'};
3340 }
3341 } elsif ($line =~ m/--BEGIN/) {
3342 push @comment, $line;
3343 last;
3344 } elsif ($line eq "") {
3345 last;
3346 }
3347 }
3348 push @comment, <$fd>;
3349 $tag{'comment'} = \@comment;
3350 close $fd or return;
3351 if (!defined $tag{'name'}) {
3352 return
3353 };
3354 return %tag
3355 }
3356
3357 sub parse_commit_text {
3358 my ($commit_text, $withparents) = @_;
3359 my @commit_lines = split '\n', $commit_text;
3360 my %co;
3361
3362 pop @commit_lines; # Remove '\0'
3363
3364 if (! @commit_lines) {
3365 return;
3366 }
3367
3368 my $header = shift @commit_lines;
3369 if ($header !~ m/^[0-9a-fA-F]{40}/) {
3370 return;
3371 }
3372 ($co{'id'}, my @parents) = split ' ', $header;
3373 while (my $line = shift @commit_lines) {
3374 last if $line eq "\n";
3375 if ($line =~ m/^tree ([0-9a-fA-F]{40})$/) {
3376 $co{'tree'} = $1;
3377 } elsif ((!defined $withparents) && ($line =~ m/^parent ([0-9a-fA-F]{40})$/)) {
3378 push @parents, $1;
3379 } elsif ($line =~ m/^author (.*) ([0-9]+) (.*)$/) {
3380 $co{'author'} = to_utf8($1);
3381 $co{'author_epoch'} = $2;
3382 $co{'author_tz'} = $3;
3383 if ($co{'author'} =~ m/^([^<]+) <([^>]*)>/) {
3384 $co{'author_name'} = $1;
3385 $co{'author_email'} = $2;
3386 } else {
3387 $co{'author_name'} = $co{'author'};
3388 }
3389 } elsif ($line =~ m/^committer (.*) ([0-9]+) (.*)$/) {
3390 $co{'committer'} = to_utf8($1);
3391 $co{'committer_epoch'} = $2;
3392 $co{'committer_tz'} = $3;
3393 if ($co{'committer'} =~ m/^([^<]+) <([^>]*)>/) {
3394 $co{'committer_name'} = $1;
3395 $co{'committer_email'} = $2;
3396 } else {
3397 $co{'committer_name'} = $co{'committer'};
3398 }
3399 }
3400 }
3401 if (!defined $co{'tree'}) {
3402 return;
3403 };
3404 $co{'parents'} = \@parents;
3405 $co{'parent'} = $parents[0];
3406
3407 foreach my $title (@commit_lines) {
3408 $title =~ s/^ //;
3409 if ($title ne "") {
3410 $co{'title'} = chop_str($title, 80, 5);
3411 # remove leading stuff of merges to make the interesting part visible
3412 if (length($title) > 50) {
3413 $title =~ s/^Automatic //;
3414 $title =~ s/^merge (of|with) /Merge ... /i;
3415 if (length($title) > 50) {
3416 $title =~ s/(http|rsync):\/\///;
3417 }
3418 if (length($title) > 50) {
3419 $title =~ s/(master|www|rsync)\.//;
3420 }
3421 if (length($title) > 50) {
3422 $title =~ s/kernel.org:?//;
3423 }
3424 if (length($title) > 50) {
3425 $title =~ s/\/pub\/scm//;
3426 }
3427 }
3428 $co{'title_short'} = chop_str($title, 50, 5);
3429 last;
3430 }
3431 }
3432 if (! defined $co{'title'} || $co{'title'} eq "") {
3433 $co{'title'} = $co{'title_short'} = '(no commit message)';
3434 }
3435 # remove added spaces
3436 foreach my $line (@commit_lines) {
3437 $line =~ s/^ //;
3438 }
3439 $co{'comment'} = \@commit_lines;
3440
3441 my $age = time - $co{'committer_epoch'};
3442 $co{'age'} = $age;
3443 $co{'age_string'} = age_string($age);
3444 my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday) = gmtime($co{'committer_epoch'});
3445 if ($age > 60*60*24*7*2) {
3446 $co{'age_string_date'} = sprintf "%4i-%02u-%02i", 1900 + $year, $mon+1, $mday;
3447 $co{'age_string_age'} = $co{'age_string'};
3448 } else {
3449 $co{'age_string_date'} = $co{'age_string'};
3450 $co{'age_string_age'} = sprintf "%4i-%02u-%02i", 1900 + $year, $mon+1, $mday;
3451 }
3452 return %co;
3453 }
3454
3455 sub parse_commit {
3456 my ($commit_id) = @_;
3457 my %co;
3458
3459 local $/ = "\0";
3460
3461 open my $fd, "-|", git_cmd(), "rev-list",
3462 "--parents",
3463 "--header",
3464 "--max-count=1",
3465 $commit_id,
3466 "--",
3467 or die_error(500, "Open git-rev-list failed");
3468 %co = parse_commit_text(<$fd>, 1);
3469 close $fd;
3470
3471 return %co;
3472 }
3473
3474 sub parse_commits {
3475 my ($commit_id, $maxcount, $skip, $filename, @args) = @_;
3476 my @cos;
3477
3478 $maxcount ||= 1;
3479 $skip ||= 0;
3480
3481 local $/ = "\0";
3482
3483 open my $fd, "-|", git_cmd(), "rev-list",
3484 "--header",
3485 @args,
3486 ("--max-count=" . $maxcount),
3487 ("--skip=" . $skip),
3488 @extra_options,
3489 $commit_id,
3490 "--",
3491 ($filename ? ($filename) : ())
3492 or die_error(500, "Open git-rev-list failed");
3493 while (my $line = <$fd>) {
3494 my %co = parse_commit_text($line);
3495 push @cos, \%co;
3496 }
3497 close $fd;
3498
3499 return wantarray ? @cos : \@cos;
3500 }
3501
3502 # parse line of git-diff-tree "raw" output
3503 sub parse_difftree_raw_line {
3504 my $line = shift;
3505 my %res;
3506
3507 # ':100644 100644 03b218260e99b78c6df0ed378e59ed9205ccc96d 3b93d5e7cc7f7dd4ebed13a5cc1a4ad976fc94d8 M ls-files.c'
3508 # ':100644 100644 7f9281985086971d3877aca27704f2aaf9c448ce bc190ebc71bbd923f2b728e505408f5e54bd073a M rev-tree.c'
3509 if ($line =~ m/^:([0-7]{6}) ([0-7]{6}) ([0-9a-fA-F]{40}) ([0-9a-fA-F]{40}) (.)([0-9]{0,3})\t(.*)$/) {
3510 $res{'from_mode'} = $1;
3511 $res{'to_mode'} = $2;
3512 $res{'from_id'} = $3;
3513 $res{'to_id'} = $4;
3514 $res{'status'} = $5;
3515 $res{'similarity'} = $6;
3516 if ($res{'status'} eq 'R' || $res{'status'} eq 'C') { # renamed or copied
3517 ($res{'from_file'}, $res{'to_file'}) = map { unquote($_) } split("\t", $7);
3518 } else {
3519 $res{'from_file'} = $res{'to_file'} = $res{'file'} = unquote($7);
3520 }
3521 }
3522 # '::100755 100755 100755 60e79ca1b01bc8b057abe17ddab484699a7f5fdb 94067cc5f73388f33722d52ae02f44692bc07490 94067cc5f73388f33722d52ae02f44692bc07490 MR git-gui/git-gui.sh'
3523 # combined diff (for merge commit)
3524 elsif ($line =~ s/^(::+)((?:[0-7]{6} )+)((?:[0-9a-fA-F]{40} )+)([a-zA-Z]+)\t(.*)$//) {
3525 $res{'nparents'} = length($1);
3526 $res{'from_mode'} = [ split(' ', $2) ];
3527 $res{'to_mode'} = pop @{$res{'from_mode'}};
3528 $res{'from_id'} = [ split(' ', $3) ];
3529 $res{'to_id'} = pop @{$res{'from_id'}};
3530 $res{'status'} = [ split('', $4) ];
3531 $res{'to_file'} = unquote($5);
3532 }
3533 # 'c512b523472485aef4fff9e57b229d9d243c967f'
3534 elsif ($line =~ m/^([0-9a-fA-F]{40})$/) {
3535 $res{'commit'} = $1;
3536 }
3537
3538 return wantarray ? %res : \%res;
3539 }
3540
3541 # wrapper: return parsed line of git-diff-tree "raw" output
3542 # (the argument might be raw line, or parsed info)
3543 sub parsed_difftree_line {
3544 my $line_or_ref = shift;
3545
3546 if (ref($line_or_ref) eq "HASH") {
3547 # pre-parsed (or generated by hand)
3548 return $line_or_ref;
3549 } else {
3550 return parse_difftree_raw_line($line_or_ref);
3551 }
3552 }
3553
3554 # parse line of git-ls-tree output
3555 sub parse_ls_tree_line {
3556 my $line = shift;
3557 my %opts = @_;
3558 my %res;
3559
3560 if ($opts{'-l'}) {
3561 #'100644 blob 0fa3f3a66fb6a137f6ec2c19351ed4d807070ffa