X-Git-Url: https://git.ladys.computer/Gitweb/blobdiff_plain/0a1276bd4b16b1fdf71b6b852fc9ba6c094d67edbec5ca8f06c9170b6445284d..95ffa50f893aed551b698a71db6cd403019dba0f1c49d2e9699d98e2995a0fea:/gitweb.perl diff --git a/gitweb.perl b/gitweb.perl index 04e1c56..cf34c5b 100755 --- a/gitweb.perl +++ b/gitweb.perl @@ -7,55 +7,62 @@ # # This program is licensed under the GPLv2 +use 5.008; use strict; use warnings; use CGI qw(:standard :escapeHTML -nosticky); use CGI::Util qw(unescape); -use CGI::Carp qw(fatalsToBrowser); +use CGI::Carp qw(fatalsToBrowser set_message); use Encode; use Fcntl ':mode'; use File::Find qw(); use File::Basename qw(basename); +use Time::HiRes qw(gettimeofday tv_interval); binmode STDOUT, ':utf8'; -our $t0; -if (eval { require Time::HiRes; 1; }) { - $t0 = [Time::HiRes::gettimeofday()]; -} +our $t0 = [ gettimeofday() ]; our $number_of_git_cmds = 0; BEGIN { CGI->compile() if $ENV{'MOD_PERL'}; } -our $cgi = new CGI; our $version = "++GIT_VERSION++"; -our $my_url = $cgi->url(); -our $my_uri = $cgi->url(-absolute => 1); -# Base URL for relative URLs in gitweb ($logo, $favicon, ...), -# needed and used only for URLs with nonempty PATH_INFO -our $base_url = $my_url; +our ($my_url, $my_uri, $base_url, $path_info, $home_link); +sub evaluate_uri { + our $cgi; -# When the script is used as DirectoryIndex, the URL does not contain the name -# of the script file itself, and $cgi->url() fails to strip PATH_INFO, so we -# have to do it ourselves. We make $path_info global because it's also used -# later on. -# -# Another issue with the script being the DirectoryIndex is that the resulting -# $my_url data is not the full script URL: this is good, because we want -# generated links to keep implying the script name if it wasn't explicitly -# indicated in the URL we're handling, but it means that $my_url cannot be used -# as base URL. -# Therefore, if we needed to strip PATH_INFO, then we know that we have -# to build the base URL ourselves: -our $path_info = $ENV{"PATH_INFO"}; -if ($path_info) { - if ($my_url =~ s,\Q$path_info\E$,, && - $my_uri =~ s,\Q$path_info\E$,, && - defined $ENV{'SCRIPT_NAME'}) { - $base_url = $cgi->url(-base => 1) . $ENV{'SCRIPT_NAME'}; + our $my_url = $cgi->url(); + our $my_uri = $cgi->url(-absolute => 1); + + # Base URL for relative URLs in gitweb ($logo, $favicon, ...), + # needed and used only for URLs with nonempty PATH_INFO + our $base_url = $my_url; + + # When the script is used as DirectoryIndex, the URL does not contain the name + # of the script file itself, and $cgi->url() fails to strip PATH_INFO, so we + # have to do it ourselves. We make $path_info global because it's also used + # later on. + # + # Another issue with the script being the DirectoryIndex is that the resulting + # $my_url data is not the full script URL: this is good, because we want + # generated links to keep implying the script name if it wasn't explicitly + # indicated in the URL we're handling, but it means that $my_url cannot be used + # as base URL. + # Therefore, if we needed to strip PATH_INFO, then we know that we have + # to build the base URL ourselves: + our $path_info = $ENV{"PATH_INFO"}; + if ($path_info) { + if ($my_url =~ s,\Q$path_info\E$,, && + $my_uri =~ s,\Q$path_info\E$,, && + defined $ENV{'SCRIPT_NAME'}) { + $base_url = $cgi->url(-base => 1) . $ENV{'SCRIPT_NAME'}; + } } + + # target of the home link on top of all pages + our $home_link = $my_uri || "/"; } # core git executable to use @@ -70,9 +77,6 @@ our $projectroot = "++GITWEB_PROJECTROOT++"; # the number is relative to the projectroot our $project_maxdepth = "++GITWEB_PROJECT_MAXDEPTH++"; -# target of the home link on top of all pages -our $home_link = $my_uri || "/"; - # string of the home link on top of all pages our $home_link_str = "++GITWEB_HOME_LINK_STR++"; @@ -112,6 +116,14 @@ our $projects_list = "++GITWEB_LIST++"; # the width (in characters) of the projects list "Description" column our $projects_list_description_width = 25; +# group projects by category on the projects list +# (enabled if this variable evaluates to true) +our $projects_list_group_categories = 0; + +# default category if none specified +# (leave the empty string for no category) +our $project_list_default_category = ""; + # default order of projects list # valid values are none, project, descr, owner, and age our $default_projects_order = "project"; @@ -161,6 +173,12 @@ our @diff_opts = ('-M'); # taken from git_commit # the gitweb domain. our $prevent_xss = 0; +# Path to the highlight executable to use (must be the one from +# http://www.andre-simon.de due to assumptions about parameters and output). +# Useful if highlight is not installed on your webserver's PATH. +# [Default: highlight] +our $highlight_bin = "++HIGHLIGHT_BIN++"; + # information about snapshot formats that gitweb is capable of serving our %known_snapshot_formats = ( # name => { @@ -177,7 +195,7 @@ our %known_snapshot_formats = ( 'type' => 'application/x-gzip', 'suffix' => '.tar.gz', 'format' => 'tar', - 'compressor' => ['gzip']}, + 'compressor' => ['gzip', '-n']}, 'tbz2' => { 'display' => 'tar.bz2', @@ -228,6 +246,30 @@ our %avatar_size = ( # Leave it undefined (or set to 'undef') to turn off load checking. our $maxload = 300; +# configuration for 'highlight' (http://www.andre-simon.de/) +# match by basename +our %highlight_basename = ( + #'Program' => 'py', + #'Library' => 'py', + 'SConstruct' => 'py', # SCons equivalent of Makefile + 'Makefile' => 'make', +); +# match by extension +our %highlight_ext = ( + # main extensions, defining name of syntax; + # see files in /usr/share/highlight/langDefs/ directory + map { $_ => $_ } + qw(py c cpp rb java css php sh pl js tex bib xml awk bat ini spec tcl sql make), + # alternate extensions, see /etc/highlight/filetypes.conf + 'h' => 'c', + map { $_ => 'sh' } qw(bash zsh ksh), + map { $_ => 'cpp' } qw(cxx c++ cc), + map { $_ => 'php' } qw(php3 php4 php5 phps), + map { $_ => 'pl' } qw(perl pm), # perhaps also 'cgi' + map { $_ => 'make'} qw(mak mk), + map { $_ => 'xml' } qw(xhtml html htm), +); + # You define site-wide feature defaults here; override them with # $GITWEB_CONFIG as necessary. our %feature = ( @@ -241,7 +283,7 @@ our %feature = ( # return value of feature-sub indicates if to enable specified feature # # if there is no 'sub' key (no feature-sub), then feature cannot be - # overriden + # overridden # # use gitweb_get_feature() to retrieve the value # (an array) or gitweb_check_feature() to check if @@ -379,20 +421,23 @@ our %feature = ( 'override' => 0, 'default' => []}, - # Allow gitweb scan project content tags described in ctags/ - # of project repository, and display the popular Web 2.0-ish - # "tag cloud" near the project list. Note that this is something - # COMPLETELY different from the normal Git tags. + # Allow gitweb scan project content tags of project repository, + # and display the popular Web 2.0-ish "tag cloud" near the projects + # list. Note that this is something COMPLETELY different from the + # normal Git tags. # gitweb by itself can show existing tags, but it does not handle - # tagging itself; you need an external application for that. - # For an example script, check Girocco's cgi/tagproj.cgi. + # tagging itself; you need to do it externally, outside gitweb. + # The format is described in git_get_project_ctags() subroutine. # You may want to install the HTML::TagCloud Perl module to get # a pretty tag cloud instead of just a list of tags. # To enable system wide have in $GITWEB_CONFIG - # $feature{'ctags'}{'default'} = ['path_to_tag_script']; + # $feature{'ctags'}{'default'} = [1]; # Project specific override is not supported. + + # In the future whether ctags editing is enabled might depend + # on the value, but using 1 should always mean no editing of ctags. 'ctags' => { 'override' => 0, 'default' => [0]}, @@ -446,6 +491,31 @@ our %feature = ( 'javascript-actions' => { 'override' => 0, 'default' => [0]}, + + # Syntax highlighting support. This is based on Daniel Svensson's + # and Sham Chukoury's work in gitweb-xmms2.git. + # It requires the 'highlight' program present in $PATH, + # and therefore is disabled by default. + + # To enable system wide have in $GITWEB_CONFIG + # $feature{'highlight'}{'default'} = [1]; + + 'highlight' => { + 'sub' => sub { feature_bool('highlight', @_) }, + 'override' => 0, + 'default' => [0]}, + + # Enable displaying of remote heads in the heads list + + # To enable system wide have in $GITWEB_CONFIG + # $feature{'remote_heads'}{'default'} = [1]; + # To have project specific config enable override in $GITWEB_CONFIG + # $feature{'remote_heads'}{'override'} = 1; + # and in project config gitweb.remote_heads = 0|1; + 'remote_heads' => { + 'sub' => sub { feature_bool('remote_heads', @_) }, + 'override' => 0, + 'default' => [0]}, ); sub gitweb_get_feature { @@ -455,7 +525,11 @@ sub gitweb_get_feature { $feature{$name}{'sub'}, $feature{$name}{'override'}, @{$feature{$name}{'default'}}); - if (!$override) { return @defaults; } + # project specific override is possible only if we have project + our $git_dir; # global variable, declared later + if (!$override || !defined $git_dir) { + return @defaults; + } if (!defined $sub) { warn "feature $name is not overridable"; return @defaults; @@ -550,12 +624,26 @@ sub filter_snapshot_fmts { !$known_snapshot_formats{$_}{'disabled'}} @fmts; } -our $GITWEB_CONFIG = $ENV{'GITWEB_CONFIG'} || "++GITWEB_CONFIG++"; -if (-e $GITWEB_CONFIG) { - do $GITWEB_CONFIG; -} else { +# If it is set to code reference, it is code that it is to be run once per +# request, allowing updating configurations that change with each request, +# while running other code in config file only once. +# +# Otherwise, if it is false then gitweb would process config file only once; +# if it is true then gitweb config would be run for each request. +our $per_request_config = 1; + +our ($GITWEB_CONFIG, $GITWEB_CONFIG_SYSTEM); +sub evaluate_gitweb_config { + our $GITWEB_CONFIG = $ENV{'GITWEB_CONFIG'} || "++GITWEB_CONFIG++"; our $GITWEB_CONFIG_SYSTEM = $ENV{'GITWEB_CONFIG_SYSTEM'} || "++GITWEB_CONFIG_SYSTEM++"; - do $GITWEB_CONFIG_SYSTEM if -e $GITWEB_CONFIG_SYSTEM; + # die if there are errors parsing config file + if (-e $GITWEB_CONFIG) { + do $GITWEB_CONFIG; + die $@ if $@; + } elsif (-e $GITWEB_CONFIG_SYSTEM) { + do $GITWEB_CONFIG_SYSTEM; + die $@ if $@; + } } # Get loadavg of system, to compare against $maxload. @@ -581,13 +669,16 @@ sub get_loadavg { } # version of the core git binary -our $git_version = qx("$GIT" --version) =~ m/git version (.*)$/ ? $1 : "unknown"; -$number_of_git_cmds++; - -$projects_list ||= $projectroot; +our $git_version; +sub evaluate_git_version { + our $git_version = qx("$GIT" --version) =~ m/git version (.*)$/ ? $1 : "unknown"; + $number_of_git_cmds++; +} -if (defined $maxload && get_loadavg() > $maxload) { - die_error(503, "The load average on the server is too high"); +sub check_loadavg { + if (defined $maxload && get_loadavg() > $maxload) { + die_error(503, "The load average on the server is too high"); + } } # ====================================================================== @@ -624,6 +715,7 @@ our @cgi_param_mapping = ( snapshot_format => "sf", extra_options => "opt", search_use_regexp => "sr", + ctag => "by_tag", # this must be last entry (for manipulation from JavaScript) javascript => "js" ); @@ -647,6 +739,7 @@ our %actions = ( "log" => \&git_log, "patch" => \&git_patch, "patches" => \&git_patches, + "remotes" => \&git_remotes, "rss" => \&git_rss, "atom" => \&git_atom, "search" => \&git_search, @@ -674,11 +767,15 @@ our %allowed_options = ( # should be single values, but opt can be an array. We should probably # build an array of parameters that can be multi-valued, but since for the time # being it's only this one, we just single it out -while (my ($name, $symbol) = each %cgi_param_mapping) { - if ($symbol eq 'opt') { - $input_params{$name} = [ $cgi->param($symbol) ]; - } else { - $input_params{$name} = $cgi->param($symbol); +sub evaluate_query_params { + our $cgi; + + while (my ($name, $symbol) = each %cgi_param_mapping) { + if ($symbol eq 'opt') { + $input_params{$name} = [ $cgi->param($symbol) ]; + } else { + $input_params{$name} = $cgi->param($symbol); + } } } @@ -717,10 +814,10 @@ sub evaluate_path_info { 'history', ); - # we want to catch + # we want to catch, among others # [$hash_parent_base[:$file_parent]..]$hash_parent[:$file_name] my ($parentrefname, $parentpathname, $refname, $pathname) = - ($path_info =~ /^(?:(.+?)(?::(.+))?\.\.)?(.+?)(?::(.+))?$/); + ($path_info =~ /^(?:(.+?)(?::(.+))?\.\.)?([^:]+?)?(?::(.+))?$/); # first, analyze the 'current' part if (defined $pathname) { @@ -756,8 +853,15 @@ sub evaluate_path_info { # hash_base instead. It should also be noted that hand-crafted # links having 'history' as an action and no pathname or hash # set will fail, but that happens regardless of PATH_INFO. - $input_params{'action'} ||= "shortlog"; - if (grep { $_ eq $input_params{'action'} } @wants_base) { + if (defined $parentrefname) { + # if there is parent let the default be 'shortlog' action + # (for http://git.example.com/repo.git/A..B links); if there + # is no parent, dispatch will detect type of object and set + # action appropriately if required (if action is not set) + $input_params{'action'} ||= "shortlog"; + } + if ($input_params{'action'} && + grep { $_ eq $input_params{'action'} } @wants_base) { $input_params{'hash_base'} ||= $refname; } else { $input_params{'hash'} ||= $refname; @@ -825,157 +929,298 @@ sub evaluate_path_info { } } } -evaluate_path_info(); -our $action = $input_params{'action'}; -if (defined $action) { - if (!validate_action($action)) { - die_error(400, "Invalid action parameter"); +our ($action, $project, $file_name, $file_parent, $hash, $hash_parent, $hash_base, + $hash_parent_base, @extra_options, $page, $searchtype, $search_use_regexp, + $searchtext, $search_regexp); +sub evaluate_and_validate_params { + our $action = $input_params{'action'}; + if (defined $action) { + if (!validate_action($action)) { + die_error(400, "Invalid action parameter"); + } } -} -# parameters which are pathnames -our $project = $input_params{'project'}; -if (defined $project) { - if (!validate_project($project)) { - undef $project; - die_error(404, "No such project"); + # parameters which are pathnames + our $project = $input_params{'project'}; + if (defined $project) { + if (!validate_project($project)) { + undef $project; + die_error(404, "No such project"); + } } -} -our $file_name = $input_params{'file_name'}; -if (defined $file_name) { - if (!validate_pathname($file_name)) { - die_error(400, "Invalid file parameter"); + our $file_name = $input_params{'file_name'}; + if (defined $file_name) { + if (!validate_pathname($file_name)) { + die_error(400, "Invalid file parameter"); + } } -} -our $file_parent = $input_params{'file_parent'}; -if (defined $file_parent) { - if (!validate_pathname($file_parent)) { - die_error(400, "Invalid file parent parameter"); + our $file_parent = $input_params{'file_parent'}; + if (defined $file_parent) { + if (!validate_pathname($file_parent)) { + die_error(400, "Invalid file parent parameter"); + } } -} -# parameters which are refnames -our $hash = $input_params{'hash'}; -if (defined $hash) { - if (!validate_refname($hash)) { - die_error(400, "Invalid hash parameter"); + # parameters which are refnames + our $hash = $input_params{'hash'}; + if (defined $hash) { + if (!validate_refname($hash)) { + die_error(400, "Invalid hash parameter"); + } } -} -our $hash_parent = $input_params{'hash_parent'}; -if (defined $hash_parent) { - if (!validate_refname($hash_parent)) { - die_error(400, "Invalid hash parent parameter"); + our $hash_parent = $input_params{'hash_parent'}; + if (defined $hash_parent) { + if (!validate_refname($hash_parent)) { + die_error(400, "Invalid hash parent parameter"); + } } -} -our $hash_base = $input_params{'hash_base'}; -if (defined $hash_base) { - if (!validate_refname($hash_base)) { - die_error(400, "Invalid hash base parameter"); + our $hash_base = $input_params{'hash_base'}; + if (defined $hash_base) { + if (!validate_refname($hash_base)) { + die_error(400, "Invalid hash base parameter"); + } } -} -our @extra_options = @{$input_params{'extra_options'}}; -# @extra_options is always defined, since it can only be (currently) set from -# CGI, and $cgi->param() returns the empty array in array context if the param -# is not set -foreach my $opt (@extra_options) { - if (not exists $allowed_options{$opt}) { - die_error(400, "Invalid option parameter"); - } - if (not grep(/^$action$/, @{$allowed_options{$opt}})) { - die_error(400, "Invalid option parameter for this action"); + our @extra_options = @{$input_params{'extra_options'}}; + # @extra_options is always defined, since it can only be (currently) set from + # CGI, and $cgi->param() returns the empty array in array context if the param + # is not set + foreach my $opt (@extra_options) { + if (not exists $allowed_options{$opt}) { + die_error(400, "Invalid option parameter"); + } + if (not grep(/^$action$/, @{$allowed_options{$opt}})) { + die_error(400, "Invalid option parameter for this action"); + } } -} -our $hash_parent_base = $input_params{'hash_parent_base'}; -if (defined $hash_parent_base) { - if (!validate_refname($hash_parent_base)) { - die_error(400, "Invalid hash parent base parameter"); + our $hash_parent_base = $input_params{'hash_parent_base'}; + if (defined $hash_parent_base) { + if (!validate_refname($hash_parent_base)) { + die_error(400, "Invalid hash parent base parameter"); + } } -} -# other parameters -our $page = $input_params{'page'}; -if (defined $page) { - if ($page =~ m/[^0-9]/) { - die_error(400, "Invalid page parameter"); + # other parameters + our $page = $input_params{'page'}; + if (defined $page) { + if ($page =~ m/[^0-9]/) { + die_error(400, "Invalid page parameter"); + } } -} -our $searchtype = $input_params{'searchtype'}; -if (defined $searchtype) { - if ($searchtype =~ m/[^a-z]/) { - die_error(400, "Invalid searchtype parameter"); + our $searchtype = $input_params{'searchtype'}; + if (defined $searchtype) { + if ($searchtype =~ m/[^a-z]/) { + die_error(400, "Invalid searchtype parameter"); + } } -} -our $search_use_regexp = $input_params{'search_use_regexp'}; + our $search_use_regexp = $input_params{'search_use_regexp'}; -our $searchtext = $input_params{'searchtext'}; -our $search_regexp; -if (defined $searchtext) { - if (length($searchtext) < 2) { - die_error(403, "At least two characters are required for search parameter"); + our $searchtext = $input_params{'searchtext'}; + our $search_regexp; + if (defined $searchtext) { + if (length($searchtext) < 2) { + die_error(403, "At least two characters are required for search parameter"); + } + $search_regexp = $search_use_regexp ? $searchtext : quotemeta $searchtext; } - $search_regexp = $search_use_regexp ? $searchtext : quotemeta $searchtext; } # path to the current git repository our $git_dir; -$git_dir = "$projectroot/$project" if $project; - -# list of supported snapshot formats -our @snapshot_fmts = gitweb_get_feature('snapshot'); -@snapshot_fmts = filter_snapshot_fmts(@snapshot_fmts); - -# check that the avatar feature is set to a known provider name, -# and for each provider check if the dependencies are satisfied. -# if the provider name is invalid or the dependencies are not met, -# reset $git_avatar to the empty string. -our ($git_avatar) = gitweb_get_feature('avatar'); -if ($git_avatar eq 'gravatar') { - $git_avatar = '' unless (eval { require Digest::MD5; 1; }); -} elsif ($git_avatar eq 'picon') { - # no dependencies -} else { - $git_avatar = ''; +sub evaluate_git_dir { + our $git_dir = "$projectroot/$project" if $project; } -# dispatch -if (!defined $action) { - if (defined $hash) { - $action = git_get_type($hash); - } elsif (defined $hash_base && defined $file_name) { - $action = git_get_type("$hash_base:$file_name"); - } elsif (defined $project) { - $action = 'summary'; +our (@snapshot_fmts, $git_avatar); +sub configure_gitweb_features { + # list of supported snapshot formats + our @snapshot_fmts = gitweb_get_feature('snapshot'); + @snapshot_fmts = filter_snapshot_fmts(@snapshot_fmts); + + # check that the avatar feature is set to a known provider name, + # and for each provider check if the dependencies are satisfied. + # if the provider name is invalid or the dependencies are not met, + # reset $git_avatar to the empty string. + our ($git_avatar) = gitweb_get_feature('avatar'); + if ($git_avatar eq 'gravatar') { + $git_avatar = '' unless (eval { require Digest::MD5; 1; }); + } elsif ($git_avatar eq 'picon') { + # no dependencies } else { - $action = 'project_list'; + $git_avatar = ''; + } +} + +# custom error handler: 'die ' is Internal Server Error +sub handle_errors_html { + my $msg = shift; # it is already HTML escaped + + # to avoid infinite loop where error occurs in die_error, + # change handler to default handler, disabling handle_errors_html + set_message("Error occured when inside die_error:\n$msg"); + + # you cannot jump out of die_error when called as error handler; + # the subroutine set via CGI::Carp::set_message is called _after_ + # HTTP headers are already written, so it cannot write them itself + die_error(undef, undef, $msg, -error_handler => 1, -no_http_header => 1); +} +set_message(\&handle_errors_html); + +# dispatch +sub dispatch { + if (!defined $action) { + if (defined $hash) { + $action = git_get_type($hash); + } elsif (defined $hash_base && defined $file_name) { + $action = git_get_type("$hash_base:$file_name"); + } elsif (defined $project) { + $action = 'summary'; + } else { + $action = 'project_list'; + } + } + if (!defined($actions{$action})) { + die_error(400, "Unknown action"); + } + if ($action !~ m/^(?:opml|project_list|project_index)$/ && + !$project) { + die_error(400, "Project needed"); + } + $actions{$action}->(); +} + +sub reset_timer { + our $t0 = [ gettimeofday() ] + if defined $t0; + our $number_of_git_cmds = 0; +} + +our $first_request = 1; +sub run_request { + reset_timer(); + + evaluate_uri(); + if ($first_request) { + evaluate_gitweb_config(); + evaluate_git_version(); + } + if ($per_request_config) { + if (ref($per_request_config) eq 'CODE') { + $per_request_config->(); + } elsif (!$first_request) { + evaluate_gitweb_config(); + } } + check_loadavg(); + + # $projectroot and $projects_list might be set in gitweb config file + $projects_list ||= $projectroot; + + evaluate_query_params(); + evaluate_path_info(); + evaluate_and_validate_params(); + evaluate_git_dir(); + + configure_gitweb_features(); + + dispatch(); +} + +our $is_last_request = sub { 1 }; +our ($pre_dispatch_hook, $post_dispatch_hook, $pre_listen_hook); +our $CGI = 'CGI'; +our $cgi; +sub configure_as_fcgi { + require CGI::Fast; + our $CGI = 'CGI::Fast'; + + my $request_number = 0; + # let each child service 100 requests + our $is_last_request = sub { ++$request_number > 100 }; +} +sub evaluate_argv { + my $script_name = $ENV{'SCRIPT_NAME'} || $ENV{'SCRIPT_FILENAME'} || __FILE__; + configure_as_fcgi() + if $script_name =~ /\.fcgi$/; + + return unless (@ARGV); + + require Getopt::Long; + Getopt::Long::GetOptions( + 'fastcgi|fcgi|f' => \&configure_as_fcgi, + 'nproc|n=i' => sub { + my ($arg, $val) = @_; + return unless eval { require FCGI::ProcManager; 1; }; + my $proc_manager = FCGI::ProcManager->new({ + n_processes => $val, + }); + our $pre_listen_hook = sub { $proc_manager->pm_manage() }; + our $pre_dispatch_hook = sub { $proc_manager->pm_pre_dispatch() }; + our $post_dispatch_hook = sub { $proc_manager->pm_post_dispatch() }; + }, + ); } -if (!defined($actions{$action})) { - die_error(400, "Unknown action"); + +sub run { + evaluate_argv(); + + $first_request = 1; + $pre_listen_hook->() + if $pre_listen_hook; + + REQUEST: + while ($cgi = $CGI->new()) { + $pre_dispatch_hook->() + if $pre_dispatch_hook; + + run_request(); + + $post_dispatch_hook->() + if $post_dispatch_hook; + $first_request = 0; + + last REQUEST if ($is_last_request->()); + } + + DONE_GITWEB: + 1; } -if ($action !~ m/^(?:opml|project_list|project_index)$/ && - !$project) { - die_error(400, "Project needed"); + +run(); + +if (defined caller) { + # wrapped in a subroutine processing requests, + # e.g. mod_perl with ModPerl::Registry, or PSGI with Plack::App::WrapCGI + return; +} else { + # pure CGI script, serving single request + exit; } -$actions{$action}->(); -exit; ## ====================================================================== ## action links +# possible values of extra options +# -full => 0|1 - use absolute/full URL ($my_uri/$my_url as base) +# -replay => 1 - start from a current view (replay with modifications) +# -path_info => 0|1 - don't use/use path_info URL (if possible) +# -anchor => ANCHOR - add #ANCHOR to end of URL, implies -replay if used alone sub href { my %params = @_; # default is to use -absolute url() i.e. $my_uri my $href = $params{-full} ? $my_url : $my_uri; + # implicit -replay, must be first of implicit params + $params{-replay} = 1 if (keys %params == 1 && $params{-anchor}); + $params{'project'} = $project unless exists $params{'project'}; if ($params{-replay}) { @@ -987,7 +1232,8 @@ sub href { } my $use_pathinfo = gitweb_check_feature('pathinfo'); - if ($use_pathinfo and defined $params{'project'}) { + if (defined $params{'project'} && + (exists $params{-path_info} ? $params{-path_info} : $use_pathinfo)) { # try to put as many parameters as possible in PATH_INFO: # - project name # - action @@ -1002,7 +1248,7 @@ sub href { $href =~ s,/$,,; # Then add the project name, if present - $href .= "/".esc_url($params{'project'}); + $href .= "/".esc_path_info($params{'project'}); delete $params{'project'}; # since we destructively absorb parameters, we keep this @@ -1012,7 +1258,8 @@ sub href { # Summary just uses the project path URL, any other action is # added to the URL if (defined $params{'action'}) { - $href .= "/".esc_url($params{'action'}) unless $params{'action'} eq 'summary'; + $href .= "/".esc_path_info($params{'action'}) + unless $params{'action'} eq 'summary'; delete $params{'action'}; } @@ -1022,13 +1269,13 @@ sub href { || $params{'hash_parent'} || $params{'hash'}); if (defined $params{'hash_base'}) { if (defined $params{'hash_parent_base'}) { - $href .= esc_url($params{'hash_parent_base'}); + $href .= esc_path_info($params{'hash_parent_base'}); # skip the file_parent if it's the same as the file_name if (defined $params{'file_parent'}) { if (defined $params{'file_name'} && $params{'file_parent'} eq $params{'file_name'}) { delete $params{'file_parent'}; } elsif ($params{'file_parent'} !~ /\.\./) { - $href .= ":/".esc_url($params{'file_parent'}); + $href .= ":/".esc_path_info($params{'file_parent'}); delete $params{'file_parent'}; } } @@ -1036,19 +1283,19 @@ sub href { delete $params{'hash_parent'}; delete $params{'hash_parent_base'}; } elsif (defined $params{'hash_parent'}) { - $href .= esc_url($params{'hash_parent'}). ".."; + $href .= esc_path_info($params{'hash_parent'}). ".."; delete $params{'hash_parent'}; } - $href .= esc_url($params{'hash_base'}); + $href .= esc_path_info($params{'hash_base'}); if (defined $params{'file_name'} && $params{'file_name'} !~ /\.\./) { - $href .= ":/".esc_url($params{'file_name'}); + $href .= ":/".esc_path_info($params{'file_name'}); delete $params{'file_name'}; } delete $params{'hash'}; delete $params{'hash_base'}; } elsif (defined $params{'hash'}) { - $href .= esc_url($params{'hash'}); + $href .= esc_path_info($params{'hash'}); delete $params{'hash'}; } @@ -1081,6 +1328,13 @@ sub href { } $href .= "?" . join(';', @result) if scalar @result; + # final transformation: trailing spaces must be escaped (URI-encoded) + $href =~ s/(\s+)$/CGI::escape($1)/e; + + if ($params{-anchor}) { + $href .= "#".esc_param($params{-anchor}); + } + return $href; } @@ -1144,6 +1398,7 @@ sub validate_refname { # in utf-8 thanks to "binmode STDOUT, ':utf8'" at beginning sub to_utf8 { my $str = shift; + return undef unless defined $str; if (utf8::valid($str)) { utf8::decode($str); return $str; @@ -1156,25 +1411,46 @@ sub to_utf8 { # correct, but quoted slashes look too horrible in bookmarks sub esc_param { my $str = shift; + return undef unless defined $str; $str =~ s/([^A-Za-z0-9\-_.~()\/:@ ]+)/CGI::escape($1)/eg; $str =~ s/ /\+/g; return $str; } -# quote unsafe chars in whole URL, so some charactrs cannot be quoted +# the quoting rules for path_info fragment are slightly different +sub esc_path_info { + my $str = shift; + return undef unless defined $str; + + # path_info doesn't treat '+' as space (specially), but '?' must be escaped + $str =~ s/([^A-Za-z0-9\-_.~();\/;:@&= +]+)/CGI::escape($1)/eg; + + return $str; +} + +# quote unsafe chars in whole URL, so some characters cannot be quoted sub esc_url { my $str = shift; - $str =~ s/([^A-Za-z0-9\-_.~();\/;?:@&=])/sprintf("%%%02X", ord($1))/eg; - $str =~ s/\+/%2B/g; + return undef unless defined $str; + $str =~ s/([^A-Za-z0-9\-_.~();\/;?:@&= ]+)/CGI::escape($1)/eg; $str =~ s/ /\+/g; return $str; } +# quote unsafe characters in HTML attributes +sub esc_attr { + + # for XHTML conformance escaping '"' to '"' is not enough + return esc_html(@_); +} + # replace invalid utf8 character with SUBSTITUTION sequence sub esc_html { my $str = shift; my %opts = @_; + return undef unless defined $str; + $str = to_utf8($str); $str = $cgi->escapeHTML($str); if ($opts{'-nbsp'}) { @@ -1189,6 +1465,8 @@ sub esc_path { my $str = shift; my %opts = @_; + return undef unless defined $str; + $str = to_utf8($str); $str = $cgi->escapeHTML($str); if ($opts{'-nbsp'}) { @@ -1331,7 +1609,6 @@ sub chop_str { $str =~ m/^(.*?)($begre)$/; my ($lead, $body) = ($1, $2); if (length($lead) > 4) { - $body =~ s/^[^;]*;// if ($lead =~ m/&[^;]*$/); $lead = " ..."; } return "$lead$body"; @@ -1342,8 +1619,6 @@ sub chop_str { $str =~ m/^(.*?)($begre)$/; my ($mid, $right) = ($1, $2); if (length($mid) > 5) { - $left =~ s/&[^;]*$//; - $right =~ s/^[^;]*;// if ($mid =~ m/&[^;]*$/); $mid = " ... "; } return "$left$mid$right"; @@ -1353,7 +1628,6 @@ sub chop_str { my $body = $1; my $tail = $2; if (length($tail) > 4) { - $body =~ s/&[^;]*$//; $tail = "... "; } return "$body$tail"; @@ -1575,7 +1849,7 @@ sub format_ref_marker { hash=>$dest )}, $name); - $markers .= " " . + $markers .= " " . $link . ""; } } @@ -1659,7 +1933,7 @@ sub git_get_avatar { return $pre_white . "" . $post_white; } else { @@ -2207,6 +2481,8 @@ sub config_to_multi { sub git_get_project_config { my ($key, $type) = @_; + return unless defined $git_dir; + # key sanity check return unless ($key); $key =~ s/^gitweb\.//; @@ -2295,37 +2571,94 @@ sub git_get_path_by_hash { ## ...................................................................... ## git utility functions, directly accessing git repository -sub git_get_project_description { - my $path = shift; +# get the value of config variable either from file named as the variable +# itself in the repository ($GIT_DIR/$name file), or from gitweb.$name +# configuration variable in the repository config file. +sub git_get_file_or_project_config { + my ($path, $name) = @_; $git_dir = "$projectroot/$path"; - open my $fd, '<', "$git_dir/description" - or return git_get_project_config('description'); - my $descr = <$fd>; + open my $fd, '<', "$git_dir/$name" + or return git_get_project_config($name); + my $conf = <$fd>; close $fd; - if (defined $descr) { - chomp $descr; + if (defined $conf) { + chomp $conf; } - return $descr; + return $conf; } -sub git_get_project_ctags { +sub git_get_project_description { my $path = shift; + return git_get_file_or_project_config($path, 'description'); +} + +sub git_get_project_category { + my $path = shift; + return git_get_file_or_project_config($path, 'category'); +} + + +# supported formats: +# * $GIT_DIR/ctags/ file (in 'ctags' subdirectory) +# - if its contents is a number, use it as tag weight, +# - otherwise add a tag with weight 1 +# * $GIT_DIR/ctags file, each line is a tag (with weight 1) +# the same value multiple times increases tag weight +# * `gitweb.ctag' multi-valued repo config variable +sub git_get_project_ctags { + my $project = shift; my $ctags = {}; - $git_dir = "$projectroot/$path"; - opendir my $dh, "$git_dir/ctags" - or return $ctags; - foreach (grep { -f $_ } map { "$git_dir/ctags/$_" } readdir($dh)) { - open my $ct, '<', $_ or next; - my $val = <$ct>; - chomp $val; - close $ct; - my $ctag = $_; $ctag =~ s#.*/##; - $ctags->{$ctag} = $val; + $git_dir = "$projectroot/$project"; + if (opendir my $dh, "$git_dir/ctags") { + my @files = grep { -f $_ } map { "$git_dir/ctags/$_" } readdir($dh); + foreach my $tagfile (@files) { + open my $ct, '<', $tagfile + or next; + my $val = <$ct>; + chomp $val if $val; + close $ct; + + (my $ctag = $tagfile) =~ s#.*/##; + if ($val =~ /\d+/) { + $ctags->{$ctag} = $val; + } else { + $ctags->{$ctag} = 1; + } + } + closedir $dh; + + } elsif (open my $fh, '<', "$git_dir/ctags") { + while (my $line = <$fh>) { + chomp $line; + $ctags->{$line}++ if $line; + } + close $fh; + + } else { + my $taglist = config_to_multi(git_get_project_config('ctag')); + foreach my $tag (@$taglist) { + $ctags->{$tag}++; + } + } + + return $ctags; +} + +# return hash, where keys are content tags ('ctags'), +# and values are sum of weights of given tag in every project +sub git_gather_all_ctags { + my $projects = shift; + my $ctags = {}; + + foreach my $p (@$projects) { + foreach my $ct (keys %{$p->{'ctags'}}) { + $ctags->{$ct} += $p->{'ctags'}->{$ct}; + } } - closedir $dh; - $ctags; + + return $ctags; } sub git_populate_project_tagcloud { @@ -2343,33 +2676,49 @@ sub git_populate_project_tagcloud { } my $cloud; + my $matched = $cgi->param('by_tag'); if (eval { require HTML::TagCloud; 1; }) { $cloud = HTML::TagCloud->new; - foreach (sort keys %ctags_lc) { + foreach my $ctag (sort keys %ctags_lc) { # Pad the title with spaces so that the cloud looks # less crammed. - my $title = $ctags_lc{$_}->{topname}; + my $title = esc_html($ctags_lc{$ctag}->{topname}); $title =~ s/ / /g; $title =~ s/^/ /g; $title =~ s/$/ /g; - $cloud->add($title, $home_link."?by_tag=".$_, $ctags_lc{$_}->{count}); + if (defined $matched && $matched eq $ctag) { + $title = qq($title); + } + $cloud->add($title, href(project=>undef, ctag=>$ctag), + $ctags_lc{$ctag}->{count}); } } else { - $cloud = \%ctags_lc; + $cloud = {}; + foreach my $ctag (keys %ctags_lc) { + my $title = esc_html($ctags_lc{$ctag}->{topname}, -nbsp=>1); + if (defined $matched && $matched eq $ctag) { + $title = qq($title); + } + $cloud->{$ctag}{count} = $ctags_lc{$ctag}->{count}; + $cloud->{$ctag}{ctag} = + $cgi->a({-href=>href(project=>undef, ctag=>$ctag)}, $title); + } } - $cloud; + return $cloud; } sub git_show_project_tagcloud { my ($cloud, $count) = @_; - print STDERR ref($cloud)."..\n"; if (ref $cloud eq 'HTML::TagCloud') { return $cloud->html_and_css($count); } else { - my @tags = sort { $cloud->{$a}->{count} <=> $cloud->{$b}->{count} } keys %$cloud; - return '

' . join (', ', map { - "$cloud->{$_}->{topname}" - } splice(@tags, 0, $count)) . '

'; + my @tags = sort { $cloud->{$a}->{'count'} <=> $cloud->{$b}->{'count'} } keys %$cloud; + return + '
' . + join (', ', map { + $cloud->{$_}->{'ctag'} + } splice(@tags, 0, $count)) . + '
'; } } @@ -2409,6 +2758,9 @@ sub git_get_projects_list { follow_skip => 2, # ignore duplicates dangling_symlinks => 0, # ignore dangling symlinks, silently wanted => sub { + # global variables + our $project_maxdepth; + our $projectroot; # skip project-list toplevel, if we get it. return if (m!^[/.]$!); # only directories can be git repositories @@ -2555,6 +2907,44 @@ sub git_get_last_activity { return (undef, undef); } +# Implementation note: when a single remote is wanted, we cannot use 'git +# remote show -n' because that command always work (assuming it's a remote URL +# if it's not defined), and we cannot use 'git remote show' because that would +# try to make a network roundtrip. So the only way to find if that particular +# remote is defined is to walk the list provided by 'git remote -v' and stop if +# and when we find what we want. +sub git_get_remotes_list { + my $wanted = shift; + my %remotes = (); + + open my $fd, '-|' , git_cmd(), 'remote', '-v'; + return unless $fd; + while (my $remote = <$fd>) { + chomp $remote; + $remote =~ s!\t(.*?)\s+\((\w+)\)$!!; + next if $wanted and not $remote eq $wanted; + my ($url, $key) = ($1, $2); + + $remotes{$remote} ||= { 'heads' => () }; + $remotes{$remote}{$key} = $url; + } + close $fd or return; + return wantarray ? %remotes : \%remotes; +} + +# Takes a hash of remotes as first parameter and fills it by adding the +# available remote heads for each of the indicated remotes. +sub fill_remote_heads { + my $remotes = shift; + my @heads = map { "remotes/$_" } keys %$remotes; + my @remoteheads = git_get_heads_list(undef, @heads); + foreach my $remote (keys %$remotes) { + $remotes->{$remote}{'heads'} = [ grep { + $_->{'name'} =~ s!^$remote/!! + } @remoteheads ]; + } +} + sub git_get_references { my $type = shift || ""; my %refs; @@ -2617,8 +3007,10 @@ sub parse_date { $date{'iso-8601'} = sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ", 1900+$year, 1+$mon, $mday, $hour ,$min, $sec; - $tz =~ m/^([+\-][0-9][0-9])([0-9][0-9])$/; - my $local = $epoch + ((int $1 + ($2/60)) * 3600); + my ($tz_sign, $tz_hour, $tz_min) = + ($tz =~ m/^([-+])(\d\d)(\d\d)$/); + $tz_sign = ($tz_sign eq '-' ? -1 : +1); + my $local = $epoch + $tz_sign*((($tz_hour*60) + $tz_min)*60); ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday) = gmtime($local); $date{'hour_local'} = $hour; $date{'minute_local'} = $min; @@ -2953,13 +3345,15 @@ sub parse_from_to_diffinfo { ## parse to array of hashes functions sub git_get_heads_list { - my $limit = shift; + my ($limit, @classes) = @_; + @classes = ('heads') unless @classes; + my @patterns = map { "refs/$_" } @classes; my @headslist; open my $fd, '-|', git_cmd(), 'for-each-ref', ($limit ? '--count='.($limit+1) : ()), '--sort=-committerdate', '--format=%(objectname) %(refname) %(subject)%00%(committer)', - 'refs/heads' + @patterns or return; while (my $line = <$fd>) { my %ref_item; @@ -2970,7 +3364,7 @@ sub git_get_heads_list { my ($committer, $epoch, $tz) = ($committerinfo =~ /^(.*) ([0-9]+) (.*)$/); $ref_item{'fullname'} = $name; - $name =~ s!^refs/heads/!!; + $name =~ s!^refs/(?:head|remote)s/!!; $ref_item{'name'} = $name; $ref_item{'id'} = $hash; @@ -3144,26 +3538,109 @@ sub blob_contenttype { return $type; } +# guess file syntax for syntax highlighting; return undef if no highlighting +# the name of syntax can (in the future) depend on syntax highlighter used +sub guess_file_syntax { + my ($highlight, $mimetype, $file_name) = @_; + return undef unless ($highlight && defined $file_name); + my $basename = basename($file_name, '.in'); + return $highlight_basename{$basename} + if exists $highlight_basename{$basename}; + + $basename =~ /\.([^.]*)$/; + my $ext = $1 or return undef; + return $highlight_ext{$ext} + if exists $highlight_ext{$ext}; + + return undef; +} + +# run highlighter and return FD of its output, +# or return original FD if no highlighting +sub run_highlighter { + my ($fd, $highlight, $syntax) = @_; + return $fd unless ($highlight && defined $syntax); + + close $fd; + open $fd, quote_command(git_cmd(), "cat-file", "blob", $hash)." | ". + quote_command($highlight_bin). + " --replace-tabs=8 --fragment --syntax $syntax |" + or die_error(500, "Couldn't open file or run syntax highlighter"); + return $fd; +} + ## ====================================================================== ## functions printing HTML: header, footer, error page -sub git_header_html { - my $status = shift || "200 OK"; - my $expires = shift; +sub get_page_title { + my $title = to_utf8($site_name); + + return $title unless (defined $project); + $title .= " - " . to_utf8($project); + + return $title unless (defined $action); + $title .= "/$action"; # $action is US-ASCII (7bit ASCII) + + return $title unless (defined $file_name); + $title .= " - " . esc_path($file_name); + if ($action eq "tree" && $file_name !~ m|/$|) { + $title .= "/"; + } + + return $title; +} - my $title = "$site_name"; +sub print_feed_meta { if (defined $project) { - $title .= " - " . to_utf8($project); - if (defined $action) { - $title .= "/$action"; - if (defined $file_name) { - $title .= " - " . esc_path($file_name); - if ($action eq "tree" && $file_name !~ m|/$|) { - $title .= "/"; - } - } + my %href_params = get_feed_info(); + if (!exists $href_params{'-title'}) { + $href_params{'-title'} = 'log'; } + + foreach my $format (qw(RSS Atom)) { + my $type = lc($format); + my %link_attr = ( + '-rel' => 'alternate', + '-title' => esc_attr("$project - $href_params{'-title'} - $format feed"), + '-type' => "application/$type+xml" + ); + + $href_params{'action'} = $type; + $link_attr{'-href'} = href(%href_params); + print "\n"; + + $href_params{'extra_options'} = '--no-merges'; + $link_attr{'-href'} = href(%href_params); + $link_attr{'-title'} .= ' (no merges)'; + print "\n"; + } + + } else { + printf(''."\n", + esc_attr($site_name), href(project=>undef, action=>"project_index")); + printf(''."\n", + esc_attr($site_name), href(project=>undef, action=>"opml")); } +} + +sub git_header_html { + my $status = shift || "200 OK"; + my $expires = shift; + my %opts = @_; + + my $title = get_page_title(); my $content_type; # require explicit support from the UA if we are to send the page as # 'application/xhtml+xml', otherwise send it as plain old 'text/html'. @@ -3177,7 +3654,8 @@ sub git_header_html { $content_type = 'text/html'; } print $cgi->header(-type=>$content_type, -charset => 'utf-8', - -status=> $status, -expires => $expires); + -status=> $status, -expires => $expires) + unless ($opts{'-no_http_header'}); my $mod_perl_version = $ENV{'MOD_PERL'} ? " $ENV{'MOD_PERL'}" : ''; print < @@ -3199,57 +3677,17 @@ EOF # print out each stylesheet that exist, providing backwards capability # for those people who defined $stylesheet in a config file if (defined $stylesheet) { - print ''."\n"; + print ''."\n"; } else { foreach my $stylesheet (@stylesheets) { next unless $stylesheet; - print ''."\n"; - } - } - if (defined $project) { - my %href_params = get_feed_info(); - if (!exists $href_params{'-title'}) { - $href_params{'-title'} = 'log'; - } - - foreach my $format qw(RSS Atom) { - my $type = lc($format); - my %link_attr = ( - '-rel' => 'alternate', - '-title' => "$project - $href_params{'-title'} - $format feed", - '-type' => "application/$type+xml" - ); - - $href_params{'action'} = $type; - $link_attr{'-href'} = href(%href_params); - print "\n"; - - $href_params{'extra_options'} = '--no-merges'; - $link_attr{'-href'} = href(%href_params); - $link_attr{'-title'} .= ' (no merges)'; - print "\n"; + print ''."\n"; } - - } else { - printf(''."\n", - $site_name, href(project=>undef, action=>"project_index")); - printf(''."\n", - $site_name, href(project=>undef, action=>"opml")); } + print_feed_meta() + if ($status eq '200 OK'); if (defined $favicon) { - print qq(\n); + print qq(\n); } print "\n" . @@ -3259,15 +3697,28 @@ EOF insert_file($site_header); } - print "
\n" . - $cgi->a({-href => esc_url($logo_url), - -title => $logo_label}, - qq()); + print "
\n"; + if (defined $logo) { + print $cgi->a({-href => esc_url($logo_url), + -title => $logo_label}, + $cgi->img({-src => esc_url($logo), + -width => 72, -height => 27, + -alt => "git", + -class => "logo"})); + } print $cgi->a({-href => esc_url($home_link)}, $home_link_str) . " / "; if (defined $project) { print $cgi->a({-href => href(action=>"summary")}, esc_html($project)); if (defined $action) { - print " / $action"; + my $action_print = $action ; + if (defined $opts{-action_extra}) { + $action_print = $cgi->a({-href => href(action=>$action)}, + $action); + } + print " / $action_print"; + } + if (defined $opts{-action_extra}) { + print " / $opts{-action_extra}"; } print "\n"; } @@ -3327,7 +3778,7 @@ sub git_footer_html { } $href_params{'-title'} ||= 'log'; - foreach my $format qw(RSS Atom) { + foreach my $format (qw(RSS Atom)) { $href_params{'action'} = lc($format); print $cgi->a({-href => href(%href_params), -title => "$href_params{'-title'} $format feed", @@ -3346,7 +3797,7 @@ sub git_footer_html { print "
\n"; print 'This page took '. ''. - Time::HiRes::tv_interval($t0, [Time::HiRes::gettimeofday()]). + tv_interval($t0, [ gettimeofday() ]). ' seconds '. ' and '. ''. @@ -3360,7 +3811,7 @@ sub git_footer_html { insert_file($site_footer); } - print qq!\n!; + print qq!\n!; if (defined $action && $action eq 'blame_incremental') { print qq!