Skip Menu |

This queue is for tickets about the MKDoc-Apache_Cache CPAN distribution.

Report information
The Basics
Id: 7899
Status: new
Priority: 0/
Queue: MKDoc-Apache_Cache

People
Owner: Nobody in particular
Requestors: BPOSTLE [...] cpan.org
Cc:
AdminCc:

Bug Information
Severity: (no value)
Broken in: (no value)
Fixed in: (no value)



Subject: GET requests shouldn't be cached
MKDoc::Apache_Cache passes-on HTTP requests and caches them. Currently both GET and HEAD requests are cached, however this assumes that the back-end application is capable of producing accurate HTTP headers for Content-Length and Etag when given a GET request. Since MKDoc::Apache_Cache generates these headers, it should translate all HEAD requests into GET and only ever cache the full content. When answering a HEAD request it should return a GET without the content.
This is an unfinished but broken patch that attempts to fix this bug.
diff -Nbaur -x CVS /home/bruno/src/cvs/MKDoc-Apache_Cache/lib/MKDoc/Apache_Cache.pm MKDoc-Apache_Cache/lib/MKDoc/Apache_Cache.pm --- /home/bruno/src/cvs/MKDoc-Apache_Cache/lib/MKDoc/Apache_Cache.pm 2004-09-23 13:02:34.000000000 +0100 +++ MKDoc-Apache_Cache/lib/MKDoc/Apache_Cache.pm 2004-10-06 13:10:05.000000000 +0100 @@ -49,28 +49,229 @@ use MKDoc::Control_List; use Cache::FileCache; use File::Spec; -use vars qw /$Request/; use CGI; use Compress::Zlib; use Digest::MD5; our $VERSION = '0.7'; +# all your variables are belong to our +our $Request; +our $Response; +our $Control; + sub handler ($$) { my ($class, $r) = (@_ >= 2) ? (shift, shift) : (__PACKAGE__, shift); - # Makes $MKDoc::Apache_Cache::Request available local $Request = $r; + local $Response = undef; + local $Control = undef; + + if ( is_request_cacheable() ) + { + get_cached_request_data(); + + $Response || do { + get_fresh_request_data(); + is_request_200() && set_cached_request_data(); + }; + } + else + { + get_fresh_request_data(); + } + + serve_request(); +} + + +sub is_request_cacheable +{ + @_ = control_list_process(); + + my $timeout = shift || return; + $timeout = do { + no warnings; + 0 + _expiration_time ($timeout); + }; + + return $timeout > 0; +} + - my @args = do { +sub is_request_200 +{ + my ($res) = $Response =~ /^Status:(.*)/; + return $res =~ / 200 /; +} + + +sub get_cached_request_data +{ + my $cache = _cache_object(); + my $id = object_identifier(); + $Response = $cache->get ($id); +} + + +sub set_cached_request_data +{ + my $cache = _cache_object(); + my $id = object_identifier(); + my $time = object_timeout(); + + return $cache->set ($id, $Response, $time); +} + + +sub get_fresh_request_data +{ + my $fake_r = MKDoc::Apache_Cache::Capture->new ($Request); + my $ret = Apache::RegistryNG::handler ($fake_r); + my $data = $fake_r->data(); + + warn "- $data -"; + warn "- $ret -"; + + $data =~ s/^Status:.*//; + return "Status: $ret\r\n$data"; +} + + +sub serve_request +{ + my $data = shift; + my ($ret) = $Response =~ /^Status: (.*)\r\n/; + $Response =~ s/^Status:.*//; + + $Request->print ($Response); + return $ret; +} + + +sub object_timeout +{ + @_ = control_list_process(); + my $timeout = shift || return; + return do { + no warnings; + 0 + _expiration_time ($timeout); + }; +} + + +sub object_identifier +{ + @_ = control_list_process(); + my $timeout = shift || return _default_identifier(); + my $identifier = shift || return _default_identifier(); + return $identifier; +} + + +sub control_list_process +{ + $Control ||= do { no warnings; no strict; - ( $class->_control_list_process() ); + my @args = ( _control_list_process() ); + \@args; }; - my ($ret, $data) = $class->_do_cached (@args); + @{$Control}; +} + + +sub _control_list_process +{ + my $key = 'MKDoc_Apache_Cache_CONFIG'; + my $file = Apache->request->dir_config ($key) || $ENV{$key} || '/etc/mkdoc-apache-cache.conf'; + -e $file && -f $file || do { + warn "Cannot stat $file - skipping"; + return (); + }; + + my $ctrl = new MKDoc::Control_List ( file => $file ); + return $ctrl->process(); +} + + +sub _default_identifier +{ + my $class = shift; + CGI->new()->self_url(); +} + + +sub _cache_object +{ + my %args = (); + + _cache_object_option ('namespace', \%args); + _cache_object_option ('default_expires_in', \%args); + _cache_object_option ('auto_purge_interval', \%args); + _cache_object_option ('auto_purge_on_set', \%args); + _cache_object_option ('auto_purge_on_get', \%args); + _cache_object_option ('cache_root', \%args); + _cache_object_option ('cache_depth', \%args); + _cache_object_option ('directory_umask', \%args); + + return new Cache::FileCache ( \%args ); +} + + +sub _cache_object_option +{ + my $opt = shift; + my $args = shift; + my $key = 'MKDoc_Apache_Cache_' . uc ($opt); + my $val = Apache->request->dir_config ($key) || $ENV{$key}; + $key eq 'MKDoc_Apache_Cache_CACHE_ROOT' and do { $val ||= File::Spec->tmpdir() }; + + defined $val and do { $args->{$opt} = $val }; +} + + +# borrowed / modded from Cache::BaseCache +# -------------------------------------------------------------------------------- +our $EXPIRES_NOW = 'now'; +our $EXPIRES_NEVER = 'never'; +our %Expiration_Units = ( map(($_, 1), qw(s second seconds sec)), + map(($_, 60), qw(m minute minutes min)), + map(($_, 60*60), qw(h hour hours)), + map(($_, 60*60*24), qw(d day days)), + map(($_, 60*60*24*7), qw(w week weeks)), + map(($_, 60*60*24*30), qw(M month months)), + map(($_, 60*60*24*365), qw(y year years)) ); +sub _expiration_time +{ + my ($p_expires_in) = @_; + uc ($p_expires_in) eq uc ($EXPIRES_NOW) and return 0; + uc ($p_expires_in) eq uc ($EXPIRES_NEVER) and return; + $p_expires_in =~ /^\s*([+-]?(?:\d+|\d*\.\d*))\s*$/ and return $p_expires_in; + $p_expires_in =~ /^\s*([+-]?(?:\d+|\d*\.\d*))\s*(\w*)\s*$/ and + exists $Expiration_Units{$2} and + return $Expiration_Units{$2} * $1; + + return 0; +} +# -------------------------------------------------------------------------------- + + +1; + + +__END__ + + + +sub old_handler +{ + # Makes $MKDoc::Apache_Cache::Request available + local $Request = $r; + # bug: # 200 at /usr/local/lib/perl5/site_perl/5.8.0/MKDoc/Apache_Cache.pm line 66. @@ -99,6 +302,9 @@ $ENV{REQUEST_METHOD} =~ /HEAD/i and do { $data =~ s/\r?\ncontent-length\:.*//i; + $data =~ s/\r?\ncontent-encoding\:.*//i; + $data =~ s/\r?\nvary\:.*//i; + $data =~ s/\r?\netag\:.*//i; }; $r->print ($data); @@ -147,58 +353,6 @@ } -sub _default_identifier -{ - my $class = shift; - return "$ENV{REQUEST_METHOD}:" . CGI->new()->self_url(); -} - - -sub _control_list_process -{ - my $class = shift; - my $key = 'MKDoc_Apache_Cache_CONFIG'; - my $file = Apache->request->dir_config ($key) || $ENV{$key} || '/etc/mkdoc-apache-cache.conf'; - -e $file && -f $file || do { - warn "Cannot stat $file - skipping"; - return (); - }; - - my $ctrl = new MKDoc::Control_List ( file => $file ); - return $ctrl->process(); -} - - -sub _cache_object -{ - my $class = shift; - my %args = (); - - $class->_cache_object_option ('namespace', \%args); - $class->_cache_object_option ('default_expires_in', \%args); - $class->_cache_object_option ('auto_purge_interval', \%args); - $class->_cache_object_option ('auto_purge_on_set', \%args); - $class->_cache_object_option ('auto_purge_on_get', \%args); - $class->_cache_object_option ('cache_root', \%args); - $class->_cache_object_option ('cache_depth', \%args); - $class->_cache_object_option ('directory_umask', \%args); - - return new Cache::FileCache ( \%args ); -} - - -sub _cache_object_option -{ - my $self = shift; - my $opt = shift; - my $args = shift; - my $key = 'MKDoc_Apache_Cache_' . uc ($opt); - my $val = Apache->request->dir_config ($key) || $ENV{$key}; - $key eq 'MKDoc_Apache_Cache_CACHE_ROOT' and do { $val ||= File::Spec->tmpdir() }; - - defined $val and do { $args->{$opt} = $val }; -} - # borrowed from http://www.mnot.net/cgi_buffer/ # -------------------------------------------------------------------------------- @@ -217,6 +371,13 @@ { my $buf = shift; my ($headers, $body) = split /\r?\n\r?\n/, $buf, 2; + + # we're going to re-write those... + $headers =~ s/\r?\ncontent-length\:.*//i; + $headers =~ s/\r?\ncontent-encoding\:.*//i; + $headers =~ s/\r?\nvary\:.*//i; + $headers =~ s/\r?\netag\:.*//i; + my @o = (); # Figure out some kind of content_type @@ -227,7 +388,6 @@ # Vary: Accept-Encoding is here to tell proxies to keep a separate # cache for every different Accept-Encoding that is being sent. - $_ = lc( $ENV{'HTTP_ACCEPT_ENCODING'} ); $content_type !~ /zip/ and $content_type =~ /(text|xml)/ and do { $body = Compress::Zlib::memGzip ($body); push @o, "Content-Encoding: gzip"; @@ -265,30 +425,6 @@ # -------------------------------------------------------------------------------- -# borrowed / modded from Cache::BaseCache -# -------------------------------------------------------------------------------- -our $EXPIRES_NOW = 'now'; -our $EXPIRES_NEVER = 'never'; -our %Expiration_Units = ( map(($_, 1), qw(s second seconds sec)), - map(($_, 60), qw(m minute minutes min)), - map(($_, 60*60), qw(h hour hours)), - map(($_, 60*60*24), qw(d day days)), - map(($_, 60*60*24*7), qw(w week weeks)), - map(($_, 60*60*24*30), qw(M month months)), - map(($_, 60*60*24*365), qw(y year years)) ); -sub _expiration_time -{ - my ($p_expires_in) = @_; - uc ($p_expires_in) eq uc ($EXPIRES_NOW) and return 0; - uc ($p_expires_in) eq uc ($EXPIRES_NEVER) and return; - $p_expires_in =~ /^\s*([+-]?(?:\d+|\d*\.\d*))\s*$/ and return $p_expires_in; - $p_expires_in =~ /^\s*([+-]?(?:\d+|\d*\.\d*))\s*(\w*)\s*$/ and - exists $Expiration_Units{$2} and - return $Expiration_Units{$2} * $1; - - return 0; -} -# -------------------------------------------------------------------------------- 1;