Subject: | directory hashing for CGI::Session::Driver::file (patch included) |
I've got a site that is generating hundreds of thousands of sessions a
day and due to customer requirements, the sessions have a long expiry
value. As a result, having our sessions using the file driver, causes
poor FS performance over time.
I'll leave discussion of the performance of various file systems to
other forums.
So, I modified the file driver, and created hashfile.pm, attached. It is
based on your original file driver, and implements all the same
functionality, while providing macros like %1, %2, %3, etc to allow for
hashing schemes like /tmp/1/12/123/cgisess_1234567890.
I've attached the module; if you find it beneficial, please feel free to
include it in the core distribution.
The only caveat that I can think of is that this module will have
problems if a directory name has % in it followed by digits.
Subject: | hashfile.pm |
package CGI::Session::Driver::hashfile;
use strict;
use Carp;
use File::Spec;
use Fcntl qw( :DEFAULT :flock :mode );
use CGI::Session::Driver;
use vars qw( $FileName $NoFlock $UMask $NO_FOLLOW );
BEGIN {
no strict 'refs';
}
@CGI::Session::Driver::hashfile::ISA = ( "CGI::Session::Driver" );
$CGI::Session::Driver::hashfile::VERSION = "0.1";
$FileName = "cgisess_%s";
$NoFlock = 0;
$UMask = 0660;
$NO_FOLLOW = eval { O_NOFOLLOW } || 0;
sub init {
my $self = shift;
$self->{Directory} ||= File::Spec->tmpdir();
$self->{NoFlock} = $NoFlock unless exists $self->{NoFlock};
$self->{UMask} = $UMask unless exists $self->{UMask};
return 1;
}
sub _file {
my ($self,$sid) = @_;
my $directory = $self->{Directory};
$directory =~ s/\%(\d+)/substr($sid,0,$1)/eg;
unless ( -d $directory ) {
require File::Path;
unless ( File::Path::mkpath($directory) ) {
return $self->set_error( "init(): couldn't create directory path: $!" );
}
}
return File::Spec->catfile($directory, sprintf( $FileName, $sid ));
}
sub retrieve {
my $self = shift;
my ($sid) = @_;
my $path = $self->_file($sid);
return 0 unless -e $path;
# make certain our filehandle goes away when we fall out of scope
local *FH;
if (-l $path) {
unlink($path) or
return $self->set_error("retrieve(): '$path' appears to be a symlink and I couldn't remove it: $!");
return 0; # we deleted this so we have no hope of getting back anything
}
sysopen(FH, $path, O_RDONLY | $NO_FOLLOW ) || return $self->set_error( "retrieve(): couldn't open '$path': $!" );
$self->{NoFlock} || flock(FH, LOCK_SH) or return $self->set_error( "retrieve(): couldn't lock '$path': $!" );
my $rv = "";
while ( <FH> ) {
$rv .= $_;
}
close(FH);
return $rv;
}
sub store {
my $self = shift;
my ($sid, $datastr) = @_;
my $path = $self->_file($sid);
# make certain our filehandle goes away when we fall out of scope
local *FH;
my $mode = O_WRONLY|$NO_FOLLOW;
# kill symlinks when we spot them
if (-l $path) {
unlink($path) or
return $self->set_error("store(): '$path' appears to be a symlink and I couldn't remove it: $!");
}
$mode = O_RDWR|O_CREAT|O_EXCL unless -e $path;
sysopen(FH, $path, $mode, $self->{UMask}) or return $self->set_error( "store(): couldn't open '$path': $!" );
# sanity check to make certain we're still ok
if (-l $path) {
return $self->set_error("store(): '$path' is a symlink, check for malicious processes");
}
# prevent race condition (RT#17949)
$self->{NoFlock} || flock(FH, LOCK_EX) or return $self->set_error( "store(): couldn't lock '$path': $!" );
truncate(FH, 0) or return $self->set_error( "store(): couldn't truncate '$path': $!" );
print FH $datastr;
close(FH) or return $self->set_error( "store(): couldn't close '$path': $!" );
return 1;
}
sub remove {
my $self = shift;
my ($sid) = @_;
my $directory = $self->{Directory};
$directory =~ s/\%(\d+)/substr($sid,0,$1)/eg;
my $file = sprintf( $FileName, $sid );
my $path = File::Spec->catfile($directory, $file);
unlink($path) or return $self->set_error( "remove(): couldn't unlink '$path': $!" );
return 1;
}
sub traverse {
# test script used strict; it seemed to override no strict refs in the BEGIN block, so I'm leaving this here
no strict 'refs';
my $self = shift;
my ($coderef, $dir, $dparts) = @_;
unless ( $coderef && ref($coderef) && (ref $coderef eq 'CODE') ) {
croak "traverse(): usage error";
}
if($dir)
{
if(!$dparts)
{
opendir( $$dir, $dir )
or return $self->set_error( "traverse(): couldn't open $dir, " . $! );
my $filename_pattern = $FileName;
$filename_pattern =~ s/\./\\./g;
$filename_pattern =~ s/\%s/(\.\+)/g;
while ( my $filename = readdir($$dir) )
{
next if $filename =~ m/^\.\.?$/;
my $full_path = File::Spec->catfile($dir, $filename);
my $mode = (stat($full_path))[2]
or return $self->set_error( "traverse(): stat failed for $full_path: " . $! );
next if S_ISDIR($mode);
if ( $filename =~ /^$filename_pattern$/ )
{
$coderef->($1);
}
}
closedir( $$dir );
}
else
{
opendir( $$dparts, $dir )
or return $self->set_error( "traverse(): couldn't open $dir, " . $! );
my $pattern = '';
my $next_dparts = '';
if($dparts =~ /^\%(\d+)\/(.+)$/)
{
$pattern = '.' x $1;
$next_dparts = $2;
}
elsif($dparts =~/^\%(\d+)\/?$/)
{
$pattern = '.' x $1;
}
while ( my $filename = readdir($$dparts) )
{
next if $filename =~ m/^\.\.?$/;
my $full_path = File::Spec->catfile($dir, $filename);
my $mode = (stat($full_path))[2]
or return $self->set_error( "traverse(): stat failed for $full_path: " . $! );
next if !S_ISDIR($mode);
if ( $filename =~ /^$pattern$/ )
{
$self->traverse($coderef, $full_path, $next_dparts);
}
}
closedir( $$dparts );
}
}
else
{
if( $self->{Directory} =~ /\%/ )
{
my $d = $self->{Directory};
my $d2 = $self->{Directory};
$d =~ s/^(.+?)\/\%.+$/$1/;
$d2 =~ s/^.+?\/(\%.+)$/$1/;
$self->traverse($coderef, $d, $d2);
}
else
{
$self->traverse($coderef, $self->{Directory}, '');
}
}
return 1;
}
sub DESTROY {
my $self = shift;
}
1;
__END__;
=pod
=head1 NAME
CGI::Session::Driver::hashfile - CGI::Session driver implementing file storage using hashed directories. Useful for sites with a large number of simultaneous sessions
=head1 SYNOPSIS
$s = new CGI::Session();
$s = new CGI::Session("driver:hashfile", $sid);
$s = new CGI::Session("driver:hashfile", $sid, {Directory=>'/tmp/%1/%2/%3'});
=head1 DESCRIPTION
I<hashfile> - driver will store session data in plain files, where each session will be stored in a separate
file, optionally stored in a hashed directory structure.
Naming conventions of session files are defined by C<$CGI::Session::Driver::file::FileName> global variable.
Default value of this variable is I<cgisess_%s>, where %s will be replaced with respective session ID. Should
you wish to set your own FileName template, do so before requesting for session object:
$CGI::Session::Driver::file::FileName = "%s.dat";
$s = new CGI::Session();
=head2 DRIVER ARGUMENTS
If you wish to specify a session directory, use the B<Directory> option, which denotes location of the directory
where session ids are to be kept. If B<Directory> is not set, defaults to whatever File::Spec->tmpdir() returns.
So all the three lines in the SYNOPSIS section of this manual produce the same result on a UNIX machine.
If specified B<Directory> does not exist, all necessary directory hierarchy will be created.
Special codes %1 through %nnn are recognized, and replaced with the specified number of digits from the beginning
of the session id. For example:
$session_id = '972gtf982u30f9';
{ Directory => '/tmp/%1/%2/%3' } # yields /tmp/9/97/972/cgisess_972gtf982u30f9
{ Directory => '/tmp/%1/%3/%5' } # yields /tmp/9/972/972gt/cgisess_972gtf982u30f9
By default, sessions are created with a umask of 0660. If you wish to change the umask for a session, pass
a B<UMask> option with an octal representation of the umask you would like for said session.
=head1 NOTES
If your OS doesn't support flock, you should understand the risks of going without locking the session files. Since
sessions tend to be used in environments where race conditions may occur due to concurrent access of files by
different processes, locking tends to be seen as a good and very necessary thing. If you still want to use this
driver but don't want flock, set C<$CGI::Session::Driver::file::NoFlock> to 1 or pass C<< NoFlock => 1 >> and this
driver will operate without locks.
=head1 LICENSING
For support and licensing see L<CGI::Session|CGI::Session>
=cut