[Pdx-pm] Instance hash ( keyed array )
Joshua Keroes
jkeroes at eli.net
Tue Oct 17 14:34:21 PDT 2006
On 10/17/06 1:32 PM, "Roderick A. Anderson" <raanders at acm.org> wrote:
> I need to build a, sometimes, quite large hash table ( 40,000 - 470,000
> records ) one record at a time. The method will return the key it is
> used to the caller so it can be used to key the rest of the data not
> passed in to the method.
>
> sub add_addr {
>
> my $self = shift;
> my @stuff = @_;
>
> my $recid = $self->inc_recid();
> my $addr = join( $FldDelim, $recid, @stuff );
Not sure why you're putting the recid in both the key and the value - that's
just one more thing that might break. Oh, and it takes up additional storage
space.
> $addr .= $RcdDelim;
>
> $self->{addr_data}->{$recid} = $addr;
>
> return $recid;
> }
>
> Is there a better way to do this?
A few questions first:
Q: Instead of serializing @stuff, why not create a HoA?
Q: Depending on what you're using as a recid, you might be able to use an
AoA instead for some space savings.
Q: Why not use a DB instead?
That said, here's some code that implements this in a way that makes sense
to me; that is, storing an HoA and stringifying the output of each addr on
demand.
__BEGIN__
package Hash::Super;
# Using Spiffy for class/method generation because it makes for
# nice, short classes
use Spiffy qw/-base/; # writes new()
field fld_delim => ":"; # creates get/set method w/ a default value
field rcd_delim => "\n"; # creates get/set method w/ a default value
# addr() - get/set method
#
# Usage:
#
# $obj->addr( [qw/a b c/] ); # set next index to aref
# $obj->addr( 5 => [qw/a b c/] ); # set any index to aref
# $obj->addr( 5 ); # get addr at index 5
# $obj->addr; # get last addr
sub addr {
my $self = shift;
if ( ref $_[0] eq "ARRAY" ) {
# User wants us to get next recid
my $recid = $self->inc_recid or croak "Couldn't get new recid";
my $aref = shift;
return $self->{addr_data}{$recid} = $aref;
}
elsif ( ref $_[1] eq "ARRAY" ) {
# User passed a recid and aref
my $recid = shift;
my $aref = shift;
return $self->{addr_data}{$recid} = $aref;
}
else {
# No args. Return last addr
return $self->{addr_data}{$self->recid};
}
croak "Flow should never reach this point";
}
# Usage:
#
# my $str = $obj->to_str( ['a', 'b', 'c'] )
sub to_str {
my $self = shift;
my $aref = shift;
croak "to_str needs an array ref arg" unless ref $aref eq "ARRAY"
return join ($self->fld_delim, @{ $self->{addr_data}->{$recid} })
. $self->rcd_delim;
}
{ # Closure
my $singleton_recid = 0;
sub inc_recid { $singleton_recid++ } # get only
sub recid { $singleton_recid } # get only
}
1;
__END__
# Example code:
use Hash::Super;
my $superhash = Hash::Super->new;
$superhash->addr( [qw/a b whoops/] ); # Store incorrect first addr
$superhash->addr( 0 => [qw/a b c/] ); # Correct first addr
$superhash->addr( [qw/d e f/] ); # Store second addr
# $superhash = {
# fld_delim => ":",
# rcd_delim => "\n",
# addr_data => {
# 0 => ['a', 'b', 'c'],
# 1 => ['d', 'e', 'f'],
# },
# }
my $str = $superhash->to_str( $superhash->addr( 1 ) );
# $str = "d:e:f\n"
__END__
-Joshua
More information about the Pdx-pm-list
mailing list