[Pdx-pm] Instance hash ( keyed array )

Joshua Keroes jkeroes at eli.net
Tue Oct 17 14:34:21 PDT 2006


On 10/17/06 1:32 PM, "Roderick A. Anderson" <raanders at acm.org> wrote:
> I need to build a, sometimes, quite large hash table ( 40,000 - 470,000
> records ) one record at a time.  The method will return the key it is
> used to the caller so it can be used to key the rest of the data not
> passed in to the method.
> 
> sub add_addr {
> 
>      my $self = shift;
>      my @stuff = @_;
> 
>      my $recid = $self->inc_recid();
>      my $addr = join( $FldDelim, $recid, @stuff );

Not sure why you're putting the recid in both the key and the value - that's
just one more thing that might break. Oh, and it takes up additional storage
space.

>      $addr .= $RcdDelim;
> 
>      $self->{addr_data}->{$recid} = $addr;
> 
>      return $recid;
> }
> 
> Is there a better way to do this?

A few questions first:

Q: Instead of serializing @stuff, why not create a HoA?

Q: Depending on what you're using as a recid, you might be able to use an
AoA instead for some space savings.

Q: Why not use a DB instead?

That said, here's some code that implements this in a way that makes sense
to me; that is, storing an HoA and stringifying the output of each addr on
demand.


__BEGIN__

package Hash::Super;

# Using Spiffy for class/method generation because it makes for
# nice, short classes
use Spiffy qw/-base/;       # writes new()

field fld_delim => ":";     # creates get/set method w/ a default value
field rcd_delim => "\n";    # creates get/set method w/ a default value


# addr() - get/set method
#
# Usage:
#
#   $obj->addr( [qw/a b c/] );       # set next index to aref
#   $obj->addr( 5 => [qw/a b c/] );  # set any  index to aref
#   $obj->addr( 5 );                 # get addr at index 5
#   $obj->addr;                      # get last addr

sub addr {
    my $self  = shift;

    if ( ref $_[0] eq "ARRAY" ) {

        # User wants us to get next recid
        
        my $recid = $self->inc_recid or croak "Couldn't get new recid";
        my $aref  = shift;
        return $self->{addr_data}{$recid} = $aref;
    }
    elsif ( ref $_[1] eq "ARRAY" ) {

        # User passed a recid and aref

        my $recid = shift;
        my $aref  = shift;
        return $self->{addr_data}{$recid} = $aref;
    }
    else {

        # No args. Return last addr

        return $self->{addr_data}{$self->recid};
    }

    croak "Flow should never reach this point";
}


# Usage:
# 
#   my $str = $obj->to_str( ['a', 'b', 'c'] )

sub to_str {
    my $self = shift;
    my $aref = shift;
    croak "to_str needs an array ref arg" unless ref $aref eq "ARRAY"
    return join ($self->fld_delim, @{ $self->{addr_data}->{$recid} })
               . $self->rcd_delim;
}

{ # Closure

    my $singleton_recid = 0;
    
    sub inc_recid { $singleton_recid++ } # get only
    sub     recid { $singleton_recid   } # get only
}

1;

__END__

# Example code:

use Hash::Super;

my $superhash = Hash::Super->new;
$superhash->addr( [qw/a b whoops/] ); # Store incorrect first addr
$superhash->addr( 0 => [qw/a b c/] ); # Correct first addr
$superhash->addr( [qw/d e f/] );      # Store second addr

# $superhash = {
#    fld_delim => ":",
#    rcd_delim => "\n",
#    addr_data => {
#       0 => ['a', 'b', 'c'],
#       1 => ['d', 'e', 'f'],
#    },
# }

my $str = $superhash->to_str( $superhash->addr( 1 ) );

# $str = "d:e:f\n"

__END__


-Joshua









More information about the Pdx-pm-list mailing list