[Omaha.pm] Notes dump: pack, unpack, vec, and 2562 long bit vectors

Jay Hannah jhannah at omnihotels.com
Wed Apr 20 15:08:15 PDT 2005


For the record...

Here's a dump of notes/code from a day a couple weeks back I spent wrestling pack, unpack, vec, etc...

I lost. Bad. I also stumped people on EFFNET #perl. -grin-

j

 
--------------

I have around 61M on/off switches (bits) I need to store in a database and do really fast lookups on.

There's 7 bits per date (per other stuff), so we were thinking about having 1 row in the database for every calendar year (for each other stuff). So, stringing a whole year together (max 366 days) is a string of 2562 bits. 

Storing those 2562 bits as a long string of ASCII Ys and Ns or 1s and 0s would be easy. As would shrinking the column a bit by storing each day in hex (pretend there were 8 bits per date, then store "FF" in the database instead of "11111111"). 

For some reason, I'm thinking that storing the 2562 bits in binary in the database should be faster still, but so far my various benchmarking using pack, unpack, and vec is slightly slower than storing hex...

------

Benchmark: timing 1 iterations of byte, byte_onevec, char2562, char732...
      byte: 30 wallclock secs (11.82 usr +  7.92 sys = 19.74 CPU) @  0.05/s (n=1)
            (warning: too few iterations for a reliable count)
byte_onevec: 26 wallclock secs ( 8.35 usr +  7.02 sys = 15.37 CPU) @  0.07/s (n=1)
            (warning: too few iterations for a reliable count)
  char2562: 34 wallclock secs (17.28 usr +  8.23 sys = 25.51 CPU) @  0.04/s (n=1)
            (warning: too few iterations for a reliable count)
   char732: 13 wallclock secs ( 8.21 usr +  2.03 sys = 10.24 CPU) @  0.10/s (n=1)
            (warning: too few iterations for a reliable count)


---------

./insert.pl
#!/usr/bin/perl

use Benchmark;

#use DBI qw(:sql_types);
#use DBD::Informix qw(:ix_types);
use Omni::DB;
use Bit::Vector;

my $dbh = Omni::DB::connect_test_with_pw();

my $val1 = "1010101" x 366;
substr $val1, 7 * 300, 7, "0011100";

my $val2 = "FF" x 366;
substr $val2, 2 * 300, 2, "38";

use Omni::DB;
my $val3 = pack("b2562", $val1);

#my $strsql = "insert into jay (uni, char2562, char732, byte) values (?, ?, ?, ?)";
#my $sth = $dbh->prepare($strsql);
#$sth->execute(1, $val1, $val2, $val3);
#$dbh->commit;


timethese(10, {
   'char2562' => '@row = getrow("char2562"); die unless ((substr $row[0], 7 * 300, 7)                           eq "0011100")',
   'char732'  => '@row = getrow("char732");  die unless ((unpack("B7",pack("H2",(substr $row[0], 2 * 300, 2)))) eq "0011100")',
   'byte'     => '@row = getrow("byte");     die unless ((substr((unpack("b2562", $row[0])),  7 * 300, 7))      eq "0011100")'
});

sub getrow {
   my ($what) = @_;
   my $strsql = "select $what from jay where uni = 1";
   my $sth = $dbh->prepare($strsql);
   $sth->execute();
   return $sth->fetchrow;
}

#print "@row\n";

$dbh->disconnect;


./j.pl
use Test::More tests => 10;

my $yn = "YNNYYNY";
$yn =~ tr/YN/10/;
my $b = pack "b7", $yn;
is((unpack "b7", $b), "1001101",  "yay");

my $yn = "YNNYYNYY" x 366;
substr $yn, 199 * 8, 8, "00001000";
substr $yn, 201 * 8, 8, "00010100";
$yn =~ tr/YN/10/;
my $b = pack "b2928", $yn;
is((substr((unpack "b2928", $b), 198 * 8, 8)), "10011011",   "yay");
is((substr((unpack "b2928", $b), 199 * 8, 8)), "00001000",   "yay");
is((substr((unpack "b2928", $b), 200 * 8, 8)), "10011011",   "yay");
is((substr((unpack "b2928", $b), 201 * 8, 8)), "00010100",   "yay");
is((substr((unpack "b2928", $b), 202 * 8, 8)), "10011011",   "yay");
#is((unpack "b2928", (vec $b, (8 * 8 * 199), 8)), "10011011",   "yay");
is((unpack "b2928", $b), $yn,  "yay");


my $b = pack "b7", "0" x 8;
vec($b,0,1) = 0;
is((unpack "b7", $b), "0000000",  "yay");
vec($b,0,1) = 1;
is((unpack "b7", $b), "1000000",  "yay");

vec($b,0,2) = 2;
is((unpack "b7", $b), "0100000",  "yay");
vec($b,0,2) = 3;
is((unpack "b7", $b), "1100000",  "yay");
vec($b,0,2) = 5;
is((unpack "b7", $b), "1000000",  "yay");
vec($b,0,4) = 5;
is((unpack "b7", $b), "1010000",  "yay");


./load_jay.pl
#!usr/bin/perl
use strict;
use Omni::DB;

my $dbh = Omni::DB::connect_test_with_pw;

my $val1 = "1010101" x 366;
substr $val1, 7 * 300, 7, "0011100";
push @vals,$val1;

my $val2 = "FF" x 366;
substr $val2, 2 * 300, 2, "38";
push @vals,$val2;

my $val3 = pack("b2562", $val1);
push @vals,$val3;

my $all=0;
my $ctr=0;
foreach my $table qw(jay_char2562 jay_char732 jay_byte) {
  for (1..10000) {
    my $strsql = "insert into $table (uni,string) values (?,?)";
    my $sth = $dbh->prepare($strsql);
    $sth->execute(0, $vals[$ctr]);
    $dbh->commit;
    print "$table $all\n" if ($all % 1000 == 0);
    $all++;
  }
  $ctr++;
}

$dbh->disconnect;
./j2.pl
use Test::More tests => 10;

my $yn = "YNNYYNY";
$yn =~ tr/YN/10/;
my $b = pack "b7", $yn;
is((unpack "b7", $b), "1001101",  "yay");

my $yn = "YNNYYNYY" x 366;
substr $yn, 199 * 8, 8, "00001000";
substr $yn, 201 * 8, 8, "00010100";
$yn =~ tr/YN/10/;
my $b;
for (0..7) {
   vec $b, $_, 1 = substr
for (0..7) {
   print vec $b, $_, 1;
}
print "\n";
#is((vec $b, 0, 8), "10011011",   "yay");
#is((vec $b, 198 * 8, 8), "10011011",   "yay");
#is((unpack "b2928", (vec $b, (8 * 8 * 199), 8)), "10011011",   "yay");
#is((unpack "b2928", $b), $yn,  "yay");


my $b = pack "b7", "0" x 8;
vec($b,0,1) = 0;
is((unpack "b7", $b), "0000000",  "yay");
vec($b,0,1) = 1;
is((unpack "b7", $b), "1000000",  "yay");

vec($b,0,2) = 2;
is((unpack "b7", $b), "0100000",  "yay");
vec($b,0,2) = 3;
is((unpack "b7", $b), "1100000",  "yay");
vec($b,0,2) = 5;
is((unpack "b7", $b), "1000000",  "yay");
vec($b,0,4) = 5;
is((unpack "b7", $b), "1010000",  "yay");


./j3.pl

use strict;

# For each day of a year, I have 7 bits to track.
# There are up to 366 days in a year.
# So, a year in one string of bits might look something like this:
my $str = "0001010" x 366;

# So as one big bitstring:
my $bitstring = pack "b2562", $str;

# This gets me back to the original:
print join "", unpack("b2562", $bitstring);
print "\n";
my $day300;
for (0 .. 6) {
   $day300 .= vec $bitstring, 7 * 299 + $_, 1;
}
print "$day300\n";

./j4.pl
use Benchmark;

my $val = "10101010" x 366;
substr $val, 8 * 300, 8, "01100001";
my $bs = pack "b2928", $val;

timethese(1000, {
   looper => 'looper()',
   onevec => 'onevec()',
});


sub looper {
   my $ret;
   for (0..7) {
      $ret .= vec $bs, 300 * 8 + $_, 1;
   }
   die $ret unless ($ret eq "01100001");
}

sub onevec {
   my $ret;

   $ret = vec $bs, 366 * 8 - 300, 8;
   $ret = reverse(unpack "b8", $ret);
   die $ret unless ($ret eq "01100001");
}



./read.pl
use Omni::DB;
use Benchmark;

my $dbh = Omni::DB::connect_test_with_pw;

my $val1 = "10101010" x 366;
substr $val1, 8 * 300, 8, "00100000";
push @vals,$val1;

my $val2 = "AA" x 366;
substr $val2, 2 * 300, 2, "20";
push @vals,$val2;

my $val3 = pack("b2562", $val1);
push @vals,$val3;


timethese(1, {
   'char2562'    => 'read_char2562',
   'char732'     => 'read_char732',
   'byte'        => 'read_byte',
   'byte_onevec' => 'read_byte_onevec',
});

sub read_char2562 {
   my $strsql = "select * from jay_char2562";
   my $sth = $dbh->prepare($strsql);
   $sth->execute;
   while (@row = $sth->fetchrow)  {
      my $val = substr $row[1], 8 * 299, 8;
      #die $val unless ($val eq "10101010");
      $val    = substr $row[1], 8 * 300, 8;
      #die $val unless ($val eq "00100000");
   }
   $sth->finish;
}

sub read_char732 {
   my $strsql = "select * from jay_char732";
   my $sth = $dbh->prepare($strsql);
   $sth->execute;
   while (@row = $sth->fetchrow)  {
      my $val = unpack("B8",pack("H2",(substr $row[1], 2 * 299, 2)));
      #die $val unless ($val eq "10101010");
      my $val = unpack("B8",pack("H2",(substr $row[1], 2 * 300, 2)));
      #die $val unless ($val eq "00100000");
   }
   $sth->finish;
}

sub read_byte {
   my $strsql = "select * from jay_byte";
   my $sth = $dbh->prepare($strsql);
   $sth->execute;
   while (@row = $sth->fetchrow)  {
      my $val;
      for (0..6) {
         $val .= vec $row[1], 299 * 8 + $_, 1;
      }
      #die $val unless ($val eq "10101010");
      $val = '';
      for (0..6) {
         $val .= vec $row[1], 300 * 8 + $_, 1;
      }
      #die $val unless ($val eq "00100000");
   }
   $sth->finish;
}

sub read_byte_onevec {
   my $strsql = "select * from jay_byte";
   my $sth = $dbh->prepare($strsql);
   $sth->execute;
   while (@row = $sth->fetchrow)  {

      $ret = vec $bs, 366 * 8 - 300, 8;
      $ret = reverse(unpack "b8", $ret);
      #die $ret unless ($ret eq "10101010");
      $ret = vec $bs, 366 * 8 - 300, 8;
      $ret = reverse(unpack "b8", $ret);
      #die $ret unless ($ret eq "00100000");
   }
}


__END__

   'char2562' => '@row = getrow("char2562"); die unless ((substr $row[0], 7 * 300, 7)                           eq "0011100")',
   'char732'  => '@row = getrow("char732");  die unless ((unpack("B7",pack("H2",(substr $row[0], 2 * 300, 2)))) eq "0011100")',
   'byte'     => '@row = getrow("byte");     die unless ((substr((unpack("b2562", $row[0])),  7 * 300, 7))      eq "0011100")'





./j5.pl

$z =  "11101100"x300;
substr( $z, 0, 8 ) = "01100000";
$z = pack "b2400", $z;
print vec $z, 0, 8;
# <buu> That prints '6'

print "\n\n";

$z =  "11101100"x300;
substr( $z, 8, 8 ) = "01100000";
z = pack "b2400", $z;
print vec $z, 8, 8;
# <buu> That prints '55'
print "\n\n";


./j6.pl

$bs = "00100000" x 366;
#substr $bs, 300 * 8, 8, "01000001";
$bv = pack "b*", $bs;
$subvec = vec($bv, 0, 8);
printf("%08b\n", $subvec);

./c.pl
use IO::File;
my $fh = IO::File->new("< c");
print join "\n", map /^(\w+):([^\n]+).*$/o, <$fh>;



More information about the Omaha-pm mailing list