[Omaha.pm] Notes dump: pack, unpack, vec, and 2562 long bit vectors

Miller, Scott L (Omaha Networks) scott.l.miller at hp.com
Thu Apr 21 06:47:47 PDT 2005


Hmm, I can't help but wonder if you're not solving the wrong problem.  This seems awfully convoluted.  So exactly what are these 7 bits per date used for, and why do you have to keep many years of information?

Is the actual data being stored as repetitive as your example?  I'm hoping not...

-Scott

-----Original Message-----
From: omaha-pm-bounces at pm.org [mailto:omaha-pm-bounces at pm.org]On Behalf
Of Jay Hannah
Sent: Wednesday, April 20, 2005 5:08 PM
To: omaha-pm at pm.org
Subject: [Omaha.pm] Notes dump: pack, unpack, vec, and 2562 long bit
vectors



For the record...

Here's a dump of notes/code from a day a couple weeks back I spent wrestling pack, unpack, vec, etc...

I lost. Bad. I also stumped people on EFFNET #perl. -grin-

j

 
--------------

I have around 61M on/off switches (bits) I need to store in a database and do really fast lookups on.

There's 7 bits per date (per other stuff), so we were thinking about having 1 row in the database for every calendar year (for each other stuff). So, stringing a whole year together (max 366 days) is a string of 2562 bits. 

Storing those 2562 bits as a long string of ASCII Ys and Ns or 1s and 0s would be easy. As would shrinking the column a bit by storing each day in hex (pretend there were 8 bits per date, then store "FF" in the database instead of "11111111"). 

For some reason, I'm thinking that storing the 2562 bits in binary in the database should be faster still, but so far my various benchmarking using pack, unpack, and vec is slightly slower than storing hex...

------

Benchmark: timing 1 iterations of byte, byte_onevec, char2562, char732...
      byte: 30 wallclock secs (11.82 usr +  7.92 sys = 19.74 CPU) @  0.05/s (n=1)
            (warning: too few iterations for a reliable count)
byte_onevec: 26 wallclock secs ( 8.35 usr +  7.02 sys = 15.37 CPU) @  0.07/s (n=1)
            (warning: too few iterations for a reliable count)
  char2562: 34 wallclock secs (17.28 usr +  8.23 sys = 25.51 CPU) @  0.04/s (n=1)
            (warning: too few iterations for a reliable count)
   char732: 13 wallclock secs ( 8.21 usr +  2.03 sys = 10.24 CPU) @  0.10/s (n=1)
            (warning: too few iterations for a reliable count)


---------

./insert.pl
#!/usr/bin/perl

use Benchmark;

#use DBI qw(:sql_types);
#use DBD::Informix qw(:ix_types);
use Omni::DB;
use Bit::Vector;

my $dbh = Omni::DB::connect_test_with_pw();

my $val1 = "1010101" x 366;
substr $val1, 7 * 300, 7, "0011100";

my $val2 = "FF" x 366;
substr $val2, 2 * 300, 2, "38";

use Omni::DB;
my $val3 = pack("b2562", $val1);

#my $strsql = "insert into jay (uni, char2562, char732, byte) values (?, ?, ?, ?)";
#my $sth = $dbh->prepare($strsql);
#$sth->execute(1, $val1, $val2, $val3);
#$dbh->commit;


timethese(10, {
   'char2562' => '@row = getrow("char2562"); die unless ((substr $row[0], 7 * 300, 7)                           eq "0011100")',
   'char732'  => '@row = getrow("char732");  die unless ((unpack("B7",pack("H2",(substr $row[0], 2 * 300, 2)))) eq "0011100")',
   'byte'     => '@row = getrow("byte");     die unless ((substr((unpack("b2562", $row[0])),  7 * 300, 7))      eq "0011100")'
});

sub getrow {
   my ($what) = @_;
   my $strsql = "select $what from jay where uni = 1";
   my $sth = $dbh->prepare($strsql);
   $sth->execute();
   return $sth->fetchrow;
}

#print "@row\n";

$dbh->disconnect;


./j.pl
use Test::More tests => 10;

my $yn = "YNNYYNY";
$yn =~ tr/YN/10/;
my $b = pack "b7", $yn;
is((unpack "b7", $b), "1001101",  "yay");

my $yn = "YNNYYNYY" x 366;
substr $yn, 199 * 8, 8, "00001000";
substr $yn, 201 * 8, 8, "00010100";
$yn =~ tr/YN/10/;
my $b = pack "b2928", $yn;
is((substr((unpack "b2928", $b), 198 * 8, 8)), "10011011",   "yay");
is((substr((unpack "b2928", $b), 199 * 8, 8)), "00001000",   "yay");
is((substr((unpack "b2928", $b), 200 * 8, 8)), "10011011",   "yay");
is((substr((unpack "b2928", $b), 201 * 8, 8)), "00010100",   "yay");
is((substr((unpack "b2928", $b), 202 * 8, 8)), "10011011",   "yay");
#is((unpack "b2928", (vec $b, (8 * 8 * 199), 8)), "10011011",   "yay");
is((unpack "b2928", $b), $yn,  "yay");


my $b = pack "b7", "0" x 8;
vec($b,0,1) = 0;
is((unpack "b7", $b), "0000000",  "yay");
vec($b,0,1) = 1;
is((unpack "b7", $b), "1000000",  "yay");

vec($b,0,2) = 2;
is((unpack "b7", $b), "0100000",  "yay");
vec($b,0,2) = 3;
is((unpack "b7", $b), "1100000",  "yay");
vec($b,0,2) = 5;
is((unpack "b7", $b), "1000000",  "yay");
vec($b,0,4) = 5;
is((unpack "b7", $b), "1010000",  "yay");


./load_jay.pl
#!usr/bin/perl
use strict;
use Omni::DB;

my $dbh = Omni::DB::connect_test_with_pw;

my $val1 = "1010101" x 366;
substr $val1, 7 * 300, 7, "0011100";
push @vals,$val1;

my $val2 = "FF" x 366;
substr $val2, 2 * 300, 2, "38";
push @vals,$val2;

my $val3 = pack("b2562", $val1);
push @vals,$val3;

my $all=0;
my $ctr=0;
foreach my $table qw(jay_char2562 jay_char732 jay_byte) {
  for (1..10000) {
    my $strsql = "insert into $table (uni,string) values (?,?)";
    my $sth = $dbh->prepare($strsql);
    $sth->execute(0, $vals[$ctr]);
    $dbh->commit;
    print "$table $all\n" if ($all % 1000 == 0);
    $all++;
  }
  $ctr++;
}

$dbh->disconnect;
./j2.pl
use Test::More tests => 10;

my $yn = "YNNYYNY";
$yn =~ tr/YN/10/;
my $b = pack "b7", $yn;
is((unpack "b7", $b), "1001101",  "yay");

my $yn = "YNNYYNYY" x 366;
substr $yn, 199 * 8, 8, "00001000";
substr $yn, 201 * 8, 8, "00010100";
$yn =~ tr/YN/10/;
my $b;
for (0..7) {
   vec $b, $_, 1 = substr
for (0..7) {
   print vec $b, $_, 1;
}
print "\n";
#is((vec $b, 0, 8), "10011011",   "yay");
#is((vec $b, 198 * 8, 8), "10011011",   "yay");
#is((unpack "b2928", (vec $b, (8 * 8 * 199), 8)), "10011011",   "yay");
#is((unpack "b2928", $b), $yn,  "yay");


my $b = pack "b7", "0" x 8;
vec($b,0,1) = 0;
is((unpack "b7", $b), "0000000",  "yay");
vec($b,0,1) = 1;
is((unpack "b7", $b), "1000000",  "yay");

vec($b,0,2) = 2;
is((unpack "b7", $b), "0100000",  "yay");
vec($b,0,2) = 3;
is((unpack "b7", $b), "1100000",  "yay");
vec($b,0,2) = 5;
is((unpack "b7", $b), "1000000",  "yay");
vec($b,0,4) = 5;
is((unpack "b7", $b), "1010000",  "yay");


./j3.pl

use strict;

# For each day of a year, I have 7 bits to track.
# There are up to 366 days in a year.
# So, a year in one string of bits might look something like this:
my $str = "0001010" x 366;

# So as one big bitstring:
my $bitstring = pack "b2562", $str;

# This gets me back to the original:
print join "", unpack("b2562", $bitstring);
print "\n";
my $day300;
for (0 .. 6) {
   $day300 .= vec $bitstring, 7 * 299 + $_, 1;
}
print "$day300\n";

./j4.pl
use Benchmark;

my $val = "10101010" x 366;
substr $val, 8 * 300, 8, "01100001";
my $bs = pack "b2928", $val;

timethese(1000, {
   looper => 'looper()',
   onevec => 'onevec()',
});


sub looper {
   my $ret;
   for (0..7) {
      $ret .= vec $bs, 300 * 8 + $_, 1;
   }
   die $ret unless ($ret eq "01100001");
}

sub onevec {
   my $ret;

   $ret = vec $bs, 366 * 8 - 300, 8;
   $ret = reverse(unpack "b8", $ret);
   die $ret unless ($ret eq "01100001");
}



./read.pl
use Omni::DB;
use Benchmark;

my $dbh = Omni::DB::connect_test_with_pw;

my $val1 = "10101010" x 366;
substr $val1, 8 * 300, 8, "00100000";
push @vals,$val1;

my $val2 = "AA" x 366;
substr $val2, 2 * 300, 2, "20";
push @vals,$val2;

my $val3 = pack("b2562", $val1);
push @vals,$val3;


timethese(1, {
   'char2562'    => 'read_char2562',
   'char732'     => 'read_char732',
   'byte'        => 'read_byte',
   'byte_onevec' => 'read_byte_onevec',
});

sub read_char2562 {
   my $strsql = "select * from jay_char2562";
   my $sth = $dbh->prepare($strsql);
   $sth->execute;
   while (@row = $sth->fetchrow)  {
      my $val = substr $row[1], 8 * 299, 8;
      #die $val unless ($val eq "10101010");
      $val    = substr $row[1], 8 * 300, 8;
      #die $val unless ($val eq "00100000");
   }
   $sth->finish;
}

sub read_char732 {
   my $strsql = "select * from jay_char732";
   my $sth = $dbh->prepare($strsql);
   $sth->execute;
   while (@row = $sth->fetchrow)  {
      my $val = unpack("B8",pack("H2",(substr $row[1], 2 * 299, 2)));
      #die $val unless ($val eq "10101010");
      my $val = unpack("B8",pack("H2",(substr $row[1], 2 * 300, 2)));
      #die $val unless ($val eq "00100000");
   }
   $sth->finish;
}

sub read_byte {
   my $strsql = "select * from jay_byte";
   my $sth = $dbh->prepare($strsql);
   $sth->execute;
   while (@row = $sth->fetchrow)  {
      my $val;
      for (0..6) {
         $val .= vec $row[1], 299 * 8 + $_, 1;
      }
      #die $val unless ($val eq "10101010");
      $val = '';
      for (0..6) {
         $val .= vec $row[1], 300 * 8 + $_, 1;
      }
      #die $val unless ($val eq "00100000");
   }
   $sth->finish;
}

sub read_byte_onevec {
   my $strsql = "select * from jay_byte";
   my $sth = $dbh->prepare($strsql);
   $sth->execute;
   while (@row = $sth->fetchrow)  {

      $ret = vec $bs, 366 * 8 - 300, 8;
      $ret = reverse(unpack "b8", $ret);
      #die $ret unless ($ret eq "10101010");
      $ret = vec $bs, 366 * 8 - 300, 8;
      $ret = reverse(unpack "b8", $ret);
      #die $ret unless ($ret eq "00100000");
   }
}


__END__

   'char2562' => '@row = getrow("char2562"); die unless ((substr $row[0], 7 * 300, 7)                           eq "0011100")',
   'char732'  => '@row = getrow("char732");  die unless ((unpack("B7",pack("H2",(substr $row[0], 2 * 300, 2)))) eq "0011100")',
   'byte'     => '@row = getrow("byte");     die unless ((substr((unpack("b2562", $row[0])),  7 * 300, 7))      eq "0011100")'





./j5.pl

$z =  "11101100"x300;
substr( $z, 0, 8 ) = "01100000";
$z = pack "b2400", $z;
print vec $z, 0, 8;
# <buu> That prints '6'

print "\n\n";

$z =  "11101100"x300;
substr( $z, 8, 8 ) = "01100000";
z = pack "b2400", $z;
print vec $z, 8, 8;
# <buu> That prints '55'
print "\n\n";


./j6.pl

$bs = "00100000" x 366;
#substr $bs, 300 * 8, 8, "01000001";
$bv = pack "b*", $bs;
$subvec = vec($bv, 0, 8);
printf("%08b\n", $subvec);

./c.pl
use IO::File;
my $fh = IO::File->new("< c");
print join "\n", map /^(\w+):([^\n]+).*$/o, <$fh>;

_______________________________________________
Omaha-pm mailing list
Omaha-pm at pm.org
http://mail.pm.org/mailman/listinfo/omaha-pm


More information about the Omaha-pm mailing list