package Blake3HasherPurePerl; use strict; use bytes; use constant { BLOCK_LEN => 64, # flag values CHUNK_START => 1 << 0, CHUNK_END => 1 << 1, PARENT => 1 << 2, ROOT => 1 << 3, # NOT SUPPORTING THESE MODES. NORMAL HASHING ONLY. # KEYED_HASH => 1 << 4, # DERIVE_KEY_CONTEXT => 1 << 5, # DERIVE_KEY_MATERIAL => 1 << 6, }; my @IV = ( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ); # my @MSG_PERMUTATION = (2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8); sub new { my $state = { chaining_value => [ @IV ], # h, 8 four-byte integers message_block => '', # m, and also b, from the length. Max 64 bytes total_bytes => 0, # this, mod 64, or 64, should be length of message_block CV_stack => [], # up to 54 more arrays of 8 four-byte integers }; bless $state; } sub freeze { my $state = shift; my $serialization = pack "V8", $state->{chaining_value}->@*; $serialization .= pack 'C', length $state->{message_block}; $serialization .= $state->{message_block}; $serialization .= pack "V2", $state->{total_bytes} & 0xFFFF_FFFF , ( $state->{total_bytes} >> 32 ) & 0xFFFF_FFFF; $serialization .= pack "V8", @$_ for $state->{CV_stack}->@*; $serialization; } sub thaw { @_ > 2 and die "only one serialization allowed in thaw method"; my $iceblock = pop; my %state; $state{chaining_value} = [ unpack 'V8', substr($iceblock,0,32,'') ]; my $message_len = unpack 'C', substr $iceblock,0,1,''; $state{message_block} = substr($iceblock,0,$message_len,''); my ($low_word, $high_word) = unpack 'V2', substr($iceblock,0,8,''); $state{total_bytes} = ($high_word << 32) + $low_word; $state{CV_stack} = []; push $state{CV_stack}->@*, [unpack 'V8', substr($iceblock,0,32,'') ] while length $iceblock; bless \%state; } sub clone { $_[0]->thaw($_[0]->freeze()) } sub add { defined(wantarray) and die "Usage: call hasher->add in void context\n"; my $state = shift; while (@_){ my $piece = shift; while (length $piece) { # "Processed one byte at a time" -- Johnny Crash my $octet = substr($piece,0,1,''); if ( length($state->{message_block}) == 64 ){ my $flags = 0x0000_0000; my $length_into_chunk = $state->{total_bytes} & 1023; if ($length_into_chunk == 0 ){ $flags |= CHUNK_END }elsif ( $length_into_chunk == 64 ){ $flags |= CHUNK_START }; $state->compress( $flags ); # consumes and clears message_block }; $state->{message_block} .= $octet; $state->{total_bytes}++; }; }; } sub _mask32 { $_[0] & 0xFFFF_FFFF } sub _add32 { ($_[0] + $_[1]) & 0xFFFF_FFFF } sub _rotr32 { my ($x, $n) = @_; $x &= 0xFFFF_FFFF; (($x >> $n) | (($x << (32 - $n)) & 0xFFFF_FFFF)) & 0xFFFF_FFFF; } sub _g { my ($v, $a, $b, $c, $d, $mx, $my) = @_; $v->[$a] = _add32($v->[$a], _add32($v->[$b], $mx)); $v->[$d] = _rotr32($v->[$d] ^ $v->[$a], 16); $v->[$c] = _add32($v->[$c], $v->[$d]); $v->[$b] = _rotr32($v->[$b] ^ $v->[$c], 12); $v->[$a] = _add32($v->[$a], _add32($v->[$b], $my)); $v->[$d] = _rotr32($v->[$d] ^ $v->[$a], 8); $v->[$c] = _add32($v->[$c], $v->[$d]); $v->[$b] = _rotr32($v->[$b] ^ $v->[$c], 7); } sub _round { my ($v, $m) = @_; # columns _g($v, 0, 4, 8, 12, $m->[0], $m->[1]); _g($v, 1, 5, 9, 13, $m->[2], $m->[3]); _g($v, 2, 6, 10, 14, $m->[4], $m->[5]); _g($v, 3, 7, 11, 15, $m->[6], $m->[7]); # diagonals _g($v, 0, 5, 10, 15, $m->[8], $m->[9]); _g($v, 1, 6, 11, 12, $m->[10], $m->[11]); _g($v, 2, 7, 8, 13, $m->[12], $m->[13]); _g($v, 3, 4, 9, 14, $m->[14], $m->[15]); } # note: elements of @_ are L-value arguments, @_ itself is not sub _permute { @_[0 .. 15] = @_[2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8] } sub compress { my ($state, $flags) = @_; my $block_len = length $state->{message_block}; $block_len == 64 or $state->{message_block} .= "\0" x ( 64 - $block_len); my @m = unpack 'V16', $state->{message_block}; my $chunk_counter = ($state->{total_bytes} - $block_len) >> 10; my @v = ( $state->{chaining_value}->@*, @IV[0..3], $chunk_counter & 0xFFFF_FFFF, ($chunk_counter >> 32) & 0xFFFF_FFFF, $block_len, $flags, ); # note: _permute operates directly on @_ _round(\@v, \@m); _permute(@m); _round(\@v, \@m); _permute(@m); _round(\@v, \@m); _permute(@m); _round(\@v, \@m); _permute(@m); _round(\@v, \@m); _permute(@m); _round(\@v, \@m); _permute(@m); _round(\@v, \@m); for my $i (0..7) { $v[$i] = _mask32($v[$i] ^ $v[$i + 8]); $v[$i + 8] = _mask32($v[$i + 8] ^ $state->{chaining_value}[$i]); } $state->{chaining_value} = [ @v[0..7] ]; $state->{message_block} = ''; # If that was the last block in a chunk, merge the completed chunk CV. if ( $flags & CHUNK_END ) { my $finished_cv = $state->{chaining_value}; if ($state->{total_bytes}) { $state->merge_cv($finished_cv); }else{ push $state->{CV_stack}->@*, $finished_cv; }; # start next chunk from IV $state->{chaining_value} = [ @IV ]; } } our $parent_flags = PARENT; sub _parent_cv { my ($left_cv, $right_cv) = @_; my @m = ($left_cv->@*, $right_cv->@*); my @v = ( @IV, @IV[0..3], 0, 0, BLOCK_LEN, # 64 $parent_flags, # includes ROOT at the very end of digesting ); _round(\@v, \@m); _permute(@m); _round(\@v, \@m); _permute(@m); _round(\@v, \@m); _permute(@m); _round(\@v, \@m); _permute(@m); _round(\@v, \@m); _permute(@m); _round(\@v, \@m); _permute(@m); _round(\@v, \@m); for my $i (0..7) { $v[$i] = _mask32($v[$i] ^ $v[$i + 8]); $v[$i + 8] = _mask32($v[$i + 8] ^ $IV[$i]); } [ @v[0..7] ]; } our $digesting = 0; sub merge_cv { my ($state, $new_cv) = @_; my $total_chunks = $state->{total_bytes} >> 10; # include the final short chunk when digesting $digesting and $state->{total_bytes} & 0x03FF and $total_chunks++; while ( ($total_chunks & 1) == 0 ) { my $left_cv = pop $state->{CV_stack}->@* or die "CV stack underflow"; $new_cv = _parent_cv($left_cv, $new_cv); $total_chunks >>= 1; } push $state->{CV_stack}->@*, $new_cv; } sub digest { my $state = shift; local $parent_flags = $parent_flags; local $digesting = 1; # because $state->{message_block} .= $octet is the # last thing that happens in add(), there will always # be at least one byte in the message_block except # immediately after hasher creation. my $flags = CHUNK_END; # always CHUNK_END, this is the final chunk, and may be short. $state->{total_bytes} <= 1024 and $flags |= ROOT; $state->{total_bytes} <= 2048 and $parent_flags |= ROOT; my $length_into_chunk = $state->{total_bytes} & 1023; $length_into_chunk <= 64 and $flags |= CHUNK_START ; $state->compress( $flags ); # consumes and clears message_block my $zip_up = pop $state->{CV_stack}->@*; if ( @{$state->{CV_stack}} ){ my $leftmost = shift $state->{CV_stack}->@*; for my $left ( reverse $state->{CV_stack}->@* ){ $zip_up = _parent_cv($left, $zip_up); }; $parent_flags |= ROOT; $zip_up = _parent_cv($leftmost, $zip_up); } %$state = %{ $state->new }; # reset the object pack 'V8', @$zip_up; # here's the digest } sub hexdigest { unpack 'H*', $_[0]->digest()} use Exporter 'import'; our @EXPORT = qw/b3pp/; sub b3pp { __PACKAGE__->new() }; 1; __END__ =head1 NAME Blake3HasherPurePerl - the BLAKE3 hash function in pure Perl =head1 SYNOPSIS use lib "."; # or wherever you put this file use Blake3HasherPurePerl; my $hasher = b3pp(); $hasher->add("hello"); $hello_hash = $hasher->clone->digest(); # use clone to not reset $hasher->add(" world\n"); $hash = $hasher->hexdigest(); # matches `echo hello world | ./b3sum --no-names` # which is dc5a4edb8240b018124052c330270696f96771a63b45250a5c17d3000e823355 =head1 DESCRIPTION A pure Perl implementation of the BLAKE3 hash function. The module provides a subset of the interface found in L, which you should obviously prefer for serious work. =head1 DEPENDENCIES A Perl recent enough to have postfix dereference syntax, since the code uses that. =head1 METHODS =over =item $class->new() =item $hasher->new() =item b3pp() the constructor. C is exported by default. =item $hasher->clone() Returns a new hasher with the same state, mode, and output size as the original. =item $hasher->add($bytes, ...) Updates the hasher state with each of the given byte strings. =item $hasher->digest() Returns the final hash value as a byte string and resets the hasher. =item $hasher->hexdigest() Returns the final hash value as a hexadecimal text string and resets the hasher. =item $hasher->freeze() serializes a b3pp into a binary string, in case you would like to, for instance, associate one of these with a file you are planning to append more data to, and get the current hash based on the frozen state and the string getting appended, without having to read the file from the beginning. Also used in the C method. =item $class->thaw(serialization) create a hasher from a frozen hasher =item $hasher->clone returns $hasher->thaw($hasher->freeze()) =back =head1 AUTHOR David Nicol =cut