#!/usr/local/bin/perl use strict; use warnings; use IO::File; use Benchmark 'cmpthese'; my $count = shift || -1; my @key_names = ('Gene number', 'Location begin', 'Location end', 'Orientation'); my $names_file = '/Users/kellert/Documents/Computing/in_progress/BatchBlast01/names01.txt'; my ($header, @names) = process_names($names_file); #print join "\n", @names, "\n"; my $annot_file = '/Users/kellert/Documents/Computing/in_progress/BatchBlast01/STM_Genome_Annot_short.txt'; my $fh1 = new IO::File; my $fh2 = new IO::File; my $fh3 = new IO::File; cmpthese( $count, { 'with_string_cmp' => sub { my $names_join = join '|', @names; my @goi; if ($fh1->open("< $annot_file")) { my @lines = <$fh1>; foreach (@lines) { chomp; push @goi, $_ if $_ =~ m/($names_join)/; } } else { die "Could not get the filehandle $fh1: $!." } $fh1->close; }, 'with_hash' => sub { my %have_name = map({$_ => 1} @names); if ($fh2->open("< $annot_file")) { while(my $line = <$fh2>) { #print "Line2: $line\n"; my ($name,$else) = split(/\t/, $line, 2); $have_name{$name} = [split(/\t/, $else)] or next; } } else { die "Could not get the filehandle $fh2: $!." } $fh2->close; }, 'with_grep' => sub { if ($fh3->open("< $annot_file")) { my @lines =<$fh3>; my @index = grep { my $c = $_; if ($_ > $c or ( grep { $lines[$c] =~ m/$_/ } @names ) ) { 1; #yes, select it } else { 0; # no, skip it } } 0..$#lines; my @gene_of_interest = @lines[@index]; } else { die "Could not get the filehandle $fh3: $!." } $fh3->close; } }); ## Subs sub process_names { my $names_file = shift; my @names; open NAMES, "< $names_file" or die "Cannot open $names_file."; chomp( my $header = ); ## remove header line while () { chomp; my @line = split(/\t+/, $_); push @names, $line[0] } close NAMES; return $header, @names; } __END__ cmpthese( $count, { 'with_string_cmp' => sub { my $names_join = join '|', @names; my @goi; if ($fh1->open("< $annot_file")) { my @lines = <$fh1>; foreach (@lines) { chomp; push @goi, $_ if $_ =~ m/($names_join)/; } } else { die "Could not get the filehandle $fh1: $!." } $fh1->close; }, 'with_hash' => sub { my %have_name = map({$_ => 1} @names); if ($fh2->open("< $annot_file")) { my $header = <$fh2>; while(my $line = <$fh2>) { #print "Line2: $line\n"; my ($name,$else) = split(/\t/, $line, 2); $have_name{$name} = [split(/\t/, $else)] or next; } } else { die "Could not get the filehandle $fh2: $!." } $fh2->close; }, 'with_grep' => sub { if ($fh3->open("< $annot_file")) { my $header = <$fh3>; my @lines =<$fh3>; my @index = grep { my $c = $_; if ($_ > $c or ( grep { $lines[$c] =~ m/$_/ } @names ) ) { 1; #yes, select it } else { 0; # no, skip it } } 0..$#lines; my @gene_of_interest = @lines[@index]; } else { die "Could not get the filehandle $fh3: $!." } $fh3->close; } });