#!/usr/local/bin/perl -w # Name: wr_parser_osyn, Tom Keller, Ph.D., Jul-2004 use strict; use File::Basename; use File::Spec; use Config::Simple; use IO::File; use Spreadsheet::Read; use Mac::AppleScript qw(RunAppleScript); ### MAIN CODE ### # A. get spreadsheet filename and starting run-id number from Mac::AppleScript dialog boxes my $as1 = < '', 'CYCLE' => 'LV40* 394-8', 'DMT' => 'OFF', 'END_PROC' => 'End CESS', 'L' => '', 'MODEL' => '394-08', 'MULTICYCLES' => '0', 'ROM_VERSION' => '2.01', 'SYNTH_JOB_FILE'=> '1', 'RUN_DATE' => "$run_date", 'RUN_ID' => '', 'SEQ_NAME' => '', 'SEQ_TEXT' => '', 'USER_FIELD1' => '', 'USER_FIELD2' => '', 'SYNTHESIZER' => 'Synthesizer-2', 'comment' => '', 'faid' => '', 'email' => '', 'phone' => '', 'scale' => '40 nmole', 'priority' => 'Standard', 'process' => 'NAP-10', ); # Use Spreadsheet::Read to parse spreadsheet #print "Source: $source\n"; #print "file: $filename\n"; my $ref = ReadData ("${source}/${filename}"); print "version: ", Spreadsheet::Read::Version (),"\n" or carp(); ## meta data from hardcoded cell locations: contact, customer, alias, faid, contact_phone and contact_email my @rows = Spreadsheet::Read::rows($ref->[1]); my $customer = $ref->[1]{A2}; ## PI last name $data{'CUSTOMER'} = $customer; $data{'RUN_DATE'} = $run_date; $data{'USER_FIELD1'} = $ref->[1]{B2}; ## alias $data{'USER_FIELD2'} = $ref->[1]{D2}; ## contact name $data{'faid'} = $ref->[1]{C2} || ''; $data{'phone'} = $ref->[1]{F2} || ''; $data{'email'} = $ref->[1]{E2} || ''; $data{'scale'} = $ref->[1]{A4} || '40 nmole'; $data{'priority'} = $ref->[1]{C4} || 'standard'; $data{'process'} = $ref->[1]{B4} || 'Nap-10'; $data{'comment'} = $ref->[1]{D4} || ''; my $num_rows = scalar @rows; my $start_row = 7; ## N.b. hardcoded first row of oligo order data ## Sanity checks ## print "Reading source ${source}/${filename}\n"; print "Output, beginning with $run_number, sent to $out_dir\n"; print "Last row is: $num_rows\n"; print "There are ", $num_rows - 6, " orders.\n"; ## Process data rows ## process_rows(\%data); ############ SUBROUTINES ############ #### LOCAL_CONFIG #### sub local_config { chomp(my $hostname = `hostname`); #print "hostname in local_config: $hostname\n"; ## sanity check my @block_id = split(/\./, $hostname); chomp(my $home = `env | grep '^HOME'`); $home =~ s/HOME=//; my $default_loc = 'bin/.config_core'; my $config_file = File::Spec->catfile($home,$default_loc); #print "config_file is $config_file\n"; ## sanity check my $cfg = new Config::Simple($config_file); my $source = $cfg->param("$block_id[0].osyn_source"); print "source is $source\n"; ## sanity check my $out_dir = $cfg->param("$block_id[0].osyn_output"); my $file_type = $cfg->param("$block_id[0].file_type"); ## Spreadsheet::Read handles this return ($source, $out_dir, $file_type); } #### END OF LOCAL_CONFIG #### #### PROCESS_ORDER #### ## process data rows containing oligo orders sub process_rows { my $data = shift; ## ref to %data, with defaults and metadata (from the first five rows) ## add row data and return data hashref for each row my $count = 0; foreach my $i ($start_row .. $num_rows) { my $name_cell = "A" . $i; next unless ($ref->[1]{$name_cell} =~ m/^\w/); ## skip lines without a SEQ_NAME in first column my $seq_cell = "B" . $i; $data->{'SEQ_NAME'} = $ref->[1]{$name_cell}; $data->{'RUN_ID'} = $run_number + $count ."-" . substr $customer, 0, 4; my $seq_text = munge_seq($ref->[1]{$seq_cell}); $data->{'SEQ_TEXT'} = $seq_text; ## Sanity check ## print "oligo name is $data->{'SEQ_NAME'}, run id is $data->{'RUN_ID'}, seq: $data->{'SEQ_TEXT'}\n"; $data->{'L'} = length $data->{'SEQ_TEXT'}; $count++; ## print output file to out_dir print_oligonet($data); } } ## end of sub PROCESS_ORDERS ## ## munge sequence text sub munge_seq { my $string = shift; # takes a single cell as input string $string =~ s/[^a-zA-Z]//g; # remove all non letters. $string = uc($string); return $string; } ##### end #### ###### OUTPUT ##### sub print_oligonet { my $order = shift; ## print an OligoNet file for each input row ## my $output = File::Spec->catfile($out_dir,$order->{RUN_ID} . '.txt'); print "output sent to $output\n"; # sanity check open(ORDER, ">$output") or die "Can't open $output in $out_dir: $!"; print ORDER join("\t", "SYNTH_JOB_FILE", $order->{SYNTH_JOB_FILE}, ## a constant: '1' "RUN_DATE", $order->{RUN_DATE}, "RUN_ID", $order->{RUN_ID}, "CUSTOMER", $order->{CUSTOMER}, ## the last name of the PI "USER_FIELD1", $order->{USER_FIELD1}, ## 8-digit alias to account "USER_FIELD2", $order->{USER_FIELD2}, ## contact info "MODEL", $order->{MODEL}, "ROM_VERSION", $order->{ROM_VERSION}, "SYNTHESIZER", $order->{SYNTHESIZER}, ## default to Syn-2 "SEQ_NAME", $order->{SEQ_NAME}, "SEQ_TEXT", $order->{SEQ_TEXT}, "COMMENTS", "email: $order->{email}, phone: $order->{phone}, faid: $order->{faid}, scale: $order->{scale}, priority: $order->{priority}, process: $order->{process}, length: $order->{'L'}, client_note: $order->{comment}, 394_note: ", "CYCLE", $order->{CYCLE}, "END_PROC", $order->{END_PROC}, "DMT", $order->{DMT}, "MULTICYCLES", $order->{MULTICYCLES}, "\cM"); ## set newline to Mac style close ORDER; } #### End of Subs ####### =pod =head1 PROGRAM wr_xls2oligonet wr_xls2oligonet.pls - is used to parse the OSYN Core's oligo_batchorder.xls. This spreadsheet can be downloaded from http://www.ohsu.edu/research/core/workrequests/wr_osyn.html. The parser will generate an OligoNet compatible file from each record in the spreadsheet. These are then used for automated DNA synthesis on an ABI 394. =head1 SYNOPSIS This program uses a specificly formatted spreadsheet as input. It extracts billing info from row 2, and then sample name and sequence from subsequent rows. =head1 DESCRIPTION Saved excel spreadsheet batch workrequests are parsed to generate separate OligoNet synthesis order files. The spreadsheet filename and the run_id number (in that order) are the two required arguments to the program. The first 5 rows contain ordering data. Rows 6 and following contain the specific oligo order data: cells B1, B2, B3, B4 are expected to contain the Contact person, PI name, Account alias, and FAID. Oligo names are listed in column A, from A6 down; Oligo sequences are listed in column B, from B6 down. =over 4 =item Config::Simple is used with an "ini" formatted configuration file on the machine running the program. =item OligoNet Synthesis Order form fields are generated as individual output files at the location specified in the configuration file. =item Spreadsheet::Read is used to parse. the spreadsheet =head2 REQUIREMENTS =over 4 =item Specifc format for the input, defined by the spreadsheet. The output path is read from the "ini" formatted configuration file at $HOME/bin/.core_config =item Input is expected to be a spreadsheet in the format provided at the Core workrequest website. =item See below for required PERL MODULES =back =head1 AUTHOR Thomas J. Keller, Ph.D. =head1 VERSION 1.00 (18 October, 2007) =head1 SEE ALSO L L L =cut