Friday, August 22, 2014

Extract sequences from multi fasta file in a particular order


 #!/usr/bin/perl  
 use warnings;  
 #perl extractInOrder.pl test.fasta test.list  
 # Input parameters  
 open FASTA, $ARGV[0] or die $!;  
 my $seqst_temp="";  
 my $seqs = ();  
 while($line = <FASTA>){  
 if($line=~ /^>/){  
 if($header){  
 $header=~s/>//;  
 $seqs{$header}=$seqst_temp;  
 }  
 chomp $line;  
 $header="";  
 $header=$line;  
 $seqst_temp="";  
 }  
 else{  
 $line =~ s/[\n\t\f\r_0-9\s]//g;  
 $seqst_temp .= $line;  
 }  
 }#end of while loop  
 if($header){  
 $header=~s/>//;  
 $seqs{$header}=$seqst_temp;  
 }  
 close FASTA;  
 open FASTA, $ARGV[1] or die $!;  
 while($line = <FASTA>){  
 chomp $line;  
 $line=~s/>//;  
 if(exists $seqs{$line}){print ">$line\n$seqs{$line}\n";}  
 else {  
 print "Sequence header $line does not exist in fasta\n";  
 exit();  
 }  
 }  
 close FASTA;  

No comments: