Monday, January 20, 2014

Perl script to print codon usage per sequence in multi-fasta file

 #!/usr/bin/perl  
 open GENES, $ARGV[0] or die $!;  
 my @bases = ("A", "T", "C", "G");  
 my %ctable=();  
 my %genes=();  
 while($line = <GENES>){  
 if($line=~ /^>/){  
           if($header){  
           $genes{$header}=$seqst_temp;  
           }  
      chomp $line;  
      $line =~ s/\>//g;  
      $header="";  
      $header=$line;  
      $seqst_temp="";  
      }  
      else{  
      $line =~ s/[\n\t\f\r_0-9\s]//g;  
      $seqst_temp .= $line;  
      }  
 }#end of while loop  
 if($header){  
 $genes{$header}=$seqst_temp;  
 }  
      foreach $base1 (@bases){  
      foreach $base2 (@bases){  
      foreach $base3 (@bases){  
      #print "$base1\t$base2\t$base3\n";  
      my $codon="$base1" . "$base2" . "$base3";  
      #populate all possible codons  
      $ctable{$codon}=0;  
      }}}  
 #header  
 print "Genename\t";foreach $key (sort keys %ctable) {print "$key\t";}print "\n";  
 foreach $genekey (sort keys %genes) {  
 print "$genekey\t";  
 @gcodons = ( $genes{$genekey} =~ m/.../g );  
      foreach $base1 (@bases){  
      foreach $base2 (@bases){  
      foreach $base3 (@bases){  
      #print "$base1\t$base2\t$base3\n";  
      my $codon="$base1" . "$base2" . "$base3";  
      #populate all possible codons  
      $ctable{$codon}=0;  
      }}}  
      foreach $gcodon (@gcodons) {  
 #print "$gcodon\n";  
      if(exists $ctable{$gcodon}){$ctable{$gcodon}++;}  
      }  
      foreach $key (sort keys %ctable) {  
         print "$ctable{$key}\t";  
      }  
      print "\n";  
 }#end genes foreach  
 close GENES;  

No comments: