#!/usr/bin/perl
open GENES, $ARGV[0] or die $!;
my @bases = ("A", "T", "C", "G");
my %ctable=();
my %genes=();
while($line = <GENES>){
if($line=~ /^>/){
if($header){
$genes{$header}=$seqst_temp;
}
chomp $line;
$line =~ s/\>//g;
$header="";
$header=$line;
$seqst_temp="";
}
else{
$line =~ s/[\n\t\f\r_0-9\s]//g;
$seqst_temp .= $line;
}
}#end of while loop
if($header){
$genes{$header}=$seqst_temp;
}
foreach $base1 (@bases){
foreach $base2 (@bases){
foreach $base3 (@bases){
#print "$base1\t$base2\t$base3\n";
my $codon="$base1" . "$base2" . "$base3";
#populate all possible codons
$ctable{$codon}=0;
}}}
#header
print "Genename\t";foreach $key (sort keys %ctable) {print "$key\t";}print "\n";
foreach $genekey (sort keys %genes) {
print "$genekey\t";
@gcodons = ( $genes{$genekey} =~ m/.../g );
foreach $base1 (@bases){
foreach $base2 (@bases){
foreach $base3 (@bases){
#print "$base1\t$base2\t$base3\n";
my $codon="$base1" . "$base2" . "$base3";
#populate all possible codons
$ctable{$codon}=0;
}}}
foreach $gcodon (@gcodons) {
#print "$gcodon\n";
if(exists $ctable{$gcodon}){$ctable{$gcodon}++;}
}
foreach $key (sort keys %ctable) {
print "$ctable{$key}\t";
}
print "\n";
}#end genes foreach
close GENES;
Monday, January 20, 2014
Perl script to print codon usage per sequence in multi-fasta file
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment