use DBI;
$dbh = DBI->connect('DBI:mysql:meta', 'root', 'password'
) || die "Could not connect to database: $DBI::errstr";
my ($infile) = @ARGV;
open(IN, "$infile");
my $outfile ="multifasta";
open(OUT,">>$outfile");
my $jcvi_read,$strand,$start,$stop,@seq,$ofset,$leng,$wrtseq;
while(my $line =
chomp($line);
my @a = split(/ /, $line);
my @c = split(/\s+/, $line);
my @b = split(/\_/, $a[0]);
$jcvi_read=$b[2];
$start = $c[3];
$stop = $c[4];
$strand = $c[6];
$pep = $c[8];
$sth = $dbh->prepare("SELECT sequence FROM metadata WHERE jcvi_read='$jcvi_read'");
$sth->execute();
@seq = $sth->fetchrow_array();
$ofset=$start - 1;
$leng=$stop-$start;
$wrtseq=substr $seq[0], $ofset, $leng;
$sth->finish();
#function to translate DNA to Amino acid based on standard genetic code
sub codon2aa{
my($codon)=@_;
$codon= uc $codon;
my(%genetic_code) = (
'TCA'=>'S', #Serine
'TCC'=>'S', #Serine
'TCG'=>'S', #Serine
'TCT'=>'S', #Serine
'TTC'=>'F', #Phenylalanine
'TTT'=>'F', #Phenylalanine
'TTA'=>'L', #Leucine
'TTG'=>'L', #Leucine
'TAC'=>'Y', #Tyrosine
'TAT'=>'Y', #Tyrosine
'TAA'=>'_', #Stop
'TAG'=>'_', #Stop
'TGC'=>'C', #Cysteine
'TGT'=>'C', #Cysteine
'TGA'=>'_', #Stop
'TGG'=>'W', #Tryptophan
'CTA'=>'L', #Leucine
'CTC'=>'L', #Leucine
'CTG'=>'L', #Leucine
'CTT'=>'L', #Leucine
'CCA'=>'P', #Proline
'CAT'=>'H', #Histidine
'CAA'=>'Q', #Glutamine
'CAG'=>'Q', #Glutamine
'CGA'=>'R', #Arginine
'CGC'=>'R', #Arginine
'CGG'=>'R', #Arginine
'CGT'=>'R', #Arginine
'ATA'=>'I', #Isoleucine
'ATC'=>'I', #Isoleucine
'ATT'=>'I', #Isoleucine
'ATG'=>'M', #Methionine
'ACA'=>'T', #Threonine
'ACC'=>'T', #Threonine
'ACG'=>'T', #Threonine
'ACT'=>'T', #Threonine
'AAC'=>'N', #Asparagine
'AAT'=>'N', #Asparagine
'AAA'=>'K', #Lysine
'AAG'=>'K', #Lysine
'AGC'=>'S', #Serine#Valine
'AGT'=>'S', #Serine
'AGA'=>'R', #Arginine
'AGG'=>'R', #Arginine
'CCC'=>'P', #Proline
'CCG'=>'P', #Proline
'CCT'=>'P', #Proline
'CAC'=>'H', #Histidine
'GTA'=>'V', #Valine
'GTC'=>'V', #Valine
'GTG'=>'V', #Valine
'GTT'=>'V', #Valine
'GCA'=>'A', #Alanine
'GCC'=>'A', #Alanine
'GCG'=>'A', #Alanine
'GCT'=>'A', #Alanine
'GAC'=>'D', #Aspartic Acid
'GAT'=>'D', #Aspartic Acid
'GAA'=>'E', #Glutamic Acid
'GAG'=>'E', #Glutamic Acid
'GGA'=>'G', #Glycine
'GGC'=>'G', #Glycine
'GGG'=>'G', #Glycine
'GGT'=>'G', #Glycine
);
if(exists $genetic_code{$codon}){
return $genetic_code{$codon};
}
else{
return 'X';
}
}
if($strand == '-')
#reverse complementing -ve strands
{$wrtseq=reverse $wrtseq;
$wrtseq=~ tr/ATGC/TACG/;}
my $dna=$wrtseq;
#my $dna="TCATTCTCATTC";
my $protein='';
my $codon3;
for(my $i=0; $i<(length($dna)-2); $i+=3){ $codon3=substr($dna,$i,3); $protein.= codon2aa($codon3); } print $pep."\n"; $dbh->do("INSERT into iddata (jcvi_read,strand,start,stop,protein,orf_id) VALUES('$jcvi_read','$strand','$start','$stop','$protein','$pep')");
print OUT ">".$jcvi_read."|".$pep."\n";
print OUT $protein."\n";
}
$dbh->disconnect();
6 comments:
Correctly your article helped me very much in my college assignment. Hats incorrect to you send, will look forward in behalf of more interdependent articles soon as its one of my choice question to read.
Isoleucine is "I" not "T"
thanks for the correction. Changed it
hey can u please post simplified version of this script ...i'm not able to understand the last part n the place where u have not specified anything like return x and if($strand==-)
hey can you please send simplified version of this script ...i'm not able to understand the last part ...
can u please send simplified version of this script
Post a Comment