Sunday, May 1, 2016

Ensembl Perl API to get all intron lengths in Human genom

The below script will get all stable id's from Ensembl and prints out the intron lengths for each transcript of every gene. Along with intron length, the flanking exon id's are also printed. One can get the upstream and downstream intron length for each exon using the output.

The output would look like this:
Gene Id                     Transcript Id            Previous Exon          Next Exon               Intron length
ENSG00000084674 ENST00000233242 ENSE00000932268 ENSE00000932269 717 ENSG00000084674 ENST00000233242 ENSE00000932269 ENSE00000932270 2338 ENSG00000084674 ENST00000233242 ENSE00000932270 ENSE00000932271 112 ENSG00000084674 ENST00000233242 ENSE00000932271 ENSE00000719046 1100 ENSG00000084674 ENST00000233242 ENSE00000719046 ENSE00000718984 261 ENSG00000084674 ENST00000233242 ENSE00000718984 ENSE00000932272 863 ENSG00000084674 ENST00000233242 ENSE00000932272 ENSE00000932273 1663 ENSG00000084674 ENST00000233242 ENSE00000932273 ENSE00000718481 1240 ENSG00000084674 ENST00000233242 ENSE00000718481 ENSE00000542194 482

 #!/usr/bin/perl  
 use strict;  
 use warnings;  
 use Bio::EnsEMBL::Registry;  
 use Bio::SeqIO;  
 use Getopt::Long;  
 my $registry = 'Bio::EnsEMBL::Registry';  
 ## Load the databases into the registry  
 $registry->load_registry_from_db(  
  -host => 'ensembldb.ensembl.org',  
  -user => 'anonymous'  
 );  
 ## Get the gene adaptor for human  
     my $gene_adaptor = $registry->get_adaptor( 'Human', 'Core', 'Gene' );  
     # Fetch my gene of interest usning ensemble ID  
     my @gene_ids = @{$gene_adaptor->list_stable_ids()};  
 foreach my $geneid(@gene_ids){  
 #print "$geneid\n";  
 my $gene = $gene_adaptor->fetch_by_stable_id($geneid);  
  foreach my $transcript (@{ $gene->get_all_Transcripts }) {  
   foreach my $intron (@{ $transcript->get_all_Introns }) {  
   print $gene->stable_id,"\t",$transcript->stable_id,"\t",$intron->prev_Exon->stable_id,"\t",$intron->next_Exon->stable_id,"\t",$intron->length,"\n";  
   }  
  }  
 }  

No comments: