#!/usr/bin/perl
#use strict;
use warnings;
# Input parameters
open FASTA, $ARGV[0] or die $!;
my $seqst_temp="";
my %seqs = ();
my $iso="";
my $maxlen=0;
my $maxval="";
while($line = <FASTA>){
if($line=~ /^>/){
if($header){
$seqs{$header}=$seqst_temp;
}
chomp $line;
$header="";
$header=$line;
$seqst_temp="";
}
else{
$line =~ s/[\n\t\f\r_0-9\s]//g;
$seqst_temp .= $line;
}
}#end of while loop
if($header){
$seqs{$header}=$seqst_temp;
}
close FASTA;
$maxlen=0;
foreach $iso (sort keys %seqs) {
my $line1=$iso;
my $line2=$seqs{$iso};
my $flag=0,$overlap=500,$length=1000;
$seqlen=length $line2;
while(($seqlen-$flag)>$length){
if(($seqlen-($flag+$overlap))<$length){
$length=$seqlen-$flag;
}
$nextseq=substr $line2,$flag,$length;
print $line1.":".$flag."-".($flag+$length)."\n";
print $nextseq."\n";
$flag=$flag+$overlap;
}#end of seqlen while loop
}
Monday, August 11, 2014
Get overlapping sequences from multifasta file
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment