#!/usr/bin/perl
use warnings;
# Input parameters
open FASTA1, $ARGV[0] or die $!;
open FASTA2, $ARGV[1] or die $!;
my %seqs = ();
while($line = <FASTA1>){
chomp $line;
my @jelly=split(/\s+/,$line);
$seqs{$jelly[0]}=$jelly[1];
}
close FASTA1;
while($line = <FASTA2>){
chomp $line;
my @jelly=split(/\s+/,$line);
if(exists $seqs{$jelly[0]}){$seqs{$jelly[0]}=$seqs{$jelly[0]}+$jelly[1];}
else{$seqs{$jelly[0]}=$jelly[1];}
}
close FASTA2;
foreach $iso (sort keys %seqs) {
print "$iso\t$seqs{$iso}\n";
}
Will take two lists of kmer counts and merge them into one. A kmer count list consists of two columns. The first column being the kmer itself and the second being its count.
This might be useful in merging the output of a program like jellyfish after running it on each chromosome separately. While jellyfish has a merge function, it requires the hashes to be of equal size.
May be this can be re-written as a perl one liner...
No comments:
Post a Comment