#!/usr/bin/perl -w

#Usage:
#	mutation-detector.pl {seq1 fasta file} {seq2 fasta file}

# Description:
#	reads two DNA sequences of equal lengths from the input fasta files,
#		then compares them against one another for single nucleotide polymorphisms

use strict;


if(!exists($ARGV[1])) {
	print "Usage: mutation-detector.pl {seq1 fasta file} {seq2 fasta file\n";
	exit;
}

my $seq1 = "";
my $seq2 = "";

$seq1 = loadSequence($ARGV[0]);
$seq2 = loadSequence($ARGV[1]);

compare($seq1,$seq2);

exit;


# define a new function to load and return the first sequence in a fasta file
sub loadSequence {

	my $filename = shift;  

	my $seq_name = "";

	my $seq = "";
	open(FP,$filename);
	while(<FP>) {
		chomp;

		if(/^>/) {
			$seq_name = $_;
		}
		else {
			my $uc_seq = uc($_);
			$seq = $seq . "$uc_seq";
		}
	} # end of while loop
	close(FP);
	print "Sequence $seq_name	$seq\n";
	return $seq;
} #end sub loadSequence


# define a new function to "compare" two sequences and report the locations of SNPs

sub compare {

	my $s1 = shift;
	my $s2 = shift;

	if(length($s1)!=length($s2)) {
		print "Error: Supplied strings should be of equal length!\n";
		exit;
	}

	my $i;
	my $n_snps=0;
	for($i=0;$i<length($s1);$i++) {
		my $base1 = substr($s1,$i,1);
		my $base2 = substr($s2,$i,1);
		if($base1 ne $base2) {	# notice I didn't use the != operator here
			print "Mutation at position $i : $base1 ==> $base2\n";
			$n_snps++;
		}
	}
	print "Number of SNPs/mutations = $n_snps\n";
}#end sub compare

