#!/usr/bin/perl -w

#Usage:
#	translateToProtein.pl {input fasta format file containing the DNA sequence}

# Description:
#	reads a DNA sequence from a fasta file,
#		then converts it into an RNA,
#		then converts the RNA into a corresponding amino acid sequence using the genetic code
#		and then outputs the translated amino acid product
#Assumption:
#	only one sequence present in the input fasta file

use strict;


if(!exists($ARGV[0])) {
	print "Usage: translateToProtein.pl {input fasta format file containing the sequence}\n";
	exit;
}

my $sequence = "";

open(FP,$ARGV[0]);

while(<FP>) {
	chomp;

	if(/^>/) {

		print "Sequence name = $_\n";
	}
	else {
		# convert everything into uppercase for consistency 
		my $uc_seq = uc($_);
		$sequence = $sequence . "$uc_seq";
	}
} # end of while loop
close(FP);

print "The input DNA sequence is:	5' $sequence 3'\n";

my $rna = makeRNA($sequence);

print "The RNA sequence is:	5' $rna 3'\n";

translate($rna);

exit;


# define a new function to "convert" a DNA sequence into an RNA sequence
sub makeRNA {

	my $s = shift;  


	$s=~tr/T/U/;	#change all Ts into Us 

	return $s;
	
} #end sub makeRNA


# define a new function to do translation of RNA to protein
sub translate {

	my $s = shift;  

	#first create a "hash table" to store the translations
	my %code = ();
	open(CodeFP,"genetic_code.txt");
	while(<CodeFP>) {
		chomp;
		my @array = split;

		$code{$array[0]} = $array[1];
		
	}
	close(CodeFP);

	# Now we are ready to translate the RNA sequence in $s

	#get number of codons
	my $codons = length($s)/3;

	if(length($s)%3!=0) {
		# report error if the RNA sequence's length is not a multiple of 3
		print "Error: RNA sequence should be a multiple of length 3\n";
		return;
	}

	print "Protein: \n";
	my $i;
	my $n=0;
	for($i=0;$i<$codons;$i++) {
		my $next_codon = substr($s,$i*3, 3);
		
		# translate the next codon by looking up the hash table
		if(!defined ($code{$next_codon}) ) {
			print "Error: $next_codon codon's translation not defined\n";
			# this should never happen
			exit;
		}

		my $amino_acid = $code{$next_codon};
		print "$amino_acid ";
		$n++;
		if($n==10) {
			print "\n";
			$n=0;
		}
	} #end of for loop
	print "\n";

}

