#!/usr/bin/perl -w

#Usage:
#	convertDNA-rcomp.pl {input fasta format file containing the sequence}

# Description:
#	reads a DNA sequence from an external file that stores the sequence in the fasta format
#		and then outputs the reverse complement of the input sequence
#Assumption:
#	only one sequence present in the input fasta file

use strict;


if(!exists($ARGV[0])) {
	print "Usage: convertDNA-rcomp.pl {input fasta format file containing the sequence}\n";
	exit;
}

my $sequence = "";

print "Opening file " . $ARGV[0] . "\n";

open(FP,$ARGV[0]);

while(<FP>) {
	chomp;

	if(/^>/) {

		print "Sequence name = $_\n";
	}
	else {
		# convert everything into uppercase for consistency 
		my $uc_seq = uc($_);
		$sequence = $sequence . "$uc_seq";
	}
} # end of while loop
close(FP);

print "The input sequence is:	5' $sequence 3'\n";

my $reverse_complement = rcomp($sequence);

print "The reverse complemented sequence is:	5' $reverse_complement 3'\n";

exit;


# define a new function to calculate the reverse complement of any sequence
sub rcomp {

	my $s = shift;  # load the first argument passed into a new variable string called $s


	$s=~tr/ACGT/TGCA/;	#translate A<->T, C<->G

	# how does the translate function work? (finite automata)

	return reverse $s;
	
} #end sub rcomp

#Note: Another way to do translation is to do in steps of substitution like below:
#	$s=~s/A/T/g;   
#	$s=~s/T/A/g;
#	$s=~s/G/C/g;
#	$s=~s/C/G/g;
#	the last "g" stands for "global substitution"

# substitution is more general for string length
#	for example: 
#	s/cow/monkey/g   
#	will substitute all occurrences of "cow" into "monkey" in the current line


