Lab 4: solution

Click “read more” to see today’s solution…

#!/usr/bin/perl
 
# ==CONSTANTS==
# we write here the FILENAME of the multifasta 
# file containing the contigs
 
# Multifasta containing all contigs:
$multifasta = '/home/geno/tools/contigs.fna';
# Path to the PRIMER3 program:
$primer3prog= '/home/geno/tools/primer3/src/primer3_core';
# Size of target region for primer3
$targetsize = 50;  
# Maximum contig size (we trim the contig if larger than this)
$maxcontigsize = 1000;
# PCR product size
$desiredproductsize = "100-200";
# We print a MESSAGE for the user
print STDERR "
 +----------------------------------------------+
 |   Pick Primer Tool v.01                      |
 |                                              | 
 |   Parameters:                                |
 |   contig1 direction1 contig2 direction2      |
 |                                              |
 |   direction1,2 = U or C                      |
 +----------------------------------------------+
";
 
# We populate variables with USER PARAMETERS
($contig1, $dir1, $contig2, $dir2) = @ARGV;
 
# And print them for the user
print STDERR "
INPUT DATA:
contig1   $contig1\t$dir1
contig2   $contig2\t$dir2
 
";
open(I, "$multifasta") || die " ERROR: can't open $multifasta :(\n";
 
if ($dir2 eq '') {
	die " FATAL ERROR:\n This program requires 4 parameters.\n";
}
# Here we read the multifasta file
# If the HEADER is called like 'contig1' we'll keep the sequence into $seq1. Same for contig2
print STDERR "Loading multifasta...\n";
while (<I>) {
	chomp($_);
	# We check if the line is a header (>...) or not
	if ($_=~/>(\w*)/) {
 
	   if ($header eq $contig1) {
	   	 	$seq1 = $seq;
			$len1 = length($seq1);
			print STDERR "$contig1 found ($len1 bp)! ";
	   }
	   if ($header eq $contig2) {
	   		$seq2 = $seq;
			$len2 = length($seq2);
			print STDERR "$contig2 found ($len2 bp)! ";
	   }
	   # Reset header and sequecne
	   $header=$1;
	   $seq = '';
	} else {
		$seq=$seq.$_;
	}
}
print STDERR "\n";
# Final check (for last sequence)
if ($header eq $contig1) {
 	$seq1 = $seq;
}
if ($header eq $contig2) {
	$seq2 = $seq;
}
 
# Control: did we populate both $seq1 and $seq2?
# Suppose that the user mistyped one or both contig names!
 
if (length($seq1) == 0) {
	die "FATAL ERROR:\n\"$contig1\" was not found!\n";
}
if (length($seq2) == 0) {
	die "FATAL ERROR:\n\"$contig2\" was not found!\n";
}
 
# Calculate rev-compl (using a subroutine)
if ($dir1 eq 'C') {
	$seq1 = rc($seq1);
}
if ($dir2 eq 'C') {
	$seq2 = rc($seq2);
}
 
# Let's trim contigs if they are larger than $maxcontigsize. This will reduce
# primer3 execution time.
if (length($seq1)>$maxcontigsize) {
	print STDERR "$contig1 was reduced to $maxcontigsize bp to accelerate primer picking.\n";
	$startfrom = length($seq1)-$maxcontigsize;
	$seq1 = substr($seq1, $startfrom, $maxcontigsize);
}
 
if (length($seq2)>$maxcontigsize) {
	print STDERR "$contig2 was reduced to $maxcontigsize bp to accelerate primer picking.\n";
	$seq1 = substr($seq1, 0, $maxcontigsize);
}
 
$template = $seq1.$seq2;
$start_point = length($seq1)-int($targetsize/2);
 
$primer3conf = "SEQUENCE_ID=GAP-$contig1$dir1-$contig2$dir2
PRIMER_TM_FORMULA=1
PRIMER_SALT_CORRECTIONS=1
SEQUENCE_TEMPLATE=$template
SEQUENCE_TARGET=$start_point,$targetsize
PRIMER_MIN_TM=52
PRIMER_MAX_TM=62
PRIMER_TASK=pick_detection_primers
PRIMER_PICK_LEFT_PRIMER=1
PRIMER_PICK_INTERNAL_OLIGO=0
PRIMER_PICK_RIGHT_PRIMER=1
PRIMER_OPT_SIZE=19
PRIMER_MIN_SIZE=14
PRIMER_MAX_SIZE=28
PRIMER_MAX_NS_ACCEPTED=0
PRIMER_PRODUCT_SIZE_RANGE=$desiredproductsize
P3_FILE_FLAG=0
PRIMER_EXPLAIN_FLAG=0
=";
 
# Launch primer3
@primer3output = qx(echo "$primer3conf" | $primer3prog);
 
# We print the output now: in the future we'll ANALIZE IT
print @primer3output;
 
# SUB ROUTINES
 
sub rc {
	# This subroutine calculates reverse-complimentary 
 
	$sequence = $_[0];
	$sequence = reverse($sequence);
	$sequence =~tr/ACGTacgt/TGCAtgca/;
	return $sequence
}

Leave a Reply

Your email address will not be published. Required fields are marked *