Click “read more” to see today’s solution…
#!/usr/bin/perl # ==CONSTANTS== # we write here the FILENAME of the multifasta # file containing the contigs # Multifasta containing all contigs: $multifasta = '/home/geno/tools/contigs.fna'; # Path to the PRIMER3 program: $primer3prog= '/home/geno/tools/primer3/src/primer3_core'; # Size of target region for primer3 $targetsize = 50; # Maximum contig size (we trim the contig if larger than this) $maxcontigsize = 1000; # PCR product size $desiredproductsize = "100-200"; # We print a MESSAGE for the user print STDERR " +----------------------------------------------+ | Pick Primer Tool v.01 | | | | Parameters: | | contig1 direction1 contig2 direction2 | | | | direction1,2 = U or C | +----------------------------------------------+ "; # We populate variables with USER PARAMETERS ($contig1, $dir1, $contig2, $dir2) = @ARGV; # And print them for the user print STDERR " INPUT DATA: contig1 $contig1\t$dir1 contig2 $contig2\t$dir2 "; open(I, "$multifasta") || die " ERROR: can't open $multifasta :(\n"; if ($dir2 eq '') { die " FATAL ERROR:\n This program requires 4 parameters.\n"; } # Here we read the multifasta file # If the HEADER is called like 'contig1' we'll keep the sequence into $seq1. Same for contig2 print STDERR "Loading multifasta...\n"; while (<I>) { chomp($_); # We check if the line is a header (>...) or not if ($_=~/>(\w*)/) { if ($header eq $contig1) { $seq1 = $seq; $len1 = length($seq1); print STDERR "$contig1 found ($len1 bp)! "; } if ($header eq $contig2) { $seq2 = $seq; $len2 = length($seq2); print STDERR "$contig2 found ($len2 bp)! "; } # Reset header and sequecne $header=$1; $seq = ''; } else { $seq=$seq.$_; } } print STDERR "\n"; # Final check (for last sequence) if ($header eq $contig1) { $seq1 = $seq; } if ($header eq $contig2) { $seq2 = $seq; } # Control: did we populate both $seq1 and $seq2? # Suppose that the user mistyped one or both contig names! if (length($seq1) == 0) { die "FATAL ERROR:\n\"$contig1\" was not found!\n"; } if (length($seq2) == 0) { die "FATAL ERROR:\n\"$contig2\" was not found!\n"; } # Calculate rev-compl (using a subroutine) if ($dir1 eq 'C') { $seq1 = rc($seq1); } if ($dir2 eq 'C') { $seq2 = rc($seq2); } # Let's trim contigs if they are larger than $maxcontigsize. This will reduce # primer3 execution time. if (length($seq1)>$maxcontigsize) { print STDERR "$contig1 was reduced to $maxcontigsize bp to accelerate primer picking.\n"; $startfrom = length($seq1)-$maxcontigsize; $seq1 = substr($seq1, $startfrom, $maxcontigsize); } if (length($seq2)>$maxcontigsize) { print STDERR "$contig2 was reduced to $maxcontigsize bp to accelerate primer picking.\n"; $seq1 = substr($seq1, 0, $maxcontigsize); } $template = $seq1.$seq2; $start_point = length($seq1)-int($targetsize/2); $primer3conf = "SEQUENCE_ID=GAP-$contig1$dir1-$contig2$dir2 PRIMER_TM_FORMULA=1 PRIMER_SALT_CORRECTIONS=1 SEQUENCE_TEMPLATE=$template SEQUENCE_TARGET=$start_point,$targetsize PRIMER_MIN_TM=52 PRIMER_MAX_TM=62 PRIMER_TASK=pick_detection_primers PRIMER_PICK_LEFT_PRIMER=1 PRIMER_PICK_INTERNAL_OLIGO=0 PRIMER_PICK_RIGHT_PRIMER=1 PRIMER_OPT_SIZE=19 PRIMER_MIN_SIZE=14 PRIMER_MAX_SIZE=28 PRIMER_MAX_NS_ACCEPTED=0 PRIMER_PRODUCT_SIZE_RANGE=$desiredproductsize P3_FILE_FLAG=0 PRIMER_EXPLAIN_FLAG=0 ="; # Launch primer3 @primer3output = qx(echo "$primer3conf" | $primer3prog); # We print the output now: in the future we'll ANALIZE IT print @primer3output; # SUB ROUTINES sub rc { # This subroutine calculates reverse-complimentary $sequence = $_[0]; $sequence = reverse($sequence); $sequence =~tr/ACGTacgt/TGCAtgca/; return $sequence } |