scriptname.pl fofn.txt
GeneratePrimer3Input.pl fofn.txt | primer3
Code | Explanation |
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use POSA::Read; | |
my $seq_vect_file = '/your_path_to/vector_primer'; | Location to file with vector sequence (see Staden documentation) |
my $screen_seq_file = '/your_path_to/some_seq.seq'; | Location to file with other sequence to screen (see Staden documentation) |
my $file_of_filenames = shift; | What is the name of the file containing all ab1 filenames? |
foreach my $abi_file ( <FOFN> ) { | For each of the files in the fofn, do the following: |
chomp $abi_file; | |
my $read = POSA::Read->importFromAbi(name => $abi_file, file => $abi_file ); | Create a POSA::Read object based on the ab1 file. |
$read->qclip(w => 30); | Perform quality clipping with a cutoff of 30. |
$read->seqvectclip(v => $seq_vect_file ); | Remove vector sequence. |
$read->screenseq(S => $screen_seq_file ); | Remove other sequences. |
print $read->asPrimer3, "\n"; | Print as primer3 input. |
} |
Code | Explanation |
#/usr/bin/perl | |
use strict; | |
use warnings; | |
use POSA::Read; | |
use POSA::Contig; | |
my $file_of_filenames = shift; | What is the name of the file containing all ab1 filenames? |
my @reads = (); | |
open FOFN, $file_of_filenames; | |
foreach my $abi_file ( <FOFN> ) { | For each of the files in the fofn, do the following: |
chomp $abi_file; | Remove the trailing newline. |
my $read = POSA::Read->importFromAbi(name => $abi_file, file => $abi_file); | Create a POSA::Read object based on the ab1 file. |
push @reads, $read; | Store the POSA::Read object in an array. |
} | |
close FOFN; | |
my @contigs = POSA::Contig->phrap(reads => \@reads, gap_init_penalty => -6); | Create contigs based on the reads that were generated above. |
foreach my $ctg ( @contigs ) { | For each of the contigs, do the following: |
$ctg->searchSnps(rank => 6); | Search for SNPs |
print "#####GENOTYPES#####\n"; | |
print $ctg->asGenotypes, "\n"; | Print the genotypes |
print "#####POSSIBLE SBE PRIMERS#####\n"; | |
print $ctg->asSbe(length => 35), "\n"; | Print possible SBE primers |
print "#####ALIGNMENT#####\n"; | |
print $ctg->asAlignment, "\n"; | Print the alignment |
print "#####MIPE FORMAT#####\n"; | |
print $ctg->asMipe, "\n"; | Print in MIPE format |
} |
Output from this script could look like this:
#####GENOTYPES##### Contig1 Polymorphisms: 25(1),133(3),292(2),452(3),499(3),503(2),508(1),601(1) Genotypes: 25 133 292 452 499 503 508 601 sample1.ab1 C/C C/C G/G C/C G/G sample2.ab1 G/G T/T T/T A/A T/T A/A sample3.ab1 G/G T/T sample4.ab1 T/T sample5.ab1 G/G T/T G/G C/C sample6.ab1 G/G G/T A/A sample7.ab1 G/G T/T sample8.ab1 G/G T/T G/G sample9.ab1 T/T sample10.ab1 G/G T/T G/G C/C sample11.ab1 C/G T/T G/G sample12.ab1 G/G T/T G/G sample13.ab1 G/G T/T G/G sample14.ab1 T/T sample15.ab1 G/G T/T G/G #####POSSIBLE SBE PRIMERS##### 25 FORWARD CCCTCTGCAat 25 REVERSE GCAGCAGGAAGAGGCAGGGCAGTGCCACGGGCTCC 133 FORWARD aCCCCCCCCGCGTCAAATGG*AGCAAGGTGCGCTC 133 REVERSE CAGGATGGGGACGTCCT*CCC*TCTGCCCGCTGGC 292 FORWARD ACGCTGCTGCTGCGCGCCGCCCGCGCCAGCGATGC 292 REVERSE TCGATGCCAGCCACCACCTCGCAGCGGTACAGCCC 452 FORWARD CTGCACCCGATGTGAGCTGTGCGGCCCCCGTGAGC 452 REVERSE GCTTGTAGGGTGTTCCAGGCTTGCACAGGGTGCAC 499 FORWARD GCAAGCCTGGAACACCCTACAAGCTACGCACAGCA 499 REVERSE TCACGTGCATCCCATGCATTCCCTATAAGCCTTGC 503 FORWARD GCCTGGAACACCCTACAAGCTACGCACAGCATGCA 503 REVERSE CTGTTCACGTGCATCCCATGCATTCCCTATAAGCC 508 FORWARD GAACACCCTACAAGCTACGCACAGCATGCAAGGCT 508 REVERSE CTGTGCTGTTCACGTGCATCCCATGCATTCCCTAT 601 FORWARD TGTGAGCTGTAGGGTGCATCCTCtGCAAGCTGTGC 601 REVERSE GTGCATGGCTTGCATGAGGTGAATGGCTCACAGCA #####ALIGNMENT##### sample1.ab1CCCTCTGCAa sample2.ab1 CCCtCTGCAat sample3.ab1 cCggtcaGTGCGgaGTGGGGCTGGGGGAGccCGTGGCACTGCCCTGCCT sample4.ab1 gGCACTGCCCTGCCT sample5.ab1 ccagcagtgcg*aGTGGGGCTGGGGGAGCCCGTGGCACTGCCCTGCCT sample6.ab1 cCAggcagTGCG*AGTGGGGCTGGGGGAGCCCGTGGCACTGCCCTGCCT sample7.ab1 g*AGTGGGGCTGGGGGAGCCCGTGGCACTGCCCTGCCT sample8.ab1 agTGGGGCTGGGGGAGCCCGTGGCACTGCCCTGCCT sample9.ab1 ggCACTGCCCTGcct sample10.ab1 agTGGGGCTGGGGGAGCCCGTGGCACTGCCCTGCCT sample11.ab1 tgGGGCTggcggaGCCCGTGGCACTGCCCTGCCT sample12.ab1 gtGGGGCTGGGGGAGCCCGTGGCACTGCCCTGCCT sample13.ab1 agTGGGGCTGGGGGAGCCCGTGGCACTGCCCTGCCT sample14.ab1 GGCACTGCCCTGCCT sample15.ab1 agTGGGGCTGGGGGAGCCCGTGGCACTGCCCTGCCT Contig1 cCAggcaGTGCG*AGTGGGGCTGGGGGAGCCCGTGGCACTGCCCTGCCT CCCTCTGCAat #####MIPE FORMAT##### <use> <sample> <id>sample1</id> <file>sample1.ab1</file> <genotype> <snp_id>snp_Contig1_452</snp_id> <amb>C</amb> </genotype> <genotype> <snp_id>snp_Contig1_499</snp_id> <amb>C</amb> </genotype> <genotype> <snp_id>snp_Contig1_503</snp_id> <amb>G</amb> </genotype> <genotype> <snp_id>snp_Contig1_508</snp_id> <amb>C</amb> </genotype> <genotype> <snp_id>snp_Contig1_601</snp_id> <amb>G</amb> </genotype> </sample> <sample> <id>sample2</id> <file>sample2.ab1</file> <genotype> <snp_id>snp_Contig1_292</snp_id> <amb>G</amb> </genotype> <genotype> <snp_id>snp_Contig1_452</snp_id> <amb>T</amb> </genotype> <genotype> <snp_id>snp_Contig1_499</snp_id> <amb>T</amb> </genotype> <genotype> <snp_id>snp_Contig1_503</snp_id> <amb>A</amb> </genotype> <genotype> <snp_id>snp_Contig1_508</snp_id> <amb>T</amb> </genotype> <genotype> <snp_id>snp_Contig1_601</snp_id> <amb>A</amb> </genotype> </sample> <sample> <id>sample3</id> <file>sample3.ab1</file> <genotype> <snp_id>snp_Contig1_25</snp_id> <amb>G</amb> </genotype> <genotype> <snp_id>snp_Contig1_133</snp_id> <amb>T</amb> </genotype> </sample> <sample> <id>sample4</id> <file>sample4.ab1</file> <genotype> <snp_id>snp_Contig1_133</snp_id> <amb>T</amb> </genotype> </sample> <sample> <id>sample5</id> <file>sample5.ab1</file> <genotype> <snp_id>snp_Contig1_25</snp_id> <amb>G</amb> </genotype> <genotype> <snp_id>snp_Contig1_133</snp_id> <amb>T</amb> </genotype> <genotype> <snp_id>snp_Contig1_292</snp_id> <amb>G</amb> </genotype> <genotype> <snp_id>snp_Contig1_452</snp_id> <amb>C</amb> </genotype> </sample> <sample> <id>sample6</id> <file>sample6.ab1</file> <genotype> <snp_id>snp_Contig1_25</snp_id> <amb>G</amb> </genotype> <genotype> <snp_id>snp_Contig1_133</snp_id> <amb>R</amb> </genotype> <genotype> <snp_id>snp_Contig1_292</snp_id> <amb>A</amb> </genotype> </sample> <sample> <id>sample7</id> <file>sample7.ab1</file> <genotype> <snp_id>snp_Contig1_25</snp_id> <amb>G</amb> </genotype> <genotype> <snp_id>snp_Contig1_133</snp_id> <amb>T</amb> </genotype> </sample> <sample> <id>sample8</id> <file>sample8.ab1</file> <genotype> <snp_id>snp_Contig1_25</snp_id> <amb>G</amb> </genotype> <genotype> <snp_id>snp_Contig1_133</snp_id> <amb>T</amb> </genotype> <genotype> <snp_id>snp_Contig1_292</snp_id> <amb>G</amb> </genotype> </sample> <sample> <id>sample9</id> <file>sample9.ab1</file> <genotype> <snp_id>snp_Contig_133</snp_id> <amb>T</amb> </genotype> </sample> <sample> <id>sample10</id> <file>sample10.ab1</file> <genotype> <snp_id>snp_Contig1_25</snp_id> <amb>G</amb> </genotype> <genotype> <snp_id>snp_Contig1_133</snp_id> <amb>T</amb> </genotype> <genotype> <snp_id>snp_Contig1_292</snp_id> <amb>G</amb> </genotype> <genotype> <snp_id>snp_Contig1_452</snp_id> <amb>C</amb> </genotype> </sample> <sample> <id>sample11</id> <file>sample11.ab1</file> <genotype> <snp_id>snp_Contig1_25</snp_id> <amb>S</amb> </genotype> <genotype> <snp_id>snp_Contig1_133</snp_id> <amb>T</amb> </genotype> <genotype> <snp_id>snp_Contig1_292</snp_id> <amb>G</amb> </genotype> </sample> <sample> <id>sample12</id> <file>sample12.ab1</file> <genotype> <snp_id>snp_Contig1_25</snp_id> <amb>G</amb> </genotype> <genotype> <snp_id>snp_Contig1_133</snp_id> <amb>T</amb> </genotype> <genotype> <snp_id>snp_Contig1_292</snp_id> <amb>G</amb> </genotype> </sample> <sample> <id>sample13</id> <file>sample13.ab1</file> <genotype> <snp_id>snp_Contig1_25</snp_id> <amb>G</amb> </genotype> <genotype> <snp_id>snp_Contig1_133</snp_id> <amb>T</amb> </genotype> <genotype> <snp_id>snp_Contig1_292</snp_id> <amb>G</amb> </genotype> </sample> <sample> <id>sample14</id> <file>sample14.ab1</file> <genotype> <snp_id>snp_Contig1_133</snp_id> <amb>T</amb> </genotype> </sample> <sample> <id>sample15</id> <file>sample15.ab1</file> <genotype> <snp_id>snp_Contig1_25</snp_id> <amb>G</amb> </genotype> <genotype> <snp_id>snp_Contig1_133</snp_id> <amb>T</amb> </genotype> <genotype> <snp_id>snp_Contig1_292</snp_id> <amb>G</amb> </genotype> </sample> <seq>cCAggcaGTGCG*AGTGGGGCTGGGGGAGCCCGTGGCACTGCCCTGCCT<skip>CCCTCTGCAat</seq> <snp> <id>snp_Contig1_25</id> <amb>S</amb> <pos>25</pos> <rank>1</rank> </snp> <snp> <id>snp_Contig1_133</id> <amb>K</amb> <pos>133</pos> <rank>3</rank> </snp> <snp> <id>snp_Contig1_292</id> <amb>R</amb> <pos>292</pos> <rank>2</rank> </snp> <snp> <id>snp_Contig1_452</id> <amb>Y</amb> <pos>452</pos> <rank>3</rank> </snp> <snp> <id>snp_Contig1_499</id> <amb>Y</amb> <pos>499</pos> <rank>3</rank> </snp> <snp> <id>snp_Contig1_503</id> <amb>R</amb> <pos>503</pos> <rank>2</rank> </snp> <snp> <id>snp_Contig1_508</id> <amb>Y</amb> <pos>508</pos> <rank>1</rank> </snp> <snp> <id>snp_Contig1_601</id> <amb>R</amb> <pos>601</pos> <rank>1</rank> </snp> </use>
#!/usr/bin/perl use strict; use warnings; use POSA::Read; use POSA::Contig; my $file_of_filenames = shift; my @reads = (); open FOFN, $file_of_filename; foreach my $abi_file ( <FOFN> ) { chomp $abi_file; my $read = POSA::Read->importFromAbi(name => $abi_file, file => $abi_file); push @reads, $read; } close FOFN; my @contigs = POSA::Contig->phrap(reads => \@reads, gap_init_penalty => -6); foreach my $ctg ( @contigs ) { $ctg->searchSnps(rank => 6); print $ctg->asGff, "\n"; }Output could look like this:
Contig1 POSA::Contig Consensus 1 1000 . . . Consensus Contig1 Contig1 POSA::Contig Read 1 250 . . . Read Read1 Contig1 POSA::Contig Read 150 400 . . . Read Read2 Contig1 POSA::Contig Read 300 800 . . . Read Read3 Contig1 POSA::Contig Read 600 1000 . . . Read Read4 Contig1 POSA::Contig SNP 432 432 . . . SNP 432 ; Rank 4
Directory structure: