#!/usr/bin/perl


## VERSION=20030409

use strict;
use warnings;
use POSA::Read;
use Boulder::Stream;
use Mail::Sendmail;
use ConfigReader::Simple;

my $config = ConfigReader::Simple->new('/path_to/AutomaticBlast.config');

my $root = $config->root;
my $fofn = $root . '/' . $config->fofn;
my $fastadb = $root . '/' . $config->fastadb;
my $boulderdb = $root . '/' . $config->boulderdb;
my $tmpboulder = $root . '/' . $config->tmpboulder;
my $tmpfas = $root . '/' . $config->tmpfas;
my $tmpblast = $root . '/' . $config->tmpblast;
my $tmpparsed = $root . '/' . $config->tmpparsed;
my $stderr = $root . '/' . $config->stderr;
my @mailto = split /;/, $config->mailto;

open STDERR, ">>$stderr";
print STDERR "------------------------------\n";
print STDERR "BES Pipeline run of ", date(), "\n";
print STDERR "------------------------------\n";

my $failed = 0;
my $annotationcounter = 0;

my $mailbody = '';

`ls $root/ToProcess > $root/fofn.txt`;

open FOFN, $fofn;
my @fofn = <FOFN>;
close FOFN;

open TMPFAS, ">$tmpfas";
open TMPBOULDER, ">>$tmpboulder";
open FASTADB, ">>$fastadb";
foreach my $fn ( @fofn ) {
  chomp $fn;
  $failed = 0;
  my %annotations = ();
  $annotationcounter = 0;

  my $read = CleanSeq($fn);
  
  `mv $root/ToProcess/$fn $root/Processed/`;

  $mailbody .= $read->name . "\n";
  
  if ( not $failed ) {
    $mailbody .= "\tl=" . length $read->seq;
    $mailbody .= "\n";

    # Annotations: database, expectation value, percent identity, length, relative length
    %annotations = ( %annotations, %{ Blast($read, 'gallus', 10e-12, 90, 50, 1) } );
    %annotations = ( %annotations, %{ Blast($read, 'HSrna', 10e-5, 70, 50, 1) } );
    %annotations = ( %annotations, %{ Blast($read, 'hgi', 10e-7, 80, 50, 1) } );
    %annotations = ( %annotations, %{ Blast($read, 'mgi', 10e-12, 80, 50, 1) } );
    %annotations = ( %annotations, %{ Blast($read, 'gggi', 10e-12, 90, 50, 1) } );
    
    WriteOutput($read, \%annotations);

    CleanUp();

  } else {
    $mailbody .= "\tFAILED\n";
  }
}
close FASTADB;
close TMPBOULDER;
close TMPFAS;

NotifyByMail($mailbody, @mailto);

`rm $fofn`;

close STDERR;

sub NotifyByMail {
   my $mailbody = shift;

   my (@addresses) = @_;
 
   unshift @{$Mail::Sendmail::mailcfg{'smtp'}} , 'net.wau.nl';
 
   my $date = date();
   if ( $mailbody eq '' ) { $mailbody = "No BAC ends to process\n" }
 
   foreach ( @addresses ) {
     my %mail = ( To	=> "$_",
	       From    => 'AutomaticBlastPipeline@wur.nl',
	       Subject => "BAC End Sequencing Report of $date",
	       Message => $mailbody
	      );
   
     sendmail(%mail) or die $Mail::Sendmail::error;
   }
}

sub CleanUp {
  `rm $root/Tmp/*`;
}

sub Annotate {      # For format of %annotations, see end of the script
  my ($read, $tmpparsed) = @_;
  
  my %annotations = ();
  my $blaststream = Boulder::Stream->newFh($tmpparsed);
  foreach ( my $blastrecord = <$blaststream> ) {
    my $qryname = $blastrecord->QRYNAME;
    $qryname =~ s/ .*$//g;
    if ( $read->name eq $qryname ) {
      foreach my $subject ( $blastrecord->SBJ ) {
        $annotationcounter++;
        $annotations{$annotationcounter}{'DATABASE'} = $blastrecord->DATABASE;
        $annotations{$annotationcounter}{'SUBJECT'}{'NAME'} = $subject->SBJNAME;
	$annotations{$annotationcounter}{'SUBJECT'}{'LENGTH'} = $subject->SBJLENGTH;
        my $hitcounter = 0;
	foreach ( $subject->HIT ) {
	  $hitcounter++;
	  $annotations{$annotationcounter}{'HITS'}{$hitcounter}{'PVALUE'} = $_->PVALUE;
	  $annotations{$annotationcounter}{'HITS'}{$hitcounter}{'PERCENT'} = $_->PERCENT;
	  $annotations{$annotationcounter}{'HITS'}{$hitcounter}{'LENGTH'} = $_->LENGTH;
	  $annotations{$annotationcounter}{'HITS'}{$hitcounter}{'RELLENGTH'} = $_->RELLENGTH;
	  $annotations{$annotationcounter}{'HITS'}{$hitcounter}{'QRYSTART'} = $_->QRYSTRT;
	  $annotations{$annotationcounter}{'HITS'}{$hitcounter}{'QRYEND'} = $_->QRYEND;
	  $annotations{$annotationcounter}{'HITS'}{$hitcounter}{'SBJSTART'} = $_->STRT;
	  $annotations{$annotationcounter}{'HITS'}{$hitcounter}{'SBJEND'} = $_->END;
	  $annotations{$annotationcounter}{'HITS'}{$hitcounter}{'QRYSEQ'} = $_->QRYSEQ;
	  $annotations{$annotationcounter}{'HITS'}{$hitcounter}{'SBJSEQ'} = $_->SBJSEQ;
	  $annotations{$annotationcounter}{'HITS'}{$hitcounter}{'HOMSEQ'} = $_->HOMSEQ;
	}
      }
    }
  }

  \%annotations;
}

sub Blast {
  my ($read, $database, $exp, $percid, $length, $rellength) = @_;
  
  `blastall -p blastn -d $database -i $tmpfas -o $tmpblast`;
  `blast2boulder -i $tmpblast -o $tmpparsed -e $exp -p $percid -l $length -r $rellength`;
  my %annotations = %{ Annotate($read, $tmpparsed) };
  
  \%annotations;
}

sub WriteOutput {
  my ($read, $annref) = @_;
  
  my %annotations = %{ $annref };
  
  `chmod u+w $root/Database/$boulderdb`;
  open BOULDER, ">>$root/Database/$boulderdb";
  print BOULDER "NAME=", $read->name, "\n";
  print BOULDER "DATE=", date(), "\n";
  print BOULDER "SEQ=", $read->seq, "\n";
  foreach my $counter ( keys %annotations ) {
    $mailbody .= "\t" . $annotations{$counter}{'DATABASE'} . "\t";
    foreach ( keys %{ $annotations{$counter}{'HITS'} } ) {
      $mailbody .= $annotations{$counter}{'HITS'}{$_}{'PVALUE'} . ',';
    }
    $mailbody .= "\t" . $annotations{$counter}{'SUBJECT'}{'NAME'} . "\n";
    print BOULDER "BLASTHIT={\n";
    print BOULDER "\tDATABASE=", $annotations{$counter}{'DATABASE'}, "\n";
    print BOULDER "\tSUBJECT={\n";
    print BOULDER "\t\tNAME=", $annotations{$counter}{'SUBJECT'}{'NAME'}, "\n";
    print BOULDER "\t\tLENGTH=", $annotations{$counter}{'SUBJECT'}{'LENGTH'}, "\n";
    print BOULDER "\t\t}\n";
    foreach ( keys %{ $annotations{$counter}{'HITS'} } ) {
      print BOULDER "\tHIT={\n";
      print BOULDER "\t\tPVALUE=", $annotations{$counter}{'HITS'}{$_}{'PVALUE'}, "\n";
      print BOULDER "\t\tPERCENT=", $annotations{$counter}{'HITS'}{$_}{'PERCENT'}, "\n";
      print BOULDER "\t\tLENGTH=", $annotations{$counter}{'HITS'}{$_}{'LENGTH'}, "\n";
      print BOULDER "\t\tRELLENGTH=", $annotations{$counter}{'HITS'}{$_}{'RELLENGTH'}, "\n";
      print BOULDER "\t\tQRYSTART=", $annotations{$counter}{'HITS'}{$_}{'QRYSTART'}, "\n";
      print BOULDER "\t\tQRYEND=", $annotations{$counter}{'HITS'}{$_}{'QRYEND'}, "\n";
      print BOULDER "\t\tSBJSTART=", $annotations{$counter}{'HITS'}{$_}{'SBJSTART'}, "\n";
      print BOULDER "\t\tSBJEND=", $annotations{$counter}{'HITS'}{$_}{'SBJEND'}, "\n";
      print BOULDER "\t\tQRYSEQ=", $annotations{$counter}{'HITS'}{$_}{'QRYSEQ'}, "\n";
      print BOULDER "\t\tSBJSEQ=", $annotations{$counter}{'HITS'}{$_}{'SBJSEQ'}, "\n";
      print BOULDER "\t\tHOMSEQ=", $annotations{$counter}{'HITS'}{$_}{'HOMSEQ'}, "\n";
      print BOULDER "\t\t}\n";
    }
    print BOULDER "\t}\n";
  }
  print BOULDER "=\n";

  close BOULDER;
  `chmod a-w $root/Database/besDB.boulder`;
}


sub CleanSeq {
  my $fn = shift;
  
  my $read = POSA::Read->importFromAbi($fn, $root . "/ToProcess/" . $fn);
  $read->qclip();
  $read->seqvectclip(v => '/some_path/vector_primer');
  $read->screenseq(S => '/some_path/ecoli.seq');
  $read->clonvectclip();

  my $start = max($read->seqvectleft, $read->qualleft, 0);
  my $stop = min($read->seqvectright, $read->qualright, length $read->seq);
  my $seq = substr($read->seq, $start, $stop-$start);
  if ( $stop-$start > 20 ) {
    open FASTADB, ">>$fastadb";
    open TMPFAS, ">$tmpfas";
    print FASTADB $read->asFasta;
    print TMPFAS $read->asFasta;
    close TMPFAS;
    close FASTADB;
    $read->seq($seq);
  } else {
    $failed = 1;;
    open BOULDER, ">>$root/Database/besDB.boulder";
    print BOULDER "NAME=", $read->name, "\n";
    print BOULDER "DATE=", date(), "\n";
    print BOULDER "FAILED=1\n";
    print BOULDER "=\n";
    close BOULDER;
  }
  
  $read;
}


sub min {
  my @list = @_;
  my $min = 9999999999999999999999999;
  
  foreach ( @list ) {
    $min = ( $_ < $min ) ? $_ : $min;
  }
  
  $min;
}

sub max {
  my @list = @_;
  my $max = 0;
  
  foreach ( @list ) {
     $max = ( $_ > $max ) ? $_ : $max;
  }
  
  $max;
}

sub date {
  my $date = `date +%Y%m%d`;
  chomp $date;
  
  $date;
}


# Format of %annotations:
# 
# %annotations = ( 1 => {
#                        DATABASE => umistCluster
#                       ,SUBJECT => {
# 		                   NAME => 052635.1 Incyte Unique
# 				  ,LENGTH => 755
# 				  }
#                       ,HITS => {
# 		                1 => {
# 				      LENGTH => 17
# 				     ,PVALUE => 10e-12
# 				     ,...
# 				     }
# 			       ,2 => {
# 			              LENGTH => 25
# 				     ,PVALUE => 10e-12
# 				     ,...
# 				     }
# 			       }
# 		      }
# 	       , 2 => {
#                        DATABASE => umistCluster
#                       ,SUBJECT => {
# 		                   NAME => 052738.1 Incyte Unique
# 				  ,LENGTH => 602
# 				  }
#                       ,HITS => {
# 		                1 => {
# 				      LENGTH => 35
# 				     ,PVALUE => 10e-11
# 				     ,...
# 				     }
# 			       ,2 => {
# 			              LENGTH => 102
# 				     ,PVALUE => 10e-49
# 				     ,...
# 				     }
# 			       }
# 		      }
#                )
