#!/usr/bin/env python

"""
 blblastout.py - Translate NCBI blast output into other formats, and open those
     files using the appropriate application, or write output to a file.
     We call blast_formatter to 

 Synopsis:
  blblastout.py --archive filename  [--delete] --outfmt format --destination dest [--outfile filename]
  blblastout.py --rid rid  [--delete] --outfmt format --destination dest [--outfile filename]

       --archive filename - filename is output from BLAST in ASN.1 format (-outfmt 11)

       --rid rid - RID number of a blast search done at NCBI. 

       --archive and --rid are mutually exclusive

       --delete - Delete infile when finished. This is mainly intended for running
            from BioLegato, where infile is a temporary file.
       
       --outfmt format - format is any output format supported by blast_formatter

       --destination dest - dest is one of the following:
           For BLAST viewable Report:
                 textedit - open output files in text editor
                   specified by the $BL_TextEditor environment variable
                 browser - open in web browser specified by $BL_browser
                 textfile - write to files, using the basename
                    specified by destination.
                 htmlfile - write to HTML file
            For BLAST tsv Report:
                 blnfetch - Open in blnfetch, BioLegato interface for retrieving DNA/RNA
                     entries using ACCESSION numbers
                 blpfetch - Open in blpfetch, BioLegato interface for retrieving protein
                     entries using ACCESSION numbers
                 tsvfile - write to a tsvfile.

       --outfile - filename for saving an output file.

@modified: April 16, 2020
@author: Brian Fristensky
@contact: brian.fristensky@umanitoba.ca
"""

import os
import subprocess
import sys

#optparse is deprecated in favor of argparse as of Python 2.7. However,
# since 2.7 is not always present on many systems, at this writing,
# it is safer to stick with optparse for now. It should be easy
# to change later, since the syntax is very similar between argparse and optparse.
from optparse import OptionParser


blib = os.environ.get("BIRCHPYLIB")
sys.path.append(blib)

from birchlib import Birchmod

PROGRAM = "blblastout.py : "
USAGE = "\n\tUSAGE: blblastout.py --archive filename [--delete] --outfmt format --destination dest [--outfile filename]"
USAGE = USAGE + "\n\tUSAGE: blblastout.py --rid rid [--delete] --outfmt format --destination dest [--outfile filename]"

BM = Birchmod(PROGRAM, USAGE)
DEBUG = True

class Parameters:
    """
      	Wrapper class for command line parameters
      	By default, ETHRESHOLD is set to 10000, so that all
      	hits will be returned, if -e is not set at the command line
      	"""
    def __init__(self):
        """
     	  Initializes arguments:
     		IFN=""
     		RFN=""
                OUTFMT=""
     		DESTINATION=""
     		OFN=""
     		PID = str(os.getpid())
     	  Then calls read_args() to fill in their values from command line
          """
        self.IFN = ""
        self.RFN = ""
        self.DELETEFILE = False
        self.OUTFMT=""
        self.DESTINATION = ""
        self.OFN = ""
        self.PID = str(os.getpid())
        self.read_args()


    def read_args(self):
        """
        	Read command line arguments into a Parameter object

        	"""
        parser = OptionParser()
        parser.add_option("--archive", dest="archive", action="store", type="string", default="",
                          help="BLAST output file in ASN.1 format")
        parser.add_option("--rid", dest="rid", action="store", type="string", default="",
                          help="RFN number from BLAST run remotely at NCBI")
        parser.add_option("--delete", dest="delete", action="store_true", default=False,
                          help="Delete archive when done")
        parser.add_option("--outfmt", dest="outfmt", action="store", type="string", default="",
                          help="output format specified by BLAST -outfmt option")
        parser.add_option("--destination", dest="destination", action="store", type="string", default="",
                          help="destination for output")
        parser.add_option("--outfile", dest="outfile", action="store", type="string", default="",
                          help="output filename")
        (options, args) = parser.parse_args() 
        self.IFN = options.archive 
        self.RFN = options.rid
        self.DELETEFILE = options.delete
        self.OUTFMT = options.outfmt
        self.DESTINATION = options.destination
        self.OFN = options.outfile

        if DEBUG :
            print('------------ Parameters from command line ------' + '\n')
            print('    IFN: ' + self.IFN)
            print('    RFN: ' + self.RFN)
            print('    DELETEFILE: ' + str(self.DELETEFILE))
            print('    OUTFMT: ' + self.OUTFMT)
            print('    DESTINATION: ' + self.DESTINATION)
            print('    OFN: ' + self.OFN)   

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def ParseRID(RFN):
    """
    Parse the RID number from the first line containing 'RID:'

    """
    infile=open(RFN,"r")
    lines = infile.readlines()
    infile.close()

    # First, we process the comment lines, printing all comments but the Field line
    # Then, we print the modified field line.
    i = int(0)
    RIDNUM = ""
    while (i < len(lines)) and (RIDNUM == "") :
        if "RID:" in lines[i] :
           TOKENS = lines[i].split(':')
           RIDNUM = TOKENS[1].strip()
        i += 1

    print('RID: ' + RIDNUM)        	
    return RIDNUM

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def RunBlast_Formatter(SOURCE,SOURCENAME,OUTFMT,FILETYPE,OFN):
    """
    Run blast_formatter to convert an NCBI ASN.1 archive (outfmt=11)
    into other supported formats. If blast results came from NCBI,
    rather than a local database, blast_formatter will use the RID
    to send a request to NCBI for output file in the specified format.

    SOURCE - "-archive" or "-rid"
    SOURCENAME - filename or RID string
    OUTFMT - -outfmt field used by BLAST or blast_formatter
    FILETYPE - htmlfile|tsvfile|textfile
    """

    # For print formats giving tab-separated values, blblastout.py takes a comma-separated list,
    # eg. 7,sacc,slen,evalue
    # We have to convert it to a string blast_formatter recognizes
    # eg. '7 sacc slen evalue'
    # For other formats, we just use the OUTFMT used in the command line
    # If we run blastformatter using os.system, the PRFMT needs to be enclosed by quotes
    # If we run blastformatter using subprocess, it must not be enclosed by quotes
    TOKENS = OUTFMT.split(",")
    if TOKENS[0] in ["6","7","10"] :
        #PRFMT = "'"
        PRFMT=""
        for T in TOKENS :
            PRFMT = PRFMT + " " + T
        #PRFMT = PRFMT + "'"
    else :
        PRFMT=OUTFMT
    
    #COMMAND = 'blast_formatter ' + SOURCE + ' ' + SOURCENAME + ' -outfmt ' + PRFMT
    if FILETYPE == 'htmlfile' :
        p = subprocess.Popen(['blast_formatter',SOURCE,SOURCENAME, '-outfmt',PRFMT,'-html','-out',OFN])
        #COMMAND = COMMAND  + ' -html ' + ' -out ' + OFN
    elif FILETYPE == 'tsvfile' :
        #COMMAND = COMMAND + " -out " + OFN
        p = subprocess.Popen(['blast_formatter',SOURCE,SOURCENAME, '-outfmt',PRFMT,'-out',OFN])
    else :
        #COMMAND = COMMAND + ' -out ' + OFN
        p = subprocess.Popen(['blast_formatter',SOURCE,SOURCENAME, '-outfmt',PRFMT,'-out',OFN])        

    p.wait()
           	
    return

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def ParseHeaders(IFN,OFN):
    """
    For tabular BLAST output, parse the Fields line so that the names of 
    fields appear as column headings.

    """

    tempfile=open(IFN,"r")
    lines = tempfile.readlines()
    tempfile.close()

    outfile=open(OFN,"w")

    # First, we process the comment lines, printing all comments but the Field line
    # Then, we print the modified field line.
    i = int(0)
    while (i < len(lines)) and (lines[i].startswith("#")) :
        if lines[i].startswith("# Fields:") :
           FieldLine = "# " + lines[i][10:] # get rid of ' Fields: '
           FieldLine = FieldLine.replace(",","\t")
        else : 
           outfile.write(lines[i])
        i += 1
    outfile.write(FieldLine)

    # Now write the rest of the file
    while i < len(lines) :
        outfile.write(lines[i])
        i += 1

    outfile.close()
        	
    return

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def RUNTEXTEDIT(OFN):
    """
    Run the texteditor in the background and
    remove the temporary file when done
    """
    COMMAND = '(nohup `choose_edit_wrapper.sh` ' + OFN + '; $RM_CMD ' + OFN + ' > /dev/null)&'
    # It's surprising how many issues there are with launching multiple
    # files in a text editor. choose_edit_wrapper.sh takes care of
    # these issues.
    #COMMAND = '($BL_TextEditor ' + OFN + '; $RM_CMD ' + OFN + ')&'
    os.system(COMMAND)           	
    return

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def RUNBROWSER(OFN,DELETEFILE):
    """
    Run web browser in the background and
    remove the temporary file when done
    """
    #COMMAND = '(nohup `chooseviewer.py` ' + OFN + '; $RM_CMD ' + OFN + ' > /dev/null)&'
    if DELETEFILE :
        p = subprocess.Popen(['chooseviewer.py', OFN, '--delete'])
    else :
        p = subprocess.Popen(['chooseviewer.py', OFN])
    p.wait()
    #COMMAND = '(nohup $BL_Browser ' + OFN + '; $RM_CMD ' + OFN + ' > /dev/null)&'
    #os.system(COMMAND)           	
    return

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def RunBioLegato(DESTINATION, FN):
    """
    Run the blnfetch or blpfetch in the background and 
    remove the temporary file when done
    """
    COMMAND = '(nohup ' + DESTINATION + ' ' +  FN + '; rm -f ' + FN + ' > /dev/null)&'
    os.system(COMMAND)
           	
    return

           
#======================== MAIN PROCEDURE ==========================
def main():
    """
        Called when not in documentation mode.
        """
	
    P = Parameters ()
    if P.RFN != "" :
        SOURCE="-rid"
        SOURCENAME=ParseRID(P.RFN)
    else :
        SOURCE="-archive"
        SOURCENAME=P.IFN

    # Write the output to a file, or send it to a window, as specified
    # in --destination
	
    if P.DESTINATION == 'textedit':
        TEMPOFN = P.PID + '.' + 'txt'
        RunBlast_Formatter(SOURCE,SOURCENAME,P.OUTFMT,'textfile',TEMPOFN)
        RUNTEXTEDIT(TEMPOFN)

    elif P.DESTINATION == 'browser':
        TEMPOFN = P.PID + '.' + 'html'
        RunBlast_Formatter(SOURCE,SOURCENAME,P.OUTFMT,'htmlfile',TEMPOFN)
        RUNBROWSER(TEMPOFN,True)	       

    elif P.DESTINATION == 'textfile':
        RunBlast_Formatter(SOURCE,SOURCENAME,P.OUTFMT,'textfile',P.OFN + '.txt')

    elif P.DESTINATION == 'htmlfile':
        RunBlast_Formatter(SOURCE,SOURCENAME,P.OUTFMT,'htmlfile',P.OFN + ".html")
	    
    elif P.DESTINATION in ['blnfetch','blpfetch','tsvfile'] :
        TEMPOFN = P.PID + '.' + 'tmp'
        RunBlast_Formatter(SOURCE,SOURCENAME,P.OUTFMT,'tsvfile',TEMPOFN)
        PARSEDOFN = P.PID + '.' + 'tsv'
        ParseHeaders(TEMPOFN,PARSEDOFN)
        if P.DESTINATION in ['blnfetch','blpfetch'] :
            RunBioLegato(P.DESTINATION,PARSEDOFN)  
        else :
            os.rename(PARSEDOFN,P.OFN + '.tsv')          
        os.remove(TEMPOFN)	   

    # If --delete, delete the input file when done
    if P.DELETEFILE :
        if SOURCE == "-archive" :
            os.remove(P.IFN)
        else :
            os.remove(P.RFN)
    BM.exit_success()

if (BM.documentor() or "-test" in sys.argv):
    pass
else:
    main()
