#!/usr/bin/env python3

import datetime
import getpass
import os
import re
import stat
import subprocess
import sys
import time

'''
bl_magicblast.py - Trim  adaptors from Illumina reads

Synopsis: bl_magicblast.py tsvfile outdir database [magicblast arguments]


@modified: March 25, 2021
@author: Brian Fristensky
@contact: Brian.Fristensky@umanitoba.ca  
'''

PROGRAM = "bl_magicblast.py : "
USAGE = "\n\tUSAGE: bl_magicblast.py tsvfile outdir database [magicblast arguments]"

DEBUG = True
if DEBUG :
    print('bl_magicblast: Debugging mode on')


# - - - - - - - - - - - - - Utility classes - - - - - - - - - - - - - - - - -
def chmod_ar(filename):
    """
    Make a file world-readable.
    """
    if os.path.exists(filename):
        st = os.stat(filename)
        os.chmod(filename, st.st_mode | stat.S_IREAD \
        | stat.S_IRGRP | stat.S_IROTH)

		
def chmod_arx(filename):
    """
    Make a file or directory world-readable and world-executable/searchable.
    """
    if os.path.exists(filename):
        st = os.stat(filename)
        os.chmod(filename, st.st_mode | stat.S_IEXEC | stat.S_IREAD \
            | stat.S_IXGRP | stat.S_IRGRP | stat.S_IXOTH \
            | stat.S_IROTH)

def LocalHostname():
    """
    Return the name of the local machine. Tries a number of methods
    to get a name other than 'localhost' or a null result.
    """
    import socket
    import platform

    def CheckName(name) :
        if name == None or name.startswith("localhost") or name == "" :
            OKAY = False
        else :
            OKAY = True
        return OKAY

    name = os.getenv('HOSTNAME') 

    if not CheckName(name) :
        name = platform.uname()[1]

    if not CheckName(name) :
        if socket.gethostname().find('.')>=0:
            name=socket.gethostname()
        else:
            name=socket.gethostbyaddr(socket.gethostname())[0]

    return name


def SendEmail(From,To,Subject,Text) :
    """
        Very simple email method adapted from:
        http://stackoverflow.com/questions/882712/sending-html-email-using-python
        There are more elaborate examples on this site for sending
        HTML messages and attachments.
    """
    import smtplib
    from email.mime.multipart import MIMEMultipart
    from email.mime.text import MIMEText

    Host = 'localhost'

    msg = MIMEMultipart('alternative')
    msg['Subject'] = Subject
    Html = """\
        <html>
          <head></head>
          <body>
            <p>
            %s
            </p>
          </body>
        </html>
        """ %(Text)
    part1 = MIMEText(Text, 'plain')
    part2 = MIMEText(Html, 'html')
    msg.attach(part1)
    msg.attach(part2)

    try:
       server = smtplib.SMTP(Host)
       server.sendmail(From, To, msg.as_string())
       server.quit()         

       print("Successfully sent email")
    except :
       print("Error: unable to send email")


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class Parameters:
    """
      	Wrapper class for command line parameters
      	"""
    def __init__(self):
        """
     	  Initializes arguments:
                TSVFILE = ""
                OUTDIR = ""

     	  Then calls read_args() to fill in their values from command line
          """
        self.TSVFILE = ""
        self.OUTDIR = ""
        self.EMAIL = "" # not used 
        self.BLASTARGS = ""
        self.read_args()

        if DEBUG :
            print('------------ Parameters from command line ------')
            print('    TSVFILE: ' + self.TSVFILE)
            print('    OUTDIR: ' + self.OUTDIR)
            print('    BLASTARGS: ' + str(self.BLASTARGS))
            print()  


    def read_args(self):
        """
        	Read command line arguments into a Parameter object
    	"""
        self.TSVFILE = sys.argv[1]
        self.OUTDIR = sys.argv[2] 
        self.BLASTARGS = sys.argv[3:]

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class TSVFiles :
    """
    Methods for reading lists of paired read TSV files, and for
    writing lists to output.
    """
    def __init__(self):
        """
     	  Initializes arguments:
                READPAIRS = []

          """
        self.READPAIRS = []            

    def ReadTSVfile(self,FN) :
        """
        TSV file containing names of paired-end and/or single end read files.
        Paired-end files are on lines such as

        leftreadfile.fq.gz<TAB>rightreadfile.fq.gz

        Single-end files have a each file on a separate line

        reads1.fq.gz
        reads2.fq.gz
        reads3.fq.gz
        """
        TAB = '\t'
        F = open(FN,"r")
        for line in F.readlines() :
            line = line.strip()
            if len(line) > 0 and not line.startswith('#') :
                # get rid of double quotes that enclose fields when some programs write
                # output, and then split by TABs.
                tokens = line.replace('"','').split(TAB)

                # ignore blank fields. Add either single or pair of filenames
                # to list. Only process names from first two fields on a line
                # and ignore other fields. 
                if len(tokens) > 0 :
                    r1 = tokens[0].strip()
                    if len(r1) > 0 :
                        fnames = [r1]
                    else :
                        fnames = []
                    if len(tokens) > 1 :
                        r2 = tokens[1].strip()
                        if len(r2) > 0 :
                            fnames.append(r2)
                    if len(fnames) > 0 :
                        self.READPAIRS.append(fnames)
        if DEBUG :
            print(str(self.READPAIRS))
        F.close()


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def RunMagicblast(P,PR,LOGFILE) :

    # Given an input filename, the basename and file extension as a list
    def FNparts(FN) :
        if FN.endswith(".gz") : # remove .gz extension
            TempName = FN[:-3]
        else :
            TempName = FN
        Components = TempName.rpartition(".")
        BaseName = Components[0]
        Sep = Components[1]
        Ext = Components[2]
        return [BaseName,Ext]

    # Return the longest identical substring in common
    # between s1 and s2, reading from left to right.
    def FirstDiff(s1,s2) :
        I = 0
        MINLEN=min(len(s1),len(s2))
        DONE = False
        while s1[I] == s2[I] and I < MINLEN :
                I +=1

        return s1[:I] 

    # Count the number of lines in a file. Similar to wc -l in Unix
    def lc(FN) :
        f = open(FN,"r")
        numlines = len(f.readlines())
        f.close()
        return numlines
        

    # ----- Step 1: Parse read file name(s) into components to be used for output filename.

    PAIRED = len(PR) == 2

    if PAIRED :
        TFN0 = FNparts(PR[0])
        TFN1 = FNparts(PR[1])
        if TFN0[1] == TFN1[1] : # both have the same file extension
            OKAY = True
        else :
            OKAY = False
            print(" ".join([">>>Paired files ",PR[0],"and",PR[1]," appear to be of different types."]))
            print(">>>Aborting magicblast.py")
    else:
        TFN0 = FNparts(PR[0])
        OKAY = True

    if OKAY :
        Ext = TFN0[1]

        # Create an output filename
        if PAIRED :
            OutName = ".".join([FirstDiff(TFN0[0],TFN1[0]),"magicblast","tsv"])
        else :
            OutName = ".".join([TFN0[0],"magicblast","tsv"])

        # Construct the command string - - - - - - - - - - - - - - 
        COMMAND=["magicblast"]

        if PAIRED :
            COMMAND.extend(["-query", PR[0], "-query_mate", PR[1], "-no_discordant"])   
        else :   
            COMMAND.extend(["-query", PR[0]])
        if Ext in ["fastq","fq"] :
            COMMAND.extend(["-infmt","fastq"])
        else :
            COMMAND.extend(["-infmt","fasta"])

        OUTFMT = "tabular"
        COMMAND.extend(["-outfmt",OUTFMT,"-out",OutName])

        COMMAND.extend(P.BLASTARGS)  

        print("COMMAND: ",COMMAND)

        # Run the command - - - - - - - - - - - - - - - - -
        LOGFILE.write('======== Magicblast ==========' + '\n')
        LOGFILE.write('COMMAND: ' + str(COMMAND) + '\n')
        StartTime = datetime.datetime.now()
        LOGFILE.write('Start time: ' + str(StartTime) + '\n')
        LOGFILE.write('\n')
        LOGFILE.flush()
        print(COMMAND)
        p = subprocess.Popen(COMMAND,stdout=LOGFILE,stderr=LOGFILE)
        p.wait()
        FinishTime = datetime.datetime.now()
        NumHits = lc(OutName)
        LOGFILE.write(OutName + "\t" + str(NumHits)+ "\tHits" + "\n")
        LOGFILE.write('\n')
        LOGFILE.write('Finish time: ' + str(FinishTime) + '\n')
        ElapsedTime = FinishTime - StartTime
        LOGFILE.write('Elapsed time: ' + str(ElapsedTime) + '\n')
        LOGFILE.write('\n')

#======================== MAIN PROCEDURE ==========================
def main():
    """
        Called when not in documentation mode.
        """
	
    # Read parameters from command line
    P = Parameters()

    TF = TSVFiles()
    if not P.TSVFILE == "" :
        TF.ReadTSVfile(P.TSVFILE)
        OKAY = True
    else :
        OKAY = False

    # Create output directory, if it doesn't already exist.
    if OKAY and P.OUTDIR != "" :
        if not os.path.isdir(P.OUTDIR) :
            os.mkdir(P.OUTDIR)
                             
    LOGFN = os.path.join(P.OUTDIR,"bl_magicblast.log")
    LOGFILE = open(LOGFN,'w')
    LOGFILE.write('\n')

#    exit()

    # Run magicblast
    if OKAY :
        for PR in TF.READPAIRS :
            RunMagicblast(P,PR,LOGFILE)
          
    LOGFILE.close()

    # Notify user when job is done, if email address was
    # supplied using --email
    if P.EMAIL != "" :
        Sender = getpass.getuser() + '@' + LocalHostname()
        Subject = 'bl_magicblast.py completed'
        Message = 'bl_magicblast.py: Completed<br>'
        LOGFILE = open(os.path.join('bl_magicblast.log'),'r')
        for line in LOGFILE.readlines() :
            Message = Message + line + '<br>'
        LOGFILE.close()
        SendEmail(Sender,[P.EMAIL],Subject,Message)


if __name__ == "__main__":
    main()

