#!/usr/bin/env python3

import datetime
import getpass
import os
import re
import stat
import subprocess
import sys
import time

'''
bl_trimmomatic.py - Trim  adaptors from Illumina reads

Synopsis: bl_trimmomatic.py tsvfile outdir threads [processing_steps]


@modified: May 18, 2019
@author: Brian Fristensky
@contact: Brian.Fristensky@umanitoba.ca  
'''

PROGRAM = "bl_trimmomatic.py : "
USAGE = "\n\tUSAGE: bl_trimmomatic.py tsvfile outdir threads [processing_steps]"

DEBUG = True
if DEBUG :
    print('bl_trimmomatic: Debugging mode on')


# - - - - - - - - - - - - - Utility classes - - - - - - - - - - - - - - - - -
def chmod_ar(filename):
    """
    Make a file world-readable.
    """
    if os.path.exists(filename):
        st = os.stat(filename)
        os.chmod(filename, st.st_mode | stat.S_IREAD \
        | stat.S_IRGRP | stat.S_IROTH)

		
def chmod_arx(filename):
    """
    Make a file or directory world-readable and world-executable/searchable.
    """
    if os.path.exists(filename):
        st = os.stat(filename)
        os.chmod(filename, st.st_mode | stat.S_IEXEC | stat.S_IREAD \
            | stat.S_IXGRP | stat.S_IRGRP | stat.S_IXOTH \
            | stat.S_IROTH)

def LocalHostname():
    """
    Return the name of the local machine. Tries a number of methods
    to get a name other than 'localhost' or a null result.
    """
    import socket
    import platform

    def CheckName(name) :
        if name == None or name.startswith("localhost") or name == "" :
            OKAY = False
        else :
            OKAY = True
        return OKAY

    name = os.getenv('HOSTNAME') 

    if not CheckName(name) :
        name = platform.uname()[1]

    if not CheckName(name) :
        if socket.gethostname().find('.')>=0:
            name=socket.gethostname()
        else:
            name=socket.gethostbyaddr(socket.gethostname())[0]

    return name


def SendEmail(From,To,Subject,Text) :
    """
        Very simple email method adapted from:
        http://stackoverflow.com/questions/882712/sending-html-email-using-python
        There are more elaborate examples on this site for sending
        HTML messages and attachments.
    """
    import smtplib
    from email.mime.multipart import MIMEMultipart
    from email.mime.text import MIMEText

    Host = 'localhost'

    msg = MIMEMultipart('alternative')
    msg['Subject'] = Subject
    Html = """\
        <html>
          <head></head>
          <body>
            <p>
            %s
            </p>
          </body>
        </html>
        """ %(Text)
    part1 = MIMEText(Text, 'plain')
    part2 = MIMEText(Html, 'html')
    msg.attach(part1)
    msg.attach(part2)

    try:
       server = smtplib.SMTP(Host)
       server.sendmail(From, To, msg.as_string())
       server.quit()         

       print("Successfully sent email")
    except :
       print("Error: unable to send email")


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class Parameters:
    """
      	Wrapper class for command line parameters
      	"""
    def __init__(self):
        """
     	  Initializes arguments:
                TSVFILE = ""
                OUTDIR = ""
                THREADS = 1

     	  Then calls read_args() to fill in their values from command line
          """
        self.TSVFILE = ""
        self.OUTDIR = ""
        self.THREADS = 1
        self.EMAIL = "" # not used 
        self.TRIMARGS = ""
        self.read_args()


        if DEBUG :
            print('------------ Parameters from command line ------')
            print('    TSVFILE: ' + self.TSVFILE)
            print('    OUTDIR: ' + self.OUTDIR)
            print('    THREADS: ' + str(self.THREADS))
            print('    TRIMARGS: ' + str(self.TRIMARGS))
            print()  


    def OrderArgs(self,RawArgs) :
        """
        arglist is a list of Trimmomatic parameters of the form 

        <step>^<param>

        where order is an integer telling at which step param should be used in processing

        Returns a sorted list of tuples, of the form [ <param1>,<param2>...]
        Use of sorted function preserves input order where two or more items have the same step.
        A step of 0 is used to indicate that the step is to be ignored, so items with 0 as the step
        are not returned.
        """

        def getKey(item):
            return item[0]

        RawList = []
        for A in RawArgs :
            tokens = A.split('^')
            if not tokens[0] == "0" :
                RawList.append([int(tokens[0]),tokens[1]])
        SortedList = sorted(RawList,key=getKey)
        ParamList = []
        for P in SortedList :
            ParamList.append(P[1])
        return ParamList


    def read_args(self):
        """
        	Read command line arguments into a Parameter object
    	"""

        self.TSVFILE = sys.argv[1]
        self.OUTDIR = sys.argv[2] 
        self.THREADS = int(sys.argv[3])
        RawArgs = sys.argv[4:]
        self.TRIMARGS = self.OrderArgs(RawArgs)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class TSVFiles :
    """
    Methods for reading lists of paired read TSV files, and for
    writing lists to output.
    """
    def __init__(self):
        """
     	  Initializes arguments:
                READPAIRS = []

          """
        self.READPAIRS = []            

    def ReadTSVfile(self,FN) :
        """
        TSV file containing names of paired-end and/or single end read files.
        Paired-end files are on lines such as

        leftreadfile.fq.gz<TAB>rightreadfile.fq.gz

        Single-end files have a each file on a separate line

        reads1.fq.gz
        reads2.fq.gz
        reads3.fq.gz
        """
        TAB = '\t'
        F = open(FN,"r")
        for line in F.readlines() :
            line = line.strip()
            if len(line) > 0 and not line.startswith('#') :
                # get rid of double quotes that enclose fields when some programs write
                # output, and then split by TABs.
                tokens = line.replace('"','').split(TAB)

                # ignore blank fields. Add either single or pair of filenames
                # to list. Only process names from first two fields on a line
                # and ignore other fields. 
                if len(tokens) > 0 :
                    r1 = tokens[0].strip()
                    if len(r1) > 0 :
                        fnames = [r1]
                    else :
                        fnames = []
                    if len(tokens) > 1 :
                        r2 = tokens[1].strip()
                        if len(r2) > 0 :
                            fnames.append(r2)
                    if len(fnames) > 0 :
                        self.READPAIRS.append(fnames)
        if DEBUG :
            print(str(self.READPAIRS))
        F.close()


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def RunTrimmomatic(P,PR,LOGFILE) :

    # Given an input filename, return an output filename
    def OutName(IFN,PairedTag) :
        TFN = IFN
        if TFN.endswith(".gz") : # remove .gz extension
            TFN = TFN[:-3]
        Components = TFN.rpartition(".")
        BaseName = Components[0]
        Sep = Components[1]
        Ext = Components[2]
        OFN = BaseName + PairedTag + Sep + Ext
        return os.path.join(P.OUTDIR,OFN)


    # Construct the command string - - - - - - - - - - - - - - 
 
    if len(PR) == 2 :
        COMMAND=["trimmomatic", "PE", "-threads", str(P.THREADS)]    
    else :   
        COMMAND=["trimmomatic", "SE", "-threads", str(P.THREADS)]

    if len(PR) == 2 :
        COMMAND.extend([PR[0],PR[1]])
        OFN1 = OutName(PR[0],"_1P")
        OFN2 = OutName(PR[0],"_1U")
        OFN3 = OutName(PR[1],"_2P")
        OFN4 = OutName(PR[1],"_2U") 
        COMMAND.extend([OFN1,OFN2,OFN3,OFN4])
    else :
        COMMAND.extend([PR[0]]) 
        OFN = OutName(PR[0],"_S")
        COMMAND.append(OFN)

    COMMAND.extend(P.TRIMARGS)  

    #COMMAND = ["trimmomatic","-version"]

    # Run the command - - - - - - - - - - - - - - - - -
    LOGFILE.write('======== Trimmomatic ==========' + '\n')
    LOGFILE.write('COMMAND: ' + str(COMMAND) + '\n')
    StartTime = datetime.datetime.now()
    LOGFILE.write('Start time: ' + str(StartTime) + '\n')
    LOGFILE.write('\n')
    LOGFILE.flush()
    p = subprocess.Popen(COMMAND,stdout=LOGFILE,stderr=LOGFILE)
    p.wait()
    FinishTime = datetime.datetime.now()
    LOGFILE.write('\n')
    LOGFILE.write('Finish time: ' + str(FinishTime) + '\n')
    ElapsedTime = FinishTime - StartTime
    LOGFILE.write('Elapsed time: ' + str(ElapsedTime) + '\n')

#======================== MAIN PROCEDURE ==========================
def main():
    """
        Called when not in documentation mode.
        """
	
    # Read parameters from command line
    P = Parameters()

    TF = TSVFiles()
    if not P.TSVFILE == "" :
        TF.ReadTSVfile(P.TSVFILE)
        OKAY = True
    else :
        OKAY = False

    # Create output directory, if it doesn't already exist.
    if OKAY and P.OUTDIR != "" :
        if not os.path.isdir(P.OUTDIR) :
            os.mkdir(P.OUTDIR)
                             
    LOGFN = os.path.join(P.OUTDIR,"bl_trimmomatic.log")
    LOGFILE = open(LOGFN,'w')
    LOGFILE.write('\n')

    # Run trimmomatic
    if OKAY :
        for PR in TF.READPAIRS :
            RunTrimmomatic(P,PR,LOGFILE)
          
    LOGFILE.close()

    # Notify user when job is done, if email address was
    # supplied using --email
    if P.EMAIL != "" :
        Sender = getpass.getuser() + '@' + LocalHostname()
        Subject = 'bl_trimmomatic.py completed'
        Message = 'bl_trimmomatic.py: Completed<br>'
        LOGFILE = open(os.path.join('bl_trimmomatic.log'),'r')
        for line in LOGFILE.readlines() :
            Message = Message + line + '<br>'
        LOGFILE.close()
        SendEmail(Sender,[P.EMAIL],Subject,Message)


if __name__ == "__main__":
    main()
#else:
    #used to generate documentation
#    import doctest
#    doctest.testmod()

#if (BM.documentor() or "-test" in sys.argv):
#    pass
#else:
#    main()
