#!/usr/bin/env python3

"""
optparse is deprecated in favor of argparse as of Python 2.7. However,
 since 2.7 is not always present on many systems, at this writing,
 it is safer to stick with optparse for now. It should be easy
 to change later, since the syntax is very similar between argparse and optparse.
 from optparse import OptionParser
"""

import datetime
import getpass
import os
import re
import shutil
import stat
import subprocess
import sys
import time

'''
bl_soapdenovo2.py - Run SOAPdenovo2

Synopsis: bl_soapdenovo2.py infile outdir prefix program kmer genomesize [SOAPdenov2 options] 

@modified: June 3, 2021
@author: Brian Fristensky
@contact: Brian.Fristensky@umanitoba.ca  
'''


PROGRAM = "bl_soapdenovo2.py : "
USAGE = "\n\tUSAGE: bl_soapdenovo2.py infile outdir prefix program kmer genomesize [SOAPdenov2 options]"

DEBUG = True
if DEBUG :
    print('bl_soapdenovo2: Debugging mode on')


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class Parameters:
    """
      	Wrapper class for command line parameters
      	"""
    def __init__(self):
        """
     	  Initializes arguments:
                INFILE = ""
                OUTDIR = ""
                PREFIX = ""
                PROGRAM = "SOAPdenovo-63mer"
                KMER = 23
                GENOME = 20
                SOAPARGS = []

     	  Then calls read_args() to fill in their values from command line
        """
        self.INFILE = "" 
        self.OUTDIR = ""
        self.PREFIX = ""
        self.PROGRAM = "SOAPdenovo-63mer"
        self.KMER = 23
        self.GENOME = 20
        self.SOAPARGS = []             
        self.read_args()

        if DEBUG :
            print('------------ Parameters from command line ------') 
            print('    INFILE: ' + self.INFILE)
            print('    OUTDIR: ' + self.OUTDIR)
            print('    PREFIX: ' + self.PREFIX)
            print('    PROGRAM: ' + self.PROGRAM)
            print('    KMER: ' + str(self.KMER))
            print('    GENOME: ' + str(self.GENOME))
            print('    SOAPARGS: ' + str(self.SOAPARGS))
            print()  

    def read_args(self):
        """
        	Read command line arguments into a Parameter object
    	"""                  
        self.INFILE = sys.argv[1]
        self.OUTDIR = sys.argv[2]
        self.PREFIX = sys.argv[3]
        self.PROGRAM = sys.argv[4]
        self.KMER = int(sys.argv[5])
        self.GENOME = int(sys.argv[6]) * 1000000
        self.SOAPARGS = sys.argv[7:]

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class ConfigLib :
    """
    Library for a config fiile as described in SOAPdenovo2 documentation.
    """
    def __init__(self):
        """
     	  Initializes arguments:
                READPAIRS = []
                PARAMS = []

          """
        self.READPAIRS = []
        self.PARAMS = []        

    def ParseLib(self,LINE) :
        """
        BioLegato writes all information for each lib on a single LIBLINE, which must be parsed. 
        Each line is structured accordingly:

        <LIBLINE> ::= <READFILES>^<PARAMS>
        <READFILES> ::= <LEFT>,<RIGHT>[|<LEFT>,<RIGHT>] #ie. one or more comma-separated read pairs
        <PARAMS> ::= <P>[|<P>]
        <P> ::= <PNAME>=<PVAL>

        The pipe character is used as a separator between read pairs and between parameters.

        For example, Paired-end reads:

        leftreadfile.fq.gz,rightreadfile.fq.gz

        Single-end reads:

        reads1.fq.gz

        """

        # Ugly workround for BioLegato bug that prevents recursive evaluation of variables 
        # when there are several tabs containing multiple panels
        # In this case variables witn no value will get written as %<string>%, so
        # we get rid of anything that matches that pattern.
        # When BioLegato is fixed, these lines can be deleted.
        pattern = re.compile('%\S+%')
        CleanLine = re.sub(pattern,'',LINE)

        # Get rid of quotes that might enclose the string, then split it into Filename and Parameter
        # parts for separate parsing.       
        LTOKENS = CleanLine.replace('"','').split('^')
        FTOKENS = LTOKENS[0]
        PTOKENS = LTOKENS[1]

        # Parse Read Pairs
        PAIRS = FTOKENS.split('|')
        for P in PAIRS :
            tokens = P.split(',')
            # ignore blank fields. Add either single or pair of filenames
            # to list. Only process names from first two fields on a line
            # and ignore other fields. 
            if len(tokens) > 0 :
                r1 = tokens[0].strip()
                if len(r1) > 0 :
                    fnames = [r1]
                else :
                    fnames = []
                if len(tokens) > 1 :
                    r2 = tokens[1].strip()
                    if len(r2) > 0 :
                        fnames.append(r2)
                if len(fnames) > 0 :
                    self.READPAIRS.append(fnames)

        # Parse Parameters
        PARAMS = PTOKENS.split('|')
        for P in PARAMS :
            testP = P.strip()
            if len(testP) > 0 :
                self.PARAMS.append(testP)        

        if DEBUG :
            print(str(self.READPAIRS))
            print(str(self.PARAMS))

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Create config file for running SOAPdenovo2
def BuildConfig(P,Libs,CFN) :

    # Return a code indicating the type of file(s), based on file exension(s)
    def FileType(PR) :
        FT = ""
        Fastq = ['.fq','.fastq']
        # There are too many file extensions that have been used for fasta files,
        # so we assume that it it's not a fastq file, it must be a fasta file.
        if len(PR) == 2 :
            Ext1 = os.path.splitext(PR[0])[1].lower()
            Ext2 = os.path.splitext(PR[1])[1].lower()
            if Ext1 in Fastq and Ext2 in Fastq :
                FT = 'qn'
            else :
                FT = 'fn'               
        else :
            Ext1 = os.path.splitext(PR[0])[1].lower()
            if Ext1 in Fastq :
                FT = 'q'
            else :
                FT = 'f'     
        return FT

    NL = '\n'
    CFILE = open(CFN,'w')
    CFILE.write('#=====  SOAPdenovo2 Config file generated by bl_soapdenovo2.py =====' + NL)
    CFILE.write(NL)
    CFILE.write('# - - - - - GLOBAL PARAMETERS - - - - -' + NL)
    #CFILE.write('max_rd_len=' + str(P.MAXREADLEN) + NL)
    CFILE.write(NL)

    for L in Libs :
        CFILE.write('[LIB]' + NL)        
        # Write parameters
        CFILE.write('# - - - - - PARAMETERS - - - - -' + NL)
        for P in L.PARAMS : 
            CFILE.write(P + NL)     
        # Write read files
        CFILE.write('# - - - - - READ FILES - - - - -' + NL)
        for PR in L.READPAIRS :
            FT = FileType(PR)
            if FT == 'qn' : # paired fastq files
                CFILE.write('q1=' + PR[0] + NL)
                CFILE.write('q2=' + PR[1] + NL)
            elif FT == 'fn' : # paired fasta files
                CFILE.write('f1=' + PR[0] + NL)
                CFILE.write('f2=' + PR[1] + NL)
            elif FT == 'q' : # single fastq file
                CFILE.write('q=' + PR[0] + NL)
            elif FT == 'f' : # single fasta file
                CFILE.write('f=' + PR[0] + NL)
        CFILE.write(NL)
        CFILE.write(NL)

    CFILE.close()

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def RunSOAPdenovo2(P,CFN,LOGFILE) :

        # k-mer must be an odd number (default 23)
        # for SOAPdenovo-63mer, 13 >= k-mer <= 63 
        # for SOAPdenovo-127mer, 13 >= k-mer <= 127 
        KMER = P.KMER
        if KMER % 2 == 0 : # modulo operator returns 0 if even
            KMER += 1      # force even number to next highest odd number
        if P.PROGRAM == "SOAPdenovo-63mer" :
            if KMER > 63 :
                KMER = 63
        else :
            if KMER > 127 :
                KMER = 127

        COMMAND=[P.PROGRAM]
        COMMAND.extend(["all", "-s", CFN, "-o", P.PREFIX, "-K", str(KMER), "-N", str(P.GENOME)])
        COMMAND.extend(P.SOAPARGS)

        # Run the command - - - - - - - - - - - - - - - - -
        LOGFILE.write('======== SOAPdenovo2 ==========' + '\n')
        LOGFILE.write(str(COMMAND) + '\n')
        StartTime = datetime.datetime.now()
        LOGFILE.write('Start time: ' + str(StartTime) + '\n')
        LOGFILE.write('\n')
        LOGFILE.flush()
        p = subprocess.Popen(COMMAND,stdout=LOGFILE, stderr=LOGFILE)
        p.wait()
        FinishTime = datetime.datetime.now()
        LOGFILE.write('\n')
        LOGFILE.write('Finish time: ' + str(FinishTime) + '\n')
        ElapsedTime = FinishTime - StartTime
        LOGFILE.write('Elapsed time: ' + str(ElapsedTime) + '\n')
      

#======================== MAIN PROCEDURE ==========================
def main():
    """
        Called when not in documentation mode.
        """
	
    # Read parameters from command line
    P = Parameters()

    # Read libraries from input file
    Libs = []
    if not P.INFILE == "" :
        F = open(P.INFILE,"r")
        for line in F.readlines() :
            line = line.strip()
            if line != "" :
                NewLib = ConfigLib()
                NewLib.ParseLib(line)
                Libs.append(NewLib)
        F.close()  
        OKAY = True
        # We'll be working in the output directory, so we need
        # absolute file paths to the input files.
        # While we're at it, we need to find out which read is the longest,
        # for setting max_rd_len, the only global varialble in config file.
        # The latter is important, because if we don't set max_rd_len, 
        # SOAPdenovo2 will truncate reads in ALL libraries to 100 !
        for L in Libs :
            for PR in L.READPAIRS :
                PR[0] = os.path.abspath(PR[0])
                if len(PR) == 2 :
                    PR[1] = os.path.abspath(PR[1])   
    else :
        OKAY = False


    # From now on, everything happens in the output directory.    
    if not os.path.exists(P.OUTDIR) :
        os.mkdir(P.OUTDIR) 
    os.chdir(P.OUTDIR)             
    LOGFN = 'soapdenovo2.log'

    # First create the file, then re-open it so that we can append to it.
    # We first create with 'w' just to make sure that we're creating a fresh copy,
    # rather than appending to an existing copy from a previous run.
    LOGFILE = open(LOGFN,'w')
    LOGFILE.close()
    LOGFILE = open(LOGFN,'a')
    LOGFILE.write('\n')
    if not OKAY :
        LOGFILE.write('bl_soapdenovo2.py: >>> Need to specify a list of sequencing read files <<<' + '\n')  
          
    # Create config file for running SOAPdenovo2
    CFN = P.PREFIX + '.config'
    BuildConfig(P,Libs,CFN)  

    # Run SOAPdenovo2
    if OKAY :
        RunSOAPdenovo2(P,CFN,LOGFILE)
          
    LOGFILE.close()


if __name__ == "__main__":
    main()

