#!/usr/bin/env python3

'''
bl_rcorrector.py - Given a series of paired-end read files, run rcorrector for each pair of files, and
    generate output files for each pair.

Synopsis: bl_rcorrector.py tsvfile threads outdir [rcorrector options]

    control1_R1.fastq.gz<TAB>control1_R2.fastq.gz

    would output to 

    control1_R.bam

    tsvfile - a tab-separated value file with each pair of filenames on separate lines
        MUST be the first argument. All rcorrector arguments follow.


    outdir - director for writing corrected files

    [rcorrector options] - options to be passed to rcorrector

@modified: March 2, 2019
@author: Brian Fristensky
@contact: Brian.Fristensky@umanitoba.ca  
'''

"""
optparse is deprecated in favor of argparse as of Python 2.7. However,
 since 2.7 is not always present on many systems, at this writing,
 it is safer to stick with optparse for now. It should be easy
 to change later, since the syntax is very similar between argparse and optparse.
 from optparse import OptionParser
"""
from optparse import OptionParser

import os
#import re
#import stat
import subprocess
import sys

PROGRAM = "bl_rcorrector.py : "
USAGE = "\n\tUSAGE: bl_rcorrector.py tsvfile threads outdir [rcorrector options] "

DEBUG = True
if DEBUG :
    print('bl_rcorrector: Debugging mode on')

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class Parameters:
    """
      	Wrapper class for command line parameters
      	"""
    def __init__(self):
        """
     	  Initializes arguments:
                TSVFILE = ""
                THREADS = "2"
                OUTDIR = "reads.corrected"
                rcorrectorargs = []

     	  Then calls read_args() to fill in their values from command line
          """
        self.TSVFILE = "" 
        self.THREADS = "2"
        self.OUTDIR = "reads.corrected"
        self.rcorrectorargs = []             
        self.read_args()


        if DEBUG :
            print('------------ Parameters from command line ------') 
            print('    TSVFILE: ' + self.TSVFILE)
            print('    THREADS: ' + str(self.THREADS))
            print('    OUTDIR: ' + self.OUTDIR)
            print('    rcorrectorargs: ' + str(self.rcorrectorargs))
            print()  

    def read_args(self):
        """
        	Read command line arguments into a Parameter object
    	"""                  
        self.TSVFILE = sys.argv[1]
        self.THREADS = sys.argv[2]
        self.OUTDIR = sys.argv[3]
        self.rcorrectorargs = sys.argv[4:]

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class TSVFiles :
    """
    Methods for reading lists of paired read TSV files, and for
    writing lists to output.
    """
    def __init__(self):
        """
     	  Initializes arguments:
                READPAIRS = []

          """
        self.READPAIRS = []            

    def ReadTSVfile(self,FN) :
        """
        TSV file containing names of paired-end and/or single end read files.
        Paired-end files are on lines such as

        leftreadfile.fq.gz<TAB>rightreadfile.fq.gz

        Single-end files have a each file on a separate line

        reads1.fq.gz
        reads2.fq.gz
        reads3.fq.gz
        """
        TAB = '\t'
        F = open(FN,"r")
        for line in F.readlines() :
            line = line.strip()
            if len(line) > 0 and not line.startswith('#') :
                # get rid of double quotes that enclose fields when some programs write
                # output, and then split by TABs.
                tokens = line.replace('"','').split(TAB)

                # ignore blank fields. Add either single or pair of filenames
                # to list. Only process names from first two fields on a line
                # and ignore other fields. 
                if len(tokens) > 0 :
                    r1 = tokens[0].strip()
                    if len(r1) > 0 :
                        fnames = [r1]
                    else :
                        fnames = []
                    if len(tokens) > 1 :
                        r2 = tokens[1].strip()
                        if len(r2) > 0 :
                            fnames.append(r2)
                    if len(fnames) > 0 :
                        self.READPAIRS.append(fnames)
        if DEBUG :
            print(str(self.READPAIRS))
        F.close()


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def Runrcorrector(PR,THREADS,OUTDIR,rcorrectorargs,LOGFILE) :  

    # Make sure output directory exists
    # We allow overwriting results from a previous run of Rcorrector.
    if not os.path.isdir(OUTDIR) :
        os.mkdir(OUTDIR)

    # Construct the command string - - - - - - - - - - - - - - 

    # Dummy command for testing
    #COMSTR=["rcorrector","--help"]
    COMSTR=["run_rcorrector.pl"] 

    # Add the names of the left and right read files
    if len(PR) > 0 :
        if len(PR) == 1 :
            COMSTR.extend(['-s', PR[0], ' '])
        else:
            COMSTR.extend(['-1', PR[0], '-2', PR[1]])
  
    # Append the rcorrector options to the command
    COMSTR.extend(['-t', THREADS])
    COMSTR.extend(['-od', OUTDIR])
    COMSTR.extend(rcorrectorargs)

    if DEBUG :
        print('COMSTR: ' + str(COMSTR))

    # Run rcorrector - - - - - - - - - - - - - - - - -
    LOGFILE.write('======== rcorrector ==========' + '\n')
    LOGFILE.write(str(COMSTR) + '\n')
    LOGFILE.write('\n')
    LOGFILE.flush()
    p = subprocess.Popen(COMSTR,stdout=LOGFILE,stderr=LOGFILE)
    p.wait()
    LOGFILE.write('\n')   
    

#======================== MAIN PROCEDURE ==========================
def main():
    """
        Called when not in documentation mode.
        """
	
    # Read parameters from command line
    P = Parameters()

    TF = TSVFiles()
    if not P.TSVFILE == "" :
        TF.ReadTSVfile(P.TSVFILE)
                             
    LOGFN = os.path.join('bl_rcorrector.log')
    LOGFILE = open(LOGFN,'w')
    LOGFILE.write('\n')

    # Run rcorrector
    for PR in TF.READPAIRS :
        Runrcorrector(PR,P.THREADS,P.OUTDIR,P.rcorrectorargs,LOGFILE)
         
    LOGFILE.close()


if __name__ == "__main__":
    main()
#else:
    #used to generate documentation
#    import doctest
#    doctest.testmod()

#if (BM.documentor() or "-test" in sys.argv):
#    pass
#else:
#    main()
