#!/usr/bin/env python3

# Convert reads to a different file format
"""
    bl_seqkit_convert.py - from a single fastq file, create files containing valid reads and bad reads

@modified: May 21 2021
@author: Brian Fristensky
@contact: brian.fristensky@umanitoba.ca

"""

import argparse
import datetime
import os
import subprocess
import sys

# - - - - - - -  GLOBAL VARIABLES - - - - - - - -
PROGRAM = os.path.basename(sys.argv[0]) + ": "   # preceeds print messages
USAGE = "\n\t USAGE: bl_seqkit_convert.py [--filelist] infile --dry_run --threads n --outfmt string --outdir string"
DEBUG = True
NL = "\n"
FORMATS = ['sanger', 'solexa', 'illumina-1.3+', 'illumina-1.5+', 'illumina-1.8+']

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
"Wrapper class for command line parameters"
class Parameters:

    def __init__(self):
        """
                Initializes arguments:
                Then calls read_args() to fill in their values from command line
                """
        self.IFN = "" 
        self.FILELIST = False # True if --filelist was set. 
        self.DRYRUN = False
        self.THREADS = 2
        self.OUTFMT = "illumina-1.8+"
        self.OUTDIR = ""
        self.OKAY = True
        self.read_args()

    def read_args(self):
        """
                Read command line arguments into a Paramters object
                """
        parser = argparse.ArgumentParser()
        parser.add_argument("--filelist", dest="filelist", action="store_true", help="if set, file contains list of filenames")
        parser.add_argument("infile", action="store", default="", help="input file")
        parser.add_argument("--dry-run", dest="dryrun", action="store_true", help="if set, check file format only")
        parser.add_argument("--outfmt", dest="outfmt", action="store", default="illumina-1.8+", help="output format")
        parser.add_argument("--outdir", dest="outdir", action="store", default="", help="output format")
        parser.add_argument("--threads", dest="threads", action="store", default="2", help="# of CPUs to use")

        try:
            args = parser.parse_args()
            if args.filelist :
                self.FILELIST = True
            self.IFN = args.infile
            self.DRYRUN = args.dryrun
            self.OUTFMT = args.outfmt
            self.OUTDIR = args.outdir
            self.THREADS = args.threads

        except ValueError:
            print(USAGE)
            self.OKAY = False

        if DEBUG :
            print("FILELIST: " + str(self.FILELIST))
            print("IFN: " + self.IFN)
            print("DRYRUN: " + str(self.DRYRUN))
            print("OUTFMT: " + self.OUTFMT)
            print("OUTDIR: " + self.OUTDIR)
            print("THREADS: " + str(self.THREADS))

        if not self.OUTFMT in FORMATS :
            print(PROGRAM + " output format must be one of " + str(FORMATS))
            self.OKAY = False

        return self.OKAY

# --------------------------------------------
def ReadList(F) :
    lfile = open(F,"r")
    FILES = []
    for line in lfile.readlines() :
        FILES.append(line.strip())
    lfile.close()
    return FILES

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
"Run SeqKit convert"
def RunSeqKitConvert(P,F):

        # construct output filename

        basename = os.path.splitext(F)[0]
        OFN = os.path.join(P.OUTDIR,basename + "." + P.OUTFMT + ".fq")

        COMMAND=["seqkit", "convert", F, "--to", P.OUTFMT, "-o", OFN, "--threads", P.THREADS]

        if P.DRYRUN :
            COMMAND.extend(["--dry-run"])
        
        print(COMMAND)
        p = subprocess.Popen(COMMAND)
        p.wait()


#========================    MAIN   =============================

print("========== " + PROGRAM + " ==========")

P = Parameters()

if P.OKAY :

    if P.FILELIST :
        FILES = ReadList(P.IFN)
    else:
        FILES = [P.IFN]

    if not os.path.exists(P.OUTDIR) :
            os.mkdir(P.OUTDIR)

    start_time = datetime.datetime.now()
    print("Start time: " + str(start_time))

    for F in FILES :
        print("---------- " + F + " ----------")
        RunSeqKitConvert(P,F)

    finish_time = datetime.datetime.now()
    print("Finish time: " + str(finish_time))
    time_elapsed = finish_time - start_time
    print("Elapsed time on " + os.uname()[1] + ": " + str(time_elapsed) + " seconds")



