#!/usr/bin/env python3

import os
import os.path
import phylip
import shutil
import subprocess
import sys
import time

#Version  February 17, 2021
# Run dnapars as a command
#Synopsis: dnapars.py infile soption method replicates percent blocksize
#            jumble numjum outgroup parstype threshold transversion termout alltrees printdata outfile treefile alltreefile

#Convert arguments to variables
INFILE        = sys.argv[1]

# blnalign: If no file has been chosen for UFN, UFN will be blank.
# Simply substituting %UFN% into the command for dnapars would
# therefore result in a missing parameter. Here we concatenate
# UTREE and UFN in the form UTREE_UFN. We can read both parameters from the UTREE variable,
# and UFN is no longer passed as a separate variable.
UTREEPARAM = sys.argv[2]
UTREE = UTREEPARAM[0]
UFN           = UTREEPARAM[1:]

SOPTION       = sys.argv[3]
METHOD        = sys.argv[4]
REPLICATES    = sys.argv[5]
PERCENT       = int(sys.argv[6])
BLOCKSIZE     = sys.argv[7]
JUMBLE        = sys.argv[8]
NUMJUM        = sys.argv[9]
OUTGROUP      = sys.argv[10]
PARSTYPE      = sys.argv[11]
THRESHOLD     = sys.argv[12]
TRANSVERSION  = sys.argv[13]
TERMOUT       = sys.argv[14]
ALLTREES      = sys.argv[15]
PRINTDATA     = sys.argv[16]
OUTFILE       = sys.argv[17]
TREEFILE      = sys.argv[18]
ALLTREEFILE   = sys.argv[19]

# Make a temporary directory in which to run the program
STARTDIR = os.getcwd()
TEMPDIR = 'DNAPARS.' + str(os.getpid())
os.mkdir(TEMPDIR)
shutil.copyfile(INFILE, os.path.join(TEMPDIR, 'infile.original'))

if UTREE == 'y':
    # Turn off bootstrapping when evaluating a user tree.
    # If bootstrapping is on, an empty file may be generated
    # which could cause drawtree or drawgram to loop infinitely
    METHOD = 'n'

    # Make sure that treefile begins with number of trees on first
    # line of file. If first line in file has parentheses, the
    # number must be added.
    phylip.ufn(UFN, TEMPDIR)

# From here on, everything is done in TEMPDIR
os.chdir(TEMPDIR)

# Phylip can't deal with sequence names longer than 10 characters. 
# Therefore, we have to replace the names with randomly assigned names, and save the
# original names and random names in idkeys.tsv. We can use idkey.tsv at the end
# to restore the original names to the output files. 
subprocess.call(["uniqid.py","--encode", "infile.original","infile.encoded","idkeys.tsv"])
subprocess.call(["phylcnv.py","-inf", "fasta", "-outf", "pint", "infile.encoded", "infile.temp"])

# Debug statement.
#p_testinfile = subprocess.call(["nedit", "infile.temp"])
if UTREE == 'y' :
    subprocess.call(["uniqid.py","--encodesame", "intree","intree.temp","idkeys.tsv"])
    shutil.move("intree.temp","intree")
    # Debug statement.
    #p_testinfile = subprocess.call(["nedit", "intree"])

msgfile_h = open('MSGFILE', 'a')
msgfile_h.write("---------------------  DNAPARS - DNA PARSIMONY METHODS ---------------------\n")
msgfile_h.write("\n")

#----------------- generated resampled datasets, if specified  -----
# Choose resampling method
   
# Percent of characters to sample
if PERCENT < 5 or PERCENT > 100:
    PERCENT = 100

if METHOD == "n":
    msgfile_h.write(" \n")
    shutil.copyfile('infile.temp', 'infile')
if METHOD in ("b", "d"):
    phylip.stdresample(METHOD, PERCENT, REPLICATES, '1', msgfile_h, 's', 'i')
elif METHOD in ("ps", "po", "pw"):
    phylip.weightless_resample(METHOD, PERCENT, REPLICATES, '1',  msgfile_h, 's', 'i')
# Debug statement.
#p_testinfile = subprocess.call(["nedit", "infile"])

#------------------------------- DNAPARS ---------------------------------
termout_h = open(TERMOUT, 'w')
RSEED = phylip.phylip_random()
msgfile_h.write("DNAPARS:  SEED= " + str(RSEED) + "\n")

# This statement reads from infile, so we have to run it before
# running TREEPROGRAM, which also reads infile.
NUMSEQ = phylip.get_numseq("infile")

os.nice(4)
start_time = time.time()


#----------------- generate keyboard input to send to program -----

# Jumble - When multiple datasets are analyzed, some programs automatically
# jumbles, and prompts for a random number seed for jumbling. Othersise,
# jumbling must be explicitly set. From a scripting perspecitve, it is safest therefore
# to set jumbling first, so that we don't need to handle a bunch of special cases in
# which the program asks for jumbling at a later time.
comfile_h = open('Dnaparsfile', 'w')

if UTREE == 'y':
    comfile_h.write('u\n')

# Jumble - When multiple datasets are analyzed, some programs automatically
# jumbles, and prompts for a random number seed for jumbling. Othersise,
# jumbling must be explicitly set. From a scripting perspecitve, it is safest therefore
# to set jumbling first, so that we don't need to handle a bunch of special cases in
# which the program asks for jumbling at a later time.
if METHOD != "n":
    JUMBLE = "J"
if JUMBLE == "J": 
    phylip.jumble(comfile_h, msgfile_h, NUMJUM)

if METHOD in ("b", "d"):
    comfile_h.write('m\n')
    comfile_h.write('w\n')
    comfile_h.write(REPLICATES + '\n')
    comfile_h.write(str(RSEED) + '\n')
elif METHOD in ("ps", "po", "pw"):
    comfile_h.write('m\n')
    comfile_h.write('d\n')
    comfile_h.write(REPLICATES + '\n')
    comfile_h.write(str(RSEED) + '\n')

# Search option
if SOPTION == "m":
    msgfile_h.write('Search option: THOROUGH\n') 
elif SOPTION == "l":	
    msgfile_h.write('Search option: LESS THOROUGH\n')
    comfile_h.write('s\n')
    comfile_h.write('n\n')
elif SOPTION == "r":
    msgfile_h.write('Search option: REARRANGE ON JUST ONE BEST TREE\n')
    comfile_h.write('s\n')
    comfile_h.write('y\n')

# Outgroup
OUTGROUP = phylip.do_outgroup(OUTGROUP, NUMSEQ, comfile_h, msgfile_h)

# Threshold or ordinary parsimony
if PARSTYPE == 't':
    if int(THRESHOLD) < 2:
        THRESHOLD = 2
    comfile_h.write(PARSTYPE + '\n')
    comfile_h.write(str(THRESHOLD) + '\n')
    msgfile_h.write('Using threshold parsimony, THRESHOLD= ' + str(THRESHOLD) + '\n')
# Count all steps: a; Count only transversions: t
if TRANSVERSION == 't':
    comfile_h.write('n\n')

# Should sequence data be printed?
if PRINTDATA == 'y' and METHOD == 'n':
    comfile_h.write('1\n')


# When resampling or jumbling, turn off printing trees to outfile
if METHOD in ('b', 'd', 'ps', 'po', 'pw'):
   comfile_h.write('3\n')

#accept current settings and do the analysis
comfile_h.write('y\n')
comfile_h.close()

#-------- Run dnapars using keyboard input from Dnaparsfile -----------

comfile_h = open('Dnaparsfile', 'r')
p_PROGRAM = subprocess.Popen(['dnapars'], stdin=comfile_h)
p_PROGRAM.wait()
comfile_h.close()
termout_h.close()
msgfile_h.close()
time_elapsed = time.time() - start_time
os.nice(0)

outfile_h = open('outfile', 'a')
outfile_h.write("Elapsed time on " + os.uname()[1] + ": " + str(time_elapsed) + " seconds")
outfile_h.close()

#----------- Return results to calling directory----------------
# When using resampling or jumbling, filter the outtree through
# consense to generate an unrooted consensus tree.
if METHOD == 'b' or METHOD == 'd' or METHOD == 'ps' or METHOD == 'po' or METHOD == 'pw':
    outfile_h = open('outfile', 'a')
    outfile_h.write(" \n")
    outfile_h.write('-------------------------------------------\n')
    outfile_h.close()

    #subprocess.call(['consense.py', 'outtree', 'e', '1', str(OUTGROUP), 'n', 'outfile.consense', os.path.join(STARTDIR, TREEFILE)])
    #phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), True, True)

    subprocess.call(["uniqid.py","--decode", "outfile","outfile.decoded","idkeys.tsv"])
    shutil.move("outfile.decoded","outfile")
    subprocess.call(['consense.py', 'outtree', 'e', '1', str(OUTGROUP), 'n', 'outfile.consense.tmp', 'outtree.consense'])
    subprocess.call(["uniqid.py","--decode", "outfile.consense.tmp","outfile.consense","idkeys.tsv"])
    subprocess.call(["uniqid.py","--decode", "outtree.consense","outtree.decoded","idkeys.tsv"])
    shutil.move("outtree.decoded", os.path.join(STARTDIR, TREEFILE))
    phylip.merge_msg(os.path.join(STARTDIR, OUTFILE))

    if ALLTREES == 'y':
        subprocess.call(["uniqid.py","--decode", "outtree","alltrees.decoded","idkeys.tsv"])
        shutil.move('alltrees.decoded', os.path.join(STARTDIR, ALLTREEFILE))
else:
    #shutil.move('outtree', os.path.join(STARTDIR, TREEFILE))
    #phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), False)
    subprocess.call(["uniqid.py","--decode", "outfile","outfile.decoded","idkeys.tsv"])
    shutil.move("outfile.decoded","outfile")
    subprocess.call(["uniqid.py","--decode", "outtree","outtree.decoded","idkeys.tsv"])
    shutil.move("outtree.decoded", os.path.join(STARTDIR, TREEFILE))
    phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), False)

os.chdir(STARTDIR)
shutil.rmtree(TEMPDIR)

print('DNAPARS Completed')
