#!/usr/bin/env python3

import os
import os.path
import phylip
import shutil
import subprocess
import sys
import time

#Version   Feb. 17, 2021
# Run protpars as a command
#Synopsis: protpars.csh infile utree_ufn method replicates percent blocksize
#            jumble numjum outgroup parstype threshold gc termout printdata outfile treefile alltreefile

#Convert arguments to variables
INFILE        = sys.argv[1]

# blpalign: If no file has been chosen for UFN, UFN will be blank.
# Simply substituting %UFN% into the command for protpars would
# therefore result in a missing parameter. Here we concatenate
# UTREE and UFN in the form UTREE_UFN. We can read both parameters from the UTREE variable,
# and UFN is no longer passed as a separate variable.
UTREEPARAM = sys.argv[2]
UTREE = UTREEPARAM[0]
UFN           = UTREEPARAM[1:]

METHOD        = sys.argv[3]
REPLICATES    = sys.argv[4]
PERCENT       = sys.argv[5]
BLOCKSIZE     = sys.argv[6]
JUMBLE        = sys.argv[7]
NUMJUM        = sys.argv[8]
OUTGROUP      = sys.argv[9]
PARSTYPE      = sys.argv[10]
THRESHOLD     = sys.argv[11]
GC            = sys.argv[12]
TERMOUT       = sys.argv[13]
ALLTREES      = sys.argv[14]
PRINTDATA     = sys.argv[15]
OUTFILE       = sys.argv[16]
TREEFILE      = sys.argv[17]
ALLTREEFILE   = sys.argv[18]

# Make a temporary directory in which to run the program
STARTDIR = os.getcwd()
TEMPDIR = 'PROTPARS.' + str(os.getpid())
os.mkdir(TEMPDIR)
shutil.copyfile(INFILE, os.path.join(TEMPDIR, 'infile.original'))

if UTREE == 'y':
    # Turn off bootstrapping when evaluating a user tree.
    # If bootstrapping is on, an empty file may be generated
    # which could cause drawtree or drawgram to loop infinitely
    METHOD = 'n'

    # protpars does NOT want the first line to have a number.
    # This is distinct from proml, which requires that the first line
    # of intree tell the number of trees. So we copy the file as is.
    shutil.copyfile(UFN, os.path.join(TEMPDIR, "intree"))

# From here on, everything is done in TEMPDIR
os.chdir(TEMPDIR)

# Phylip can't deal with sequence names longer than 10 characters. 
# Therefore, we have to replace the names with randomly assigned names, and save the
# original names and random names in idkeys.tsv. We can use idkey.tsv at the end
# to restore the original names to the output files. 
subprocess.call(["uniqid.py","--encode", "infile.original","infile.encoded","idkeys.tsv"])
subprocess.call(["phylcnv.py","-inf", "fasta", "-outf", "pint", "infile.encoded", "infile.temp"])

# Debug statement.
#p_testinfile = subprocess.call(["nedit", "infile.temp"])
if UTREE == 'y' :
    subprocess.call(["uniqid.py","--encodesame", "intree","intree.temp","idkeys.tsv"])
    shutil.move("intree.temp","intree")
    # Debug statement.
    #p_testinfile = subprocess.call(["nedit", "intree"])


msgfile_h = open('MSGFILE', 'a')
msgfile_h.write("---------------------  PROTPARS - PROTEIN PARSIMONY METHODS ---------------------\n")
msgfile_h.write("\n")


# Percent of characters to sample
if int(PERCENT) < 5 or int(PERCENT) > 100:
    PERCENT = 100

#----------------- generated resampled datasets, if specified  -----

# Choose resampling method
if METHOD == "n":
    msgfile_h.write(" \n")
    shutil.copyfile('infile.temp', 'infile')
if METHOD in ("b", "d"):
    phylip.stdresample(METHOD, PERCENT, REPLICATES, '1', msgfile_h, 's', 'i')
elif METHOD in ("ps", "po", "pw"):
    phylip.weightless_resample(METHOD, PERCENT, REPLICATES, '1',  msgfile_h, 's', 'i')
# Debug statement.
#p_testinfile = subprocess.call(["nedit", "infile"])

#----------------- generate keyboard input to send to program -----

termout_h = open(TERMOUT, 'w')
RSEED = phylip.phylip_random()
msgfile_h.write("PROTPARS:  SEED= " + str(RSEED) + '\n')

# This statement reads from infile, so we have to run it before
# running TREEPROGRAM, which also reads infile.
NUMSEQ = phylip.get_numseq("infile")

# Jumble - When multiple datasets are analyzed, some programs automatically
# jumbles, and prompts for a random number seed for jumbling. Othersise,
# jumbling must be explicitly set. From a scripting perspecitve, it is safest therefore
# to set jumbling first, so that we don't need to handle a bunch of special cases in
# which the program asks for jumbling at a later time.
comfile_h = open('Protparsfile', 'w')

if UTREE == 'y':
    comfile_h.write('u\n')

# Jumble - When multiple datasets are analyzed, some programs automatically
# jumbles, and prompts for a random number seed for jumbling. Othersise,
# jumbling must be explicitly set. From a scripting perspecitve, it is safest therefore
# to set jumbling first, so that we don't need to handle a bunch of special cases in
# which the program asks for jumbling at a later time.
if METHOD != "n":
    JUMBLE = "J"
if JUMBLE == "J":
    phylip.jumble(comfile_h, msgfile_h, NUMJUM)

# Outgroup
OUTGROUP = phylip.do_outgroup(OUTGROUP, NUMSEQ, comfile_h, msgfile_h)

# Threshold or ordinary parsimony
if PARSTYPE == 't':
    if int(THRESHOLD) < 2:
        THRESHOLD = 2
    comfile_h.write(PARSTYPE + '\n')
    comfile_h.write(str(THRESHOLD) + '\n')
    msgfile_h.write('Using threshold parsimony, THRESHOLD= ' + str(THRESHOLD) + '\n')

if METHOD in ("b", "d"):
    comfile_h.write('m\n')
    comfile_h.write('w\n')
    comfile_h.write(REPLICATES + '\n')
    comfile_h.write(str(RSEED) + '\n')
elif METHOD in ("ps", "po", "pw"):
    comfile_h.write('m\n')
    comfile_h.write('d\n')
    comfile_h.write(REPLICATES + '\n')
    comfile_h.write(str(RSEED) + '\n')

# Genetic code
if GC == "u" :
    msgfile_h.write('Using Genetic Code: UNIVERSAL\n')
elif GC == "m" :
    msgfile_h.write('Using Genetic Code: MITOCHONDRIAL\n')
    comfile_h.write("c\n")
    comfile_h.write("m\n")
elif GC == "v" :
    msgfile_h.write('Using Genetic Code: VERTEBRATE MITOCHONDRIAL\n')
    comfile_h.write("c\n")
    comfile_h.write("v\n")
elif GC == "f" :
    msgfile_h.write('Using Genetic Code: FLY MITOCHONDRIAL\n')
    comfile_h.write("c\n")
    comfile_h.write("f\n")
elif GC == "y" :
    msgfile_h.write('Using Genetic Code: YEAST MITOCHONDRIAL\n')
    comfile_h.write("c\n")
    comfile_h.write("y\n")
else:
    msgfile_h.write('Using Genetic Code: UNIVERSAL\n')

# Should sequence data be printed?
if PRINTDATA == 'y' and METHOD == 'n':
    comfile_h.write('1\n')


# When resampling or jumbling, turn off printing trees to outfile
if METHOD in ('b', 'd', 'ps', 'po', 'pw'):
    comfile_h.write("3\n")

#accept current settings and do the analysis
comfile_h.write("y\n")
comfile_h.close()

#-------- Run protpars -----------

os.nice(4)
start_time = time.time()
comfile_h = open('Protparsfile', 'r')
p_PROGRAM = subprocess.Popen(['protpars'], stdin=comfile_h)
p_PROGRAM.wait()
comfile_h.close()
termout_h.close()
msgfile_h.close()
time_elapsed = time.time() - start_time
outfile_h = open('outfile', 'a')
outfile_h.write("Elapsed time on " + os.uname()[1] + ": " + str(time_elapsed) + " seconds")
outfile_h.close()
os.nice(0)

#----------- Return results to calling directory----------------
# When using resampling or jumbling, filter the outtree through
# consense to generate an unrooted consensus tree.
if METHOD == 'b' or METHOD == 'd' or METHOD == 'ps' or METHOD == 'po' or METHOD == 'pw':
    outfile_h = open('outfile', 'a')
    outfile_h.write(" \n")
    outfile_h.write('-------------------------------------------\n')
    outfile_h.close()

    #subprocess.call(['consense.py', 'outtree', 'e', '1', str(OUTGROUP), 'n', 'outfile.consense', os.path.join(STARTDIR, TREEFILE)])
    #phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), True, True)

    subprocess.call(["uniqid.py","--decode", "outfile","outfile.decoded","idkeys.tsv"])
    shutil.move("outfile.decoded","outfile")
    subprocess.call(['consense.py', 'outtree', 'e', '1', str(OUTGROUP), 'n', 'outfile.consense.tmp', 'outtree.consense'])
    subprocess.call(["uniqid.py","--decode", "outfile.consense.tmp","outfile.consense","idkeys.tsv"])
    subprocess.call(["uniqid.py","--decode", "outtree.consense","outtree.decoded","idkeys.tsv"])
    shutil.move("outtree.decoded", os.path.join(STARTDIR, TREEFILE))
    phylip.merge_msg(os.path.join(STARTDIR, OUTFILE))

    if ALLTREES == 'y':
        subprocess.call(["uniqid.py","--decode", "outtree","alltrees.decoded","idkeys.tsv"])
        shutil.move('alltrees.decoded', os.path.join(STARTDIR, ALLTREEFILE))
else:
    #shutil.move('outtree', os.path.join(STARTDIR, TREEFILE))
    #phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), False)
    subprocess.call(["uniqid.py","--decode", "outfile","outfile.decoded","idkeys.tsv"])
    shutil.move("outfile.decoded","outfile")
    subprocess.call(["uniqid.py","--decode", "outtree","outtree.decoded","idkeys.tsv"])
    shutil.move("outtree.decoded", os.path.join(STARTDIR, TREEFILE))
    phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), False)

os.chdir(STARTDIR)
shutil.rmtree(TEMPDIR)

print('protpars Completed')
