#!/usr/bin/env python3

import birchenv
import os
import os.path
import phylip
import shutil
import subprocess
import sys
import time
import random

#Version  Feb. 26, 2020
# Run discrete data parsimony programs as a command
#Synopsis: discpars.py infile utree ufn method replicates percent\
#             TCONMETH PARSTYPE THRESHOLD outgroup jumble numjum\
#             termout alltrees outfile treefile alltreefile

""" ensure that there are enough command line arguments to parse """
if len(sys.argv) < 16:
    print("Usage: discpars.py  INFILE  UFN  METHOD REPLICATES  PERCENT")
    print("         TCONMETH  PARSTYPE  THRESHOLD  OUTGROUP  JUMBLE  NUMJUM  TERMOUT")
    print("         OUTFILE  TREEFILE  ALLTREEFILE")
    exit();

#Convert arguments to variables
INFILE        = sys.argv[1]
# bltree: If no file has been chosen for UFN, UFN will be blank.
# Simply substituting $UFN into the command for restml.csh would
# therefore result in a missing parameter. Here we concatenate
# UTREE and UFN in the form UTREE_UFN. restml.csh has been
# modified to read both parameters from the UTREE variable,
# and UFN is no longer passed as a separate variable.
UTREEPARAM = sys.argv[2]
UTREE = UTREEPARAM[0]
UFN           = UTREEPARAM[1:]
METHOD        = sys.argv[3]
REPLICATES    = sys.argv[4]
PERCENT       = sys.argv[5]
TCONMETH      = sys.argv[6]
PARSTYPE      = sys.argv[7]
THRESHOLD     = sys.argv[8]
OUTGROUP      = sys.argv[9]
JUMBLE        = sys.argv[10]
NUMJUM        = sys.argv[11]
TERMOUT       = sys.argv[12]
ALLTREES      = sys.argv[13]
OUTFILE       = sys.argv[14]
TREEFILE      = sys.argv[15]
ALLTREEFILE   = sys.argv[16]

# Debug statement.
#p_testinfile = subprocess.call(["nedit", UFN])

# Make a temporary directory in which to run the program
STARTDIR = os.getcwd()
TEMPDIR = 'DISCPARS.' + str(os.getpid())
os.mkdir(TEMPDIR)
shutil.copyfile(INFILE, os.path.join(TEMPDIR, 'infile.temp'))

if UTREE == 'y':
    # Turn off bootstrapping when evaluating a user tree.
    # If bootstrapping is on, an empty file may be generated
    # which could cause drawtree or drawgram to loop infinitely
    METHOD = 'n'

    # Make sure that treefile begins with number of trees on first
    # line of file. If first line in file has parentheses, the
    # number must be added.
    phylip.ufn(UFN, TEMPDIR)

os.chdir(TEMPDIR)

# Debug statement.
#p_testinfile = subprocess.call(["nedit", "infile.temp"])

# ------------------ Choose program ------------------------------
PROGRAM = 'dollop'
INFORMAT = 's'
if TCONMETH == "d":
    PROGRAM = 'dollop'
    INFORMAT = 's'
elif TCONMETH == "p":
    PROGRAM = 'dollop'
    INFORMAT = 's'
elif TCONMETH == "c":
    PROGRAM = 'mix'
    INFORMAT = 's'
elif TCONMETH == "w":
    PROGRAM = 'mix'
    INFORMAT = 's'
elif TCONMETH == "P":
    PROGRAM = 'pars'
    INFORMAT = 'i'
elif TCONMETH == "L":
    PROGRAM = 'pars'
    INFORMAT = 'i'
elif TCONMETH == "1":
    PROGRAM = 'pars'
    INFORMAT = 'i'
else:
    pass

# x requires a file called 'mixed', which specifies, for each character,
# which method to use. Tricky to implement, at present, so we'll leave
# this commented out.
# elif TCONMETH == "x"
#    PROGRAM = 'mix'

msgfile_h = open('MSGFILE', 'a')
msgfile_h.write("---------------------  DISCRETE DATA PARSIMONY METHODS ---------------------\n")
msgfile_h.write("\n")
msgfile_h.write('PROGRAM: ' + PROGRAM + '\n')
msgfile_h.write("\n")

# SHAZZBOT! DOLLOP and MIX can only read files in Phylip sequential format,
# while PARS can only read files in Phylip interleaved format. Therefore,
# we have to convert to the correct format, depending on the program.
# The original infile.temp is in interleaved format.
#
# Dollop - can read multiple datasets either as datasets or weights
# Mix -
# Pars -
#
# If the programs read multiple datasets, the datasets must be in infile.
# If the programs read weights, the original sequence is in infile, and  weights
# are in weights.

if INFORMAT == "s":
    subprocess.call(['python', os.path.join(birchenv.BIRCH, 'script','phylcnv.py'), '-inf', 'pint', '-outf', 'pseq', 'infile.temp', 'infile.s.temp'])
    shutil.move('infile.s.temp', 'infile.temp')
else:
    # For bootstrapping, both formats are needed. See below.
    subprocess.call(['python', os.path.join(birchenv.BIRCH, 'script','phylcnv.py'), '-inf', 'pint', '-outf', 'pseq', 'infile.temp', 'infile.s.temp'])


#----------------- generated resampled datasets, if specified  -----
# Choose resampling method

#if PROGRAM == "pars":
#    shutil.move("infile.s.temp", "infile.temp")

if METHOD == "n":
    msgfile_h.write(" ")
    shutil.copyfile('infile.temp', 'infile')
if METHOD in ("b", "d"):
    phylip.stdresample(METHOD, PERCENT, REPLICATES, '1', msgfile_h, 'r', INFORMAT)
elif METHOD in ("ps", "po", "pw"):
    phylip.weightless_resample(METHOD, PERCENT, REPLICATES, '1',  msgfile_h, 'r', INFORMAT)

# This statement reads from infile, so we have to run it before
# running TREEPROGRAM, which also reads infile.
NUMSEQ = phylip.get_numseq("infile")

RSEED = phylip.phylip_random()
msgfile_h.write(PROGRAM + ":  SEED= " + str(RSEED) + "\n")

#----------------- generate keyboard input to send to parsimony program -----
os.nice(8)
comfile_h = open("DiscparsComfile", 'w')

if UTREE == 'y':
    comfile_h.write('u\n')
    #Resampling makes no sense when evaluating a user-defined tree.
    METHOD = "n"
    #As well, when evaluating a user-defined tree, the Jumble option doesn't
    #appear in the menu, so we must turn jumble off. Otherwise, "J" will be
    #treated as an illegal option, and the program won't run.
    JUMBLE = "n"

# Jumble - When multiple datasets are analyzed, some programs automatically
# jumbles, and prompts for a random number seed for jumbling. Othersise,
# jumbling must be explicitly set. From a scripting perspecitve, it is safest therefore
# to set jumbling first, so that we don't need to handle a bunch of special cases in
# which the program asks for jumbling at a later time.
if METHOD != "n":
    JUMBLE = "J"
if JUMBLE == "J":
    phylip.jumble(comfile_h, msgfile_h, NUMJUM)

if METHOD in ("b", "d"):
    comfile_h.write('m\n')
    comfile_h.write('w\n')
    comfile_h.write(REPLICATES + '\n')
    comfile_h.write(str(RSEED) + '\n')
elif METHOD in ("ps", "po", "pw"):
    comfile_h.write('m\n')
    comfile_h.write('d\n')
    comfile_h.write(REPLICATES + '\n')
    comfile_h.write(str(RSEED) + '\n')

# Set program-specific parameters
if TCONMETH == "p":
    comfile_h.write('p\n')
elif TCONMETH == "c":
    comfile_h.write('p\n')
elif TCONMETH == "w":
    pass
elif TCONMETH == "P":
    pass
elif TCONMETH == "L":
    comfile_h.write('s\n')
    comfile_h.write('n\n')
elif TCONMETH == "1":
    comfile_h.write('s\n')
    comfile_h.write('y\n')
else:
    pass
# skip because it's the same as the default values
# elif TCONMETH == "d":
#     set INFORMAT = 's'
# x requires a file called 'mixed', which specifies, for each character,
# which method to use. Tricky to implement, at present, so we'll leave
# this commented out.
# elif TCONMETH == "x"
#    comfile_h.write('x\n')


# Outgroup
if PROGRAM == 'mix' or PROGRAM == 'pars':
    # Outgroup
    OUTGROUP = phylip.do_outgroup(OUTGROUP, NUMSEQ, comfile_h, msgfile_h)

# Threshold or ordinary parsimony
if PARSTYPE == 't':
    msgfile_h.write('Using threshold parsimony, THRESHOLD= ' + THRESHOLD + '\n')

    if THRESHOLD < 2:
        THRESHOLD = 2

    comfile_h.write(PARSTYPE + '\n')
    comfile_h.write(THRESHOLD + '\n')


# When resampling, turn off printing trees to outfile

if METHOD == 'b' or METHOD == 'd' or METHOD == 'ps' or METHOD == 'po' or METHOD == 'pw':
    comfile_h.write('3\n')

#accept current settings and do the analysis
comfile_h.write('y\n')
comfile_h.close()

#----------------- Run DOLLOP, MIX or PARS  -----
# Debug statement.
#p_testinfile = subprocess.call(["nedit", "infile"])
comfile_h = open("DiscparsComfile", 'r')
start_time = time.time()
p_PROGRAM = subprocess.Popen([PROGRAM], stdin=comfile_h)
#Send program output to the terminal
termout_h = open(TERMOUT, 'w')
p_PROGRAM.wait()
time_elapsed = time.time() - start_time
#os.nice(0)

termout_h.close()
outfile_h = open('outfile', 'a')
outfile_h.write("Execution times on " + os.uname()[1] + ": " + str(time_elapsed))
outfile_h.close()
OUTfile2_h = open(os.path.join(STARTDIR, OUTFILE), 'w')

msgfile_h.close()


#----------- Return results to calling directory----------------
# When using resampling, filter the treefile through
# consense to generate an unrooted consensus tree.
if METHOD == 'b' or METHOD == 'd' or METHOD == 'ps' or METHOD == 'po' or METHOD == 'pw':
    outfile_h = open('outfile', 'a')
    outfile_h.write(" \n")
    outfile_h.write('-------------------------------------------\n')
    outfile_h.close()

    if PROGRAM == "dollop":
        ROOTEDTREE = "y"
    else:
        ROOTEDTREE = "n"

    subprocess.call(['consense.py', 'outtree', 'e', '1', str(OUTGROUP), ROOTEDTREE, 'outfile.consense', os.path.join(STARTDIR, TREEFILE)])

    phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), True, True)

    if ALLTREES == 'y':
        shutil.move('outtree', os.path.join(STARTDIR, ALLTREEFILE))
else:
    shutil.move('outtree', os.path.join(STARTDIR, TREEFILE))
    phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), False)

OUTfile2_h.close()

os.chdir(STARTDIR)
shutil.rmtree(TEMPDIR)

print(PROGRAM + ' completed.')
