#!/usr/bin/env python3

import os
import os.path
import phylip
import shutil
import subprocess
import sys
import time

# Run discrete data distance programs as a command
#Synopsis: restdist.py infile model transratio sitelen method replicates percent\
#             tconmeth power subrep global negbranch outgroup jumble numjum\
#             termout printdata outfile treefile

""" ensure that there are enough command line arguments to parse """
if len(sys.argv) < 19:
    print("Usage: restdist.py  INFILE  MODEL  TRANSRATIO  SITELEN  METHOD ")
    print("         REPLICATES  PERCENT  TCONMETH  POWER  GLOBAL  NEGBRANCH")
    print("         OUTGROUP  JUMBLE NUMJUM  TERMOUT  PRINTDATA  OUTFILE")
    print("         TREEFILE")
    exit();

#Convert arguments to variables
INFILE        = sys.argv[1]
MODEL         = sys.argv[2]
TRANSRATIO    = sys.argv[3]
SITELEN       = sys.argv[4]
METHOD        = sys.argv[5]
REPLICATES    = sys.argv[6]
PERCENT       = sys.argv[7]
TCONMETH      = sys.argv[8]
POWER         = sys.argv[9]
#SUBREP        = sys.argv[10]
GLOBAL        = sys.argv[10]
NEGBRANCH     = sys.argv[11]
OUTGROUP      = sys.argv[12]
JUMBLE        = sys.argv[13]
NUMJUM        = sys.argv[14]
TERMOUT       = sys.argv[15]
PRINTDATA     = sys.argv[16]
OUTFILE       = sys.argv[17]
TREEFILE      = sys.argv[18]

#Send program output to the terminal
termout_h = open(TERMOUT, 'w')

# Make a temporary directory in which to run the program
STARTDIR = os.getcwd()
TEMPDIR = 'RESTDIST.' + str(os.getpid())
os.mkdir(TEMPDIR)
shutil.copy(INFILE, os.path.join(TEMPDIR, 'infile.temp'))
os.chdir(TEMPDIR)
# Debug statement - did infile.temp get written to TEMPDIR?
#p_testinfile = subprocess.call(["nedit", "infile.temp"])

#----------------- generated resampled datasets, if specified  -----
# Choose resampling method
# Note: For bootstrapping, as of Phylip 3.63, restdist cannot utilize the weights file generated by
# seqboot. Therefore, complete datasets must be generated by seqboot, which
# in turn will be used to create distance matrices.


#----------------- generate keyboard input to send to RESTDIST -----
msgfile_h = open('MSGFILE', 'w')
msgfile_h.write("""Discrete Data Distance Matrix Phylogeny Methods

---------------------  DISTANCE MATRIX ---------------------
""")

if METHOD == 'n':
    shutil.move('infile.temp', 'infile')
else:
    phylip.weightless_resample(METHOD, PERCENT, REPLICATES, '1', msgfile_h, 'r', 'i')

# Allows us to examine the contents of infile produced by seqboot.
#p_testinfile = subprocess.call(["nedit", "infile"])

#----------------- generate restdist keyboard input   -----
# Generate keyboard input for restdist and save it to RestdistComfile.
# While it is possible to pipe the output directly to protdist, we have
# discovered that on systems with a high load average, the program doesn't
# always complete. Besides, it is WAY easier to debug when we save keyboard
# output to a file that can be examined.
comfile_h = open('RestdistComfile', 'w')

if METHOD != 'n':
    comfile_h.write('m\n')
    comfile_h.write(str(REPLICATES) + '\n')
#    comfile_h.write(str(RSEED) + '\n')

# Choose method for constructing distance matrix
if MODEL == "S":
    msgfile_h.write("Markers represent alleles at polymorphic sites\n")
    msgfile_h.write("Distance matrix constructed using modified Nei/Li model\n")
elif MODEL == "s":
    msgfile_h.write("Markers represent alleles at polymorphic sites\n")
    msgfile_h.write("Distance matrix constructed using original Nei/Li model\n")
    comfile_h.write('n\n')
elif MODEL == "R":
    msgfile_h.write("Markers represent restriction fragments\n")
    msgfile_h.write("Modified Nei/Li change model\n")
    msgfile_h.write("Distance matrix constructed modified Nei/Li model\n")
    comfile_h.write('r\n')
elif MODEL == "r":
    msgfile_h.write("Markers represent restriction fragments\n")
    msgfile_h.write("Distance matrix constructed original Nei/Li model\n")
    comfile_h.write('r\n')
    comfile_h.write('n\n')
else:
    msgfile_h.write("Markers represent alleles at polymorphic sites\n")
    msgfile_h.write("Distance matrix constructed using modified Nei/Li model\n")

if MODEL == 'S' or MODEL == 'R':
    # Transition/transversion ratio
    if TRANSRATIO != 2.0:
        comfile_h.write('t\n')
        comfile_h.write(str(TRANSRATIO) + '\n')
    msgfile_h.write("Transition/Transversion ratio = " + str(TRANSRATIO) + "\n")

# Set length of polymorphic site eg. length of restriction site or length of primer
# binding sites
comfile_h.write('s\n')
comfile_h.write(str(SITELEN) + '\n')
msgfile_h.write("Site length = " + str(SITELEN) + "\n")

#accept current settings and do the analysis
comfile_h.write('y\n')
comfile_h.close()

# Debug:  Allows us to examine the contents of RestdistComfile
#p_testinfile = subprocess.call(["nedit", "RestdistComfile"])


#-------------------- run restdist --------------
comfile_h = open('RestdistComfile', 'r')
restdist_p = subprocess.Popen('restdist', stdin=comfile_h)
restdist_p.wait()
comfile_h.close()
shutil.move('outfile', 'infile')

# Allows us to examine the contents of the matrix file produced by restdist.
#p_testinfile = subprocess.call(["nedit", "infile"])

msgfile_h.write("---------------------  CONSTRUCTING TREE(S) ---------------------\n")
msgfile_h.write("\n")

#----------------- generate keyboard input to send to distance tree program -----
# Generate keyboard input for restdist and save it to DistanceComfile.
# This statement reads from infile, so we have to run it before
# running TREEPROGRAM, which also reads infile.
NUMSEQ = phylip.get_numseq("infile")

comfile_h = open('DistanceComfile', 'w')

#-----------------  Construct tree from matrix   -----
# Choose method for constructing distance matrix
# F = FITCH, Fitch-Margoliash method
# f = FITCH, Minimum evolution
# K = KITSCH, Fitch-Margoliash method
# k = KITSCH, Minimum evolution method
# N = Neighbor-joining
# U = Neighbor-joining
# default --> FITCH, Fitch-Margoliash method
if TCONMETH in ("K", "k"):
    PROGRAM = "kitsch"
elif TCONMETH in ("N", "U"):
    PROGRAM = "neighbor"
else:
    PROGRAM = "fitch"

# Jumble - When multiple datasets are analyzed, some programs automatically
# jumbles, and prompts for a random number seed for jumbling. Othersise,
# jumbling must be explicitly set. From a scripting perspecitve, it is safest therefore
# to set jumbling first, so that we don't need to handle a bunch of special cases in
# which the program asks for jumbling at a later time.
if METHOD != "n":
    JUMBLE = "J"
if JUMBLE == "J":
    phylip.jumble(comfile_h, msgfile_h, NUMJUM)

if TCONMETH in ("k", "f"):
    comfile_h.write("d\n")
elif TCONMETH == "U":
    comfile_h.write("n\n")

RSEED = phylip.phylip_random()
if METHOD in ("b", "d"):
    comfile_h.write('m\n')
    comfile_h.write(str(REPLICATES) + '\n')
    comfile_h.write(str(RSEED) + '\n')
elif METHOD in ("ps", "po", "pw"):
    comfile_h.write('m\n')
    comfile_h.write(str(REPLICATES) + '\n')
    comfile_h.write(str(RSEED) + '\n')

# Subreplicates
# Phylip documentation doesn't tell us what this parameter does, so
# fo rnoe we leave it commented out.
#if SUBREP == 'n':
#    comfile_h.write('s\n')

if PROGRAM in ('fitch', 'kitsch'):
    # Global rearrangements
    if GLOBAL == 'y':
        comfile_h.write('g\n')

    # Negative branch lengths
    if NEGBRANCH == 'y':
        comfile_h.write('-\n')

# Outgroup
OUTGROUP = phylip.do_outgroup(OUTGROUP, NUMSEQ, comfile_h, msgfile_h)

# Should sequence data be printed?
if PRINTDATA == "y":
    comfile_h.write("1\n")

# When resampling, turn off printing trees to outfile
if METHOD in ('b', 'd', 'ps', 'po', 'pw'):
    comfile_h.write("3\n")

#accept current settings and do the analysis
comfile_h.write("y\n")
comfile_h.close()
msgfile_h.write(PROGRAM + ":  SEED= " + str(RSEED))

# Debug:  Allows us to examine the contents of DistanceComfile
#p_testinfile = subprocess.call(["nedit", "DistanceComfile"])

#-----------------------  run FITCH, KITCH, or NEIGHBOR
print('Running ' + PROGRAM)
os.nice(4)
comfile_h = open('DistanceComfile', 'r')
start_time = time.time()
p_TREEPROGRAM = subprocess.Popen([PROGRAM], stdin=comfile_h)
p_TREEPROGRAM.wait()
comfile_h.close()
termout_h.close()
msgfile_h.close()
time_elapsed = time.time() - start_time
os.nice(0)
# Debug:  Allows us to examine the contents of output from PROGRAM
#p_testinfile = subprocess.call(["nedit", "outfile"])

outfile_h = open('outfile', 'a')
# There is no standard way to time a subprocess in Python.
outfile_h.write("Elapsed time on " + os.uname()[1] + ": " + str(time_elapsed) + " seconds")
outfile_h.close()


#----------- Return results to calling directory----------------
# When using resampling, filter the treefile through
# consense to generate an unrooted consensus tree.
if METHOD in ('b', 'd', 'ps', 'po', 'pw'):
    outfile_h = open('outfile', 'a')
    outfile_h.write(" \n")
    outfile_h.write('-------------------------------------------\n')
    outfile_h.close()

    subprocess.call(['consense.py', 'outtree', 'e', '1', str(OUTGROUP), 'n', 'outfile.consense', TREEFILE])
    shutil.move(TREEFILE, os.path.join(STARTDIR, TREEFILE))
    phylip.merge_msg(os.path.join(STARTDIR, OUTFILE))
else:
    shutil.move('outtree', os.path.join(STARTDIR, TREEFILE))
    phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), False)

os.chdir(STARTDIR)
shutil.rmtree(TEMPDIR)

termout_h.close()
print(PROGRAM + ' completed.')
