#!/usr/bin/env python

import os
import os.path
import phylip
import shutil
import subprocess
import sys
import time

#Version  February 13, 2021
# Run dna distance programs as a command
#Synopsis: dnadist.py infile dmethod transratio method replicates blocksize percent\
#             tconmeth power subrep global negbranch outgroup jumble numjum\
#             termout printdata outfile treefile

#Convert arguments to variables
INFILE        = sys.argv[1]
DMETHOD       = sys.argv[2]
TRANSRATIO    = sys.argv[3]
METHOD        = sys.argv[4]
REPLICATES    = sys.argv[5]
BLOCKSIZE     = sys.argv[6]
PERCENT       = sys.argv[7]
TCONMETH      = sys.argv[8]
POWER         = sys.argv[9]
SUBREP        = sys.argv[10]
GLOBAL        = sys.argv[11]
NEGBRANCH     = sys.argv[12]
OUTGROUP      = sys.argv[13]
JUMBLE        = sys.argv[14]
NUMJUM        = sys.argv[15]
TERMOUT       = sys.argv[16]
PRINTDATA     = sys.argv[17]
OUTFILE       = sys.argv[18]
TREEFILE      = sys.argv[19]


#Send program output to the terminal
termout_h = open(TERMOUT, 'w')

# Remember where we started
STARTDIR = os.getcwd()

# Make a temporary directory in which to run the program
TEMPDIR = 'DNADIST.' + str(os.getpid())
os.mkdir(TEMPDIR)
shutil.copy(INFILE, os.path.join(TEMPDIR, 'infile.original'))
# From here on, everything is done in TEMPDIR
os.chdir(TEMPDIR)

# Phylip can't deal with sequence names longer than 10 characters. 
# Therefore, we have to replace the names with randomly assigned names, and save the
# original names and random names in idkeys.tsv. We can use idkey.tsv at the end
# to restore the original names to the output files. 
subprocess.call(["uniqid.py","--encode", "infile.original","infile.encoded","idkeys.tsv"])
subprocess.call(["phylcnv.py","-inf", "fasta", "-outf", "pint", "infile.encoded", "infile.temp"])

# Debug statement - did infile.temp get written to TEMPDIR?
#p_testinfile = subprocess.call(["nedit", "infile.temp"])

msgfile_h = open('MSGFILE', 'w')
msgfile_h.write("DNA Distance Matrix Phylogeny Methods\n")
msgfile_h.write("\n")
msgfile_h.write("---------------------  DISTANCE MATRIX ---------------------\n")
msgfile_h.write("\n")


if METHOD == 'n':
    msgfile_h.write(" \n")
    shutil.copyfile('infile.temp', 'infile')
if METHOD in ('b', 'd'):
    phylip.stdresample(METHOD, PERCENT, REPLICATES, '1', msgfile_h, 's', 'i')
elif METHOD in ("ps", "po", "pw"):
    phylip.weightless_resample(METHOD, PERCENT, REPLICATES, '1',  msgfile_h, 's', 'i')

# Debug:  Allows us to examine the contents of infile produced by either by seqboot,
# or just copied, if METHOD = n.
#p_testinfile = subprocess.call(["nedit", "infile"])

# This statement reads from infile, so we have to run it before
# running the program, which also reads infile.
NUMSEQ = phylip.get_numseq("infile")

#----------------------------  DNADIST  -----------------------------------
termout_h = open(TERMOUT, 'w')

#----------------- generate keyboard input to send to DNADIST -----
# Generate keyboard input for protdist and save it to ProtdistComfile.
# While it is possible to pipe the output directly to protdist, we have
# discovered that on systems with a high load average, the program doesn't
# always complete. Besides, it is WAY easier to debug when we save keyboard
# output to a file that can be examined.
comfile_h = open('DnadistComfile', 'w')

if METHOD in ('b', 'd'):
    comfile_h.write("m\n")
    comfile_h.write("w\n")
    comfile_h.write(str(REPLICATES) + "\n")
    #comfile_h.write(str(RSEED) + "\n")
elif METHOD in ("ps", "po", "pw"):
    comfile_h.write("m\n")
    comfile_h.write("d\n")
    comfile_h.write(str(REPLICATES) + "\n")

# Choose method for constructing distance matrix
if DMETHOD == "k": # Kimura 2-parameter
    msgfile_h.write("Distance matrix constructed using Kimura 2-parameter method\n")
    comfile_h.write("d\n")
elif DMETHOD == "j": # Jukes - Cantor
    msgfile_h.write("Distance matrix constructed using Jukes-Cantor method\n")
    comfile_h.write("d\n")
    comfile_h.write("d\n")
elif DMETHOD == "l": # LogDet
    msgfile_h.write("Distance matrix constructed using Log of the determinant of the joint\n")
    msgfile_h.write("nucleotide frequencies\n")
    comfile_h.write("d\n")
    comfile_h.write("d\n")
    comfile_h.write("d\n")
else: # default (DMETHOD == "f") # F84
    msgfile_h.write("Distance matrix constructed using F84 method\n")

if DMETHOD in ("f", "k"):
    # Transition/transversion ratio
    if TRANSRATIO != 2.0:
        comfile_h.write("t\n")
        comfile_h.write(str(TRANSRATIO) + "\n")
    msgfile_h.write("Transition/Transversion ratio = " + str(TRANSRATIO) + "\n")

#accept current settings and do the analysis
comfile_h.write("y" + "\n")
comfile_h.close()
# Debug:  Allows us to examine the contents of DnadistComfile
#p_testinfile = subprocess.call(["nedit", "DnadistComfile"])

os.nice(4)
#----------------- Run DNADIST   -----
comfile_h = open('DnadistComfile', 'r')

p_MATRIX = subprocess.Popen(['dnadist'], stdin=comfile_h)
p_MATRIX.wait()
comfile_h.close()

shutil.move('outfile', 'infile')

# Allows us to examine the contents of matrix file produced by dnadist.
#p_testinfile = subprocess.call(["nedit", "infile"])

msgfile_h.write("---------------------  CONSTRUCTING TREE(S) ---------------------\n")
msgfile_h.write("\n")

#----------------- generate keyboard input to send to distance tree program -----

# Choose method for constructing distance matrix
# w = weighbor, weighted Neighbor-Joining
# F = FITCH, Fitch-Margoliash method
# f = FITCH, Minimum evolution
# K = KITSCH, Fitch-Margoliash method
# k = KITSCH, Minimum evolution method
# N = Neighbor-joining
# U = Neighbor-joining
# default = FITCH, Fitch-Margoliash method
if TCONMETH in ("w"):
    PROGRAM = "weighbor"
    msgfile_h.write('Please cite:\n')
    msgfile_h.write('WEIGHBOR - Weighted Neighbor Joining.\n')
    msgfile_h.write('William J. Bruno, Nicholas D. Socci, and Aaron L. Halpern\n')
    msgfile_h.write('Weighted Neighbor Joining: A Likelihood-Based Approach to\n')
    msgfile_h.write('Distance-Based Phylogeny Reconstruction,\n')
    msgfile_h.write('Mol. Biol. Evol. 17 (1): 189-197 (2000).\n')
    msgfile_h.write(" \n")
    PROGRAM = "weighbor"

    #Read length of sequence alignment from INFILE
    h_infile = open("infile.temp", "r")
    SEQLENGTH = h_infile.readline().split()[1]
    h_infile.close()

    print("SEQLENGTH = " + str(SEQLENGTH))
    start_time = time.time()
    subprocess.call(["weighbor", "-L", str(SEQLENGTH), "-b", "4", "-i", "infile", "-o", "outtree", "-v"], stdout=open(TERMOUT, "a"))
    shutil.move("weighbor.out", "outfile")
    time_elapsed = time.time() - start_time
    h_smalloutfile = open("outfile", "a")
    h_smalloutfile.write("Execution times on " + os.uname()[1] + ": " + str(time_elapsed))
    h_smalloutfile.close()
    msgfile_h.close()
else:
    if TCONMETH in ("K", "k"):
        PROGRAM = "kitsch"
    elif TCONMETH in ("N", "U"):
        PROGRAM = "neighbor"
    else:
        PROGRAM = "fitch"

    #----------------- Run FITCH, KITCH or NEIGHBOR   -----
    termout_h = open(TERMOUT, 'a')


    # Generate keyboard input for dnadist and save it to DistanceComfile.
    comfile_h = open('DistanceComfile', 'w')
    # Jumble - When multiple datasets are analyzed, some programs automatically
    # jumbles, and prompts for a random number seed for jumbling. Othersise,
    # jumbling must be explicitly set. From a scripting perspecitve, it is safest therefore
    # to set jumbling first, so that we don't need to handle a bunch of special cases in
    # which the program asks for jumbling at a later time.
    if METHOD != "n":
        JUMBLE = "J"
    if JUMBLE == "J":
        phylip.jumble(comfile_h, msgfile_h, NUMJUM)

    if TCONMETH in ("k", "f"):
        comfile_h.write("d\n")
    elif TCONMETH == "U":
        comfile_h.write("n\n")

    RSEED = phylip.phylip_random()
    if METHOD in ("b", "d"):
        comfile_h.write("m\n")
        comfile_h.write(str(REPLICATES) + '\n')
        comfile_h.write(str(RSEED) + '\n')
    elif METHOD in ("ps", "po", "pw"):
        comfile_h.write("m\n")
        comfile_h.write(str(REPLICATES) + '\n')
        comfile_h.write(str(RSEED) + '\n')
    #case "n": DO NOTHING

    # Subreplicates
    # Phylip documentation doesn't tell us what this parameter does, so
    # fo rnoe we leave it commented out.
    #if SUBREP == 'n':
    #    comfile_h.write('s\n')

    if PROGRAM in ('fitch', 'kitsch'):
        # Global rearrangements
        if GLOBAL == 'y':
            comfile_h.write('g\n')

        # Negative branch lengths
        if NEGBRANCH == 'y':
            comfile_h.write('-\n')

    # Outgroup
    OUTGROUP = phylip.do_outgroup(OUTGROUP, NUMSEQ, comfile_h, msgfile_h)

    # Should sequence data be printed?
    if PRINTDATA == "y":
        comfile_h.write("1\n")

    # When resampling, turn off printing trees to outfile
    if METHOD in ('b', 'd', 'ps', 'po', 'pw'):
        comfile_h.write("3\n")

    #accept current settings and do the analysis
    comfile_h.write("y" + '\n')
    comfile_h.close()
    # Debug:  Allows us to examine the contents of DistanceComfile
    #p_testinfile = subprocess.call(["nedit", "DistanceComfile"])
    msgfile_h.write(PROGRAM + ":  SEED= " + str(RSEED))

    #----------------- Run FITCH, KITCH or NEIGHBOR   -----
    comfile_h = open('DistanceComfile', 'r')
    start_time = time.time()
    p_TREE = subprocess.Popen([PROGRAM], stdin=comfile_h)
    p_TREE.wait()
    comfile_h.close()
    termout_h.close()
    msgfile_h.close()
    time_elapsed = time.time() - start_time


    outfile_h = open('outfile', 'a')
    # There is no standard way to time a subprocess in Python.
    outfile_h.write("Elapsed time on " + os.uname()[1] + ": " + str(time_elapsed) + " seconds")
    outfile_h.close()
    # Debug:  Allows us to examine the contents of DnadistComfile
    #p_testinfile = subprocess.call(["nedit", "outfile"])

os.nice(0)
#----------- Return results to calling directory----------------
# When using resampling, filter the treefile through
# consense to generate an unrooted consensus tree.
if METHOD in ('b', 'd', 'ps', 'po', 'pw'):
    outfile_h = open('outfile', 'a')
    outfile_h.write(" \n")
    outfile_h.write('-------------------------------------------\n')
    outfile_h.close()

    #subprocess.call(['consense.py', 'outtree', 'e', '1', str(OUTGROUP), 'n', 'outfile.consense', TREEFILE])
    #shutil.move(TREEFILE, os.path.join(STARTDIR, TREEFILE))
    #phylip.merge_msg(os.path.join(STARTDIR, OUTFILE))

    subprocess.call(["uniqid.py","--decode", "outfile","outfile.decoded","idkeys.tsv"])
    shutil.move("outfile.decoded","outfile")
    subprocess.call(['consense.py', 'outtree', 'e', '1', str(OUTGROUP), 'n', 'outfile.consense.tmp', 'outtree.consense'])
    subprocess.call(["uniqid.py","--decode", "outfile.consense.tmp","outfile.consense","idkeys.tsv"])
    subprocess.call(["uniqid.py","--decode", "outtree.consense","outtree.decoded","idkeys.tsv"])
    shutil.move("outtree.decoded", os.path.join(STARTDIR, TREEFILE))
    phylip.merge_msg(os.path.join(STARTDIR, OUTFILE))

else:
    #shutil.move('outtree', os.path.join(STARTDIR, TREEFILE))
    #phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), False)

    subprocess.call(["uniqid.py","--decode", "outfile","outfile.decoded","idkeys.tsv"])
    shutil.move("outfile.decoded","outfile")
    subprocess.call(["uniqid.py","--decode", "outtree","outtree.decoded","idkeys.tsv"])
    shutil.move("outtree.decoded", os.path.join(STARTDIR, TREEFILE))
    phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), False)

os.chdir(STARTDIR)
shutil.rmtree(TEMPDIR)

print(PROGRAM + " completed.")
