#!/usr/bin/env python

import os
import os.path
import phylip
import shutil
import socket
import subprocess
import sys
import time

#Version  4/16/2007
# Run protein distance programs as a command
#Synopsis: protdist.py infile dmethod transratio gc categorization method replicates blocksize percent\
#             tconmeth power subrep global negbranch outgroup jumble numjum\
#             termout printdata outfile treefile

#Convert arguments to variables
INFILE         = sys.argv[1]
DMETHOD        = sys.argv[2]
TRANSRATIO     = sys.argv[3]
GC             = sys.argv[4]
CATEGORIZATION = sys.argv[5]
METHOD         = sys.argv[6]
REPLICATES     = sys.argv[7]
BLOCKSIZE      = sys.argv[8]
PERCENT        = sys.argv[9]
TCONMETH       = sys.argv[10]
POWER          = sys.argv[11]
SUBREP         = sys.argv[12]
GLOBAL         = sys.argv[13]
NEGBRANCH      = sys.argv[14]
OUTGROUP       = sys.argv[15]
JUMBLE         = sys.argv[16]
NUMJUM         = sys.argv[17]
TERMOUT        = sys.argv[18]
PRINTDATA      = sys.argv[19]
OUTFILE        = sys.argv[20]
TREEFILE       = sys.argv[21]


#Send program output to the terminal
termout_h = open(TERMOUT, 'w')

# Remember where we started
STARTDIR = os.getcwd()

# Make a temporary directory in which to run the program
TEMPDIR = 'PROTDIST.' + str(os.getpid())
os.mkdir(TEMPDIR)
shutil.copy(INFILE, os.path.join(TEMPDIR, 'infile.temp'))
os.chdir(TEMPDIR)
# Debug statement
#p_testinfile = subprocess.call(["nedit", "infile.temp"])

msgfile_h = open('MSGFILE', 'w')
msgfile_h.write("Protein Distance Matrix Phylogeny Methods\n")
msgfile_h.write("\n")
msgfile_h.write("---------------------  DISTANCE MATRIX ---------------------\n")
msgfile_h.write("\n")

if METHOD == 'n':
    msgfile_h.write(" \n")
    shutil.copyfile('infile.temp', 'infile')
if METHOD in ('b', 'd'):
    phylip.stdresample(METHOD, PERCENT, REPLICATES, '1', msgfile_h, 's', 'i')
elif METHOD in ("ps", "po", "pw"):
    phylip.weightless_resample(METHOD, PERCENT, REPLICATES, '1',  msgfile_h, 's', 'i')

# Debug:  Allows us to examine the contents of infile produced by seqboot.
#p_testinfile = subprocess.call(["nedit", "infile"])

# This statement reads from infile, so we have to run it before
# running the program, which also reads infile.
NUMSEQ = phylip.get_numseq("infile")

#-------------------------- PROTDIST   ---------------------------


# Generate keyboard input for protdist and save it to ProtdistComfile.
# While it is possible to pipe the output directly to protdist, we have
# discovered that on systems with a high load average, the program doesn't
# always complete. Besides, it is WAY easier to debug when we save keyboard
# output to a file that can be examined.
comfile_h = open('ProtdistComfile', 'w')

if METHOD in ('b', 'd'):
    comfile_h.write('m\n')
    comfile_h.write('w\n')
    comfile_h.write(str(REPLICATES) + '\n')
    #comfile_h.write(str(RSEED) + '\n')
elif METHOD in ("ps", "po", "pw"):
    comfile_h.write('m\n')
    comfile_h.write('d\n')
    comfile_h.write(str(REPLICATES) + '\n')

# Choose method for constructing distance matrix
if DMETHOD == "J": # Jones-Taylor-Thornton
    msgfile_h.write("Distance matrix constructed using Jones-Taylor-Thornton method\n")
elif DMETHOD == "H": # Kimura 2-parameter
    msgfile_h.write("Distance matrix constructed using Henikoff-Tiller PMB matrix method\n")
    comfile_h.write("p\n")
elif DMETHOD == "D": # Jukes - Cantor
    msgfile_h.write("Distance matrix constructed using Dayhoff PAM method\n")
    comfile_h.write("p\n")
    comfile_h.write("p\n")
elif DMETHOD == "K": # Kimura
    msgfile_h.write("Distance matrix constructed using Kimura method\n")
    comfile_h.write("p\n")
    comfile_h.write("p\n")
    comfile_h.write("p\n")
elif DMETHOD == "C": # Categories
    msgfile_h.write("Distance matrix constructed using Categories method\n")
    comfile_h.write("p\n")
    comfile_h.write("p\n")
    comfile_h.write("p\n")
    comfile_h.write("p\n")
    comfile_h.write("p\n")

    # Transition/transversion ratio
    if TRANSRATIO != 2.0:
        comfile_h.write("t\n")
        comfile_h.write(TRANSRATIO + '\n')
    msgfile_h.write("Transition/Transversion ratio = " + TRANSRATIO + '\n')

    # Genetic code
    if GC == "m" :
        msgfile_h.write("Using Genetic Code: MITOCHONDRIAL\n")
        comfile_h.write("u\n")
        comfile_h.write("m\n")
    elif GC == "v" :
        msgfile_h.write("Using Genetic Code: VERTEBRATE MITOCHONDRIAL\n")
        comfile_h.write("u\n")
        comfile_h.write("v\n")
    elif GC == "f" :
        msgfile_h.write("Using Genetic Code: FLY MITOCHONDRIAL\n")
        comfile_h.write("u\n")
        comfile_h.write("f\n")
    elif GC == "y" :
        msgfile_h.write("Using Genetic Code: YEAST MITOCHONDRIAL\n")
        comfile_h.write("u\n")
        comfile_h.write("y\n")
    else:
        # also: if GC == "u" :
        msgfile_h.write("Using Genetic Code: UNIVERSAL\n")

    # Categorization method
    if CATEGORIZATION == "G" :
        msgfile_h.write("Categories of amino acids: George-Hunt-Barker\n")
    elif CATEGORIZATION == "C" :
        msgfile_h.write("Categories of amino acids: Chemical\n")
        comfile_h.write("a\n")
        comfile_h.write("c\n")
    elif CATEGORIZATION == "H" :
        msgfile_h.write("Categories of amino acids: Hall\n")
        comfile_h.write("a\n")
        comfile_h.write("h\n")
    else:
        msgfile_h.write("Categories of amino acids: George-Hunt-Barker\n")
else: # Jones-Taylor-Thornton
    msgfile_h.write("Distance matrix constructed using Jones-Taylor-Thornton method\n")

#accept current settings and do the analysis
comfile_h.write("y\n")
comfile_h.close()
# Debug:  Allows us to examine the contents of comfile
#p_testinfile = subprocess.call(["nedit", "ProtdistComfile"])

#Run protdist using ProtdistComfile for keyboard input.
os.nice(4)
comfile_h = open('ProtdistComfile', 'r')
p_protdist = subprocess.Popen(["protdist"], stdin=comfile_h)
p_protdist.wait()
comfile_h.close()
shutil.move("outfile", "infile")

# Debug:  Allows us to examine the contents of infile produced by protdist.
#p_testinfile = subprocess.call(["nedit", "infile"])


msgfile_h.write("---------------------  CONSTRUCTING TREE(S) ---------------------\n")
msgfile_h.write("\n")

#----------------- generate keyboard input to send to distance tree program -----

if TCONMETH == "w":
    msgfile_h.write("Please cite:\n")
    msgfile_h.write("WEIGHBOR - Weighted Neighbor Joining.\n")
    msgfile_h.write("William J. Bruno, Nicholas D. Socci, and Aaron L. Halpern\n")
    msgfile_h.write("Weighted Neighbor Joining: A Likelihood-Based Approach to\n")
    msgfile_h.write("Distance-Based Phylogeny Reconstruction,\n")
    msgfile_h.write("Mol. Biol. Evol. 17 (1): 189-197 (2000).\n")
    msgfile_h.write(" \n")
    PROGRAM = "weighbor"

    #Read length of sequence alignment from INFILE
    h_infile = open("infile.temp", "r")
    SEQLENGTH = h_infile.readline().split()[1]
    h_infile.close()

    print("SEQLENGTH = " + SEQLENGTH)

    start_time = time.time()
    subprocess.call(["weighbor", "-L", SEQLENGTH, "-b", "20", "-i", "infile", "-o", "outtree", "-v"], stdout=open(TERMOUT, "a"))
    shutil.move("weighbor.out", "outfile")
    time_elapsed = time.time() - start_time
    h_smalloutfile = open("outfile", "a")
    h_smalloutfile.write("Execution times on " + socket.gethostname() + ": " + str(time_elapsed) + "\n")
    h_smalloutfile.close()
else:
    # Choose method for constructing distance matrix
    if TCONMETH in ("K", "k"): # "K" = KITSCH, Fitch-Margoliash method; "k" = KITSCH, Minimum evolution method
        PROGRAM = "kitsch"
    elif TCONMETH in ("N", "U"): # Neighbor-joining
        PROGRAM = "neighbor"
    else: # FITCH, Fitch-Margoliash method
        PROGRAM = "fitch"

    #----------------- Run FITCH, KITCH or NEIGHBOR   -----
    termout_h = open(TERMOUT, 'a')

    # Generate keyboard input for protdist and save it to ProtdistComfile.
    comfile_h = open('DistanceComfile', 'w')

    # Jumble - When multiple datasets are analyzed, some programs automatically
    # jumbles, and prompts for a random number seed for jumbling. Othersise,
    # jumbling must be explicitly set. From a scripting perspecitve, it is safest therefore
    # to set jumbling first, so that we don't need to handle a bunch of special cases in
    # which the program asks for jumbling at a later time.
    if METHOD != "n":
        JUMBLE = "J"
    if JUMBLE == "J":
        phylip.jumble(comfile_h, msgfile_h, NUMJUM)

    if TCONMETH == "f": # FITCH, Minimum evolution
        comfile_h.write("d\n")
    elif TCONMETH == "k": # KITSCH, Minimum evolution method
        comfile_h.write("d\n")
    elif TCONMETH == "U": # Neighbor-joining
        comfile_h.write("n\n")

    RSEED = phylip.phylip_random()
    if METHOD in ("b", "d"):
        comfile_h.write("m\n")
        comfile_h.write(str(REPLICATES) + '\n')
        comfile_h.write(str(RSEED) + '\n')
    elif METHOD in ("ps", "po", "pw"):
        comfile_h.write("m\n")
        comfile_h.write(str(REPLICATES) + '\n')
        comfile_h.write(str(RSEED) + '\n')
    #case "n": DO NOTHING

    # Subreplicates
    if SUBREP == "n":
        comfile_h.write("s\n")

    if PROGRAM in ('fitch', 'kitsch'):
        # Global rearrangements
        if GLOBAL == 'y':
            comfile_h.write('g\n')

        # Negative branch lengths
        if NEGBRANCH == 'y':
            comfile_h.write('-\n')

    # Outgroup
    OUTGROUP = phylip.do_outgroup(OUTGROUP, NUMSEQ, comfile_h, msgfile_h)

    # Should sequence data be printed?
    if PRINTDATA == "y":
        comfile_h.write("1\n")

    # When resampling, turn off printing trees to outfile
    if METHOD in ("b", "d", "ps", "po", "pw"):
        comfile_h.write("3\n")

    #accept current settings and do the analysis
    comfile_h.write("y\n")
    comfile_h.close()
    # Debug:  Allows us to examine the contents of comfile
    #p_testinfile = subprocess.call(["nedit", "DistanceComfile"])

    #----------------- Run FITCH, KITCH or NEIGHBOR   -----
    comfile_h = open('DistanceComfile', 'r')
    start_time = time.time()
    p_program = subprocess.Popen([PROGRAM], stdin=comfile_h)
    p_program.wait()
    comfile_h.close()
    termout_h.close()
    msgfile_h.close()
    time_elapsed = time.time() - start_time
    h_smalloutfile = open("outfile", "a")
    h_smalloutfile.write("Elapsed time on " + os.uname()[1] + ": " + str(time_elapsed) + " seconds")
    h_smalloutfile.close()

msgfile_h.close()

# Debug:  Allows us to examine the contents of outfile
#p_testinfile = subprocess.call(["nedit", "outfile"])

os.nice(0)

#----------- Return results to calling directory----------------
# When using resampling, filter the treefile through
# consense to generate an unrooted consensus tree.
if METHOD in ('b', 'd', 'ps', 'po', 'pw'):
    outfile_h = open('outfile', 'a')
    outfile_h.write(" \n")
    outfile_h.write('-------------------------------------------\n')
    outfile_h.close()

    subprocess.call(['consense.py', 'outtree', 'e', '1', str(OUTGROUP), 'n', 'outfile.consense', TREEFILE])
    shutil.move(TREEFILE, os.path.join(STARTDIR, TREEFILE))
    phylip.merge_msg(os.path.join(STARTDIR, OUTFILE))
else:
    shutil.move('outtree', os.path.join(STARTDIR, TREEFILE))
    phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), False)

os.chdir(STARTDIR)
#p_testinfile = subprocess.call(["ls", "-la", TEMPDIR])
shutil.rmtree(TEMPDIR)

print(PROGRAM + " completed.")
