#!/usr/bin/env python3

import re
import os
import os.path
import phylip
import shutil
import subprocess
import sys
import time

#Version  November 17, 2014
# Run restml as a command
#Synopsis: restml.py infile utree_ufn method replicates percent\
#             allsites speedy global sitenum sitelen outgroup jumble numjum\
#             termout printdata outfile treefile

#Convert arguments to variables
INFILE        = sys.argv[1]
#print 'INFILE: ' + INFILE
# bltree: If no file has been chosen for UFN, UFN will be blank.
# Simply substituting $UFN into the command for restml.csh would
# therefore result in a missing parameter. Here we concatenate
# UTREE and UFN in the form UTREE_UFN. restml.csh has been
# modified to read both parameters from the UTREE variable,
# and UFN is no longer passed as a separate variable.
UTREEPARAM = sys.argv[2]
UTREE = UTREEPARAM[0]
UFN           = UTREEPARAM[1:]
METHOD        = sys.argv[3]
REPLICATES    = sys.argv[4]
PERCENT       = sys.argv[5]
ALLSITES      = sys.argv[6]
SPEEDY        = sys.argv[7]
GLOBAL        = sys.argv[8]
SITENUM       = sys.argv[9]
SITELEN       = sys.argv[10]
OUTGROUP      = sys.argv[11]
JUMBLE        = sys.argv[12]
NUMJUM        = sys.argv[13]
TERMOUT       = sys.argv[14]
PRINTDATA     = sys.argv[15]
OUTFILE       = sys.argv[16]
TREEFILE      = sys.argv[17]

#print 'UTREE: ' + UTREE
#print 'UFN: ' + UFN
#print 'METHOD: ' + METHOD

# Make a temporary directory in which to run the program
STARTDIR = os.getcwd()
TEMPDIR  = 'RESTML.' + str(os.getpid())
os.mkdir(TEMPDIR)
shutil.copyfile(INFILE, os.path.join(TEMPDIR, 'infile.temp'))

if UTREE == 'y':
    # Turn off bootstrapping when evaluating a user tree.
    # If bootstrapping is on, an empty file may be generated
    # which could cause drawtree or drawgram to loop infinitely
    METHOD = 'n'

    # Make sure that treefile begins with number of trees on first
    # line of file. If first line in file has parentheses, the
    # number must be added.
    phylip.ufn(UFN, TEMPDIR)

os.chdir(TEMPDIR)
# Debug statement.
#p_testinfile = subprocess.call(["nedit", "infile.temp"])

#----------------- generate keyboard input to send to program -----
msgfile_h = open("MSGFILE", "w")


#----------------- generated resampled datasets, if specified  -----
# Choose resampling method

# NOTE!!: Unlike most other programs in PHYLIP, RESTML can NOT use weights
# when analyzing multiple datasets. Therefore, seqboot.csh must generate bootstrapped
# datasets, rather than weights.
# Also note that the 'r' option to seqboot.csh tells it that the datafile
# contains an extra number, telling the number of  restriction enzymes used.
if METHOD == 'n':
    shutil.move('infile.temp', 'infile')
else:
    phylip.weightless_resample(METHOD, PERCENT, REPLICATES, '1',  msgfile_h, 'r', 'i')

# RESTML expects a third number to appear on the first line of infile,
# indicating the number of restriction enzymes used. This has to be added
# to the first line of EACH dataset, when there are multiple datasets in
# infile.
shutil.move('infile', 'infile.bak')
h_infile_bak = open('infile.bak', 'r')
h_infile = open('infile', 'w')
for line in h_infile_bak:
    rawline = line.strip()
    exp1 = '( *[0-9][0-9]*  *[0-9][0-9]*)'
    #exp2 = '\g<1>'
    if re.match(exp1,rawline) :
        h_infile.write(rawline + ' ' + SITENUM + '\n')
    else:
        h_infile.write(rawline + '\n')

h_infile_bak.close()
h_infile.close()

# Debug statement.
#p_testinfile = subprocess.call(["nedit", "infile"])


# This statement reads from infile, so we have to run it before
# running TREEPROGRAM, which also reads infile.
NUMSEQ = phylip.get_numseq("infile")

#----------------- Run RESTML  -----
#----------------- generate keyboard input to send to restml program -----
comfile_h = open('RestMLComfile', 'w')


# !!!!!!!!! Workaround for bug in restml3.69 !!!!!!!!!!!!!
#
# This section should be deleted, and the corresponding section
# for SITELEN below, uncommented, once the bug is fixed.
#
# Problem: Site length - Broken in restml 3.69. Attempting to set this
# parameter has no effect.
#
# Workaround: Joe Felsenstein says that the 'l' option does work
# if the usertree option has been selected. So we set the
# usertree option, set sitelen, and then unset usertree.
# If usertree needs to be set, that will happen normally
# as below.

if int(SITELEN) < 1 or int(SITELEN) > 50:
    SITELEN = 6
comfile_h.write('u\n')
comfile_h.write('l\n')
comfile_h.write(str(SITELEN) + '\n')
comfile_h.write('u\n')
#msgfile_h.write( 'SITELENGTH = ' + str(SITELEN))
# !!!!!!!!!!!! end workaround

if UTREE == 'y':
    comfile_h.write('u\n')

msgfile_h.write( "---------------------  RESTML  ---------------------")
msgfile_h.write( "")



# RESTML 3.69 documentation claims to be able to read sequential
# format but sequential files result in an EOF error. Use Interleaved.
# Input file is not interleaved
#comfile_h.write('i\n')

# Jumble - When multiple datasets are analyzed, some programs automatically
# jumbles, and prompts for a random number seed for jumbling. Othersise,
# jumbling must be explicitly set. From a scripting perspecitve, it is safest therefore
# to set jumbling first, so that we don't need to handle a bunch of special cases in
# which the program asks for jumbling at a later time.
if METHOD != "n":
    JUMBLE = "J"
if JUMBLE == "J":
    phylip.jumble(comfile_h, msgfile_h, NUMJUM)

# RESTML doesn't seem to prompt for a random number seed. We'll keep
# this code in case that changes in the future.
#RSEED = phylip.phylip_random()
#msgfile_h.write( PROGRAM + ":  SEED= " + str(RSEED))

if METHOD in ("b", "d"):
    comfile_h.write('m\n')
#    comfile_h.write('w\n')
    comfile_h.write(REPLICATES + '\n')
#    comfile_h.write(str(RSEED) + '\n')
elif METHOD in ("ps", "po", "pw"):
    comfile_h.write('m\n')
#    comfile_h.write('d\n')
    comfile_h.write(REPLICATES + '\n')
#    comfile_h.write(str(RSEED) + '\n')

# All sites are detected
if ALLSITES == 'y':
    comfile_h.write('a\n')
    msgfile_h.write( 'Assuming all sites detected')
#else:
#    msgfile_h.write( 'Assuming NOT all sites detected')

# Speedier but rougher analysis
if SPEEDY == 'n':
    comfile_h.write('s\n')
    msgfile_h.write( 'Doing thorough analysis')
else:
    msgfile_h.write( 'Doing speedy but rougher analysis')

# Global rearrangements
if GLOBAL == 'y':
    comfile_h.write('g\n')
    msgfile_h.write( 'Doing global rearrangements')
else:
    msgfile_h.write( 'No global rearrangements done')


# Site length - Broken in restml 3.69. Attempting to set this
# parameter has no effect.
#if int(SITELEN) < 1 or int(SITELEN) > 50:
#    SITELEN = 6
#comfile_h.write('l\n')
#comfile_h.write(str(SITELEN) + '\n')
#msgfile_h.write( 'SITELENGTH = ' + str(SITELEN))

# Outgroup
OUTGROUP = phylip.do_outgroup(OUTGROUP, NUMSEQ, comfile_h, msgfile_h)

# Should sequence data be printed?
if PRINTDATA == 'y':
    comfile_h.write('1\n')

# When resampling, turn off printing trees to outfile

if METHOD in ('b', 'd', 'ps', 'po', 'pw'):
    comfile_h.write('3\n')

#accept current settings and do the analysis
comfile_h.write('y\n')



#----------------- Finish RESTML  -----
comfile_h.close()
# Debug:  Allows us to examine the contents of RestdistComfile
#p_testinfile = subprocess.call(["nedit", "RestdistComfile"])

comfile_h = open('RestMLComfile', 'r')
termout_h = open(TERMOUT, 'w')
#os.nice(8)
start_time = time.time()
p_PROGRAM = subprocess.Popen(['restml'], stdin=comfile_h)

p_PROGRAM.wait()
termout_h.close()
msgfile_h.close()
time_elapsed = time.time() - start_time
#os.nice(0)

outfile_h = open('outfile', 'a')
outfile_h.write("Execution times on " + os.uname()[1] + ": " + str(time_elapsed))
outfile_h.close()

# Debug statement.
#p_testinfile = subprocess.call(["nedit", "outfile"])

#----------- Return results to calling directory----------------
# When using resampling, filter the treefile through
# consense to generate an unrooted consensus tree.
if METHOD in ('b', 'd', 'ps', 'po', 'pw'):
    outfile_h = open('outfile', 'a')
    outfile_h.write(" \n")
    outfile_h.write('-------------------------------------------\n')
    outfile_h.close()

    subprocess.call(['consense.py', 'outtree', 'e', '1', str(OUTGROUP), 'n', 'outfile.consense', TREEFILE])
    shutil.move(TREEFILE, os.path.join(STARTDIR, TREEFILE))
    phylip.merge_msg(os.path.join(STARTDIR, OUTFILE))
else:
    shutil.move('outtree', os.path.join(STARTDIR, TREEFILE))
    phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), False)

os.chdir(STARTDIR)
shutil.rmtree(TEMPDIR)

print("RESTML completed.")
