#!/usr/bin/env python

import os
import os.path
import random
import shutil
import subprocess
import sys

# To change this template, choose Tools | Templates
# and open the template in the editor.

__author__="Graham Alvare, Brian Fristensky"
__date__ ="Sun Mar 18 14:57:59 CDT 2012"

if __name__ == "__main__":
    print "PHYLIP Library module"


#----------------------------------------------------------------------------
def merge_msg(outfile_path, consense = True, extra = False):
    '''
    Merge messages from MSGFILE to the outfile generated by a Phylip program.
    If extra is included as a parameter, this appends a message indicating
    that the output is a consensus tree, whose branch lengths are the
    bootstrap values, NOT the actual branch lengths. 
    '''
    h_OUTFILE = open(outfile_path, "w")

    # First we write the contents of MSGFILE the OUTFILE.
    h_MSGFILE = open("MSGFILE", "r")
    h_OUTFILE.writelines(h_MSGFILE.readlines())
    h_MSGFILE.close()

    # Next, we write the contents of 'outfile' to the OUTFILE
    h_smalloutfile = open("outfile", "r")
    h_OUTFILE.writelines(h_smalloutfile.readlines())
    h_smalloutfile.close()

    # For consensus trees, add an extra message at the end
    if consense:
        h_outfile_consense = open("outfile.consense", "r")
        h_OUTFILE.writelines(h_outfile_consense.readlines())
        h_outfile_consense.close()
        
        if extra:
            h_OUTFILE.write("" + '\n')
            h_OUTFILE.write("" + '\n')
            h_OUTFILE.write(">>>> THIS TREEFILE IS A CONSENSUS TREE, WHOSE BRANCH LENGTHS" + '\n')
            h_OUTFILE.write(">>>> ARE BOOTSTRAP VALUES, NOT ACTUAL BRANCH LENGTHS." + '\n')
            h_OUTFILE.write(">>>> TO GENERATE BRANCH LENGTHS" + '\n')
            h_OUTFILE.write(">>>> USE TREE FILE AS INPUT FOR DNAML OR OTHER PROGRAM" + '\n')
            h_OUTFILE.write(">>>> USING THE USERTREE OPTION" + '\n')

    h_OUTFILE.close()

#----------------------------------------------------------------------------
def get_numseq(infile):
    '''Read first line of a Phylip file to find out how many sequences there are'''
    h_infile = open(infile, "r")
    NUMSEQ = h_infile.readline().split()[0]
    h_infile.close()
    return NUMSEQ

#----------------------------------------------------------------------------
def do_outgroup(OUTGROUP, NUMSEQ, comfile, h_msgfile):
    '''Make sure OUTGROUP is not greater than NUMSEQ'''
    tempoutgroup = int(OUTGROUP)
    if tempoutgroup > 1 and tempoutgroup <= NUMSEQ:
        comfile.write("o" + '\n')
        comfile.write(OUTGROUP + '\n')
    else:
        tempoutgroup = 1
    h_msgfile.write("OUTGROUP = " + str(tempoutgroup) + '\n')	
    return tempoutgroup

#----------------------------------------------------------------------------
def phylip_random():
    '''Generate a random integer as needed by Phylip programs.
    These numbers are used by Phylip programs as seeds for a random
    number stream. They must be odd, in the form 4n + 1. Return value
    is an integer between 0 and 2e16 -1 (which is 65535). Although the
    Phylip Main document claims that 32-bit random numbers are acceptible,
    at least one program, PARS, will only take 16-bit random numbers'''
    #pseed = random.randint(0,4294967295) # 32-bit random number
    pseed = random.randint(0,65535) 
    prand = ( ( ( int(pseed) / 4 ) * 4 ) + 1 )
    return prand



#----------------------------------------------------------------------------
def seqboot (INFILE, DATATYPE, RSEED, METHOD, REPLICATES, PERCENT, BLOCKSIZE, OUTWEIGHTS, OUTFORMAT, OUTFILE):
    '''Run Seqboot. Notes: SEQBOOT reads interleaved sequences by default, but can read
    sequential files using the "I" setting. By default, SEQBOOT
    writes datasets to outfile, but will write weights to 'outweights'
    if you set the "S" option. Weight files are always sequential.
    '''
    # Make a temporary directory to run the program in
    STARTDIR = os.getcwd()
    TEMPDIR = 'SEQBOOT.' + str(os.getpid())
    os.mkdir(TEMPDIR)
    shutil.copy(INFILE, os.path.join(TEMPDIR, 'infile'))
    os.chdir(TEMPDIR)


    #----------------- generate keyboard input to send to program -----

    # Generate keyboard input for protdist and save it to SeqbootComfile.
    # While it is possible to pipe the output directly to seqboot, we have
    # discovered that on systems with a high load average, the program doesn't
    # always complete. Besides, it is WAY easier to debug when we save keyboard
    # output to a file that can be examined.
    comfile_h = open('SeqbootComfile', 'w')

    # Percent of characters to sample
    if PERCENT < 5 or PERCENT > 100:
        PERCENT = 100

    comfile_h.write('%\n')
    comfile_h.write(str(PERCENT) + '\n')


    # Block size for resampling
    if BLOCKSIZE < 1 or BLOCKSIZE > 100:
        BLOCKSIZE = 1
    if BLOCKSIZE > 1:
        comfile_h.write('B\n')
        comfile_h.write(str(BLOCKSIZE) + '\n')

    # Choose datatype type
    if DATATYPE == "m":  # Discrete morphology or molecular markers
        comfile_h.write('d\n')
    elif DATATYPE == "r":  # Restriction sites
        comfile_h.write('d\n')
        comfile_h.write('d\n')
        #comfile_h.write('e\n') # num. of enzymes is on  1st line of infile
    elif DATATYPE == "R": #  Restriction sites, no enzyme number in infile
        comfile_h.write('d\n')
        comfile_h.write('d\n')
    elif DATATYPE == "g": # Allele frequencies
        comfile_h.write('d\n')
        comfile_h.write('d\n')
        comfile_h.write('d\n')
    elif DATATYPE == "s":  # Molecular sequences
        pass


    # Choose resampling method
    # Only bootstrapping and jacknifing can generate weights.
    # Others must generate complete datasets
    if METHOD == "b":                # bootstrap
        if OUTWEIGHTS == "yes":
            comfile_h.write('s\n')
    elif METHOD == "d":                # delete-half jacknife
        comfile_h.write('j\n')
        if OUTWEIGHTS == "yes":
            comfile_h.write('s\n')
    elif METHOD == "ps":                # permute species for each character
        comfile_h.write('j\n')
        comfile_h.write('j\n')
        OUTWEIGHTS = "no"
    elif METHOD == "po":                # permute character order
        comfile_h.write('j\n')
        comfile_h.write('j\n')
        comfile_h.write('j\n')
        OUTWEIGHTS = "no"
    elif METHOD == "pw":                # permute within species
        comfile_h.write('j\n')
        comfile_h.write('j\n')
        comfile_h.write('j\n')
        comfile_h.write('j\n')
        OUTWEIGHTS = "no"
    elif METHOD == "rew":                # rewrite data
        comfile_h.write('j\n')
        comfile_h.write('j\n')
        comfile_h.write('j\n')
        comfile_h.write('j\n')
        comfile_h.write('j\n')
        OUTWEIGHTS = "no"
    else:
        OUTWEIGHTS = "no"

    # Number of replicates
    comfile_h.write('r\n')
    comfile_h.write(str(REPLICATES) + '\n')

    # Output format: interleaved (default), sequential
    if OUTFORMAT == 's':
        comfile_h.write('i\n')

    #accept current settings and do the analysis
    comfile_h.write('y\n')

    # Random seed, odd, of the form 4n + 1
    tempseed = ( ( ( int(RSEED) / 4 ) * 4 ) + 1 )
    #tempseed = phylip_random()
    comfile_h.write(str(tempseed) + '\n')
    comfile_h.close()

    #-------- Run seqboot, sending terminal output to /dev/null -----------
    comfile_h = open('SeqbootComfile', 'r')
    p = subprocess.Popen(['seqboot'], stdin=comfile_h, stdout=subprocess.PIPE)
    p.wait()
    comfile_h.close()
    if OUTWEIGHTS == "yes":
        # Debug statement.
        #p_testinfile = subprocess.call(["nedit", "outweights"])
        shutil.move('outweights', os.path.join(STARTDIR, OUTFILE))
    else:
        # Debug statement.
        #p_testinfile = subprocess.call(["nedit", "outfile"])
        shutil.move('outfile', os.path.join(STARTDIR, OUTFILE))

    os.chdir(STARTDIR)
    shutil.rmtree(TEMPDIR, True)



#----------------------------------------------------------------------------
def jumble(comfile, h_msgfile, NUMJUM):
    '''Jumble - When multiple datasets are analyzed, protpars automatically
    jumbles, and prompts for a random number seed for jumbling. Otherwise,
    jumbling must be explicitly set.'''
    # Random seeds, odd, of the form 4n + 1
    tempjseed = str(phylip_random())
    h_msgfile.write("JUMBLING SEQUENCE ORDER " + str(NUMJUM) + " ITERATIONS, SEED=" + tempjseed + '\n')
    comfile.write('j' + '\n')
    comfile.write(tempjseed + '\n')
    comfile.write(str(NUMJUM) + '\n')

#----------------------------------------------------------------------------
def stdresample(METHOD, PERCENT, REPLICATES, BLOCKSIZE, h_msgfile, DATATYPE, OUTFORMAT):
    '''Run SEQBOOT to generate resampled datasets. Output is weights, used by Phylip programs 
    to generate datasets on the fly. If you want SEQBOOT to generate actual datasets instead of 
    weights, use weightless_resample instead.'''
    Bseed = phylip_random()
    BseedStr = str(Bseed)
    RepStr = str(REPLICATES)
    # Choose resampling methodaz1
    if METHOD == "n":
        h_msgfile.write(" " + '\n')
        #shutil.copyfile("infile.temp", "infile")
    elif METHOD in ("b", "d"):
        if METHOD == "b":
            #differences from METHOD b
            h_msgfile.write("RESAMPLING: Bootstrap, " + RepStr + " REPLICATES, SEED=" + BseedStr + '\n')
            if int(BLOCKSIZE) > 1:
                h_msgfile.write('Resampling in blocks of ' + BLOCKSIZE + '\n')
        else:
            #differences from METHOD d
            h_msgfile.write("RESAMPLING: Delete-half Jacknifing, " + RepStr + " REPLICATES, SEED=" + BseedStr + '\n')
        # shared code
        if PERCENT < 100:
            h_msgfile.write('Partial Resampling: ' + PERCENT + 'percent of sites sampled' + '\n')
        shutil.copyfile("infile.temp", "infile")
        seqboot('infile.temp', DATATYPE, Bseed, METHOD, REPLICATES, PERCENT, BLOCKSIZE, 'yes', OUTFORMAT, 'weights')

    elif METHOD in ("ps", "po", "pw"):
        # print the appropriate header in the MSGFILE
        if METHOD == "ps":
            h_msgfile.write("RESAMPLING: Permute species for each character, " + RepStr + " REPLICATES, SEED=" + BseedStr + '\n')
        elif METHOD == "po":
            h_msgfile.write("RESAMPLING: Permute character order, " + RepStr + " REPLICATES, SEED=" + BseedStr + '\n')
        else:
            # pw
            h_msgfile.write("RESAMPLING: Permute within species, " + RepStr + " REPLICATES, SEED=" + BseedStr + '\n')
        # shared code
        seqboot('infile.temp', DATATYPE, Bseed, METHOD, REPLICATES, PERCENT, BLOCKSIZE, 'no', OUTFORMAT, 'infile')


#----------------------------------------------------------------------------
def weightless_resample(METHOD, PERCENT, REPLICATES, BLOCKSIZE, msgfile_h, DATATYPE, OUTFORMAT):
    ''' Generate terminal input for running seqboot to generate randomized sequence files, rather than weight files
    and run seqboot'''

    Bseed = phylip_random()
    BseedStr = str(Bseed)
    RepStr = str(REPLICATES)
    # Choose resampling method
    if METHOD == "n":
        msgfile_h.write(" " + '\n')
        #shutil.copyfile("infile.temp", "infile")
    elif METHOD in ("b", "d", "ps", "po", "pw"):
        if METHOD == "ps":
            msgfile_h.write("RESAMPLING: Permute species for each character, " + RepStr + " REPLICATES, SEED=" + BseedStr + '\n')
        elif METHOD == "po":
            msgfile_h.write("RESAMPLING: Permute character order, " + RepStr + " REPLICATES, SEED=" + BseedStr + '\n')
        elif METHOD == "pw":
            msgfile_h.write("RESAMPLING: Permute within species, " + RepStr + " REPLICATES, SEED=" + BseedStr + '\n')
        else:
            if METHOD == "b":
                msgfile_h.write("RESAMPLING: Bootstrap, " + RepStr + " REPLICATES, SEED=" + BseedStr + '\n')
            else:
                msgfile_h.write("RESAMPLING: Delete-half Jacknifing, " + RepStr + " REPLICATES, SEED=" + BseedStr + '\n')
            # shared code
            if PERCENT < 100:
                msgfile_h.write("Partial Resampling: " + PERCENT + "percent of sites sampled" + '\n')
                
        # shared code
        seqboot('infile.temp', DATATYPE, Bseed, METHOD, REPLICATES, PERCENT, BLOCKSIZE, 'no', OUTFORMAT, 'infile')
	
	
#----------------------------------------------------------------------------
def ufn(UFN, TEMPDIR, infile = True):
    '''Used by dnaml.py and protml.py. 
    Make sure that treefile begins with number of trees on first
    line of file. If first line in file has parentheses, the
    number must be added.'''
    if infile:
        infile_h = open(os.path.join(TEMPDIR, 'intree'), 'a')
    else:
        infile_h = sys.stdout

    ufn_h = open (UFN, 'r')

    ufn_lines = ufn_h.readlines()
    ufn_h.close()

    sc_count = 0
    if ufn_lines[0].find('(') > -1:
        for line in ufn_lines:
            if line.find(';') > -1:
                sc_count = sc_count + 1
        infile_h.write(str(sc_count) + "\n")

    for line in ufn_lines:
        infile_h.write(line)
    infile_h.close()
