#!/usr/bin/env python3
"""
Jan.  4, 2006, Dr. Brian Fristensky, University of Manitoba

 Description: Convert a file of trees to a GDE
 flat file. 

 Synopsis: tree2flat.py infile outfile

 Files: infile      file of trees in Phylip tree format

        outfile     GDE flat file, containing one or more
                    trees
 
@modified: May 26 2010
@author: Dale Hamel
@contact: umhameld@cc.umanitoba.ca
"""
import sys
import os
import re



blib = os.environ.get("BIRCHPYLIB")
sys.path.append(blib)

from birchlib import Birchmod
from birchlib import Argument

PROGRAM = "tree2flat.py "
USAGE = "\n\tUSAGE: tree2flat.py infile outfile"
BM = Birchmod(PROGRAM, USAGE)

class Parameters:
    def __init__(self):
        """
     		Initializes arguments:
     			IFN=""
     			OFN=""
     		Then calls read_args() to fill in their values from command line
     		"""
     		
        self.IFN = ''
        self.OFN = ''
        self.read_args()
	
    def read_args(self):
        """
     		Read command line arguments into a Parameters object
     		"""
     		
        infile = Argument("", str, BM)
        outfile = Argument("", str, BM)
	
        infile.set_position(1)
        outfile.set_position(2)
	
        try:
            self.IFN = infile.fetch()
            self.OFN = outfile.fetch()
        except:
            BM.printusage()

def GETTREES(FN):
    """
     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Read a file containing one or more trees, where a tree
     can be defined by the regular expression 
    
     (.*);
    
     In practice, it is more complicated, because we need to
     strip off leading and trailing whitespace, including
     newline characters. The quickest way seems to be to 
     read in lines and concatenate them into big lines, stripping
     whitespace as we go. When a line contains a semicolon (;),
     we break it into two lines and begin a new tree.
    """
    TREES = []
    try:
    	FILE = open(FN, 'r')
    except:
    	BM.fileError(FN)
    # Read the entire file into a single, very long line
    BIGLINE = ""
    for LINE in FILE:
        BIGLINE = BIGLINE + LINE.strip()

    FILE.close()

    # Create a list containing all trees in BIGLINE
    #I don't know why these lines don't work
#    p = re.compile('\(.*\)\;') 
#    p = re.compile('\(.*\);')
#    TREES = p.findall(BIGLINE)

    # This loses the ;, so we have to add it back when 
    # writing to the output file
    TREES = BIGLINE.split(';')
    return TREES

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

def WRITETREES(OUTFILE, IFN, TREES):
    """
    For each tree in TREES, write an entry in the form
     "name
     tree
     where the name is the input file name, followed by a number,
    and tree is the tree on a single line
    """
    
    # Create a base name for the trees, using IFN. 
    # Truncate the file extension, if any, and truncate
    # to 8 characters, to leave some room so that we
    # can add a number to the name for each tree
    I = IFN.rfind('.')
    if I > -1:
        TREENAME = IFN[0:I]
    else:
        TREENAME = IFN
    if len(TREENAME) > 8:
        TREENAME = TREENAME[0:8]

    # Write the trees
    J = 1
    for T in TREES:

        # BIGLINE.split(;) can create an extra blank line
        # at the end of a file. It is therefore necessary
        # to filter out empty lines.
        OUTTREE = T.strip()
        if len(OUTTREE) > 0:
            LINE = '\"' + TREENAME + str(J) + '\n'
            OUTFILE.write(LINE)
            OUTFILE.write(OUTTREE + ';\n')
            J = J + 1

    
#======================== MAIN PROCEDURE ==========================

def main():
    """
     	Called when not in documentation mode.
     	"""
#---------- Set global variables
    P = Parameters()


    OUTFILE = open(P.OFN, 'w')

    # Read in list of tokens
    if os.path.exists(P.IFN):
        TREES = GETTREES(P.IFN)

        #Write the list as a single line of comma-separated
        # values
        WRITETREES(OUTFILE, P.IFN, TREES)

        OUTFILE.close()

    BM.exit_success()


if (BM.documentor() or "-test" in sys.argv):
    pass
else:
    main()


