#!/usr/bin/env python

"""
 rebasecnv.py - Create subsets of REBASE based on types of cutting sites.

 Synopsis:
    rebasecnv.py --infile infile  --outfile outfile [-outfmt name|number ] [--prototypes] [--commercial] [--ends 5|3|b] [--symmetric a|s|b]

@modified: January 4, 2017
@author: Brian Fristensky
@contact: frist@cc.umanitoba.ca
"""

import os
import subprocess
import sys

#optparse is deprecated in favor of argparse as of Python 2.7. However,
# since 2.7 is not always present on many systems, at this writing,
# it is safer to stick with optparse for now. It should be easy
# to change later, since the syntax is very similar between argparse and optparse.
from optparse import OptionParser


blib = os.environ.get("BIRCHPYLIB")
sys.path.append(blib)

from birchlib import Birchmod

PROGRAM = "rebasecnv.py : "
USAGE = "\n\tUSAGE: rebasecnv.py --infile infile  --outfile outfile   [-outfmt name|number ] [--prototypes] [--commercial] [--ends 5|3|b] [--symmetry a|s]"


BM = Birchmod(PROGRAM, USAGE)
DEBUG = True

class Parameters:
    """
      	Wrapper class for command line parameters
      	
      	"""
    def __init__(self):
        """
     	  Initializes arguments:
     		IFN=""
     		OFN=""
                OUTFMT="bairoch"
     		PROTOTYPES=False
     		COMMERCIAL=False
                ENDS=["5","3","b"]
                SYMMETRY=["a","s"]
     		PID = str(os.getpid())
     	  Then calls read_args() to fill in their values from command line
          """
        self.IFN = ""
        self.OFN = ""
        self.OUTFMT="bairoch"
        self.PROTOTYPES=False
     	self.COMMERCIAL=False
        self.ENDS=["5","3","b"]
        self.SYMMETRY=["a","s"]
        self.PID = str(os.getpid())
        self.read_args()


    def read_args(self):
        """
        	Read command line arguments into a Parameter object

        	"""
        parser = OptionParser()
        parser.add_option("--infile", dest="infile", action="store", type="string", default="",
                          help="input file in Bairoch format")
        parser.add_option("--outfile", dest="outfile", action="store", type="string", default="",
                          help="output file")
        parser.add_option("--outfmt", dest="outfmt", action="store", type="string", default="bairoch",
                          help="name or number of output format")
        parser.add_option("--prototypes", dest="prototypes", action="store_true", default=False,
                          help="Write prototype enzymes only to output")
        parser.add_option("--commercial", dest="commercial", action="store_true", default=False,
                          help="Write commercially available enzymes only to output")
        parser.add_option("--ends", dest="ends", action="store", type="string", default="53b",
                          help="Type of ends")
        parser.add_option("--symmetry", dest="symmetry", action="store", type="string", default="as",
                          help="asymmetric or symmetric sites")

        (options, args) = parser.parse_args() 
        self.IFN = options.infile 
        self.OFN = options.outfile
        self.OUTFMT = options.outfmt
        self.PROTOTYPES = options.prototypes
        self.COMMERCIAL = options.commercial
        self.ENDS=list(options.ends) #break up string into a list of characters
        self.SYMMETRY=list(options.symmetry) #break up string into a list of characters

        if DEBUG :
            print('------------ Parameters from command line ------' + '\n')
            print('    IFN: ' + self.IFN)
            print('    OFN: ' + self.OFN)
            print('    OUTFMT: ' + self.OUTFMT)
            print('    PROTOTYPES: ' + str(self.PROTOTYPES))
            print('    COMMERCIAL: ' + str(self.COMMERCIAL))   
            print('    ENDS: ' + str(self.ENDS))
            print('    SYMMETRY: ' + str(self.SYMMETRY)) 

class Enzyme :
 
    def __init__(self):
        """
        Initialize enzyme
          """
        self.LINES = []
        self.ID = ""
        self.PROTOTYPE = False
     	self.COMMERCIAL = False
        self.ENDS=""
        self.SYMMETRY=""

    
    def readenz(self,INFILE,LINE,OUTFILE) :
        """
        Read the next enzyme from the file.
        """
        while (LINE != "") and (not LINE.startswith('ID')) :
            #OUTFILE.write(LINE)
            LINE=INFILE.readline()
        while not LINE.startswith('//') :
           self.LINES.append(LINE)
           LINE = INFILE.readline()

    def getID(self,ELINE) :
        """
        Get the ID from ELINE.
        """
        return ELINE[5:].strip()

    def getCOMMERCIAL(self,ELINE) :
        """
        Get COMMERCIAL from ELINE.
        """
        COM=ELINE[5:].strip()
        if COM == "." :
            return False
        else :
            return True

    def getPROTO(self,ELINE) :
        """
        Get PROTOTYPE from ELINE.
        """
        PID = ELINE[5:].strip()
        if PID == self.ID :
            return True

    def writeenz(self,OUTFILE) :
        """
        Write enzyme to output file.
        """
        OUTFILE.write('\n')
        for LINE in self.LINES :
             OUTFILE.write(LINE)
        OUTFILE.write('//' + '\n')

           
#======================== MAIN PROCEDURE ==========================
def main():
    """
        Called when not in documentation mode.
        """
	
    P = Parameters ()

    
    INFILE=open(P.IFN,'r')
    OUTFILE=open(P.OFN,'w')

    #Copy header comment lines from infile to outfile
    LINE = INFILE.readline()
    while LINE.startswith('CC') :
        OUTFILE.write(LINE)
        LINE = INFILE.readline()
    
    # Read an enzyme at a time. For each enzyme, find out whether it
    # meets criteria for prototypes, commercial, symmetry and ends. 
    # If so, print the enzyme to the output.
    E = Enzyme ()
    while LINE != "" :
        E.readenz(INFILE,LINE,OUTFILE)
        #OUTFILE.write(LINE)

        # Get criteria information from the enzyme
        OKAY = True
        for ELINE in E.LINES :
            FIELD=ELINE[0:2]
            if FIELD == 'ID' :
                E.ID = E.getID(ELINE)
            elif FIELD == 'PT' :  
                E.PROTOTYPE = E.getPROTO(ELINE)
            elif FIELD == 'CR' :
                E.COMMERCIAL = E.getCOMMERCIAL(ELINE)

        # Set OKAY if criteria from enzyme E match criteria from command line parameters P       
        if P.PROTOTYPES and not E.PROTOTYPE :
            OKAY = False  
        if P.COMMERCIAL and not E.COMMERCIAL :
            OKAY = False 

        # If enzyme meets all criteria, print it to output
        if OKAY :
            if DEBUG: 
                print(E.ID)
            E.writeenz(OUTFILE)        

        # Get the next enzyme
        LINE = INFILE.readline()
        E.__init__()

    

    INFILE.close()
    OUTFILE.close()

    BM.exit_success()

if (BM.documentor() or "-test" in sys.argv):
    pass
else:
    main()
