#!/usr/bin/env python3

import os
import os.path
import re
import sys
import subprocess

#Version   Jan. 10, 2020
# Run NUMSEQ as a command
#Synopsis: numseq.py infile outfile gcfile start finish nuccase startno group gpl which strands kind numbers nucs peptides frames form
#
#BACHREST is an interactive program, so we first generate a file containing the 
#keystrokes the user would normally type, and then run the program using that file.

""" ensure that there are enough command line arguments to parse """
if len(sys.argv) < 18:
    print("Usage: numseq.py  INFILE  OUTFILE  GCFILE  START  FINISH  NUCCASE  STARTNO")
    print("         GROUP  GPL  WHICH  STRANDS  KIND  NUMBERS  NUCS  PEPTIDES  FRAMES  FORM")
    sys.exit();

#Convert arguments to variables
INFILE   = sys.argv[1]
OUTFILE  = sys.argv[2]
GCFILE   = sys.argv[3]
START    = int(sys.argv[4])
FINISH   = int(sys.argv[5])
NUCCASE  = sys.argv[6]
STARTNO  = int(sys.argv[7])
GROUP    = int(sys.argv[8])
GPL      = int(sys.argv[9])
WHICH    = sys.argv[10]
STRANDS  = sys.argv[11]
KIND     = sys.argv[12]
NUMBERS  = sys.argv[13]
NUCS     = sys.argv[14]
PEPTIDES = sys.argv[15]
FRAMES   = sys.argv[16]
FORM     = sys.argv[17]

CFN = INFILE + '.' + str(os.getpid())

# Abort if INFILE does not exist or is of zero length
if os.path.exists(INFILE) and os.path.getsize(INFILE) > 1 :

    # Get the sequence length from line 1 of a GenBank file
    file = open(INFILE, 'r')
    firstline = file.readline()
    if (re.search('^LOCUS', firstline)):
        tokens = re.split('[ ]+', firstline)
        SEQLENGTH = int(tokens[2])
    file.close() 

    # a bit of sanity checking
    if (START > SEQLENGTH):
        START = SEQLENGTH
    if (FINISH > SEQLENGTH):
        FINISH = SEQLENGTH

    # ------------------- Generate a file of commands to be read by NUMSEQ ----------
    comfile = open(CFN, 'w')

    #initial filenames
    comfile.write(INFILE + '\n')  #input filename
    comfile.write('g\n') #GenBank format

    if (os.path.exists(OUTFILE)):
        comfile.write(OUTFILE + '\n')
        comfile.write('Y' + '\n') #outfile
    else:
        comfile.write(OUTFILE + '\n') #outfile
        # HACK!! For reasons unclear, in Fedora 31 the C access function seems to
        # return true when checking whether a file exists, even if it doesn't exist.
        # This means tha NUMSEQ will write a prompt asking if the user wants to overwrite
        # an existing file. As a workaround, we need to include an answer of Y to
        # respond to that prompt.
        comfile.write('Y' + '\n') #outfile        
    comfile.write('3\n' + GCFILE + '\n') #Genetic Code file

    # Set parameters
    comfile.write('4\n')                     #Choose parameter menu
    comfile.write('1\n' + str(START) + '\n')      #choose start
   
    comfile.write('2\n' + str(FINISH) + '\n')     #choose finish

    comfile.write('3\n' + str(NUCCASE) + '\n')    #choose nuccase
   
    # STARTNO = 0 means do not set STARTNO (default = START)
    if STARTNO != 0:
        comfile.write('4\n' + str(STARTNO) + '\n')    #choose startno

    #if PEPTIDES = Yes, round GROUP to nearest multiple of three.
    if PEPTIDES == 'Y':
        remainder = int(GROUP) % 3
        GROUP = int(GROUP) - remainder
      
    comfile.write('5\n' + str(GROUP) + '\n')    #choose group
    comfile.write('6\n' + str(GPL) + '\n')            #choose gpl
    comfile.write('7\n' + str(WHICH) + '\n')          #choose which
    comfile.write('8\n' + str(STRANDS) + '\n')        #choose strands
    comfile.write('9\n' + str(KIND) + '\n')           #choose kind
    comfile.write('10\n' + str(NUMBERS) + '\n')       #choose numbers
    comfile.write('11\n' + str(NUCS) + '\n')          #choose nucs
    comfile.write('12\n' + str(PEPTIDES) + '\n')      #choose peptides
    comfile.write('13\n' + str(FRAMES) + '\n')        #choose frames
    comfile.write('14\n' + str(FORM) + '\n')          #choose form

    # exit from NUMSEQ
    comfile.write('0\n')                   #exit parameter menu
    comfile.write('6\n')                         #Print numbered sequence
    comfile.write('\n')                          #dummy prompt line
    comfile.write('0\n')                         #exit program
    comfile.close()

    #---------------- Run NUMSEQ using the command file as input --------------------   
    comfile=open(CFN,'r')

    # Use giganumseq for anything larger than numseq can handle.
    if (START > 750000) or (FINISH > 750000 ) :
        PROGRAM='giganumseq'
    else :
        PROGRAM='numseq'

    p = subprocess.Popen([PROGRAM], stdin=comfile)
    p.wait()
    comfile.close()
    os.remove(CFN)
else:
    print("Error: Cannot open file!")

