#!/usr/bin/env python

import os
import os.path
import subprocess
import sys

""" ensure that there are enough command line arguments to parse """
if len(sys.argv) < 3:
    print("Usage: free2fasta.py  INFILE  OUTFILE")
    exit();

#Version 10/ 28/09
# Convert free format file to pseudo GenBank format
# to be read by GDE.
#Synopsis: free2gb.csh infile outfile
#Convert arguments to variables
INFILE  = sys.argv[1]
OUTFILE = sys.argv[2]

# Abort if INFILE does not exist or is of zero length
if os.path.exists(INFILE) and os.path.isfile(INFILE):
    PID = str(os.getpid()) #process id

    p = subprocess.Popen(['funnel'], stdin=subprocess.PIPE)

    #run funnel to delete non-sequence characters
    p.stdin.write(INFILE + '\n')        #input filename
    p.stdin.write(PID + '.raw\n')       #outfile
    p.stdin.write('50\n')               # print 50 nt per line
    p.stdin.close()
    p.wait()

    # $INFILE could be a fully qualified path, so we don't want to use
    # that as the sequence name. Get rid of the path and just
    # keep the file name.
    # get rid of the file extension, if any
    SEQNAME = os.path.splitext(os.path.basename(INFILE))[0]
    h_fastaout = open(OUTFILE, 'w')

    # Create a Fasta format file for input to readseq.
    h_fastaout.write('>' + SEQNAME + '\n')

    # copy any non-comment lines to end of fasta file
    h_raw = open(PID + '.raw', 'r')
    for line in h_raw:
        sc_index = line.find(';')
        if sc_index == 0:
            line = ""
        elif sc_index > 0:
            line = line[0:sc_index]
        h_fastaout.write(line)
    h_raw.close()

    # add a blank line to end of file. A bug in the old
    # readseq loses some characters from the end of file.
    # This is not a problem in the new Java readseq.
    #h_fastaout.write(' \n')
    #h_fastaout.close()

    # convert to pseudo GenBank format
    #subprocess.call(['readseq', '-a', '-fGenBank', '-o=' + OUTFILE, PID + '.wrp'])

    # delete temporary files
    os.remove(PID + '.raw')
    os.remove(PID + '.wrp')
