#!/usr/bin/env python3
# uncompyle6 version 3.7.1
# Python bytecode 2.6 (62161)
# Decompiled from: Python 3.6.9 (default, Apr 18 2020, 01:56:04) 
# [GCC 8.4.0]
# Embedded file name: ./phylcnv.py
# Compiled at: 2020-06-20 13:24:18
"""
Dr. Brian Fristensky, University of Manitoba

 Description: Convert Phylip file or fasta file into other file formats

 Synopsis: phylcnv.py [-inf format] [-outf format] [-inv] [infile] [outfile]

 -inf    pint Phylip interleaved
         pseq Phylip sequential
         fasta - Fasta file
         csv - comma-separated value file of molecular markers
         tsv - tab-separated value file of molecular markers

 -outf   pint Phylip interleaved
         pseq Phylip sequential
         fasta - Fasta file
         csv - comma-separated value file of molecular markers
         tsv - tab-separated value file of molecular markers

@modified: Jun. 20, 2020
@author: Brian Fristensky
@contact: frist@cc.umanitoba.ca
"""
import os.path, sys
blib = os.environ.get('BIRCHPYLIB')
sys.path.append(blib)
from birchlib import Birchmod
from birchlib import Argument
PROGRAM = 'phylcnv.py: '
USAGE = '\n\t USAGE: phylcnv.py [options] infile outfile'
BM = Birchmod(PROGRAM, USAGE)

class Options:

    def __init__(self):
        """
                Initializes arguments:
                        Ifn=""
                        Ofn=""
                Then calls read_args() to fill in their values from command line
                """
        self.Ifn = ''
        self.Ofn = ''
        self.InFormat = 'pint'
        self.OutFormat = 'tsv'
        self.Invert = False
        self.read_args()

    def read_args(self):
        """
                Reads command line arguments into a Paramter object
                """
        self.AInf = Argument('-inf', str, BM)
        self.AInf.set_optional()
        self.AOutf = Argument('-outf', str, BM)
        self.AOutf.set_optional()
        self.AInvert = Argument('-inv', str, BM)
        self.AInvert.set_is_switch()
        self.AInvert.set_optional()
        Ainfile = Argument('', str, BM)
        Ainfile.set_position(-2)
        Aoutfile = Argument('', str, BM)
        Aoutfile.set_position(-1)
        try:
            if BM.arg_given('-inf'):
                self.InFormat = self.AInf.fetch()
            if BM.arg_given('-outf'):
                self.OutFormat = self.AOutf.fetch()
            self.Invert = BM.arg_given('-inv')
            self.Ifn = Ainfile.fetch()
            self.Ofn = Aoutfile.fetch()
        except ValueError:
            BM.printusage()


class Sequence:

    def __init__(self):
        """
        Holds name and sequence
                """
        self.Name = ''
        self.Seq = ''


class SeqData:

    def __init__(self):
        """
        Holds sequences and associated data
                """
        self.SeqLst = []
        self.NumSeq = 0
        self.SeqLen = 0
        self.NumEnz = 0

    def AllSeqsSameLength(self):
        MinRead = len(self.SeqLst[0].Seq)
        MaxRead = MinRead
        for i in range(1, len(self.SeqLst)):
            if len(self.SeqLst[i].Seq) < MinRead:
                MinRead = len(self.SeqLst[i].Seq)
            elif len(self.SeqLst[i].Seq) > MaxRead:
                MaxRead = len(self.SeqLst[i].Seq)

        return MinRead == MaxRead

    def InvertSeq(self):
        for i in range(0, len(self.SeqLst) - 1):
            tempseq = self.SeqLst[i].Seq.replace('1', '!')
            tempseq = tempseq.replace('0', '1')
            tempseq = tempseq.replace('!', '0')
            tempseq = tempseq.replace('-', '_')
            tempseq = tempseq.replace('+', '-')
            self.SeqLst[i].Seq = tempseq.replace('_', '-')


def ReadPhylipInterleaved(O, S):
    """
    Read in a Phylip Interleaved file.
    """

    def ReadName(line):
        Name = line[0:9].rstrip()
        return Name

    def ReadSeq(line, Start, Finish):
        Seq = line[Start:Finish]
        return Seq

    try:
        in_file = open(O.Ifn, 'r')
    except:
        BM.file_error(O.Ifn)

    line = in_file.readline()
    values = line.split()
    S.NumSeq = int(values[0])
    S.SeqLen = int(values[1])
    if len(values) > 2:
        S.NumEnz = int(values[2])
    for i in range(0, S.NumSeq):
        tempseq = Sequence()
        S.SeqLst.append(tempseq)

    line = in_file.readline()
    j = 0
    FirstGroup = True
    while line != '':
        if FirstGroup:
            S.SeqLst[j].Name = ReadName(line)
            S.SeqLst[j].Seq = ReadSeq(line, 10, len(line)).strip()
        else:
            S.SeqLst[j].Seq = S.SeqLst[j].Seq + line.strip()
        j = j + 1
        if j == S.NumSeq:
            FirstGroup = False
            j = 0
        line = in_file.readline().strip()

    in_file.close()


def ReadPhylipSequential(O, S):
    """
    Read in a Phylip Sequential file.
    """

    def ReadName(line):
        Name = line.strip()
        return Name

    try:
        in_file = open(O.Ifn, 'r')
    except:
        BM.file_error(O.Ifn)

    line = in_file.readline()
    values = line.split()
    S.NumSeq = int(values[0])
    S.SeqLen = int(values[1])
    if len(values) > 2:
        S.NumEnz = int(values[2])
    for i in range(0, S.NumSeq):
        tempseq = Sequence()
        S.SeqLst.append(tempseq)

    line = in_file.readline()
    j = 0
    while j < S.NumSeq:
        S.SeqLst[j].Name = ReadName(line)
        line = in_file.readline()
        SeqRead = 0
        while line != '' and SeqRead < S.SeqLen:
            tempseq = line.strip()
            S.SeqLst[j].Seq = S.SeqLst[j].Seq + tempseq
            SeqRead = len(S.SeqLst[j].Seq)
            line = in_file.readline()

        j = j + 1

    in_file.close()


def ReadFasta(O, S):
    """
    Read sequences from a fasta file in the form:

    >name
    sequence
    sequence
    sequence...
    
    """
    try:
        in_file = open(O.Ifn, 'r')
    except:
        BM.file_error(O.Ifn)

    line = in_file.readline()
    S.NumSeq = 0
    while line != '':
        if line[0] == '>':
            tempSeq = Sequence()
            tempSeq.Name = line[1:].strip()
            line = in_file.readline()
            while line != '' and line[0] != '>':
                tempSeq.Seq = tempSeq.Seq + line.strip()
                line = in_file.readline()

            S.SeqLst.append(tempSeq)
            S.SeqLen = len(S.SeqLst[S.NumSeq].Seq)
            S.NumSeq = S.NumSeq + 1

    in_file.close()


def ReadCSV(O, S, Sep):
    """
    Read in a comma-separated value or tab-separated value file.
    """
    try:
        in_file = open(O.Ifn, 'r')
    except:
        BM.file_error(O.Ifn)

    line = in_file.readline()
    S.NumSeq = 0
    while line != '':
        tempSeq = Sequence()
        S.SeqLst.append(tempSeq)
        line = line.replace('"', '')
        templist = line.partition(Sep)
        S.SeqLst[S.NumSeq].Name = templist[0]
        S.SeqLst[S.NumSeq].Seq = templist[2].replace(Sep, '').strip()
        S.SeqLen = len(S.SeqLst[S.NumSeq].Seq)
        S.NumSeq = S.NumSeq + 1
        line = in_file.readline()

    in_file.close()


def writecsvfile(O, S, Sep):
    """
    Write the data as a single line of comma-separated values
    """
    try:
        outfile = open(O.Ofn, 'w')
    except:
        BM.file_error(O.Ofn)

    for i in range(0, len(S.SeqLst)):
        outfile.write(S.SeqLst[i].Name)
        for j in range(0, len(S.SeqLst[i].Seq)):
            outfile.write(Sep + S.SeqLst[i].Seq[j])

        outfile.write('\n')


def writePhylipInterleaved(O, S):
    """
    Write the data in Phylip interleaved format.
    """
    try:
        outfile = open(O.Ofn, 'w')
    except:
        BM.file_error(O.Ofn)

    outfile.write(str(S.NumSeq) + ' ' + str(S.SeqLen))
    if S.NumEnz != 0:
        outfile.write(' ' + str(S.NumEnz))
    outfile.write('\n')
    LineLen = 50
    FirstGroup = True
    Start = 0
    while Start < S.SeqLen:
        for i in range(0, S.NumSeq):
            if FirstGroup:
                blankind = S.SeqLst[i].Name.find(' ')
                if blankind == -1:
                    outname = S.SeqLst[i].Name
                else:
                    outname = S.SeqLst[i].Name[:blankind]
                outfile.write(outname.ljust(10, ' '))
            Finish = Start + LineLen - 1
            if Finish >= S.SeqLen:
                Finish = S.SeqLen - 1
            outfile.write(S.SeqLst[i].Seq[Start:Finish + 1])
            outfile.write('\n')

        FirstGroup = False
        Start = Finish + 1


def writePhylipSequential(O, S):
    """
    Write the data in Phylip sequential format.
    """
    try:
        outfile = open(O.Ofn, 'w')
    except:
        BM.file_error(O.Ofn)

    outfile.write(str(S.NumSeq) + ' ' + str(S.SeqLen))
    if S.NumEnz != 0:
        outfile.write(' ' + str(S.NumEnz))
    outfile.write('\n')
    LineLen = 50
    for i in range(0, S.NumSeq):
        blankind = S.SeqLst[i].Name.find(' ')
        if blankind == -1:
            outname = S.SeqLst[i].Name
        else:
            outname = S.SeqLst[i].Name[:blankind]
        outfile.write(outname.ljust(10, ' '))
        outfile.write('\n')
        Start = 0
        while Start < S.SeqLen:
            Finish = Start + LineLen - 1
            if Finish >= S.SeqLen:
                Finish = S.SeqLen - 1
            outfile.write(S.SeqLst[i].Seq[Start:Finish + 1])
            outfile.write('\n')
            Start = Finish + 1


def writeFlat(O, S, FlagChar):
    """
    Write the data in various flat file formats.
    fasta - FlagChar = '>'
    flatdna - FlagChar = '#'
    flatpro - FlagChar = '%'
    flattext - FlagChar = '"'
    """
    try:
        outfile = open(O.Ofn, 'w')
    except:
        BM.file_error(O.Ofn)

    LineLen = 50
    for i in range(0, S.NumSeq):
        outfile.write(FlagChar + S.SeqLst[i].Name)
        outfile.write('\n')
        Start = 0
        while Start < S.SeqLen:
            Finish = Start + LineLen - 1
            if Finish >= S.SeqLen:
                Finish = S.SeqLen - 1
            outfile.write(S.SeqLst[i].Seq[Start:Finish + 1])
            outfile.write('\n')
            Start = Finish + 1


def main():
    """
        Called when not in documentation mode.
        """
    O = Options()
    outfile = open(O.Ofn, 'w')
    if os.path.exists(O.Ifn):
        S = SeqData()
        FormatOkay = True
        if O.InFormat == 'pint':
            ReadPhylipInterleaved(O, S)
        elif O.InFormat == 'pseq':
            ReadPhylipSequential(O, S)
        elif O.InFormat == 'fasta':
            ReadFasta(O, S)
        elif O.InFormat == 'csv':
            ReadCSV(O, S, ',')
        elif O.InFormat == 'tsv':
            ReadCSV(O, S, '\t')
        else:
            FormatOkay = False
            print('>>> phylcnv.py: Input format ' + O.InFormat + ' is not supported.')
        if FormatOkay:
            if S.AllSeqsSameLength():
                if O.Invert:
                    S.InvertSeq()
                if O.OutFormat == 'csv':
                    writecsvfile(O, S, ',')
                elif O.OutFormat == 'tsv':
                    writecsvfile(O, S, '\t')
                elif O.OutFormat == 'pint':
                    writePhylipInterleaved(O, S)
                elif O.OutFormat == 'pseq':
                    writePhylipSequential(O, S)
                elif O.OutFormat == 'fasta':
                    writeFlat(O, S, '>')
                elif O.OutFormat == 'flatdna':
                    writeFlat(O, S, '#')
                elif O.OutFormat == 'flatpro':
                    writeFlat(O, S, '%')
                elif O.OutFormat == 'flattext':
                    writeFlat(O, S, '"')
                else:
                    print('>>> phylcnv.py: Output format ' + O.OutFormat + ' is not supported.')
            else:
                print('>>> phylcnv.py: Phylip files require that all sequences')
                print('>>> must be the same length.')
    outfile.close()
    BM.exit_success()


if BM.documentor() or '-test' in sys.argv:
    pass
else:
    main()
