Ñò
ö[î^c           @   s1  d  Z  d d k Z d d k Z e i i d ƒ Z e i i e ƒ d d k	 l
 Z
 d d k	 l Z d Z d Z e
 e e ƒ Z d d d	 „  ƒ  YZ d
 d d „  ƒ  YZ d d d „  ƒ  YZ d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z e i ƒ  p d e i j o n e ƒ  d S(   sÍ  
Dr. Brian Fristensky, University of Manitoba

 Description: Convert Phylip file or fasta file into other file formats

 Synopsis: phylcnv.py [-inf format] [-outf format] [-inv] [infile] [outfile]

 -inf    pint Phylip interleaved
         pseq Phylip sequential
         fasta - Fasta file
         csv - comma-separated value file of molecular markers
         tsv - tab-separated value file of molecular markers

 -outf   pint Phylip interleaved
         pseq Phylip sequential
         fasta - Fasta file
         csv - comma-separated value file of molecular markers
         tsv - tab-separated value file of molecular markers

@modified: Jun. 20, 2020
@author: Brian Fristensky
@contact: frist@cc.umanitoba.ca
iÿÿÿÿNt
   BIRCHPYLIB(   t   Birchmod(   t   Arguments   phylcnv.py: s-   
	 USAGE: phylcnv.py [options] infile outfilet   Optionsc           B   s   e  Z d  „  Z d „  Z RS(   c         C   s;   d |  _  d |  _ d |  _ d |  _ t |  _ |  i ƒ  d S(   sÇ   
                Initializes arguments:
                        Ifn=""
                        Ofn=""
                Then calls read_args() to fill in their values from command line
                t    t   pintt   tsvN(   t   Ifnt   Ofnt   InFormatt	   OutFormatt   Falset   Invertt	   read_args(   t   self(    (    s   ./phylcnv.pyt   __init__)   s    					c         C   sU  t  d t t ƒ |  _ |  i i ƒ  t  d t t ƒ |  _ |  i i ƒ  t  d t t ƒ |  _ |  i i ƒ  |  i i ƒ  t  d t t ƒ } | i d ƒ t  d t t ƒ } | i d ƒ y€ t i	 d ƒ o |  i i
 ƒ  |  _ n t i	 d ƒ o |  i i
 ƒ  |  _ n t i	 d ƒ |  _ | i
 ƒ  |  _ | i
 ƒ  |  _ Wn t j
 o t i ƒ  n Xd S(   sU   
                Reads command line arguments into a Paramter object
                s   -infs   -outfs   -invR   iþÿÿÿiÿÿÿÿN(   R   t   strt   BMt   AInft   set_optionalt   AOutft   AInvertt   set_is_switcht   set_positiont	   arg_givent   fetchR	   R
   R   R   R   t
   ValueErrort
   printusage(   R   t   Ainfilet   Aoutfile(    (    s   ./phylcnv.pyR   7   s*    (   t   __name__t
   __module__R   R   (    (    (    s   ./phylcnv.pyR   '   s   	t   Sequencec           B   s   e  Z d  „  Z RS(   c         C   s   d |  _  d |  _ d S(   s1   
        Holds name and sequence
                R   N(   t   Namet   Seq(   R   (    (    s   ./phylcnv.pyR   T   s    	(   R   R   R   (    (    (    s   ./phylcnv.pyR    R   s   t   SeqDatac           B   s#   e  Z d  „  Z d „  Z d „  Z RS(   c         C   s(   g  |  _  d |  _ d |  _ d |  _ d S(   s=   
        Holds sequences and associated data
                i    N(   t   SeqLstt   NumSeqt   SeqLent   NumEnz(   R   (    (    s   ./phylcnv.pyR   ^   s    			c         C   s·   t  |  i d i ƒ } | } xŽ t d t  |  i ƒ ƒ D]t } t  |  i | i ƒ | j  o t  |  i | i ƒ } q5 t  |  i | i ƒ | j o t  |  i | i ƒ } q5 q5 W| | j S(   Ni    i   (   t   lenR$   R"   t   range(   R   t   MinReadt   MaxReadt   i(    (    s   ./phylcnv.pyt   AllSeqsSameLengthg   s     c         C   s«   x¤ t  d t |  i ƒ d ƒ D]† } |  i | i i d d ƒ } | i d d ƒ } | i d d ƒ } | i d d ƒ } | i d d ƒ } | i d d ƒ |  i | _ q Wd  S(	   Ni    i   t   1t   !t   0t   -t   _t   +(   R)   R(   R$   R"   t   replace(   R   R,   t   tempseq(    (    s   ./phylcnv.pyt	   InvertSeqr   s     (   R   R   R   R-   R6   (    (    (    s   ./phylcnv.pyR#   \   s   			c      
   C   sÄ  d „  } d „  } y t  |  i d ƒ } Wn t i |  i ƒ n X| i ƒ  } | i ƒ  } t | d ƒ | _ t | d ƒ | _ t	 | ƒ d j o t | d ƒ | _
 n x3 t d | i ƒ D] } t ƒ  } | i i | ƒ q½ W| i ƒ  } d }	 t }
 x» | d j o­ |
 oB | | ƒ | i |	 _ | | d t	 | ƒ ƒ i ƒ  | i |	 _ n% | i |	 i | i ƒ  | i |	 _ |	 d }	 |	 | i j o t }
 d }	 n | i ƒ  i ƒ  } qû W| i ƒ  d	 S(
   s,   
    Read in a Phylip Interleaved file.
    c         S   s   |  d d !i  ƒ  } | S(   Ni    i	   (   t   rstrip(   t   lineR!   (    (    s   ./phylcnv.pyt   ReadName   s    c         S   s   |  | | !} | S(   N(    (   R8   t   Startt   FinishR"   (    (    s   ./phylcnv.pyt   ReadSeq…   s    t   ri    i   i   R   i
   N(   t   openR   R   t
   file_errort   readlinet   splitt   intR%   R&   R(   R'   R)   R    R$   t   appendt   TrueR!   t   stripR"   R   t   close(   t   Ot   SR9   R<   t   in_fileR8   t   valuesR,   R5   t   jt
   FirstGroup(    (    s   ./phylcnv.pyt   ReadPhylipInterleaved|   s>    		 	 ,$

c   
      C   s²  d „  } y t  |  i d ƒ } Wn t i |  i ƒ n X| i ƒ  } | i ƒ  } t | d ƒ | _ t | d ƒ | _ t	 | ƒ d j o t | d ƒ | _
 n x3 t d | i ƒ D] } t ƒ  } | i i | ƒ q´ W| i ƒ  } d } x¸ | | i j  o§ | | ƒ | i | _ | i ƒ  } d }	 xn | d j o` |	 | i j  oP | i ƒ  } | i | i | | i | _ t	 | i | i ƒ }	 | i ƒ  } q'W| d } qì W| i ƒ  d S(   s+   
    Read in a Phylip Sequential file.
    c         S   s   |  i  ƒ  } | S(   N(   RE   (   R8   R!   (    (    s   ./phylcnv.pyR9   ¯   s    R=   i    i   i   R   N(   R>   R   R   R?   R@   RA   RB   R%   R&   R(   R'   R)   R    R$   RC   R!   RE   R"   RF   (
   RG   RH   R9   RI   R8   RJ   R,   R5   RK   t   SeqRead(    (    s   ./phylcnv.pyt   ReadPhylipSequentialª   s<    	 	  c         C   s)  y t  |  i d ƒ } Wn t i |  i ƒ n X| i ƒ  } d | _ xÓ | d j oÅ | d d j o° t ƒ  } | d i ƒ  | _ | i ƒ  } xE | d j o7 | d d j o& | i	 | i ƒ  | _	 | i ƒ  } q‘ W| i
 i | ƒ t | i
 | i i	 ƒ | _ | i d | _ qH qH W| i ƒ  d S(   sq   
    Read sequences from a fasta file in the form:

    >name
    sequence
    sequence
    sequence...
    
    R=   i    R   t   >i   N(   R>   R   R   R?   R@   R%   R    RE   R!   R"   R$   RC   R(   R&   RF   (   RG   RH   RI   R8   t   tempSeq(    (    s   ./phylcnv.pyt	   ReadFastaÓ   s(    
	 	 c         C   s  y t  |  i d ƒ } Wn t i |  i ƒ n X| i ƒ  } d | _ xÄ | d j o¶ t ƒ  } | i i | ƒ | i	 d d ƒ } | i
 | ƒ } | d | i | i _ | d i	 | d ƒ i ƒ  | i | i _ t | i | i i ƒ | _ | i d | _ | i ƒ  } qH W| i ƒ  d S(   sF   
    Read in a comma-separated value or tab-separated value file.
    R=   i    R   t   "i   i   N(   R>   R   R   R?   R@   R%   R    R$   RC   R4   t	   partitionR!   RE   R"   R(   R&   RF   (   RG   RH   t   SepRI   R8   RQ   t   templist(    (    s   ./phylcnv.pyt   ReadCSVô   s$    	 	)c      	   C   sÄ   y t  |  i d ƒ } Wn t i |  i ƒ n Xx t d t | i ƒ ƒ D]s } | i | i | i ƒ xF t d t | i | i	 ƒ ƒ D]% } | i | | i | i	 | ƒ q† W| i d ƒ qI Wd S(   sC   
    Write the data as a single line of comma-separated values
    t   wi    s   
N(
   R>   R   R   R?   R)   R(   R$   t   writeR!   R"   (   RG   RH   RU   t   outfileR,   RK   (    (    s   ./phylcnv.pyt   writecsvfile  s       #c   
   
   C   s½  y t  |  i d ƒ } Wn t i |  i ƒ n X| i t | i ƒ d t | i ƒ ƒ | i d j o | i d t | i ƒ ƒ n | i d ƒ d } t	 } d } x| | i j  oxê t
 d | i ƒ D]Ö } | ok | i | i i d ƒ } | d j o | i | i } n | i | i |  } | i | i d d ƒ ƒ n | | d }	 |	 | i j o | i d }	 n | i | i | i | |	 d !ƒ | i d ƒ qÊ Wt } |	 d } q§ Wd	 S(
   s6   
    Write the data in Phylip interleaved format.
    RX   t    i    s   
i2   iÿÿÿÿi
   i   N(   R>   R   R   R?   RY   R   R%   R&   R'   RD   R)   R$   R!   t   findt   ljustR"   R   (
   RG   RH   RZ   t   LineLenRL   R:   R,   t   blankindt   outnameR;   (    (    s   ./phylcnv.pyt   writePhylipInterleaved  s8    '  "c   	   
   C   s³  y t  |  i d ƒ } Wn t i |  i ƒ n X| i t | i ƒ d t | i ƒ ƒ | i d j o | i d t | i ƒ ƒ n | i d ƒ d } xt	 d | i ƒ D] } | i
 | i i d ƒ } | d j o | i
 | i } n | i
 | i |  } | i | i d d ƒ ƒ | i d ƒ d } x} | | i j  ol | | d } | | i j o | i d } n | i | i
 | i | | d !ƒ | i d ƒ | d } q.Wq« Wd	 S(
   s5   
    Write the data in Phylip sequential format.
    RX   R\   i    s   
i2   iÿÿÿÿi
   i   N(   R>   R   R   R?   RY   R   R%   R&   R'   R)   R$   R!   R]   R^   R"   (	   RG   RH   RZ   R_   R,   R`   Ra   R:   R;   (    (    s   ./phylcnv.pyt   writePhylipSequentialA  s4    '  "c      	   C   s  y t  |  i d ƒ } Wn t i |  i ƒ n Xd } xÈ t d | i ƒ D]´ } | i | | i | i ƒ | i d ƒ d } x} | | i	 j  ol | | d } | | i	 j o | i	 d } n | i | i | i
 | | d !ƒ | i d ƒ | d } q€ WqI Wd S(   s©   
    Write the data in various flat file formats.
    fasta - FlagChar = '>'
    flatdna - FlagChar = '#'
    flatpro - FlagChar = '%'
    flattext - FlagChar = '"'
    RX   i2   i    s   
i   N(   R>   R   R   R?   R)   R%   RY   R$   R!   R&   R"   (   RG   RH   t   FlagCharRZ   R_   R,   R:   R;   (    (    s   ./phylcnv.pyt	   writeFlata  s$      "c          C   s…  t  ƒ  }  t |  i d ƒ } t i i |  i ƒ o<t ƒ  } t } |  i	 d j o t
 |  | ƒ n¡ |  i	 d j o t |  | ƒ n€ |  i	 d j o t |  | ƒ n_ |  i	 d j o t |  | d ƒ n; |  i	 d j o t |  | d ƒ n t } d	 |  i	 d
 GH| oa| i ƒ  oF|  i o | i ƒ  n |  i d j o t |  | d ƒ qe|  i d j o t |  | d ƒ qe|  i d j o t |  | ƒ qe|  i d j o t |  | ƒ qe|  i d j o t |  | d ƒ qe|  i d j o t |  | d ƒ qe|  i d j o t |  | d ƒ qe|  i d j o t |  | d ƒ qed |  i d
 GHqid GHd GHqmn | i ƒ  t i ƒ  d S(   s8   
        Called when not in documentation mode.
        RX   R   t   pseqt   fastat   csvt   ,R   s   	s   >>> phylcnv.py: Input format s    is not supported.RP   t   flatdnat   #t   flatprot   %t   flattextRS   s   >>> phylcnv.py: Output format s7   >>> phylcnv.py: Phylip files require that all sequencess   >>> must be the same length.N(   R   R>   R   t   ost   patht   existsR   R#   RD   R	   RM   RO   RR   RW   R   R-   R   R6   R
   R[   Rb   Rc   Re   RF   R   t   exit_success(   RG   RZ   RH   t
   FormatOkay(    (    s   ./phylcnv.pyt   main|  sT    		

s   -test(    (    (    (   t   __doc__t   os.pathRo   t   syst   environt   gett   blibRp   RC   t   birchlibR   R   t   PROGRAMt   USAGER   R   R    R#   RM   RO   RR   RW   R[   Rb   Rc   Re   Rt   t
   documentort   argv(    (    (    s   ./phylcnv.pyt   <module>   s.   +
 	.	)	!			#	 		3