ó
Y_Sc           @` s€   d  Z  d d l m Z m Z m Z d d l Td d l Td d l Td Z d Z	 d	 Z
 d d
 „ Z d d „ Z d „  Z d „  Z d S(   s©  Read and write sequence information in IntelliGenetics format.
    
A sequence file in IG format can contain several sequences, each consisting of a
number of comment lines that must begin with a semicolon (";"), a line with the 
sequence name and the sequence itself terminated with the termination character 
'1' for linear or '2' for circular sequences. The termination character is
defacto optional.

--- Example IG File ---

;H.sapiens fau mRNA, 518 bases
HSFAU
ttcctctttctcgactccatcttcgcggtagctgggaccgccgttcagtc
actcttaagtcttttgtaattctggctttctctaataaaaaagccactta
gttcagtcaaaaaaaaaa1
;H.sapiens fau 1 gene, 2016 bases
HSFAU1
ctaccattttccctctcgattctatatgtacactcgggacaagttctcct
gatcgaaaacggcaaaactaaggccccaagtaggaatgccttagttttcg
gggttaacaatgattaacactgagcctcacacccacgcgatgccctcagc
tcctcgctcagcgctctcaccaacagccgtagcccgcagccccgctggac
accggttctccatccccgcagcgtagcccggaacatggtagctgccatct
ttacctgctacgccagccttctgtgcgcgcaactgtctggtcccgcccc2

i    (   t   absolute_importt   divisiont   print_functioni   (   t   *i   t   intelligeneticst   igt   stanfordsû  
;H.sapiens fau mRNA, 518 bases
HSFAU
ttcctctttctcgactccatcttcgcggtagctgggaccgccgttcagtc
actcttaagtcttttgtaattctggctttctctaataaaaaagccactta
gttcagtcaaaaaaaaaa1
;H.sapiens fau 1 gene, 2016 bases
HSFAU1
ctaccattttccctctcgattctatatgtacactcgggacaagttctcct
gatcgaaaacggcaaaactaaggccccaagtaggaatgccttagttttcg
gggttaacaatgattaacactgagcctcacacccacgcgatgccctcagc
tcctcgctcagcgctctcaccaacagccgtagcccgcagccccgctggac
accggttctccatccccgcagcgtagcccggaacatggtagctgccatct
ttacctgctacgccagccttctgtgcgcgcaactgtctggtcccgcccc2
c         C` s,   g  t  |  | ƒ D] } | ^ q } t | ƒ S(   s  Read and parse an IG file. 

    Args:
        fin -- A stream or file to read
        alphabet -- The expected alphabet of the data, if given
    Returns: 
        SeqList -- A list of sequences
    Raises: 
        ValueError -- If the file is unparsable
    (   t   iterseqt   SeqList(   t   fint   alphabett   st   seqs(    (    sK   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/intelligenetics_io.pyt   readR   s    "c   	      c` so  t  | ƒ } g  } g  } d } d	 } d „  } xt |  ƒ D]\ } } | j ƒ  } | d k rd q: n  | j d ƒ r¿ | r¥ | | | | | | ƒ Vg  } g  } d	 } n  | j | d ƒ | } q: | sÎ | } q: | d d k sî | d d k r4| j t | d d !ƒ ƒ | | | | | | ƒ Vg  } g  } d	 } q: | j t | ƒ ƒ q: W| rk| | | | | | ƒ Vn  d	 S(
   s%   Parse an IG file and generate sequences.
    
    Args:
        fin -- A stream or file to read
        alphabet -- The expected alphabet of the data, if given    
    Yeilds: 
        Seq -- One alphabetic sequence at a time.
    Raises: 
        ValueError -- If the file is unparsable
    iÿÿÿÿc         S` se   y7 d j  | ƒ } t d j  |  ƒ | d | d | ƒ} Wn' t k
 r` t d | | f ƒ ‚ n X| S(   Ns   
t    t   namet   descriptionsM   Parse failed with sequence starting at line %d: Character not in alphabet: %s(   t   joint   Seqt
   ValueError(   R   R
   R   t   commentst   linenot   descR   (    (    sK   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/intelligenetics_io.pyt	   build_seqs   s    (R   t   ;i   t   1t   2i    N(   t   Alphabett   Nonet	   enumeratet   stript
   startswitht   appendt   remove_whitespace(	   R	   R
   R   t   headert   start_linenoR   R   R   t   line(    (    sK   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/intelligenetics_io.pyR   a   s<    	
 			 	c         C` s"   x | D] } t  |  | ƒ q Wd S(   s¨   Write an IG file. 

    Args:
        fout -- A writable stream.
        seqs  -- A list of Seq's
    Raises:
        ValueError -- If a sequence is missing a name
    N(   t   writeseq(   t   foutR   R   (    (    sK   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/intelligenetics_io.pyt   write   s    	c         C` sÔ   | j  p d } x( | j ƒ  D] } t d | d |  ƒq W| j s\ t d t | ƒ ƒ ‚ n  t | j d |  ƒt | ƒ } d } x? t d | | ƒ D]) } t | | | | d | !d |  ƒq– Wt d |  ƒ d S(   sÃ    Write a single sequence in IG format.

    Args:
        afile -- A writable stream.
        seq  -- A Seq instance
    Raises:
        ValueError -- If a sequence is missing a name        
    R   R   t   files+   Write failed with missing sequence name: %siP   i   N(   R   t
   splitlinest   printR   R   t   strt   lent   range(   R&   t   seqR   t   ht   Lt   line_lengtht   n(    (    sK   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/intelligenetics_io.pyR%   ª   s    		'N(   R   R   R   (   t   __doc__t
   __future__R    R   R   t   utilsR.   R   t   namest
   extensionst   exampleR   R   R   R'   R%   (    (    (    sK   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/intelligenetics_io.pyt   <module>3   s   


<	