ó
Y_Sc           @` s¤   d  Z  d d l m Z m Z m Z d d l Z d d l Td d l Td d l Td Z	 d Z
 d Z d d „ Z d d „ Z d d „ Z d „  Z d „  Z d d „ Z d S(    se  Read and write sequence information in FASTA format.
    
This is a very common format for unannotated biological sequence data, 
accepted by many multiple sequence alignment programs. Each sequence 
consists of a single-line description, followed by lines of sequence data. 
The first character of the description line is a greater-than (">") symbol 
in the first column. The first word of the description is often the name or 
ID of the sequence. Fasta files containing multiple sequences have one 
sequence listed right after another. 


Example Fasta File ::

>Lamprey GLOBIN V - SEA LAMPREY
PIVDTGSVA-P------------------LSAAEKTKIRSAWAPVYSTY---ETSGVDILVKFFTSTPAAQEFFPKFKGL
TT-----ADQLKKSA---DVRWHA-ERIINAVNDAVASMDDTEKMS--MKL-RDLSGKH----AKSFQV-----DPQYFK
VLAAVI-AD-TVAAGD--AGFEKLMSM------I---CILLR----S-----A-----Y------------
>Hagfish GLOBIN III - ATLANTIC HAGFISH
PITDHGQPP-T------------------LSEGDKKAIRESWPQIYKNF---EQNSLAVLLEFLKKFPKAQDSFPKFSAK
KS-------HLEQDP---AVKLQA-EVIINAVNHTIGLMDKEAAMK--KYL-KDLSTKH----STEFQV-----NPDMFK
ELSAVF-VS-TMG-GK--AAYEKLFSI------I---ATLLR----S-----T-----YDA----------
>Frog HEMOGLOBIN BETA CHAIN - EDIBLE FROG
----------GS-----------------------DLVSGFWGKV--DA---HKIGGEALARLLVVYPWTQRYFTTFGNL
GSADAIC-----HNA---KVLAHG-EKVLAAIGEGLKHPENLKAHY--AKL-SEYHSNK----LHVDPANFRLLGNVFIT
VLARHF-QH-EFTPELQ-HALEAHFCA------V---GDALA----K-----A-----YH-----------


i    (   t   absolute_importt   divisiont   print_functionNi   (   t   *i   t   fastat   pearsont   fat   fastt   seqt   fsat   fstt   ntt   aat   fnat   mpfat   faat   fnnt   mfastat   tfat   pfas3  
>Lamprey GLOBIN V - SEA LAMPREY
PIVDTGSVA-P------------------LSAAEKTKIRSAWAPVYSTY---ETSGVDILVKFFTSTPAAQEFFPKFKGL
TT-----ADQLKKSA---DVRWHA-ERIINAVNDAVASMDDTEKMS--MKL-RDLSGKH----AKSFQV-----DPQYFK
VLAAVI-AD-TVAAGD--AGFEKLMSM------I---CILLR----S-----A-----Y------------

>Hagfish GLOBIN III - ATLANTIC HAGFISH
PITDHGQPP-T------------------LSEGDKKAIRESWPQIYKNF---EQNSLAVLLEFLKKFPKAQDSFPKFSAK
KS-------HLEQDP---AVKLQA-EVIINAVNHTIGLMDKEAAMK--KYL-KDLSTKH----STEFQV-----NPDMFK
ELSAVF-VS-TMG-GK--AAYEKLFSI------I---ATLLR----S-----T-----YDA----------

>Frog HEMOGLOBIN BETA CHAIN - EDIBLE FROG
----------GS-----------------------DLVSGFWGKV--DA---HKIGGEALARLLVVYPWTQRYFTTFGNL
GSADAIC-----HNA---KVLAHG-EKVLAAIGEGLKHPENLKAHY--AKL-SEYHSNK----LHVDPANFRLLGNVFIT
VLARHF-QH-EFTPELQ-HALEAHFCA------V---GDALA----K-----A-----YH-----------

c         C` sW   g  t  |  | ƒ D] } | ^ q } t d } t |  d ƒ rG |  j } n  t | d | ƒS(   s  Read and parse a fasta file. 

    Args:
        fin -- A stream or file to read
        alphabet -- The expected alphabet of the data, if given
    Returns: 
        SeqList -- A list of sequences
    Raises: 
        ValueError -- If the file is unparsable
    i    t   name(   t   iterseqt   namest   hasattrR   t   SeqList(   t   fint   alphabett   st   seqsR   (    (    sA   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/fasta_io.pyt   readV   s
    "
 c         C` s   t  t |  | ƒ ƒ S(   sN   Read one sequence from the file, starting 
    from the current file position.(   t   nextR   (   R   R   (    (    sA   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/fasta_io.pyt   readseqg   s    c   	      c` s6  t  | ƒ } g  } g  } d } d } d „  } xá t |  ƒ D]Ó \ } } | j ƒ  } | d k rd q: n  | j d ƒ r¾ | d k	 r¥ | | | | | | ƒ Vd } g  } n  | d } | } g  } q: | j d ƒ rá | j | d ƒ q: | d k r t d | ƒ ‚ n  | j | ƒ q: W| sd S| | | | | | ƒ Vd S(	   s'   Parse a fasta file and generate sequences.
    
    Args:
        fin -- A stream or file to read
        alphabet -- The expected alphabet of the data, if given    
    Yeilds: 
        Seq -- One alphabetic sequence at a time.
    Raises: 
        ValueError -- If the file is unparsable
    iÿÿÿÿc         S` sŒ   y^ | j  d d ƒ d } | r9 | d d j | ƒ 7} n  t d j |  ƒ | d | d | ƒ} Wn' t k
 r‡ t d | | f ƒ ‚ n X| S(	   Nt    i   i    s   
t    R   t   descriptionsM   Parse failed with sequence starting at line %d: Character not in alphabet: %s(   t   splitt   joint   Seqt
   ValueError(   R   R   t   headert   header_linenot   commentsR   R   (    (    sA   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/fasta_io.pyt	   build_seq   s    (R!   t   >i   t   ;s/   Parse failed on line %d: sequence before headerN(   t   Alphabett   Nonet	   enumeratet   stript
   startswitht   appendR&   (	   R   R   R   R)   R'   R(   R*   t   linenot   line(    (    sA   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/fasta_io.pyR   m   s8    	 	
	 c         C` s\   | j  r: x. | j  j ƒ  D] } t d | d |  ƒq Wn  x | D] } t |  | ƒ qA Wd S(   sh   Write a fasta file. 

    Args:
        fout -- A writable stream.
        seqs  -- A list of Seq's
    R,   t   fileN(   R"   t
   splitlinest   printt   writeseq(   t   foutR   R4   R   (    (    sA   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/fasta_io.pyt   write¤   s
    	c         C` sø   | j  p | j p d } | rƒ | j ƒ  } t d | d d |  ƒt | ƒ d k r“ x) | d D] } t d | d |  ƒq_ Wq“ n t d d |  ƒt | ƒ } d } x? t d | | ƒ D]) } t | | | | d | !d |  ƒqº Wt d |  ƒ d S(	   s|    Write a single sequence in fasta format.

    Args:
        afile -- A writable stream.
        seq  -- A Seq instance
    R!   R+   i    R5   i   R,   iP   N(   R"   R   R6   R7   t   lent   range(   t   afileR   R'   t   ht   Lt   line_lengtht   n(    (    sA   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/fasta_io.pyR8   ²   s    'c         ` s=   ‡  f d †  } t  j d ƒ ‰ ‡ f d †  } t |  | | ƒ S(   sb   Return a FileIndex for the fasta file. Sequences can be retrieved
    by item number or name.
    c         ` s   t  |  ˆ  ƒ S(   N(   R   (   R=   (   R   (    sA   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/fasta_io.pyt   parserÏ   s    s
   ^>\s*(\S*)c         ` s,   ˆ  j  |  ƒ } | d  k r d  S| j d ƒ S(   Ni   (   t   searchR.   t   group(   R4   t   k(   t   key(    sA   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/fasta_io.pyt   linekeyÓ   s     (   t   ret   compilet	   FileIndex(   R=   R   RB   RG   (    (   R   RF   sA   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/fasta_io.pyt   indexË   s    (   R   R   s   fa(   s   faR   s   fasts   seqR	   R
   s   ntR   R   R   R   R   R   R   R   (   t   __doc__t
   __future__R    R   R   RH   t   utilsR   R!   R   t
   extensionst   exampleR.   R   R   R   R:   R8   RK   (    (    (    sA   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/fasta_io.pyt   <module>6   s   


7		