ó
Y_Sc           @@  s”   d  Z  d d l m Z d d l Td d l Td d l Td Z d Z i	 e d	 6e d
 6e	 d 6e	 d 6e
 d 6e
 d 6e
 d 6e
 d 6e d 6Z d d  Z d d  Z d S(   s.  Sequence IO for NBRF/PIR format.

The format is similar to fasta. The header line consistins of '>', a two-
letter sequence type (P1, F1, DL, DC, RL, RC, or XX), a semicolon, and a
sequence ID. The next line is a textual description of the sequence, 
followed by one or more lines containing the sequence data. The end of 
the sequence is marked by a "*" (asterisk) character.

type_code -- A map between NBRF two letter type codes and Alphabets.


see:  http://www.cmbi.kun.nl/bioinf/tools/crab_pir.html

--- Example NBRF File ---

>P1;CRAB_ANAPL
ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).
  MDITIHNPLI RRPLFSWLAP SRIFDQIFGE HLQESELLPA SPSLSPFLMR 
  SPIFRMPSWL ETGLSEMRLE KDKFSVNLDV KHFSPEELKV KVLGDMVEIH 
  GKHEERQDEH GFIAREFNRK YRIPADVDPL TITSSLSLDG VLTVSAPRKQ 
  SDVPERSIPI TREEKPAIAG AQRK*

>P1;CRAB_BOVIN
ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).
  MDIAIHHPWI RRPFFPFHSP SRLFDQFFGE HLLESDLFPA STSLSPFYLR 
  PPSFLRAPSW IDTGLSEMRL EKDRFSVNLD VKHFSPEELK VKVLGDVIEV 
  HGKHEERQDE HGFISREFHR KYRIPADVDP LAITSSLSSD GVLTVNGPRK 
  QASGPERTIP ITREEKPAVT AAPKK*

i    (   t   absolute_importi   (   t   *i   t   nbrft   pirt   alit   P1t   F1t   DLt   DCt   RCt   RLt   N3t   N1t   XXc         C@  s,   g  t  |  |  D] } | ^ q } t |  S(   sm  Read and parse a NBRF seqquence file. 

    Args:
        fin -- A stream or file to read
        alphabet -- The expected alphabet of the data. If not supplied, then
                an appropriate alphabet will be inferred from the data.
    Returns: 
        SeqList -- A list of sequences
    Raises: 
        ValueError -- If the file is unparsable        
    (   t   iterseqt   SeqList(   t   fint   alphabett   st   seqs(    (    s@   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/nbrf_io.pyt   readZ   s    "c      	   c@  s“  t  d  \ } } } | } d } d } d } g  }	 xzt |   D]l\ }
 } | | k r× | d k s@ | j   rv q@ n  | d d k rÄ | d j d  \ } } | r® | } n
 t | } | } q@ n  t d |
   q@ | | k rū | j   } | } q@ q@ | | k r d j | j    } | d d	 k r|	 j	 | d   t
 d j |	  d
 | j   d | d | } | V| } d } d } g  }	 q@ q¬|	 j	 |  q@ q@ t s@ t  q@ Wd S(   sĶ    Generate sequences from an NBRF file.
    
    arguments:
        fin -- A stream or file to read
        alphabet --    
    yields :
        Seq
    raises :
        ValueError -- On a parse error.
    i   t    i    t   >i   t   ;s   Parse error on line: %di’’’’R   t   namet   descriptionR   N(   t   ranget   Nonet	   enumeratet   isspacet   splitt	   type_codet
   ValueErrort   stript   joint   appendt   Seqt   Falset   AssertionError(   R   R   t   bodyt   headert   sequencet   statet   seq_idt   seq_desct	   seq_alphaR   t   linenot   linet   seq_typet   datat   seq(    (    s@   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/nbrf_io.pyR   k   sJ    	
N(   R   R   (   R   R   R   (   t   __doc__t
   __future__R    t   utilsR2   R   t   namest
   extensionst   protein_alphabett   dna_alphabett   rna_alphabett   generic_alphabetR   R   R   R   (    (    (    s@   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/nbrf_io.pyt   <module>A   s$   



