ó
Y_Sc           @@ sŒ   d  Z  d d l m Z m Z d d l Z d d l Td d l Td d l Td Z d Z	 d Z
 e j d ƒ Z d d „ Z d d „ Z d „  Z d S(   só   Read a multiple sequence alignment in STOCKHOLM format.

This file format is used by PFAM and HMMER. At present, all annotation
information is ignored.

See: 
    - http://www.cgb.ki.se/cgb/groups/sonnhammer/Stockholm.html
    - HMMER manual

i    (   t   absolute_importt   print_functionNi   (   t   *i   sŸ  
# STOCKHOLM 1.0
#=GF ID CBS
#=GF AC PF00571
#=GF DE CBS domain
#=GF AU Bateman A
#=GF CC CBS domains are small intracellular modules mostly found  
#=GF CC in 2 or four copies within a protein. 
#=GF SQ 67
#=GS O31698/18-71 AC O31698
#=GS O83071/192-246 AC O83071
#=GS O83071/259-312 AC O83071
#=GS O31698/88-139 AC O31698
#=GS O31698/88-139 OS Bacillus subtilis
O83071/192-246          MTCRAQLIAVPRASSLAE..AIACAQKM....RVSRVPVYERS
#=GR O83071/192-246 SA  999887756453524252..55152525....36463774777
O83071/259-312          MQHVSAPVFVFECTRLAY..VQHKLRAH....SRAVAIVLDEY
#=GR O83071/259-312 SS  CCCCCHHHHHHHHHHHHH..EEEEEEEE....EEEEEEEEEEE
O31698/18-71            MIEADKVAHVQVGNNLEH..ALLVLTKT....GYTAIPVLDPS
#=GR O31698/18-71 SS    CCCHHHHHHHHHHHHHHH..EEEEEEEE....EEEEEEEEHHH
O31698/88-139           EVMLTDIPRLHINDPIMK..GFGMVINN......GFVCVENDE
#=GR O31698/88-139 SS   CCCCCCCHHHHHHHHHHH..HEEEEEEE....EEEEEEEEEEH
#=GC SS_cons            CCCCCHHHHHHHHHHHHH..EEEEEEEE....EEEEEEEEEEH
O31699/88-139           EVMLTDIPRLHINDPIMK..GFGMVINN......GFVCVENDE
#=GR O31699/88-139 AS   ________________*__________________________
#=GR_O31699/88-139_IN   ____________1______________2__________0____
//
t	   stockholmt   pfamt   stht   aligns   #\s+STOCKHOLM\s+1.\d\s+$c         C@ s   t  t |  | ƒ ƒ S(   s'   Iterate over the sequences in the file.(   t   itert   read(   t   fint   alphabet(    (    sE   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/stockholm_io.pyt   iterseqS   s    c         C@ s?  t  | ƒ } g  } g  } d } xÔ t |  ƒ D]Æ } | j d k rI d } q+ | j d k r t | ƒ | k rñ | j | j ƒ | j g  ƒ qñ q+ | j d k r+ | j | j ƒ sÐ t d | j | | j f ƒ ‚ n  | | j | j ƒ | d 7} q+ q+ Wg  t	 | | ƒ D]* \ } } t
 d j | ƒ | d | ƒ^ q} t | ƒ S(	   Ni    t   begin_blockt   seq_idt   seqs.   Character on line: %d not in alphabet: %s : %si   t    t   name(   t   Alphabett   _scant   typeoft   lent   appendt   datat
   alphabetict
   ValueErrort   linenot   zipt   Seqt   joint   SeqList(   R	   R
   t   seq_idst   seqst   block_countt   tokent   st   i(    (    sE   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/stockholm_io.pyR   Y   s(    	@c   	      c@ sÁ  t  d ƒ \ } } } t d ƒ V| } x”t |  ƒ D]†\ } } | | k r« | j ƒ  r] q3 n  t j | ƒ } | } | d  k	 r˜ t d | j ƒ  ƒ Vq3 q« t d | ƒ ‚ n  | | k rÝ | j ƒ  rÉ q3 n  t d ƒ V| } n  | | k r°| j ƒ  rt d ƒ V| } q3 n  | j	 ƒ  d k r-t d ƒ Vd  S| d d	 k rCq3 n  | j
 d  d
 ƒ } t | ƒ d k rzt d | ƒ ‚ n  t d | d j	 ƒ  ƒ Vt d | d
 j	 ƒ  ƒ Vq3 n  t ƒ  ‚ q3 Wd  S(   Ni   t   begint   headers   Parse error on line: %dR   t	   end_blocks   //i    t   #i   i   R   R   (   t   ranget   Tokent	   enumeratet   isspacet   header_linet   matcht   Nonet   groupR   t   stript   splitR   t   RuntimeError(	   R	   R%   t   bodyt   blockt   statet   Lt   linet   mt   name_seq(    (    sE   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/stockholm_io.pyR   t   sF      	(   R   R   (   R   R   s   align(   t   __doc__t
   __future__R    R   t   ret   utilsR   R   t   examplet   namest
   extensionst   compileR,   R.   R   R   R   (    (    (    sE   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/stockholm_io.pyt   <module>#   s   


