ó
Y_Sc           @@  s•   d  Z  d d l m Z d d l Z d d l Td d l Td d l Td Z d Z d Z	 e j
 d ƒ Z e j
 d ƒ Z d d „ Z d d „ Z d „  Z d S(   sN  Read sequence information in MSF format.
    
This is a file format for biological sequence data. The sequences are interweaved and each line is labeled with the sequence name. The MSF format can be identified in one or more of the following ways: 
1. The word PileUp on the first line (optional)
2. the word !!AA_MULTIPLE_ALIGNMENT or !!NA_MULTIPLE_ALIGNMENT at the start of the file (optional)
3. the word MSF on the first line of the file, and the characters ".." at the end of this line (optional)
4. A header containing sequence information followed by a line with the characters "//"
i    (   t   absolute_importNi   (   t   *i   s3  

 PileUp


MSF: 64 Type: P Check: 767 ..

 Name: Cow              Len:   100  Check: 3761  Weight:  1.00
 Name: Carp             Len:   100  Check: 1550  Weight:  1.00
 Name: Chicken          Len:   100  Check: 2397  Weight:  1.00
 Name: Human            Len:   100  Check: 9021  Weight:  1.00
 Name: Loach            Len:   100  Check:  984  Weight:  1.00
 Name: Mouse            Len:   100  Check: 2993  Weight:  1.00


//

                                                            
    Cow  MAYPMQLGFQ DATSPIMEEL LHFHDHTLMI VFLISSLVLY IISLMLTTKL 
   Carp  MAHPTQLGFK DAAMPVMEEL LHFHDHALMI VLLISTLVLY IITAMVSTKL 
Chicken  MANHSQLGFQ DASSPIMEEL VEFHDHALMV ALAICSLVLY LLTLMLMEKL 
  Human  MAHAAQVGLQ DATSPIMEEL ITFHDHALMI IFLICFLVLY ALFLTLTTKL 
  Loach  MAHPTQLGFQ DAASPVMEEL LHFHDHALMI VFLISALVLY VIITTVSTKL 
  Mouse  MAYPFQLGLQ DATSPIMEEL MNFHDHTLMI VFLISSLVLY IISLMLTTKL 


                                                       
    Cow  THTSTMDAQE VETIWTILPA IILILIALPS LRILYMMDEI NNPSLTVKTM 
   Carp  TNKYILDSQE IEIVWTILPA VILVLIALPS LRILYLMDEI NDPHLTIKAM 
Chicken  S.SNTVDAQE VELIWTILPA IVLVLLALPS LQILYMMDEI DEPDLTLKAI 
  Human  TNTNISDAQE METVWTILPA IILVLIALPS LRILYMTDEV NDPSLTIKSI 
  Loach  TNMYILDSQE IEIVWTVLPA LILILIALPS LRILYLMDEI NDPHLTIKAM 
  Mouse  THTSTMDAQE VETIWTILPA VILIMIALPS LRILYMMDEI NNPVLTVKTM 
 
   t   msfs   gcg-msft   gcgt   PileUps
   (//)(\s*)$s   \s*(\S+)\s+([\S\s.?]+)$c         C@  s   t  t |  | ƒ ƒ S(   s'   Iterate over the sequences in the file.(   t   itert   read(   t   fint   alphabet(    (    s?   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/msf_io.pyt   iterseqW   s    c         C@  sZ  t  | ƒ } g  } g  } d } xÔ t |  ƒ D]Æ } | j d k rI d } q+ | j d k r t | ƒ | k rñ | j | j ƒ | j g  ƒ qñ q+ | j d k r+ | j | j ƒ sÐ t d | j | | j f ƒ ‚ n  | | j | j ƒ | d 7} q+ q+ W| g  k rt d ƒ ‚ n  g  t	 | | ƒ D]* \ } } t
 d j | ƒ | d	 | ƒ^ q } t | ƒ S(
   Ni    t   begin_blockt   seq_idt   seqs.   Character on line: %d not in alphabet: %s : %si   s"   Parse error, possible wrong formatt    t   name(   t   Alphabett   _line_ist   typeoft   lent   appendt   datat
   alphabetict
   ValueErrort   linenot   zipt   Seqt   joint   SeqList(   R   R   t   seq_idst   seqst   block_countt   tokent   st   i(    (    s?   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/msf_io.pyR   ^   s,    	@c   	      c@  sº  t  d ƒ \ } } } t d ƒ V| } xt |  ƒ D]\ } } | | k r• | j ƒ  r] q3 n  t j | ƒ } | d  k	 r3 t d ƒ V| } q3 q• q3 n  | | k rÇ | j ƒ  r³ q3 n  t d ƒ V| } n  | | k r3 | j ƒ  rö t d ƒ V| } q3 n  t j | ƒ } | d  k r$t d | ƒ ‚ n  | j	 d ƒ j
 ƒ  rZ| j	 d ƒ j ƒ  j
 ƒ  rZq3 n  t d	 | j	 d ƒ j ƒ  ƒ V| j	 d ƒ } d
 j | j ƒ  ƒ } t d | j ƒ  ƒ Vq3 q3 Wd  S(   Ni   t   begint
   end_headerR
   t	   end_blocks   Parse error on line: %di   i   R   R   R   (   t   ranget   Tokent	   enumeratet   isspaceR#   t   matcht   Nonet   seq_lineR   t   groupt   isdigitt   stripR   t   split(	   R   t   headert   bodyt   blockt   statet   Lt   linet   mR   (    (    s?   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/msf_io.pyR   x   s@      	0(   R   s   gcg-msfR   R   (   t   __doc__t
   __future__R    t   reR   t   utilsR   t   examplet   namest
   extensionst   compileR#   R+   R*   R	   R   R   (    (    (    s?   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq_io/msf_io.pyt   <module>#   s   


%