ó
Y_Sc           @`  së  d  Z  d d l m Z m Z d d l m Z d d l m Z m Z d d l m	 Z	 m
 Z
 d d d	 d
 d d d d d d d d d d d d d d g Z d e f d „  ƒ  YZ e d8 d8 ƒ Z e d e d d ƒ ƒ Z e d e d d ƒ ƒ Z e d e d d  ƒ ƒ Z e d! e d" d# ƒ ƒ Z e d$ e d% d& ƒ ƒ Z e d' e d( d) ƒ ƒ Z e d* e d+ d* ƒ ƒ Z e d, e d- d, ƒ ƒ Z e d. e d/ d0 ƒ ƒ Z e	 d1 d2 ƒ Z d e f d3 „  ƒ  YZ d e f d4 „  ƒ  YZ d5 „  Z d6 „  Z  d7 „  Z! d8 S(9   sê   Alphabetic sequences and associated tools and data.

Seq is a subclass of a python string with additional annotation and an alphabet.
The characters in string must be contained in the alphabet. Various standard
alphabets are provided.


Classes :
    Alphabet    -- A subset of non-null ascii characters
    Seq         -- An alphabetic string
    SeqList     -- A collection of Seq's
  
Alphabets :    
    o generic_alphabet  -- A generic alphabet. Any printable ASCII character.
    o protein_alphabet -- IUCAP/IUB Amino Acid one letter codes. 
    o nucleic_alphabet -- IUPAC/IUB Nucleic Acid codes 'ACGTURYSWKMBDHVN-'
    o dna_alphabet -- Same as nucleic_alphabet, with 'U' (Uracil) an 
        alternative for 'T' (Thymidine).
    o rna_alphabet -- Same as nucleic_alphabet, with 'T' (Thymidine) an
        alternative for 'U' (Uracil).
    o reduced_nucleic_alphabet -- All ambiguous codes in 'nucleic_alphabet' are
        alternative to 'N' (aNy)
    o reduced_protein_alphabet -- All ambiguous ('BZJ') and non-canonical amino 
        acids codes ( 'U', Selenocysteine and 'O', Pyrrolysine)  in 
        'protein_alphabet' are alternative to 'X'.
    o unambiguous_dna_alphabet -- 'ACGT'
    o unambiguous_rna_alphabet -- 'ACGU'
    o unambiguous_protein_alphabet -- The twenty canonical amino acid one letter
        codes, in alphabetic order, 'ACDEFGHIKLMNPQRSTVWY'

Amino Acid Codes:
    Code  Alt.  Meaning
    -----------------
    A           Alanine
    B           Aspartic acid or Asparagine
    C           Cysteine
    D           Aspartate
    E           Glutamate
    F           Phenylalanine
    G           Glycine
    H           Histidine
    I           Isoleucine
    J           Leucine or Isoleucine    
    K           Lysine
    L           Leucine
    M           Methionine
    N           Asparagine
    O           Pyrrolysine    
    P           Proline
    Q           Glutamine
    R           Arginine
    S           Serine
    T           Threonine
    U           Selenocysteine
    V           Valine
    W           Tryptophan
    Y           Tyrosine
    Z           Glutamate or Glutamine
    X    ?      any
    *           translation stop
    -    .~     gap 

Nucleotide Codes:
    Code  Alt.  Meaning
    ------------------------------
    A           Adenosine
    C           Cytidine
    G           Guanine
    T           Thymidine
    U           Uracil
    R           G A (puRine)
    Y           T C (pYrimidine)
    K           G T (Ketone)
    M           A C (aMino group)
    S           G C (Strong interaction)
    W           A T (Weak interaction)
    B           G T C (not A) (B comes after A)
    D           G A T (not C) (D comes after C)
    H           A C T (not G) (H comes after G)
    V           G C A (not T, not U) (V comes after U)
    N   X?      A G C T (aNy)
    -   .~      A gap 
    



Refs:
    http://www.chem.qmw.ac.uk/iupac/AminoAcid/A2021.html
    http://www.chem.qmw.ac.uk/iubmb/misc/naseq.html    
Status:
    Beta    
Authors:
    GEC 2004,2005
i    (   t   absolute_importt   division(   t   arrayi   (   t   argmaxt   sqrt(   t	   maketranst	   _as_bytest   Alphabett   Seqt   rnat   dnat   proteint   SeqListt   generic_alphabett   protein_alphabett   nucleic_alphabett   dna_alphabett   rna_alphabett   reduced_nucleic_alphabett   reduced_protein_alphabett   unambiguous_dna_alphabett   unambiguous_rna_alphabett   unambiguous_protein_alphabetc           B`  sÎ   e  Z d  Z d d d d g Z d d „ Z d „  Z d „  Z d „  Z d	 „  Z	 d
 „  Z
 d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z e d d „ ƒ Z RS(   sp   An ordered subset of printable ascii characters.

    Status:
        Beta
    Authors: 
        - GEC 2005
    t   _letterst   _alternativest
   _ord_tablet
   _chr_tablec         C`  s  t  j |  ƒ } d j g  t d d ƒ D] } t | ƒ ^ q% ƒ } | d k rU | } n  | | _ t d d ƒ } | d k r‚ | } n  t d g d ƒ } xj t	 | ƒ D]\ \ } }	 t
 |	 ƒ }
 |
 d k rÕ t d	 ƒ ‚ n  | |
 d k rô t d
 ƒ ‚ n  | | |
 <q¢ Wg  } g  } xp | D]h \ } } | | k rt
 | ƒ }
 | |
 d k r}| t
 | ƒ | |
 <| j | ƒ | j | ƒ q}qqWd j | ƒ d j | ƒ f | _ | d d k s¸t ‚ | | _ t d g d ƒ } x* t	 | ƒ D] \ } }	 t
 |	 ƒ | | <qáW| j ƒ  | _ | S(   sû  Create a new, immutable Alphabet.
        
        arguments:
        - letters -- the letters in the alphabet. The ordering determines
            the ordinal position of each character in this alphabet.
        - alt -- A list of (alternative, canonical) letters. The alternatives
            are given the same ordinal position as the canonical characters. 
            e.g. (('?','X'),('x', 'X')) states that '?' and 'x' are synonomous 
            with 'X'.  Values that are not in 'letters' are ignored. Alternatives
            that are already in 'letters' are also ignored. If the same
            alternative character is used twice then the alternative is assigned
            to the canonical character that occurs first in 'letters'. The 
            default is to assume that upper and lower case characters are
            equivalent, unless both cases are included in 'letters'.                   
        raises:
            ValueError : Repetitive or otherwise illegal set of letters.        
        t    i    i€   t   abcdefghijklmnopqrstuvwxyzt   ABCDEFGHIJKLMNOPQRSTUVWXYZiÿ   i   i    s)   Alphabet cannot contain null character \0s   Repetitive alphabetN(   t   objectt   __new__t   joint   ranget   chrt   NoneR   t   zipt	   bytearrayt	   enumeratet   ordt
   ValueErrort   appendR   t   AssertionErrorR   t   decodeR   (   t   clst   letterst   alternativest   selft   _Alphabet__it   ascii_letterst   equivalent_by_caset	   ord_tablet   it   at   nt   _fromt   _tot   et   ct	   chr_table(    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR   ©   sD    1 			 	!	c         C`  s>   |  j  } x. t | ƒ D]  } | t | ƒ d k r t Sq Wt S(   s:   True if all characters of the string are in this alphabet.iÿ   (   R   t   strR'   t   Falset   True(   R/   t   stringt   tablet   s(    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt
   alphabeticñ   s
    	c         C`  s   |  j  | S(   s9    The n'th character in the alphabet (zero indexed) or \0 (   R   (   R/   R6   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR"   ù   s    c         C`  s   |  j  t | ƒ S(   sg   The ordinal position of the character c in this alphabet,
        or 255 if no such character.
        (   R   R'   (   R/   R:   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR'   ý   s    c         C`  s<   g  | D] } |  j  | ^ q } d j | ƒ } t | |  ƒ S(   s9   Convert a sequence of ordinals into an alphabetic string.R   (   R   R    R   (   R/   t   sequence_of_intsR6   R:   RA   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   chrs  s     c         C`  s7   t  | ƒ } | j |  j ƒ } t d t | ƒ ƒ } | S(   s;   Convert an alphabetic string into a byte array of ordinals.t   B(   R<   t	   translateR   R   R   (   R/   R?   RA   R5   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   ords	  s    c         C`  s4   |  j  | ƒ s t d ƒ ‚ n  |  j |  j | ƒ ƒ S(   s€   Normalize an alphabetic string by converting all alternative symbols 
        to the canonical equivalent in 'letters'.
        s   Not an alphabetic string.(   RB   R(   RD   RG   (   R/   R?   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt	   normalize  s    c         C`  s
   t  |  ƒ S(   s%    Letters of the alphabet as a string.(   R<   (   R/   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR-     s    c         C`  sJ   g  } | j  |  j ƒ x$ |  j D] \ } } | j  | ƒ q  Wd j | ƒ S(   s-    All allowed letters, including alternatives.R   (   R)   R   R   R    (   R/   t   lett   keyt   value(    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   _all_letters  s
    c         C`  s    d |  j  d t |  j ƒ d S(   Ns   Alphabet( 's   ', zips    )(   R   t   reprR   (   R/   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   __repr__%  s    c         C`  s   t  |  j ƒ S(   N(   R<   R   (   R/   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   __str__(  s    c         C`  s   t  |  j ƒ S(   N(   t   lenR   (   R/   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   __len__+  s    c         C`  s#   t  | d ƒ s t S|  j | j k S(   NR   (   t   hasattrR=   R   (   R/   t   other(    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   __eq__.  s     c         C`  s   |  j  | ƒ S(   N(   RT   (   R/   RS   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   __ne__2  s    c         C`  s   t  |  j ƒ S(   N(   t   iterR   (   R/   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   __iter__5  s    c         C`  s   |  j  | S(   N(   R   (   R/   RJ   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   __getitem__8  s    c         C`  s   t  t |  j ƒ ƒ S(   N(   t   hasht   tupleR   (   R/   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   __hash__;  s    c         C`  sˆ   | d k r t t t f } n  d d l } g  | D]. } t |  j | ƒ ƒ | j t | ƒ ƒ ^ q1 } | j	 t
 | ƒ ƒ } | | } | S(   sœ   Returns the most appropriate unambiguous protein, RNA or DNA alphabet
        for a Seq or SeqList. If a list of alphabets is supplied, then the best alphabet
        is selected from that list.

        The heuristic is to count the occurrences of letters for each alphabet and 
        downweight longer alphabets by the log of the alphabet length. Ties
        go to the first alphabet in the list.

        i    N(   R#   R   R   R   t   matht   sumt   tallyt   logRP   t   indext   max(   t   seqst	   alphabetsR\   R5   t   scoret   best(    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   which>  s    ;
N(   t   __name__t
   __module__t   __doc__t	   __slots__R#   R   RB   R"   R'   RD   RG   RH   R-   RL   RN   RO   RQ   RT   RU   RW   RX   R[   t   staticmethodRf   (    (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR   ž   s*   H																s   ACDEFGHIKLMNOPQRSTUVWYBJZX*-s   acdefghiklmnopqrstuvwybjzx?.~s   ACDEFGHIKLMNOPQRSTUVWYBJZXX--s   ACGTURYSWKMBDHVN-s   acgturyswkmbdhvnXx?.~s   ACGTURYSWKMBDHVNNNN--s   ACGTRYSWKMBDHVN-s   acgtryswkmbdhvnXx?.~Uus   ACGTRYSWKMBDHVNNNN--TTs   ACGURYSWKMBDHVN-s   acguryswkmbdhvnXx?.~Tts   ACGURYSWKMBDHVNNNN--UUs   ACGTN-s    acgtryswkmbdhvnXx?.~TtRYSWKMBDHVs    ACGTNNNNNNNNNNNNNN--TTNNNNNNNNNNs   ACDEFGHIKLMNPQRSTVWYX*-s   acdefghiklmnpqrstvwyx?.~BbZzUus   ACDEFGHIKLMNPQRSTVWYXX--XXXXCCt   ACGTt   acgtt   ACGUt   acgut   ACDEFGHIKLMNPQRSTVWYt   acdefghiklmnopqrstuvwyt   ACDEFGHIKLMNOPQRSTUVWYs&   ACGTRYSWKMBDHVN-acgtUuryswkmbdhvnXx?.~s&   TGCAYRSWMKVHDBN-tgcaAayrswmkvhdbnXx?.~c           B`  s  e  Z d  Z e d d d „ Z e d „  ƒ Z d „  Z d d „ Z	 d „  Z
 d „  Z d „  Z d „  Z d	 „  Z d
 „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d d d „ Z d „  Z d „  Z d „  Z d „  Z d d „ Z d d „ Z RS(   sd   An alphabetic string. A subclass of "str" consisting solely of
    letters from the same alphabet.

    Attributes:
        alphabet    -- A string or Alphabet of allowed characters.
        name        -- A short string used to identify the sequence.
        description -- A string describing the sequence   
        
    Authors :
        GEC 2005
    c         C`  sŒ   t  j |  | ƒ } | d  k r' t } n  t | t ƒ sE t | ƒ } n  | j | ƒ sm t d | | f ƒ ‚ n  | | _ | | _	 | | _
 | S(   Ns    Sequence not alphabetic %s, '%s'(   R<   R   R#   R   t
   isinstanceR   RB   R(   t	   _alphabett   namet   description(   R,   t   objt   alphabetRu   Rv   R/   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR   ’  s    				c         C`  s   |  j  S(   N(   Rt   (   R/   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyRx   §  s    c         C`  s   |  j  j |  ƒ S(   s]    Convert sequence to an array of integers 
        in the range [0, len(alphabet) ) 
        (   Rx   RG   (   R/   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyRG   ®  s    c         C`  sn   | s |  j  } n  t | ƒ } d g | } | j |  ƒ } x- | D]% } | | k  rA | | c d 7<qA qA W| S(   sã   Counts the occurrences of alphabetic characters.
                
        Arguments:
        - alphabet -- an optional alternative alphabet

        Returns :
            A list of character counts in alphabetic order.
        i    i   (   Rx   RP   RG   (   R/   Rx   t   Lt   countsRG   R6   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR^   ´  s    
  c         C`  s(   |  j  } | t j |  | | ƒ |  j ƒ S(   N(   t	   __class__R<   t   __getslice__Rx   (   R/   R4   t   jR,   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR|   Ë  s    	c         C`  s%   |  j  } | t j |  | ƒ |  j ƒ S(   N(   R{   R<   RX   Rx   (   R/   RJ   R,   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyRX   Ï  s    	c         C`  s%   |  j  } | t j |  | ƒ |  j ƒ S(   N(   R{   R<   t   __add__Rx   (   R/   RS   R,   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR~   Ó  s    	c         C`  s%   |  j  } | t j |  | ƒ |  j ƒ S(   N(   R{   R<   R~   Rx   (   R/   RS   R,   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   __radd__Ø  s    	c         C`  s+   |  j  } | t t |  ƒ j | ƒ |  j ƒ S(   N(   R{   t   superR   R    Rx   (   R/   t   str_listR,   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR    Ý  s    	c         C`  s9   t  | d ƒ s t S|  j | j k r) t St j |  | ƒ S(   NRx   (   RR   R=   Rx   R<   RT   (   R/   RS   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyRT   á  s
     c         C`  s   |  j  | ƒ S(   N(   RT   (   R/   RS   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyRU   ç  s    c         C`  s
   t  |  ƒ S(   s(    Converts Seq to a raw string. 
        (   R<   (   R/   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   tostringê  s    c         C`  s&   |  j  } | |  d d d … |  j ƒ S(   sª   Return the reversed sequence. 
        
        Note that this method returns a new object, in contrast to
        the in-place reverse() method of list objects.
        Niÿÿÿÿ(   R{   Rx   (   R/   R,   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   reverseñ  s    	c         C`  s   |  j  d ƒ S(   Ns   -.~(   t   remove(   R/   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   ungapú  s    c         `  sP   |  j  } d j ‡  f d †  t |  ƒ Dƒ ƒ } | | j t d d ƒ ƒ |  j ƒ S(   s]   Return a new alphabetic sequence with all characters in 'delchars'
         removed.
        R   c         3`  s'   |  ] } | t  ˆ  ƒ k r | Vq d  S(   N(   t   set(   t   .0t   char(   t   delchars(    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pys	   <genexpr>  s    (   R{   R    R<   RF   R   Rx   (   R/   R‰   R,   t   cleanseq(    (   R‰   s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR„   þ  s    	%c         C`  s7   |  j  } t d d ƒ } | t |  ƒ j | ƒ |  j ƒ S(   s*   Return a lower case copy of the sequence. R   R   (   R{   R   R<   RF   Rx   (   R/   R,   t   trans(    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   lower  s    	c         C`  s7   |  j  } t d d ƒ } | t |  ƒ j | ƒ |  j ƒ S(   s*   Return a lower case copy of the sequence. R   R   (   R{   R   R<   RF   Rx   (   R/   R,   R‹   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   upper  s    	R   t   Xc         C`  sn   t  | ƒ } t  | ƒ d k r- t d ƒ ‚ n  | | } t | | ƒ } |  j } | t |  ƒ j | ƒ |  j ƒ S(   s‡   Replace all occurrences of letters with the mask character.
        The default is to replace all lower case letters with 'X'.
        i   s   Mask should be single character(   RP   R(   R   R{   R<   RF   Rx   (   R/   R-   t   maskt   LLt   toR‹   R,   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR     s    
	c         C`  s#   d d l  m } | j ƒ  j |  ƒ S(   sð   Translate a nucleotide sequence to a polypeptide using full
        IUPAC ambiguities in DNA/RNA and amino acid codes, using the
        standard genetic code. See corebio.transform.GeneticCode for
        details and more options.
        i   (   t   GeneticCode(   t	   transformR’   t   stdRF   (   R/   R’   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyRF     s    c         C`  s#   d d l  m } | j ƒ  j |  ƒ S(   s¬   Translate a protein sequence back into coding DNA, using the
        standard genetic code. See corebio.transform.GeneticCode for
        details and more options.
        i   (   R’   (   R“   R’   R”   t   back_translate(   R/   R’   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR•   )  s    c         C`  s   |  j  ƒ  j ƒ  S(   sq   Returns reversed complementary nucleic acid sequence (i.e. the other
        strand of a DNA sequence.) 
        (   Rƒ   t
   complement(   R/   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   reverse_complement2  s    c         C`  sX   t  j |  j ƒ s! t d ƒ ‚ n  t j |  t ƒ } |  j } | | |  j |  j |  j	 ƒ S(   s,   Returns complementary nucleic acid sequence.s   Incompatable alphabets(
   R   RB   Rx   R(   R<   RF   t   _complement_tableR{   Ru   Rv   (   R/   RA   R,   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR–   8  s
    	c         c`  sŽ   t  |  ƒ | k  r d S|  j j |  ƒ j ƒ  } xY t d t  | ƒ | d ƒ D]: } | | | | !} | d k s~ | j | ƒ rL | VqL qL Wd S(   sû   Return an iteration over all subwords of length k in the sequence. If an optional
        alphabet is provided, only words from that alphabet are returned.
        
        >>> list(Seq("abcabc").words(3))
        ['abc', 'bca', 'cab', 'abc']
        Ni    i   (   RP   Rx   RH   R‚   R!   R#   RB   (   R/   t   kRx   t   seqR4   t   word(    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   words@  s     $c         C`  s2   d d l  m } t |  j | | ƒ ƒ } | | ƒ S(   s¼   Return a count of all subwords in the sequence.
        
        >>> from corebio.seq import *
        >>> Seq("abcabc").word_count(3)
        [('abc', 2), ('bca', 1), ('cab', 1)]
        i   (   t   group_count(   t   utilsR   t   sortedRœ   (   R/   R™   Rx   R   Rœ   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt
   word_countS  s    N(   Rg   Rh   Ri   R   R#   R   t   propertyRx   RG   R^   R|   RX   R~   R   R    RT   RU   R‚   Rƒ   R…   R„   RŒ   R   R   RF   R•   R—   R–   Rœ   R    (    (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR   ƒ  s4   																	
				c           B`  s_   e  Z d  Z d d d g Z g  d	 d	 d	 d „ Z d „  Z d	 d „ Z d	 d „ Z d	 d „ Z	 RS(
   s    A list of sequences. 
    Rx   Ru   Rv   c         C`  s/   t  j |  | ƒ | |  _ | |  _ | |  _ d  S(   N(   t   listt   __init__Rx   Ru   Rv   (   R/   t   alistRx   Ru   Rv   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR£   h  s    		c         C`  s‰   t  |  ƒ d k r t S|  j } | d k r; |  d j } n  t  |  d ƒ } x7 |  D]/ } t  | ƒ | k rn t S| j | k rR t SqR Wt S(   s2   Are all sequences of the same length and alphabet?i    N(   RP   R>   Rx   R#   R=   (   R/   t   ARy   RA   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt	   isaligneds  s     	   c         C`  sX   | s |  j  } n  | s' t d ƒ ‚ n  g  } x$ |  D] } | j | j | ƒ ƒ q4 W| S(   s<    Convert sequence list into a 2D array of ordinals.
        s   No alphabet(   Rx   R(   R)   RG   (   R/   Rx   R™   RA   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyRG     s      c         C`  sl   | s |  j  } n  | s' t d ƒ ‚ n  g  t g  |  D] } | j | ƒ ^ q4 Œ  D] } t | ƒ ^ qP } | S(   sÓ   Counts the occurrences of alphabetic characters.

        Arguments:
            - alphabet -- an optional alternative alphabet

        Returns :
        A list of character counts in alphabetic order.
        s   No alphabet(   Rx   R(   R$   R^   R]   (   R/   Rx   RA   R:   Rz   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR^   ‹  s    	  Ac         C`  s
  | s |  j  } n  | s' t d ƒ ‚ n  t | ƒ } |  j | ƒ } t | d ƒ } g  t d | ƒ D] } d g | ^ qb } xo | D]g } t | ƒ | k r© t d ƒ ‚ n  x= t | ƒ D]/ \ } }	 |	 | k  r¶ | | |	 c d 7<q¶ q¶ Wq‚ Wd d l m }
 |
 | | ƒ S(   sg   Counts the occurrences of characters in each column.

        Returns: Motif(counts, alphabet)
        s   No alphabeti    s6   Sequences are of incommensurate lengths. Cannot tally.i   (   t   Motif(   Rx   R(   RP   RG   R!   R&   t   matrixR§   (   R/   Rx   t   NRG   Ry   t   lRz   t   oR}   R6   R§   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   profile›  s       )  N(
   Rg   Rh   Ri   Rj   R#   R£   R¦   RG   R^   R¬   (    (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR   b  s   	
c         C`  s   t  |  d t ƒS(   sE   Create an alphabetic sequence representing a stretch of DNA.    
    Rx   (   R   R   (   R?   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR
   ²  s    c         C`  s   t  |  d t ƒS(   sE   Create an alphabetic sequence representing a stretch of RNA.    
    Rx   (   R   R   (   R?   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR	   ·  s    c         C`  s   t  |  d t ƒS(   sM   Create an alphabetic sequence representing a stretch of polypeptide.    
    Rx   (   R   R   (   R?   (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyR   ¼  s    N("   Ri   t
   __future__R    R   R   t   moremathR   R   t   _py3kR   R   t   __all__R   R   R#   R   R$   R   R   R   R   R   R   R   R   R   R˜   R<   R   R¢   R   R
   R	   R   (    (    (    s5   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/seq.pyt   <module>y   sh   			»	ßP		