ó
Z_Sc           @@ sÌ   d  Z  d d l m Z m Z d d l Z d d l m Z d d l m	 Z	 d d l m
 Z
 m Z m Z d d l m Z d Z d	 e f d „  ƒ  YZ e	 d ƒ Z d e f d „  ƒ  YZ d e f d „  ƒ  YZ d S(   s(    
Arrays indexed by alphabetic strings.
i    (   t   absolute_importt   print_functionN(   t   zipi   (   t   Alphabet(   t   unambiguous_dna_alphabett   unambiguous_rna_alphabett   unambiguous_protein_alphabet(   t   isintt   AlphabeticArrayt   submatrix_alphabett	   SubMatrixt   Motifc           B@ sh   e  Z d  Z d d g Z d d d „ Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d	 „  Z d
 „  Z RS(   sI  An alphabetic array. Wraps a numpy array so that each dimension
    can be associated with an alphabet and indexed with characters or strings.
    
    Attributes :
    - alphabets -- A sequence of alphabets used to index the array
    - array     -- The underlying array object that is indexed.
    
    Examples : 
    
    >>> from corebio.seq import *
    >>> from corebio.matrix import AlphabeticArray
    >>>
    >>> str(protein_alphabet)
    'ACDEFGHIKLMNOPQRSTUVWYBJZX*-'
    >>> matrix = AlphabeticArray( (protein_alphabet, protein_alphabet) )
    >>>
    >>> # Index by character or integer:
    >>> matrix['A', 'C'] = 10 
    >>> matrix[0,1]
    10
    >>>
    >>> # Different alphabets on each dimension:
    >>> import numpy as na    
    >>> a234 = na.zeros( shape = (2,3,4) )
    >>> alpha = ( "AB", "ABC", "ABCD")
    >>> aa = AlphabeticArray(alpha,a234)
    >>> aa['A', 'B', 'C'] = 22
    >>>
    >>> # String indices are converted to integer index arrays:
    ...
    >>> aa['A', 'B', 'ABCD']
    array([ 0,  0, 22,  0])
    
    
    Authors: 
    o GEC 2005, JXG 2006
    
    t	   alphabetst   arrayc         C@ sª  d t  f d „  ƒ  Y} g  } g  } xª | D]¢ } t | t ƒ rM t | ƒ } n  | d k ry | j d ƒ | j | ƒ  ƒ q) t | t ƒ r« | j t | ƒ ƒ | j | ƒ q) | j t | ƒ ƒ | j d ƒ q) Wt | ƒ } | d k rt	 j
 d | d | ƒ } nŒ t	 j | d | ƒ} | j } t | ƒ t | ƒ k rGt d ƒ ‚ n  xD t | | ƒ D]3 \ }	 }
 |	 d k	 rW|	 |
 k rWt d ƒ ‚ qWqWW| |  _ t | ƒ |  _ d S(   sé  
        Args:
        - alphabets -- a list of alphabets (as string or Alphabet objects) to
                    be used to convert strings into indices. The lengths of 
                    the alphabets match the shape of the indexed array. 
                    Alternatively, an integer or None in the list indicate a 
                    non-alphabetic dimension. If None the dimension length is 
                    taken from values argument.
        - values -- An array of values to be indexed. If None a new  
                 array is created. If this argument is not a numpy array
                 then the alphabet list must be explicit (cannot contain 
                 None.)
        - dtype -- An optional numpy type code.
        t   NullAlphabetc           B@ s   e  Z d  „  Z d „  Z RS(   c         S@ s   t  d ƒ ‚ d  S(   Ns(   This dimension does not have an alphabet(   t
   IndexError(   t   selft   key(    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyt   ordl   s    c         S@ s   t  d ƒ ‚ d  S(   Ns(   This dimension does not have an alphabet(   R   (   R   R   (    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyt   ordsn   s    (   t   __name__t
   __module__R   R   (    (    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyR   k   s   	t   shapet   dtypes$   The values array is the wrong shape.N(   t   objectt
   isinstancet   strR   t   Nonet   appendt   lent   intt   tuplet   nat   zerost   asarrayR   t
   ValueErrorR   R   R   (   R   R   t   valuesR   R   t   alphaR   t   at   vshapet   s1t   s2(    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyt   __init__X   s4     		c         C@ s   |  j  j |  j | ƒ ƒ S(   N(   R   t   __getitem__t   _ordkey(   R   R   (    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyR+      s    c         C@ s    |  j  j |  j | ƒ | ƒ d  S(   N(   R   t   __setitem__R,   (   R   R   t   value(    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyR-   ’   s    c         @ sn   ‡  f d †  ‰  t  | t ƒ rV t g  t | |  j ƒ D] \ } } ˆ  | | ƒ ^ q4 ƒ Sˆ  | |  j d ƒ Sd S(   s¤   Convert string indices into integers. Handles characters, strings
        slices with strings, and tuples of the same. Anything else is 
        unchanged.
        c         @ sÙ   |  d  k r d  St |  t ƒ s. t |  t ƒ r… t |  ƒ }  t |  ƒ d k rY | j |  ƒ St |  ƒ d k ro d  St j | j |  ƒ ƒ St |  t	 ƒ rÑ ˆ  |  j
 | ƒ } ˆ  |  j | ƒ } |  j } t	 | | | ƒ S|  Sd  S(   Ni   i    (   R   R   R   R   R   R   R    R"   R   t   slicet   startt   stopt   step(   R   R%   R0   R1   R2   (   t   norm(    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyR3   ›   s      	i    N(   R   R   R   R   (   R   R   t   kR&   (    (   R3   s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyR,   •   s    8c         C@ sÄ   |  j  | ƒ } g  } x– t | ƒ D]ˆ \ } } | d k rV t d |  j j | ƒ } n  t j | ƒ } x5 t t | ƒ | d ƒ D] } | d t j	 f } q€ W| j
 | ƒ q" W|  j j t | ƒ ƒ S(   sµ    Return an array of shape (len(key1), len(key2), ...) whose values       
        are indexed by keys.

        a.outerindex( (I,J,K) )[i,j,k] == a.array[I[i],J[j],K[k]]  

        i    i   .N(   R,   t	   enumerateR   t   rangeR   R   R    R"   R   t   newaxisR   R+   R   (   R   t   keyst	   outerkeyst   iR4   t   j(    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyt   index°   s    	!c         C@ s   |  j  | ƒ } t | | ƒ S(   sÌ   Create a new AlphabeticArray with the given alphabets. The new 
        alphabet must be a subset of the current alphabet. Useful for
        extracting a submatrix or for permuting the alphabet.
        (   R<   R   (   R   t   new_alphabetst	   new_array(    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyt   reindexÅ   s    c         C@ s9   y t  j |  | ƒ SWn t k
 r4 t |  j | ƒ SXd  S(   N(   R   t   __getattr__t   AttributeErrort   getattrR   (   R   t   name(    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyR@   Ñ   s    c         C@ s?   y t  j |  | | ƒ SWn! t k
 r: t |  j | | ƒ SXd  S(   N(   R   t   __setattr__RA   t   setattrR   (   R   RC   R.   (    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyRD   ×   s    N(   R   R   t   __doc__t	   __slots__R   R*   R+   R-   R,   R<   R?   R@   RD   (    (    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyR   ,   s   &7						t   ARNDCQEGHILKMFPSTWYVBZXc           B@ s\   e  Z d  Z d d d d d g Z d	 d	 d	 d	 d	 d „ Z d „  Z e d	 e j	 d „ ƒ Z
 RS(
   sp  A two dimensional array indexed by an Alphabet. Used to hold substitution
    matrices and similar information. 
    
    Various standard substitution matrices are available from the data package
    >>> from corebio import data
    >>> mat = SubMatrix.read(data.data_stream('blosum100'))   
    
    Attr:
    - alphabet     -- An Alphabet
    - array        -- A numpy array
    - name         -- The name of this matrix (if any) as a string.
    - description  -- The description, if any.
    - scale        -- The scale constant of a log-odds matrix, if known.     
    
    Authors: 
    o GEC 2005, JXG 2006
    
    t   alphabetR   RC   t   descriptiont   scalec         C@ sJ   t  j |  | | f | | ƒ t | ƒ |  _ | |  _ | |  _ | |  _ d  S(   N(   R   R*   R   RI   RC   RJ   RK   (   R   RI   R   t   typeofRC   RJ   RK   (    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyR*      s
    		c         C@ s   t  j |  | | f ƒ S(   N(   R   R?   (   R   RI   (    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyR?   	  s    c   
   
   C@ s@  | d k r t } n  t | ƒ } t j | | f | ƒ } d } xit |  ƒ D][\ } } | j ƒ  sL | d d k sL | d d k rŠ qL n  | j ƒ  } | d | d k r° qL n  | d j ƒ  rø | d | | k rø t	 d | | d | | f ƒ ‚ n  | d j ƒ  r| d } n  t | ƒ d k r4| d  } n  t | ƒ | k rYt	 d | ƒ ‚ n  x1 t
 d | ƒ D]  }	 t | |	 ƒ | | |	 f <qiW| d 7} | | k rL PqL qL W| | k rÆt	 d	 ƒ ‚ n  xj t
 d | ƒ D]Y } xP t
 d | ƒ D]? }	 | | |	 f | |	 | f k rìt	 d
 | |	 f ƒ ‚ qìqìWqÖWt | | ƒ S(   sf   Parse and return a substitution matrix
        
        Arguments:
        - fin       --  matrix file 
        - alphabet  -- The set of substitution characters. Default: ''
        -  typeof    -- A numpy type or typecode.
        Returns:
        -  A numpy matrix of substitution scores
        Raises:
        -  ValueError on unreadable input
        i    t   #t   *i   s(   Incompatible alphabet: line %d : %s %s: i   i   s%   SubMatrix matrix parse error: line %ds   Premature EOFs*   Substitution matrix is asymmetric! (%d,%d)N(   R   R	   R   R    R!   R5   t   isspacet   splitt   isalphaR#   R6   t   floatR
   (
   t   finRI   RL   t   Lt   matrixR:   t   linenumt   linet   cellsR;   (    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyt   read  sD     	, $  
  N(   R   R   RF   RG   R   R*   R?   t   staticmethodR    t   float64RY   (    (    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyR
   ä   s   		c           B@ sk   e  Z d  Z d d d d d d „ Z e d „  ƒ Z d „  Z d „  Z d „  Z	 d „  Z
 e d d „ ƒ Z RS(	   s_  A two dimensional array where the second dimension is indexed by an 
    Alphabet. Used to represent sequence motifs and similar information.

    
    Attr:
    - alphabet     -- An Alphabet
    - array        -- A numpy array
    - name         -- The name of this motif (if any) as a string.
    - description  -- The description, if any.
    
    c         C@ s;   t  j |  d  | f | | ƒ | |  _ | |  _ | |  _ d  S(   N(   R   R*   R   RC   RJ   RK   (   R   RI   R   R   RC   RJ   RK   (    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyR*   i  s    		c         C@ s   |  j  d S(   Ni   (   R   (   R   (    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyRI   p  s    c         C@ s   t  | t j |  d  | f ƒ ƒ S(   N(   R   R   R?   R   (   R   RI   (    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyR?   t  s    c         C@ s   |  j  d d d … |  _  d S(   s   Reverse sequence dataNiÿÿÿÿ(   R   (   R   (    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyt   reverse{  s    c         C@ sw   d d l  m } m } |  j } | | | | ƒ j ƒ  ƒ } d | f |  _ |  j | ƒ } d | f |  _ | j |  _ d S(   s!   Complement nucleic acid sequence.i    (   t   SeqR   N(	   t   corebio.seqR]   R   RI   t
   complementR   R   R?   R   (   R   R]   R   RI   t   complement_alphabett   m(    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyR_   €  s    	c         C@ s   |  j  ƒ  |  j ƒ  d S(   sm   Complements and reverses nucleic acid sequence (i.e. the other strand
         of a DNA sequence.) 
         N(   R\   R_   (   R   (    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyt   reverse_complementŒ  s    
c         C@ sÖ  g  } t  } xŸ |  D]— } | j ƒ  s | d d k r; q n  | j ƒ  } | rs | d d k rs | d d k rs q n  | d d k s“ | d d k r— Pn  t } | j | ƒ q Wt | ƒ d k  rÏ t d ƒ ‚ n  | j d ƒ } t | ƒ } t | ƒ } t | d ƒ }	 | d d k pC| d d k pC| |	 d	 k pC| |	 d k sUt d
 ƒ ‚ n  t | d ƒ }	 xF t d	 t | ƒ ƒ D]/ }
 |	 t | |
 ƒ k r{t d |
 ƒ ‚ q{q{W| d d k sÎ| d d k rÞ| j d ƒ n  t  } t  } x; | D]3 } t	 | ƒ st } n  t
 j | ƒ sñt } qñqñW| rO| rOt d t
 | ƒ ƒ ‚ n  | rj| rjt d ƒ ‚ n  | räxñ t | ƒ D]` \ }
 } t	 | d ƒ rÁ| d d d k rÁt d |
 ƒ ‚ n  | j d ƒ d j | ƒ } q}Wn€ g  } xh t | ƒ D]Z \ }
 } t | d ƒ r;| d d d k r;t d |
 ƒ ‚ n  | j | j d ƒ ƒ q÷Wd j | ƒ } t | ƒ } | r¡| j | ƒ sét d | | f ƒ ‚ qénH t t t f } x' | D] } | j | ƒ r·| } Pq·q·W| sé| } n  t | d ƒ t | ƒ d	 k r'x | D] } | j ƒ  qWn  t | ƒ } t | d ƒ }	 t j | |	 f d t j ƒ} xI t | ƒ D]; } x2 t |	 ƒ D]$ } t | | | ƒ | | | f <qWqnW| rÀ| j ƒ  n  t | | ƒ j | ƒ S(   s]    Parse a sequence matrix from a file. 
        Returns a tuple of (alphabet, matrix)
        i    RM   t   POt   P0t   XXs   //i   s   Vacuous file.i   s   Missing header line!s   Inconsistant length, row %d: s   Can't parse header: %ss   Can't parse headert   Ps*   Expected position as first item on line %dt    s)   Incompatible alphabets: %s , %s (defacto)R   (   t   TrueRO   RP   t   FalseR   R   R#   t   popR6   R   R   RQ   R5   t   joint   ischarR   t
   alphabeticR   R   R   R    R!   R[   RR   t	   transposeR   R?   (   RS   RI   t   itemsR0   RW   t   stufft   headert   hcolst   rowst   colsR:   t   position_headert   alphabet_headert   ht   rt   defacto_alphabetR&   R   RU   t   c(    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyt   read_transfac–  sš     &   @   	 %%		 &N(   R   R   RF   R   R*   t   propertyRI   R?   R\   R_   Rb   RZ   R{   (    (    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyR   \  s   					
(   s   AlphabeticArrays   submatrix_alphabets	   SubMatrixs   Motif(   RF   t
   __future__R    R   t   numpyR    t   corebio._py3kR   t   seqR   R   R   R   t   utilsR   t   __all__R   R   R	   R
   R   (    (    (    s8   /home/psgendb/BIRCHDEV/pkg/weblogo-3.4/corebio/matrix.pyt   <module>   s   ¶x