ó
¯V^c           @   sÆ   d  d l  Z  d  d l Z  d  d l Z d  d l Z d  d l Z d  d l Z d Z d Z e d k rh d GHn  e	 e
 d „ Z d „  Z d „  Z d	 „  Z d
 „  Z d „  Z d „  Z d „  Z e	 d „ Z d S(   iÿÿÿÿNs   Graham Alvare, Brian Fristenskys   Sun Mar 18 14:57:59 CDT 2012t   __main__s   PHYLIP Library modulec         C   s"  t  |  d ƒ } t  d d ƒ } | j | j ƒ  ƒ | j ƒ  t  d d ƒ } | j | j ƒ  ƒ | j ƒ  | rt  d d ƒ } | j | j ƒ  ƒ | j ƒ  | r| j d ƒ | j d ƒ | j d d ƒ | j d	 d ƒ | j d
 d ƒ | j d d ƒ | j d d ƒ qn  | j ƒ  d S(   s  
    Merge messages from MSGFILE to the outfile generated by a Phylip program.
    If extra is included as a parameter, this appends a message indicating
    that the output is a consensus tree, whose branch lengths are the
    bootstrap values, NOT the actual branch lengths.
    t   wt   MSGFILEt   rt   outfiles   outfile.consenset    s   
s<   >>>> THIS TREEFILE IS A CONSENSUS TREE, WHOSE BRANCH LENGTHSs5   >>>> ARE BOOTSTRAP VALUES, NOT ACTUAL BRANCH LENGTHS.s   >>>> TO GENERATE BRANCH LENGTHSs6   >>>> USE TREE FILE AS INPUT FOR DNAML OR OTHER PROGRAMs   >>>> USING THE USERTREE OPTIONNs   
s   
(   t   opent
   writelinest	   readlinest   closet   write(   t   outfile_patht   consenset   extrat	   h_OUTFILEt	   h_MSGFILEt   h_smalloutfilet   h_outfile_consense(    (    s'   /home/psgendb/BIRCHDEV/script/phylip.pyt	   merge_msg   s(    


c         C   s3   t  |  d ƒ } | j ƒ  j ƒ  d } | j ƒ  | S(   sI   Read first line of a Phylip file to find out how many sequences there areR   i    (   R   t   readlinet   splitR	   (   t   infilet   h_infilet   NUMSEQ(    (    s'   /home/psgendb/BIRCHDEV/script/phylip.pyt
   get_numseq:   s    
c         C   sj   t  |  ƒ } | d k rE | | k rE | j d ƒ | j |  d ƒ n d } | j d t | ƒ d ƒ | S(   s-   Make sure OUTGROUP is not greater than NUMSEQi   t   os   
s   OUTGROUP = s   o
(   t   intR
   t   str(   t   OUTGROUPR   t   comfilet	   h_msgfilet   tempoutgroup(    (    s'   /home/psgendb/BIRCHDEV/script/phylip.pyt   do_outgroupB   s    c          C   s.   t  j d d ƒ }  t |  d d ƒ d } | S(   s1  Generate a random integer as needed by Phylip programs.
    These numbers are used by Phylip programs as seeds for a random
    number stream. They must be odd, in the form 4n + 1. Return value
    is an integer between 0 and 2e16 -1 (which is 65535). Although the
    Phylip Main document claims that 32-bit random numbers are acceptible,
    at least one program, PARS, will only take 16-bit random numbers.
    Use floor division by 4 to generate a whole number quotient, multiply the quotient
    by 4 to generate an even number, and add 1 to make it odd.  i    iÿÿ  i   i   (   t   randomt   randintR   (   t   pseedt   prand(    (    s'   /home/psgendb/BIRCHDEV/script/phylip.pyt   phylip_randomN   s    
c
         C   sg  t  j ƒ  }
 d t t  j ƒ  ƒ } t  j | ƒ t j |  t  j j | d ƒ ƒ t  j	 | ƒ t
 d d ƒ } t | ƒ d k  sŽ t | ƒ d k r— d } n  | j d ƒ | j t | ƒ d ƒ t | ƒ d	 k  sß t | ƒ d k rè d	 } n  t | ƒ d	 k r!| j d
 ƒ | j t | ƒ d ƒ n  | d k r=| j d ƒ n— | d k rf| j d ƒ | j d ƒ nn | d k r| j d ƒ | j d ƒ nE | d k rÅ| j d ƒ | j d ƒ | j d ƒ n | d k rÔn  | d k rÿ| d k rG| j d ƒ qGnH| d k r7| j d ƒ | d k rG| j d ƒ qGn| d k rf| j d ƒ | j d ƒ d } ná | d k r¢| j d ƒ | j d ƒ | j d ƒ d } n¥ | d k rë| j d ƒ | j d ƒ | j d ƒ | j d ƒ d } n\ | d k rA| j d ƒ | j d ƒ | j d ƒ | j d ƒ | j d ƒ d } n d } | j d ƒ | j t | ƒ d ƒ | d k r‡| j d ƒ n  | j d ƒ t ƒ  } | j t | ƒ d ƒ | j ƒ  t
 d d ƒ } t j d g d | ƒ} | j ƒ  | j ƒ  | d k r't j d  t  j j |
 |	 ƒ ƒ n t j d! t  j j |
 |	 ƒ ƒ t  j	 |
 ƒ t j | t ƒ d" S(#   s  Run Seqboot. Notes: SEQBOOT reads interleaved sequences by default, but can read
    sequential files using the "I" setting. By default, SEQBOOT
    writes datasets to outfile, but will write weights to 'outweights'
    if you set the "S" option. Weight files are always sequential.
    s   SEQBOOT.R   t   SeqbootComfileR   i   id   s   %
s   
i   s   B
t   ms   d
R   t   Rt   gt   st   bt   yess   s
t   ds   j
t   pst   not   pot   pwt   rews   r
s   i
s   y
t   seqboott   stdint
   outweightsR   N(   t   ost   getcwdR   t   getpidt   mkdirt   shutilt   copyt   patht   joint   chdirR   R   R
   R%   R	   t
   subprocesst   Popent   waitt   movet   rmtreet   True(   t   INFILEt   DATATYPEt   RSEEDt   METHODt
   REPLICATESt   PERCENTt	   BLOCKSIZEt
   OUTWEIGHTSt	   OUTFORMATt   OUTFILEt   STARTDIRt   TEMPDIRt	   comfile_ht   tempseedt   p(    (    s'   /home/psgendb/BIRCHDEV/script/phylip.pyR3   _   s˜    
$	$						


"c         C   sk   t  t ƒ  ƒ } | j d t  | ƒ d | d ƒ |  j d ƒ |  j | d ƒ |  j t  | ƒ d ƒ d S(   s´   Jumble - When multiple datasets are analyzed, protpars automatically
    jumbles, and prompts for a random number seed for jumbling. Otherwise,
    jumbling must be explicitly set.s   JUMBLING SEQUENCE ORDER s    ITERATIONS, SEED=s   
t   jNs   j
(   R   R%   R
   (   R   R   t   NUMJUMt	   tempjseed(    (    s'   /home/psgendb/BIRCHDEV/script/phylip.pyt   jumbleà   s
    #c   
      C   sÏ  t  ƒ  } t | ƒ } t | ƒ }	 |  d k r= | j d ƒ nŽ|  d k r"|  d k rŸ | j d |	 d | d ƒ t | ƒ d k r¼ | j d	 | d ƒ q¼ n | j d
 |	 d | d ƒ t | ƒ d k  rê | j d | d d ƒ n  t j d d ƒ t d | | |  | | | d | d ƒ
 n© |  d k rË|  d k rZ| j d |	 d | d ƒ nI |  d k r†| j d |	 d | d ƒ n | j d |	 d | d ƒ t d | | |  | | | d | d ƒ
 n  d S(   sä   Run SEQBOOT to generate resampled datasets. Output is weights, used by Phylip programs
    to generate datasets on the fly. If you want SEQBOOT to generate actual datasets instead of
    weights, use weightless_resample instead.t   nt    s   
R+   R-   s   RESAMPLING: Bootstrap, s    REPLICATES, SEED=i   s   Resampling in blocks of s$   RESAMPLING: Delete-half Jacknifing, id   s   Partial Resampling: s   percent of sites sampleds   infile.tempR   R,   t   weightsR.   R0   R1   s0   RESAMPLING: Permute species for each character, s%   RESAMPLING: Permute character order, s$   RESAMPLING: Permute within species, R/   Ns    
(   R+   R-   (   s   pss   pos   pw(   R%   R   R
   R   R:   t   copyfileR3   (
   RH   RJ   RI   RK   R   RF   RM   t   Bseedt   BseedStrt   RepStr(    (    s'   /home/psgendb/BIRCHDEV/script/phylip.pyt   stdresampleì   s,    	(  c   
      C   sp  t  ƒ  } t | ƒ } t | ƒ }	 |  d k r= | j d ƒ n/|  d k rl|  d k ru | j d	 |	 d
 | d ƒ nÏ |  d k r¡ | j d |	 d
 | d ƒ n£ |  d k rÍ | j d |	 d
 | d ƒ nw |  d k rù | j d |	 d
 | d ƒ n | j d |	 d
 | d ƒ t | ƒ d k  rD| j d | d d ƒ n  t d | | |  | | | d | d ƒ
 n  d S(   s€    Generate terminal input for running seqboot to generate randomized sequence files, rather than weight files
    and run seqbootRX   RY   s   
R+   R-   R.   R0   R1   s0   RESAMPLING: Permute species for each character, s    REPLICATES, SEED=s%   RESAMPLING: Permute character order, s$   RESAMPLING: Permute within species, s   RESAMPLING: Bootstrap, s$   RESAMPLING: Delete-half Jacknifing, id   s   Partial Resampling: s   percent of sites sampleds   infile.tempR/   R   Ns    
(   R+   R-   s   pss   pos   pw(   R%   R   R
   R   R3   (
   RH   RJ   RI   RK   t	   msgfile_hRF   RM   R\   R]   R^   (    (    s'   /home/psgendb/BIRCHDEV/script/phylip.pyt   weightless_resample  s$    	    c         C   sí   | r' t  t j j | d ƒ d ƒ } n	 t j } t  |  d ƒ } | j ƒ  } | j ƒ  d } | d j d ƒ d k rÁ x0 | D]( } | j d ƒ d k r{ | d } q{ q{ W| j	 t
 | ƒ d	 ƒ n  x | D] } | j	 | ƒ qÈ W| j ƒ  d
 S(   s·   Used by dnaml.py and protml.py.
    Make sure that treefile begins with number of trees on first
    line of file. If first line in file has parentheses, the
    number must be added.t   intreet   aR   i    t   (iÿÿÿÿt   ;i   s   
N(   R   R6   R<   R=   t   syst   stdoutR   R	   t   findR
   R   (   t   UFNRP   R   t   infile_ht   ufn_ht	   ufn_linest   sc_countt   line(    (    s'   /home/psgendb/BIRCHDEV/script/phylip.pyt   ufn4  s    !	
(   R6   t   os.pathR!   R:   R?   Rf   t
   __author__t   __date__t   __name__RD   t   FalseR   R   R    R%   R3   RW   R_   Ra   Ro   (    (    (    s'   /home/psgendb/BIRCHDEV/script/phylip.pyt   <module>   s$   %						(	 