3
c14                 @   s   d dl Z d dlZ d dlZd dlZd dlZd dlZdZdZedkrHe	d ddd	Z
d
d Zdd Zdd Zdd Zdd Zdd Zdd ZdddZdS )    NzGraham Alvare, Brian FristenskyzSun Mar 18 14:57:59 CDT 2012Z__main__zPHYLIP Library moduleTFc             C   s   t | d}t dd}|j|j  |j  t dd}|j|j  |j  |rt dd}|j|j  |j  |r|jd |jd |jd |jd |jd |jd |jd |j  dS )a  
    Merge messages from MSGFILE to the outfile generated by a Phylip program.
    If extra is included as a parameter, this appends a message indicating
    that the output is a consensus tree, whose branch lengths are the
    bootstrap values, NOT the actual branch lengths.
    wMSGFILEroutfilezoutfile.consense 
z<>>>> THIS TREEFILE IS A CONSENSUS TREE, WHOSE BRANCH LENGTHSz5>>>> ARE BOOTSTRAP VALUES, NOT ACTUAL BRANCH LENGTHS.z>>>> TO GENERATE BRANCH LENGTHSz6>>>> USE TREE FILE AS INPUT FOR DNAML OR OTHER PROGRAMz>>>> USING THE USERTREE OPTIONNr   r   z=>>>> THIS TREEFILE IS A CONSENSUS TREE, WHOSE BRANCH LENGTHS
z6>>>> ARE BOOTSTRAP VALUES, NOT ACTUAL BRANCH LENGTHS.
z >>>> TO GENERATE BRANCH LENGTHS
z7>>>> USE TREE FILE AS INPUT FOR DNAML OR OTHER PROGRAM
z>>>> USING THE USERTREE OPTION
)open
writelines	readlinesclosewrite)Zoutfile_pathZconsenseZextraZ	h_OUTFILEZ	h_MSGFILEZh_smalloutfileZh_outfile_consense r   '/home/psgendb/BIRCHDEV/script/phylip.py	merge_msg   s(    










r   c             C   s&   t | d}|j j d }|j  |S )zIRead first line of a Phylip file to find out how many sequences there arer   r   )r   readlinesplitr   )infileZh_infileNUMSEQr   r   r   
get_numseq:   s    
r   c             C   sP   t | }|dkr2||kr2|jd |j| d  nd}|jdt| d  |S )z-Make sure OUTGROUP is not greater than NUMSEQ   or   zOUTGROUP = zo
)intr   str)OUTGROUPr   comfile	h_msgfileZtempoutgroupr   r   r   do_outgroupB   s    
r   c              C   s$   t jdd} t| d d d }|S )a1  Generate a random integer as needed by Phylip programs.
    These numbers are used by Phylip programs as seeds for a random
    number stream. They must be odd, in the form 4n + 1. Return value
    is an integer between 0 and 2e16 -1 (which is 65535). Although the
    Phylip Main document claims that 32-bit random numbers are acceptible,
    at least one program, PARS, will only take 16-bit random numbers.
    Use floor division by 4 to generate a whole number quotient, multiply the quotient
    by 4 to generate an even number, and add 1 to make it odd.  r   i     r   )randomZrandintr   )ZpseedZprandr   r   r   phylip_randomN   s    
r   c
             C   s>  t j }
dtt j  }t j| tj| t jj|d t j	| t
dd}t|dk sdt|dkrhd}|jd |jt|d  t|d	k st|dkrd	}t|d	kr|jd
 |jt|d  |dkr|jd nr|dkr|jd |jd nT|dkr|jd |jd n4|dkrD|jd |jd |jd n
|dkrN|dkrn|dkrh|jd n|dkr|jd |dkrh|jd n|dkr|jd |jd d}n|dkr|jd |jd |jd d}n~|dkr"|jd |jd |jd |jd d}nF|dkrd|jd |jd |jd |jd |jd d}nd}|jd |jt|d  |dkr|jd |jd t }|jt|d  |j  t
dd}tjdg|d}|j  |j  |dkrtjd t jj|
|	 ntjd!t jj|
|	 t j	|
 tj|d" d#S )$a  Run Seqboot. Notes: SEQBOOT reads interleaved sequences by default, but can read
    sequential files using the "I" setting. By default, SEQBOOT
    writes datasets to outfile, but will write weights to 'outweights'
    if you set the "S" option. Weight files are always sequential.
    zSEQBOOT.r   ZSeqbootComfiler      d   z%
r   r   zB
mzd
r   Rgsbyeszs
dzj
psnopopwZrewzr
zi
zy
seqboot)stdinZ
outweightsr   TN)osgetcwdr   getpidmkdirshutilcopypathjoinchdirr   r   r   r   r   
subprocessPopenwaitmovermtree)INFILEDATATYPERSEEDMETHOD
REPLICATESPERCENT	BLOCKSIZEZ
OUTWEIGHTS	OUTFORMATOUTFILESTARTDIRTEMPDIR	comfile_hZtempseedpr   r   r   r-   _   s    










































r-   c             C   sV   t t }|jdt | d | d  | jd | j|d  | jt |d  dS )zJumble - When multiple datasets are analyzed, protpars automatically
    jumbles, and prompts for a random number seed for jumbling. Otherwise,
    jumbling must be explicitly set.zJUMBLING SEQUENCE ORDER z ITERATIONS, SEED=r   jNzj
)r   r   r   )r   r   NUMJUMZ	tempjseedr   r   r   jumble   s
    

rL   c       
      C   sh  t  }t|}t|}	| dkr,|jd n8| dkr| dkrv|jd|	 d | d  t|dkr|jd	| d  n|jd
|	 d | d  t|dk r|jd| d d  tjdd td||| |||d|d
 n| dkrd| dkr
|jd|	 d | d  n@| dkr0|jd|	 d | d  n|jd|	 d | d  td||| |||d|d
 dS )zRun SEQBOOT to generate resampled datasets. Output is weights, used by Phylip programs
    to generate datasets on the fly. If you want SEQBOOT to generate actual datasets instead of
    weights, use weightless_resample instead.n r   r&   r(   zRESAMPLING: Bootstrap, z REPLICATES, SEED=r   zResampling in blocks of z$RESAMPLING: Delete-half Jacknifing, r!   zPartial Resampling: zpercent of sites sampledzinfile.tempr   r'   Zweightsr)   r+   r,   z0RESAMPLING: Permute species for each character, z%RESAMPLING: Permute character order, z$RESAMPLING: Permute within species, r*   Nz 
)r&   r(   )r)   r+   r,   )r   r   r   r   r3   Zcopyfiler-   )
r@   rB   rA   rC   r   r>   rD   BseedBseedStrRepStrr   r   r   stdresample   s,    


rR   c       
      C   s   t  }t|}t|}	| dkr*|jd n| dkr| dkrX|jd	|	 d
 | d  n| dkr||jd|	 d
 | d  n| dkr|jd|	 d
 | d  nb| dkr|jd|	 d
 | d  n|jd|	 d
 | d  t|dk r|jd| d d  td||| |||d|d
 dS )z Generate terminal input for running seqboot to generate randomized sequence files, rather than weight files
    and run seqbootrM   rN   r   r&   r(   r)   r+   r,   z0RESAMPLING: Permute species for each character, z REPLICATES, SEED=z%RESAMPLING: Permute character order, z$RESAMPLING: Permute within species, zRESAMPLING: Bootstrap, z$RESAMPLING: Delete-half Jacknifing, r!   zPartial Resampling: zpercent of sites sampledzinfile.tempr*   r   Nz 
)r&   r(   r)   r+   r,   )r   r   r   r   r-   )
r@   rB   rA   rC   	msgfile_hr>   rD   rO   rP   rQ   r   r   r   weightless_resample  s$    
rT   c             C   s   |rt tjj|dd}ntj}t | d}|j }|j  d}|d jdd
krx"|D ]}|jddkrV|d }qVW |j	t
|d  x|D ]}|j	| qW |j  d	S )zUsed by dnaml.py and protml.py.
    Make sure that treefile begins with number of trees on first
    line of file. If first line in file has parentheses, the
    number must be added.Zintreear   r   (r   ;r   NrX   )r   r/   r5   r6   sysstdoutr
   r   findr   r   )ZUFNrG   r   Zinfile_hZufn_hZ	ufn_linesZsc_countliner   r   r   ufn4  s    


r]   )TF)T)r/   os.pathr   r3   r8   rY   
__author__Z__date____name__printr   r   r   r   r-   rL   rR   rT   r]   r   r   r   r   <module>   s&   
% ( 