3
bC                 @   sH  d dl mZ d dlZd dlZd dlZd dlZyd dlmZ W n  ek
r\   d dl	mZ Y nX yd dl
mZ W n   d dlmZ Y nX d dlmZmZmZmZmZ d dlmZmZ d dlmZ d dlmZ d d	lmZmZmZmZmZm Z m!Z!m"Z"m#Z# eej$d
Z%eej$dZ&eej$ddZ'eej$dZ(da)dZ*de* d Z+eej,de+Z-ej.ee-ej, Z/dd Z0dd Z1dd Z2dd Z3dJddZ4dKddZ5dLd!d"Z6dMd#d$Z7dNd%d&Z8dOd'd(Z9d)d* Z:d+d, Z;d-d. Z<dPd/d0Z=d1d2 Z>d3d4 Z?dQd5d6Z@d7d8 ZAd9d: ZBd;d< ZCd=d> ZDdRd?d@ZEdAdB ZFdSdDdEZGdTdFdGZHdHdI ZIdS )U    )with_statementN)OrderedDict)urlopen)joinisfilebasenamedirnamegetsize)qconfigqutils)compile_minimap)get_chr_lengths_from_fastafile)	compile_toolget_dir_for_downloadrelpathget_path_to_programdownload_filedownload_external_toolis_non_empty_filecorrect_nameget_free_memorybwabedtoolsbinsambambaz1.4.1zgridss-z.jarzexternal_tools/gridssc             C   s
   t | tS )N)r   bwa_dirpath)fname r   B/home/psgendb/BIRCHDEV/pkg/quast-5.2.0/quast_libs/ra_utils/misc.py	bwa_fpath-   s    r   c             C   s    t jdkrdnd}tt| | S )NmacosxZ_osxZ_linux)r
   platform_namer   sambamba_dirpath)r   Zplatform_suffixr   r   r   sambamba_fpath1   s    r#   c             C   s
   t | tS )N)r   bedtools_bin_dirpath)r   r   r   r   bedtools_fpath6   s    r%   c               C   s   t sd S tt tS )N)gridss_dirpathr   gridss_fnamer   r   r   r   get_gridss_fpath:   s    r(   gzc             C   s0   t ||| ddr,t|||d |jd dS dS )NT)	move_file)extz  DoneF)r   
unpack_tar	main_info)nameZdownload_urldownloaded_fpathZfinal_dirpathloggerr+   r   r   r   download_unpack_compressed_tar@   s
    
r1   bz2c             C   sj   dd l }|j| d| }|j| |j  t||jd j}ddlm} ||| t	j
| tj|  dS )Nr   zr:)	copy_treeT)tarfileopen
extractallcloser   membersr.   Zdistutils.dir_utilr3   shutilrmtreeosremove)fpathZdst_dirpathr+   r4   tarZtemp_dirpathr3   r   r   r   r,   H   s    



r,   Fc             C   s   t dptdtdg|| dS )Nr   ZBWA)
only_cleanr0   )r   r   r   )r0   r?   r   r   r   compile_bwaU   s    r@   c             C   s"   t dp tdttddg|| dS )Nr   ZBEDtoolsr   )r?   r0   )r%   r   bedtools_dirpathr   )r0   r?   r   r   r   compile_bedtoolsY   s    rB   c             C   s   t ddtg| |datsdS |r>tjjtr:tjtdd dS t }t	j
 r||d kr|t| r|tttds|| jdt  dS dS )NZgridssZGRIDSS)r?   FT)ignore_errorsa  Failed to download binary distribution from https://github.com/ablab/quast/tree/master/external_tools/gridss. QUAST SV module will be able to search trivial deletions only. You can try to download it manually, save the jar archive under %s, and restart QUAST.)r   r'   r&   r;   pathisdirr9   r:   r(   r
   no_svr   r   warning)r0   	bed_fpathr?   Zgridss_fpathr   r   r   download_gridss^   s    rI   c             C   s&   t | |r"t| |r"t| |r"dS dS )NTF)r@   rB   r   )r0   r?   r   r   r   compile_reads_analyzer_toolsr   s    rJ   c             C   s   t |st| t|S |S )N)r   r   r   )output_dirpathr=   r   r   r   get_safe_fpathx   s    rL   c             C   s  t jj| \}}y@|dkr8tj| dd}t|t|}nt| }t|t| }W n tk
rd   dS X t|r|j	d|  |S |}t|dj}	xbt
|D ]V\}
}|
d d	kr|j d	 | }|	j|d
  q|
d d	kr|	jd q|	j| qW W d Q R X W d Q R X |S )N.gz.gziprt)modeFzUsing existing FASTQ file w   r   
   z+
)rM   rN   )r;   rD   splitextgzipr5   r   r   IOErrorr   info	enumeratesplitwrite)r=   name_ending
output_dirr0   r.   r+   handlercorrected_fpathfout_filinefull_read_namer   r   r   correct_paired_reads_names~   s,    "re   c             C   s  g }xTt | D ]F\}}d|d  }d}|r2d}tjj|\}}	y$|	dkr\tj|dd}
nt|}
W n tk
r   |jd	 d
S X |
j }|
j	  |j
 j d }t|dk s|j| rF|j  |jd| d | d | d  t||||}|s|jd dS |dkr*|tjtjj|< n|dkrF|tjtjj|< |j|dd  qW t|dkrx|jd dS |d |d kr|jd|d |d f  dS d
S )Nz/%d   Zforwardreverse.gz.gziprO   )rP   zTCannot check equivalence of paired reads names, BWA may fail if reads are discordantTr      zImproper read names in z (zT reads)! Names should end with /1 (for forward reads) or /2 (for reverse reads) but z1 was found!
QUAST will attempt to fix read names.zFailed correcting read names. FrT   zASomething bad happened and we failed to check paired reads names!z0Paired read names do not match! Check %s and %s!)rh   ri   )rY   r;   rD   rU   rV   r5   rW   noticereadliner7   striprZ   lenendswithrX   re   rG   r
   forward_readsindexreverse_readsappend)reads_fpathstemp_output_dirr0   Zfirst_read_namesidxr=   r\   
reads_type_r+   r^   
first_linerd   r_   r   r   r   paired_reads_names_are_equal   sJ    




r{   c             C   s  t  }t|}t }	tt| t|d }
t| rBt|
 rBd S t|r|jdrtj	t
dddd|gt|
dt|d|d	 n*tj	t
ddd|gt|
dt|d|d	 d
}d}t|
J}xB|D ]:}|jdrtj||d }tj||d }t||	|< qW W d Q R X d}t|t|	kr(d}nxxvt|j |	j D ]`\}}t||d t| krz|	| || krz|||< n |	| || krd}P nd}P q<W |r|r|j|d | d | d d |rdn|  n0|j|d | d | d d |rdn|  d S |S )Nz.headerz.samr   viewz-Hz-SrQ   a)stdoutstderrr0   zSN:(\S+)zLN:(\d+)z@SQr    zNumber of chromosomeszChromosome lengthszChromosome namesz in z and corresponding file z do not match. z#QUAST will try to realign reads to zthe reference genomez+Use SAM file obtained by aligning reads to )dictr   r   r   r   r   r   rp   r   call_subprocessr#   r5   
startswithrefindallintro   zipkeysr   rG   error)rK   fasta_fpathZalignment_fpatherr_pathru   r0   is_referencecorrect_chr_namesZfasta_chr_lengthsZalignment_chr_lengthsZheader_fpathZchr_name_patternZchr_len_patternr`   lchr_namechr_leninconsistencyZ	fasta_chrZsam_chrr   r   r   get_correct_names_for_chroms   sN    



,
.,r   c             C   sv   t | d}x\t|D ]P\}}|dkr(dS |s.q|jd}|d }|d	d  dks`|d
d  dkrdS qW W d Q R X dS )Ni@B T	r   rT   z/1z/2Frk   rk   )r5   rY   rZ   )	sam_fpathsam_inrb   r   fs	read_namer   r   r   all_read_names_correct   s    

 r   c             C   s   t | }t |dp}xh|D ]`}|s&q|jd}|d }|dd  dksX|dd  dkrr|d d	 |d< dj|}|j| qW W d Q R X W d Q R X |S )
NrQ   r   r   rT   z/1z/2rk   rk   rk   )r5   rZ   r   r[   )r   correct_sam_fpathr   Zsam_outr   r   r   r   r   r   clean_read_names  s    


 
"r   c             C   sn   |s
t j}dtdtdt  }tdddt|dt|d|d	|| g}|rT||g7 }tj	|t
|d
|d d S )Nz%dGBd   rT   r   sortz-tz--tmpdirz-mz-or}   )r   r0   )r
   max_threadsminmaxr   r#   strr   r   r   r5   )	bam_fpathZsorted_bam_fpathr   r0   threads	sort_rulememcmdr   r   r   sort_bam  s    

r   c             C   sX   t ddd| | g}t| dkr*|ddg7 }t| d	 sTtj|t|d
t|d
|d d S )Nr   rr   z-prT   i   rj   z-aZbwtswz.bwtr}   )r~   r   r0   i   @l        )r   r	   r   r   r   r5   )	ref_fpathr   r0   r   r   r   r   	bwa_index  s
    r   c              C   s.   t  } | dkrdS | dkrdS | dkr*dS dS )N@                rT   )r   )free_memr   r   r   get_gridss_memory%  s    r   c       
      C   s  d }t | }t |d}|jdjddddddd	g xt|D ]\}}|d
krf|dd  jd}qB|jdsB|jd}ydtt||}d}	|d |d krd}	|jdj|d |d |d |d |d |d |	gd  W qB tk
r   Y qBX qBW W d Q R X W d Q R X d S )NrQ   r   ZCHROM_AZSTART_AZEND_AZCHROM_BZSTART_BZEND_BzTYPE
r   rf   #BNDstrand1strand2INVZchrom1Zstart1Zend1Zchrom2Zstart2Zend2rS   )	r5   r[   r   rY   rZ   r   r   r   
ValueError)
raw_bed_fpathrH   headerr`   ra   rb   rc   r   svsv_typer   r   r   reformat_bedpe0  s&    


$r   c             C   s^   | d }t | dB}x:|D ]2}t|j dkrHtj| | tj|  dS dS qW W d Q R X d S )N_rawrrT   FT)r5   ro   rZ   r9   copyr;   r<   )	cov_fpathraw_cov_fpathcoveragerc   r   r   r   check_cov_fileG  s    

r   c             C   sJ  t | |d }|rt | |d }t| rt| rtjtddd|dgt|dt|d|d	 t|d
b}t|dL}xD|D ]<}	|	j }
|
d |
d  }}|jdj |
d ||d g q~W W d Q R X W d Q R X n4t|stjtddd|gt|dt|d|d	 t | |d }t|sFtjddd|gt|dt|d|d	 |S )Nz.bedz.bedper   Zbamtobedz-iz-bedperQ   r}   )r~   r   r0   r   rf      r   r   rS   z.sorted.bedr   z-k1,1z-k2,2n)r   r   r   r   r%   r5   rZ   r[   )rK   r.   r   r   r0   bedper   Zbedpe_fpathZbed_filerc   r   startendZsorted_bed_fpathr   r   r   
bam_to_bedS  s(    
8

r   c             C   s   g }ddddg}t | `}xXt|D ]L\}}|dkr6P |jdrBq$|jd}|d }||kr^q$|jt|d	  q$W W d Q R X t|d
 t| S )N9914783163i@B @r   rf   	   g      ?)r5   rY   r   rZ   rt   ro   sum)r   Zread_lengthsmapped_flagsr   rb   r   r   flagr   r   r   calculate_read_lenl  s    


 r   Tc             C   sR   t dd| jdrdnd| d|g}|r0|dg7 }tj|t|dt|d	|d
 d S )Nr   Z	genomecovz.bamz-ibamz-iz-gz-bgarQ   r}   )r~   r   r0   )r%   rp   r   r   r5   )in_fpath	out_fpathchr_len_fpath	err_fpathr0   print_all_positionsr   r   r   r   calculate_genome_cov}  s    "
r   c             C   s|   t dddt|dg}| jdr*|dg7 }|jdr@|dd	g7 }|rP|d
|g7 }|j|  tj|t|dt|d|d d S )Nr   r|   z-tz-hz.samz-Sz.bamz-fZbamz-FrQ   r}   )r~   r   r0   )r#   r   rp   rt   r   r   r5   )r   r   r   r   r0   filter_ruler   r   r   r   sambamba_view  s    



r   c       	      C   s  t | dt}g }d}d}xXt|dk rpy|j|| tj W n( tk
rd   |jd |j }P Y nX |j }qW |d8 }W d Q R X x|D ]}|jds|jd}yTt	|d t	|d t
|d |d	 f}t	|d
 t	|d t
|d |d	 f}W q tk
r   dS X qW dS )Nr   rf   i   
   r   r   r   rT      rR   r   rj   FT)r5   ro   seekr;   SEEK_ENDrW   	readlinesr   rZ   r   r   
IndexError)	rH   r`   Zlines_foundZblock_counter_bufferrc   r   align1align2r   r   r   is_valid_bed  s,    



(,
r   )r)   )r2   )F)F)NF)F)F)NN)F)T)N)J
__future__r   rV   r;   r   r9   collectionsr   ImportError%quast_libs.site_packages.ordered_dicturllib2r   urllib.requestos.pathr   r   r   r   r	   
quast_libsr
   r   quast_libs.ca_utils.miscr   quast_libs.fastaparserr   quast_libs.qutilsr   r   r   r   r   r   r   r   r   LIBS_LOCATIONr   rA   r$   r"   r&   Zgridss_versionr'   
QUAST_HOMEZgridss_external_fpathGIT_ROOT_URLZ
gridss_urlr   r#   r%   r(   r1   r,   r@   rB   rI   rJ   rL   re   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sh   ,





,
0



