a
    bC                     @   sF  d dl mZ d dlZd dlZd dlZd dlZzd dlmZ W n eyZ   d dl	mZ Y n0 zd dl
mZ W n   d dlmZ Y n0 d dlmZmZmZmZmZ d dlmZmZ d dlmZ d dlmZ d d	lmZmZmZmZmZm Z m!Z!m"Z"m#Z# eej$d
Z%eej$dZ&eej$ddZ'eej$dZ(da)dZ*de* d Z+eej,de+Z-ej.ee-ej, Z/dd Z0dd Z1dd Z2dd Z3dJddZ4dKddZ5dLd!d"Z6dMd#d$Z7dNd%d&Z8dOd'd(Z9d)d* Z:d+d, Z;d-d. Z<dPd/d0Z=d1d2 Z>d3d4 Z?dQd5d6Z@d7d8 ZAd9d: ZBd;d< ZCd=d> ZDdRd?d@ZEdAdB ZFdSdDdEZGdTdFdGZHdHdI ZIdS )U    )with_statementN)OrderedDict)urlopen)joinisfilebasenamedirnamegetsize)qconfigqutils)compile_minimap)get_chr_lengths_from_fastafile)	compile_toolget_dir_for_downloadrelpathget_path_to_programdownload_filedownload_external_toolis_non_empty_filecorrect_nameget_free_memorybwabedtoolsbinsambambaz1.4.1zgridss-z.jarzexternal_tools/gridssc                 C   s
   t | tS N)r   bwa_dirpathfname r   B/home/psgendb/BIRCHDEV/pkg/quast-5.2.0/quast_libs/ra_utils/misc.py	bwa_fpath-   s    r!   c                 C   s    t jdkrdnd}tt| | S )NmacosxZ_osxZ_linux)r
   platform_namer   sambamba_dirpath)r   Zplatform_suffixr   r   r    sambamba_fpath1   s    r%   c                 C   s
   t | tS r   )r   bedtools_bin_dirpathr   r   r   r    bedtools_fpath6   s    r'   c                   C   s   t sd S tt tS r   )gridss_dirpathr   gridss_fnamer   r   r   r    get_gridss_fpath:   s    r*   gzc                 C   s0   t ||| ddr,t|||d |d dS dS )NT)	move_file)extz  DoneF)r   
unpack_tar	main_info)nameZdownload_urldownloaded_fpathZfinal_dirpathloggerr-   r   r   r    download_unpack_compressed_tar@   s
    
r3   bz2c                 C   sj   dd l }|| d| }|| |  t||jd j}ddlm} ||| t	
| t|  dS )Nr   zr:)	copy_treeT)tarfileopen
extractallcloser   membersr0   distutils.dir_utilr5   shutilrmtreeosremove)fpathZdst_dirpathr-   r6   tarZtemp_dirpathr5   r   r   r    r.   H   s    



r.   Fc                 C   s   t dptdtdg|| dS )Nr   ZBWA
only_cleanr2   )r!   r   r   r2   rC   r   r   r    compile_bwaU   s    rE   c                 C   s"   t dp tdttddg|| dS )Nr   ZBEDtoolsr   rB   )r'   r   bedtools_dirpathr   rD   r   r   r    compile_bedtoolsY   s    rG   c                 C   s|   t ddtg| |datsdS |r>tjtr:tjtdd dS t }t	j
sx|d u rxt|sxtttdsx| dt  dS dS )NZgridssZGRIDSS)rC   FT)ignore_errorsa  Failed to download binary distribution from https://github.com/ablab/quast/tree/master/external_tools/gridss. QUAST SV module will be able to search trivial deletions only. You can try to download it manually, save the jar archive under %s, and restart QUAST.)r   r)   r(   r>   pathisdirr<   r=   r*   r
   no_svr   r   warning)r2   	bed_fpathrC   Zgridss_fpathr   r   r    download_gridss^   s    rN   c                 C   s&   t | |r"t| |r"t| |r"dS dS )NTF)rE   rG   r   rD   r   r   r    compile_reads_analyzer_toolsr   s    rO   c                 C   s   t |st| t|S |S r   )r   r   r   )output_dirpathr@   r   r   r    get_safe_fpathx   s    rQ   c              	   C   s6  t j| \}}z@|dv r8tj| dd}t|t|}nt| }t|t| }W n tyd   Y dS 0 t|r|	d|  |S |}t|dp}	t
|D ]V\}
}|
d dkr| d | }|	|d	  q|
d
 dkr|	d q|	| qW d    n1 s0    Y  W d    n1 s(0    Y  |S )Nz.gzz.gziprtmodeFzUsing existing FASTQ file w   r   
   z+
)r>   rI   splitextgzipr7   r   r   IOErrorr   info	enumeratesplitwrite)r@   name_ending
output_dirr2   r0   r-   handlercorrected_fpathfout_filinefull_read_namer   r   r    correct_paired_reads_names~   s,    Lrj   c              	   C   s  g }t | D ]H\}}d|d  }d}|r.d}tj|\}}	z$|	dv rXtj|dd}
nt|}
W n  ty   |d Y  d	S 0 |
 }|
	  |
  d
 }t|dk s||sD|  |d| d | d | d  t||||}|s
|d  dS |dkr(|tjtj|< n|dkrD|tjtj|< ||dd  qt|dkrt|d dS |d
 |d kr|d|d
 |d f  dS d	S )Nz/%d   forwardreverserR   rS   rT   zTCannot check equivalence of paired reads names, BWA may fail if reads are discordantTr      zImproper read names in z (zT reads)! Names should end with /1 (for forward reads) or /2 (for reverse reads) but z1 was found!
QUAST will attempt to fix read names.zFailed correcting read names. FrY   zASomething bad happened and we failed to check paired reads names!z0Paired read names do not match! Check %s and %s!)r^   r>   rI   rZ   r[   r7   r\   noticereadliner9   stripr_   lenendswithr]   rj   rL   r
   forward_readsindexreverse_readsappend)reads_fpathstemp_output_dirr2   Zfirst_read_namesidxr@   ra   
reads_type_r-   rc   
first_lineri   rd   r   r   r    paired_reads_names_are_equal   sP    





r   c                 C   s(  t  }t|}t }	tt| t|d }
t|s>t|
s>d S t|r|dr~tj	t
dddd|gt|
dt|d|d	 n*tj	t
ddd|gt|
dt|d|d	 d
}d}t|
P}|D ]:}|drt||d }t||d }t||	|< qW d    n1 s0    Y  d}t|t|	kr6d}n|t| |	 D ]h\}}t||d t| kr|	| || kr|||< n(|	| || krd} qn
d} qqH|r$|r||d | d | d d |rdn|  n0||d | d | d d |rdn|  d S |S )Nz.header.samr   viewz-H-SrV   astdoutstderrr2   zSN:(\S+)zLN:(\d+)z@SQr    zNumber of chromosomeszChromosome lengthszChromosome namesz in z and corresponding file z do not match. z#QUAST will try to realign reads to zthe reference genomez+Use SAM file obtained by aligning reads to )dictr   r   r   r   r   r   rt   r   call_subprocessr%   r7   
startswithrefindallintrs   zipkeysr   rL   error)rP   fasta_fpathZalignment_fpatherr_pathry   r2   is_referencecorrect_chr_namesZfasta_chr_lengthsZalignment_chr_lengthsZheader_fpathZchr_name_patternZchr_len_patternre   lchr_namechr_leninconsistencyZ	fasta_chrZsam_chrr   r   r    get_correct_names_for_chroms   s^    


.,

r   c                 C   s   t | }t|D ]l\}}|dkr4 W d    dS |s:q|d}|d }|dd  dksl|dd  dkr W d    dS qW d    n1 s0    Y  dS )	N@B T	r   ro   /1/2F)r7   r^   r_   )	sam_fpathsam_inrg   r   fs	read_namer   r   r    all_read_names_correct   s    

 2r   c              	   C   s   t | }t |dv}|D ]`}|s$q|d}|d }|dd  dksV|dd  dkrp|d d |d< d|}|| qW d    n1 s0    Y  W d    n1 s0    Y  |S )NrV   r   r   ro   r   r   )r7   r_   r   r`   )r   correct_sam_fpathr   Zsam_outr   r   r   r   r   r    clean_read_names  s    

 
Hr   c                 C   sn   |s
t j}dtdtdt  }tdddt|dt|d|d	|| g}|rT||g7 }tj	|t
|d
|d d S )Nz%dGBd   rY   r   sort-tz--tmpdirz-mz-or   )r   r2   )r
   max_threadsminmaxr   r%   strr   r   r   r7   )	bam_fpathZsorted_bam_fpathr   r2   threads	sort_rulememcmdr   r   r    sort_bam  s    
r   c                 C   sX   t ddd| | g}t| dkr*|ddg7 }t| d sTtj|t|dt|d|d	 d S )
Nr   rv   z-pl        z-aZbwtswz.bwtr   r   )r!   r	   r   r   r   r7   )	ref_fpathr   r2   r   r   r   r    	bwa_index  s
    r   c                  C   s.   t  } | dkrdS | dkrdS | dkr*dS dS )N@                rY   )r   )free_memr   r   r    get_gridss_memory%  s    r   c           
      C   s,  d }t | }t |d}|dg d t|D ]\}}|dkr\|dd  d}q8|ds8|d}zdtt||}d}	|d |d	 krd
}	|d|d |d |d |d |d |d |	gd  W q8 ty   Y q80 q8W d    n1 s0    Y  W d    n1 s0    Y  d S )NrV   r   )ZCHROM_AZSTART_AZEND_AZCHROM_BZSTART_BZEND_BzTYPE
r   rk   #BNDstrand1strand2INVZchrom1Zstart1Zend1Zchrom2Zstart2Zend2rX   )	r7   r`   r   r^   r_   r   r   r   
ValueError)
raw_bed_fpathrM   headerre   rf   rg   rh   r   svsv_typer   r   r    reformat_bedpe0  s,    


r   c                 C   s   | d }t | dd}|D ]N}t| dkrTt| | t|   W d    dS  W d    dS qW d    n1 s|0    Y  d S )N_rawrrY   FT)r7   rs   r_   r<   copyr>   r?   )	cov_fpathraw_cov_fpathcoveragerh   r   r   r    check_cov_fileG  s    
r   c                 C   sj  t | |d }|rt | |d }t|st|stjtddd|dgt|dt|d|d	 t|d
|}t|dR}|D ]<}	|	 }
|
d |
d  }}|d |
d ||d g qxW d    n1 s0    Y  W d    n1 s0    Y  n4t|s(tjtddd|gt|dt|d|d	 t | |d }t|sftjddd|gt|dt|d|d	 |S )Nz.bedz.bedper   Zbamtobed-iz-bedperV   r   r   r   rk      r   r   rX   z.sorted.bedr   z-k1,1z-k2,2n)r   r   r   r   r'   r7   r_   r`   )rP   r0   r   r   r2   bedper   Zbedpe_fpathZbed_filerh   r   startendZsorted_bed_fpathr   r   r    
bam_to_bedS  s.    ^

r   c                 C   s   g }g d}t | h}t|D ]N\}}|dkr2 qn|dr>q|d}|d }||vrZq|t|d  qW d    n1 s0    Y  t|d t| S )N)9914783163r   @r   rk   	   g      ?)r7   r^   r   r_   rx   rs   sum)r   Zread_lengthsmapped_flagsr   rg   r   r   flagr   r   r    calculate_read_lenl  s    


2r   Tc                 C   sR   t dd| drdnd| d|g}|r0|dg7 }tj|t|dt|d	|d
 d S )Nr   Z	genomecov.bamz-ibamr   z-gz-bgarV   r   r   )r'   rt   r   r   r7   )in_fpath	out_fpathchr_len_fpath	err_fpathr2   print_all_positionsr   r   r   r    calculate_genome_cov}  s    "
r   c                 C   s|   t dddt|dg}| dr*|dg7 }|dr@|dd	g7 }|rP|d
|g7 }||  tj|t|dt|d|d d S )Nr   r   r   z-hr   r   r   z-fZbamz-FrV   r   r   )r%   r   rt   rx   r   r   r7   )r   r   r   r   r2   filter_ruler   r   r   r    sambamba_view  s    



r   c           	   	   C   s$  t | dz}g }d}d}t|dk rnz||| tj W n( tyb   |d | }Y qnY n0 | }q|d8 }W d    n1 s0    Y  |D ]}|ds|d}zTt	|d t	|d	 t
|d |d
 f}t	|d t	|d t
|d |d
 f}W q ty   Y  dS 0 qdS )Nr   i   
   r   rk   r   r   rY      rW   r   rn   FT)r7   rs   seekr>   SEEK_ENDr\   	readlinesr   r_   r   r   
IndexError)	rM   re   Zlines_foundZblock_counter_bufferrh   r   align1align2r   r   r    is_valid_bed  s,    


&

(,r   )r+   )r4   )F)F)NF)F)F)NN)F)T)N)J
__future__r   r[   r>   r   r<   collectionsr   ImportError%quast_libs.site_packages.ordered_dicturllib2r   urllib.requestos.pathr   r   r   r   r	   
quast_libsr
   r   quast_libs.ca_utils.miscr   quast_libs.fastaparserr   quast_libs.qutilsr   r   r   r   r   r   r   r   r   LIBS_LOCATIONr   rF   r&   r$   r(   Zgridss_versionr)   
QUAST_HOMEZgridss_external_fpathGIT_ROOT_URLZ
gridss_urlr!   r%   r'   r*   r3   r.   rE   rG   rN   rO   rQ   rj   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    <module>   sh   ,





,
0



