3
b\C                 @   sP  d dl mZ d dl mZ d dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZmZ d dlmZmZmZ d dlmZmZ d dlmZ d d	lmZmZmZmZ d d
lmZ dZdZdZdZ dZ!dZ"dZ#e	eej$e#ej%Z&ee&da'ee&da(dd Z)dd Z*dd Z+dd Z,dd Z-dd Z.dd Z/d d! Z0d,d#d$Z1d-d&d'Z2d(d) Z3d*d+ Z4dS ).    )with_statement)divisionN)defaultdict)joinabspathexistsbasenameisdir)qconfig	reportingqutils)compile_minimapminimap_fpath)
read_fasta)get_free_memorymd5download_external_toolget_dir_for_download)
save_kmersgMbP?i  i'     i kmc	kmc_toolsc
             C   s   t j|}
t| |
d }t| |
d }t|d*}|jdt|  |jdt|  W d Q R X t|dp}|jd|  |s|r|jd|  |jd|  |jd	|  |jd
|  |jd|  |jd|	  W d Q R X d S )Nz.sfz.statwzAssembly md5 checksum: %s
zReference md5 checksum: %s
zCompleteness: %s
zK-mer-based correct length: %d
z!K-mer-based misjoined length: %d
z!K-mer-based undefined length: %d
zTotal length: %d
z# translocations: %d
z# 100 kbp relocations: %d
)r   label_from_fpath_for_fnamer   openwriter   )
output_dircontigs_fpath	ref_fpathcompletenesscorr_lenmis_len	undef_len	total_lentranslocationsrelocationslabelkmc_check_fpathkmc_stats_fpathZcheck_fZstats_f r)   A/home/psgendb/BIRCHDEV/pkg/quast-5.2.0/quast_libs/unique_kmers.pycreate_kmc_stats_file$   s    
r+   c             C   s   t j|}t| |d }t|s$dS t|j jd}t|dk rFdS |d j j d t	t
|krjdS |d j j d	 t	t
|krdS dS )
Nz.sfF
   r      Tr/   )r   r   r   r   r   readsplitlenstripstrr   )r   r   contigs_fpathsr   r&   r'   Zsuccessful_check_contentr)   r)   r*   check_kmc_successful_check7   s    
  r6   c             C   sL   t | t|d }td||g|| d}t|rHtt|j j d }|S )Nz
.histo.txtZ	histogramr   r.   r/   )r   r   run_kmcr   intr   r0   r1   )tmp_dirpathkmc_db_fpath	log_fpath	err_fpathZhisto_fpathZ	kmers_cntr)   r)   r*   get_kmers_cntF   s    r=   c          
   C   sT   t | t|d }tdt }tdt| ddt| ddd||| g	||d	d
 |S )Nz.kmcr-   z-mz-n128z-kz-fmz-cx1z-ci1F)use_kmc_tools)r   r   maxr   r7   r4   )r9   fpathkmer_lenr;   r<   kmc_out_fpathZmax_memr)   r)   r*   count_kmersO   s
    &rC   c             C   s   t | d}t dddttjd  ddt|||g	}tj|t|dt|d	d
d tt	}tt	}t|l}	xd|	D ]\}
|
j
d}t|dk rqr|d |d |d   }}}|| jt| || jt| qrW W d Q R X ||fS )Nzkmers.coordsz-cxsrz-sr-   z	--frag=noz-tr   az  )stdoutstderrindent	
   r         )r   r   r4   r
   unique_kmer_lenr   call_subprocessr   r   listr1   r2   appendr8   )r   r   Zkmers_fpathZlog_err_fpathmax_threadsZ	out_fpathZcmdlineZkmers_pos_by_chromZkmers_by_chromflinefscontigchromposr)   r)   r*   align_kmersW   s    



 rX   c             C   s  t | d}t|dj  t }d}xt|D ]|\}	}
t | d|	 d }t|
| d }t|dH}x@t|D ]4}|jdt| d  |j|
|||  d  qpW W d Q R X t | d|	 d	 }t	|||||dd
 t
 }xt|D ]\}}|j| qW t|d}d}xzt|D ]n\}}
||kr| sDt|| tkrt|}|jdt||  d  |j|
d  |	|f||| < qW W d Q R X ||7 }tjr.tj| q.W ||fS )Nzkmc.downsampled.txtr   r   Zkmers_z.fastar.   >r,   z.filtered.fasta)	min_kmersrE   )r   r   closedictr   r2   ranger   r4   filter_contigssetaddr8   KMERS_INTERVALr
   space_efficientosremove)r9   r   r:   rA   r;   r<   Zdownsampled_txt_fpath	ref_kmersZprev_kmer_idxrV   seqZkmc_fasta_fpathZnum_kmers_in_seqout_fiZfiltered_fpathZfiltered_kmersidx_Zkmer_ir)   r)   r*   downsample_kmersi   s:    
(
 rk   c             C   s   t | jddS )Nz.kmc )r   replace)r@   r)   r)   r*   get_clear_name   s    rn   c             C   s   t | dj dd |D d }t|dkrFtdg| d|g || n`|d }xRtd	t|D ]@}t | t|d t| d }td||| d|g|| |}q^W |}|S )
Nrj   c             S   s   g | ]}t |d d qS )N   )rn   ).0rB   r)   r)   r*   
<listcomp>   s    z#intersect_kmers.<locals>.<listcomp>z.kmcr-   ZsimpleZ	intersectr   r.   )r   r2   r7   r]   rn   r4   )r9   kmc_out_fpathsr;   r<   intersect_out_fpathZprev_kmc_out_fpathrh   Ztmp_out_fpathr)   r)   r*   intersect_kmers   s    rt   r.   c             C   s6   | j drd|  } td|| dt| d|g|| d S )Nz.txt@filterz-ciz-fa)endswithr7   r4   )Zinput_fpathZoutput_fpathZdb_fpathr;   r<   rZ   r)   r)   r*   r^      s    
r^   Tc             C   s@   |rt nt}tj|dttj dg|  t|dt|dd d S )Nz-tz-hprE   )rF   rG   )kmc_tools_fpathkmc_bin_fpathr   rN   r4   r
   rQ   r   )paramsr;   r<   r>   Z
tool_fpathr)   r)   r*   r7      s    r7   c             C   sX   t t | | t ||  }|rT||k rT|| |k rTt t | | t || |  }|S )N)abs)rW   prev_posref_posprev_ref_poscyclic_ref_lensdistr)   r)   r*   _get_dist_inconstistency   s     r   c       4         s$  |j   tj}|jdt| d  g  x|D ]}tj|}t| |||r0t| |d }t	|j
 jd}t|dk r|q0|jd| d  tj|}	|	jtjjdt|d	 j jd
d3   t|dkrt|d j jd
d4 }
t|d j jd
d5 }t|d j jd
d6 }t|d j jd
d7 }t|d j jd
d8 }t|d j jd
d9 }|	jtjjd|
d |   |	jtjjd|d |   |	jtjjd|d |   |	jtjj| |	jtjj| |	jtjj||   j| q0W  fdd|D }t|d	kr8t|  |jd d S tjdkrR|jd d S t t!dddg|}t"d|dddda#t"d|dddda$t%t# st%t$ st&| r|jd d S |jd t'| st(j)|  t| d}t| d}t	|d j*  t	|d j*  t| d!}t'|s&t(j)| t+|||||}t,||||}|sh|jd"| d# | d$  d S |jd% g }xt-|D ]\}}tj.|}|jd&tj/| |  tj|}	t+|||||}t0|||g||}t,||||}|d | }|	jtjjd|  |j| qW |jd' d(d t1|D }|jd) t2||||||\}}xt-t3||D ]\}\}} tj.|}|jd&tj/| |  tj|}	d }
d }d }d:\}}d	}t4 }!x.t1|D ]"\}"}#|t|#7 }t|#|!|"< qW t|t5kr|jd* nd	}
d	}t6||||tj7\}$}%tj8o2tj9 }&|&rH|	j:tjj;nd }'d	}d	}t	t|tj|d+ d }(x|$j< D ]})g }*d;\}+},}-}.xt=t3|%|) |$|) d,d- d.D ]\}/}0||0 \}1}2|+r,|-r,|-|1krt>t>|/|+ t>|2|,  d d/kr|/|2|1f}.n|.r,|*j|. d<\}/}2}1}.|/|2|1  }+},}-qW |.rR|*j|. d=\}+},}-d0}3x|*D ]}.|.\}/}2}1|+r|-r|1|-kr|d7 }|(j?d1|)|-|+|1|/f  d}3n:t@|/|+|2|,|'tAkr|d7 }|(j?d2|)|+|,|/|2f  d}3|/|2|1  }+},}-qfW |3r||!|) 7 }nt|*d	kr~|
|!|) 7 }
q~W W d Q R X ||
 | }|	jtjjd|
d |   |	jtjjd|d |   |	jtjjd|d |   |	jtjj| |	jtjj| |	jtjj||  tB| |||	j:tjj|
|||||
 qbW t|  tjCstDjE| |jd d S )>Nz!Running analysis based on unique z-mers...z.statr,   r.   z  Using existing results for z... z%.2fr   z: rL   r-         rK      g      Y@c                s   g | ]}| kr|qS r)   r)   )rp   r@   )checked_assembliesr)   r*   rq      s    zdo.<locals>.<listcomp>zDone.Zlinux_32z4  Sorry, can't run KMC on this platform, skipping...ZKMCr   r   T)platform_specificis_executablez#  Sorry, can't run KMC, skipping...z  Running KMC on reference...zkmc.logzkmc.errr   tmpzKMC failed, check z and z. Skipping...z&  Analyzing assemblies completeness...z    z%  Analyzing assemblies correctness...c             S   s   g | ]\}}|qS r)   r)   )rp   namerj   r)   r)   r*   rq      s    z    Downsampling k-mers...zGReference is too fragmented. Scaffolding accuracy will not be assessed.z.misjoins.txtc             S   s   | d S )Nr   r)   )xr)   r)   r*   <lambda>!  s    zdo.<locals>.<lambda>)keyg?Fz#Translocation in %s: %s %d | %s %d
z$Relocation in %s: %d (%d) | %d (%d)
r/   r/   r/   r/   r/   r/   r/   )NN)NNNN)NNNN)NNN)Fprint_timestampr
   rM   	main_infor4   r   r   r6   r   r   r0   r1   r2   infor   get	add_fieldFieldsKMER_COMPLETENESSfloatr3   r8   KMER_CORR_LENGTHKMER_MIS_LENGTHKMER_UNDEF_LENGTHKMER_TRANSLOCATIONSKMER_RELOCATIONSKMER_MISASSEMBLIESrP   r   platform_namewarningr   kmc_dirnamer   ry   rx   r   r   r	   rc   makedirsr[   rC   r=   	enumeratelabel_from_fpathindex_to_strrt   r   rk   zipr\   MAX_REF_CONTIGS_NUMrX   rQ   
prokaryotecheck_for_fragmented_ref	get_fieldREFLENkeyssortedr{   r   r   EXT_RELOCATION_SIZEr+   debugshutilrmtree)4r   r   r5   loggerrA   r   r&   r(   Zstats_contentreportr    r!   r"   r#   r$   r%   kmc_dirpathr;   r<   r9   Zref_kmc_out_fpathunique_kmersrr   idZassembly_labelrB   rs   Zmatched_kmersr   Zref_contigsre   Zdownsampled_kmers_fpathr:   Zcontig_lensr   rf   Zkmers_by_contigZkmers_pos_by_contig	is_cyclicr   outrU   Zcontig_markersr|   r~   Z
prev_chromZmarkerrW   ZkmerZ	ref_chromr}   Zis_misassembledr)   )r   r*   do   s"   

*

$













 


(0






r   )r.   )T)5
__future__r   r   rc   r   collectionsr   Zos.pathr   r   r   r   r	   
quast_libsr
   r   r   quast_libs.ca_utils.miscr   r   Zquast_libs.fastaparserr   quast_libs.qutilsr   r   r   r   Zquast_libs.reportingr   ZKMER_FRACTIONra   ZMAX_CONTIGS_NUMr   ZMIN_CONTIGS_LENr   r   LIBS_LOCATIONr   r   ry   rx   r+   r6   r=   rC   rX   rk   rn   rt   r^   r7   r   r   r)   r)   r)   r*   <module>   s@   

	 

