a
    £žb\C  ã                   @   sP  d dl mZ d dl mZ d dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZmZ d dlmZmZmZ d dlmZmZ d dlmZ d d	lmZmZmZmZ d d
lmZ dZdZdZdZ dZ!dZ"dZ#e	eej$e#ej%ƒƒZ&ee&dƒa'ee&dƒa(dd„ Z)dd„ Z*dd„ Z+dd„ Z,dd„ Z-dd„ Z.dd„ Z/d d!„ Z0d,d#d$„Z1d-d&d'„Z2d(d)„ Z3d*d+„ Z4dS ).é    )Úwith_statement)ÚdivisionN)Údefaultdict)ÚjoinÚabspathÚexistsÚbasenameÚisdir)ÚqconfigÚ	reportingÚqutils)Úcompile_minimapÚminimap_fpath)Ú
read_fasta)Úget_free_memoryÚmd5Údownload_external_toolÚget_dir_for_download)Ú
save_kmersgü©ñÒMbP?iè  i'  éÈ   i † ÚkmcÚ	kmc_toolsc
                 C   s  t  |¡}
t| |
d ƒ}t| |
d ƒ}t|dƒ4}| dt|ƒ ¡ | dt|ƒ ¡ W d   ƒ n1 sj0    Y  t|dƒz}| d| ¡ |s–|rê| d| ¡ | d| ¡ | d	| ¡ | d
| ¡ | d| ¡ | d|	 ¡ W d   ƒ n1 sþ0    Y  d S )Nú.sfú.statÚwzAssembly md5 checksum: %s
zReference md5 checksum: %s
zCompleteness: %s
zK-mer-based correct length: %d
z!K-mer-based misjoined length: %d
z!K-mer-based undefined length: %d
zTotal length: %d
z# translocations: %d
z# 100 kbp relocations: %d
)r   Úlabel_from_fpath_for_fnamer   ÚopenÚwriter   )Ú
output_dirÚcontigs_fpathÚ	ref_fpathÚcompletenessÚcorr_lenÚmis_lenÚ	undef_lenÚ	total_lenÚtranslocationsÚrelocationsÚlabelÚkmc_check_fpathÚkmc_stats_fpathZcheck_fZstats_f© r+   úA/home/psgendb/BIRCHDEV/pkg/quast-5.2.0/quast_libs/unique_kmers.pyÚcreate_kmc_stats_file$   s    
0r-   c                 C   s’   t  |¡}t| |d ƒ}t|ƒs$dS t|ƒ ¡  d¡}t|ƒdk rFdS |d  ¡  ¡ d t	t
|ƒƒkrjdS |d  ¡  ¡ d t	t
|ƒƒkrŽdS dS )	Nr   FÚ
é   r   éÿÿÿÿé   T)r   r   r   r   r   ÚreadÚsplitÚlenÚstripÚstrr   )r   r   Úcontigs_fpathsr    r(   r)   Zsuccessful_check_contentr+   r+   r,   Úcheck_kmc_successful_check7   s    
  r8   c                 C   sL   t | t|ƒd ƒ}td||g||ƒ d}t|ƒrHtt|ƒ ¡  ¡ d ƒ}|S )Nz
.histo.txtZ	histogramr   r0   )r   r   Úrun_kmcr   Úintr   r2   r3   )Útmp_dirpathÚkmc_db_fpathÚ	log_fpathÚ	err_fpathZhisto_fpathZ	kmers_cntr+   r+   r,   Úget_kmers_cntF   s    r?   c              
   C   sT   t | t|ƒd ƒ}tdtƒ ƒ}tdt|ƒ ddt|ƒ ddd||| g	||d	d
 |S )Nú.kmcr/   z-mz-n128z-kz-fmz-cx1z-ci1F)Úuse_kmc_tools)r   r   Úmaxr   r9   r6   )r;   ÚfpathÚkmer_lenr=   r>   Úkmc_out_fpathZmax_memr+   r+   r,   Úcount_kmersO   s    &ÿrF   c              	   C   sô   t | dƒ}tƒ dddttjd ƒ ddt|ƒ||g	}tj|t|dƒt|d	ƒd
d tt	ƒ}tt	ƒ}t|ƒr}	|	D ]\}
|
 
d¡}t|ƒdk rŒqp|d |d |d   }}}||  t|ƒ¡ ||  t|ƒ¡ qpW d   ƒ n1 sâ0    Y  ||fS )Nzkmers.coordsz-cxÚsrz-sr/   z	--frag=noú-tr   Úaz  )ÚstdoutÚstderrÚindentú	é
   r   é   é   )r   r   r6   r
   Úunique_kmer_lenr   Úcall_subprocessr   r   Úlistr3   r4   Úappendr:   )r   r    Zkmers_fpathZlog_err_fpathÚmax_threadsZ	out_fpathZcmdlineZkmers_pos_by_chromZkmers_by_chromÚfÚlineÚfsÚcontigÚchromÚposr+   r+   r,   Úalign_kmersW   s     
ÿ

2r\   c              	   C   sÎ  t | dƒ}t|dƒ ¡  tƒ }d}t|ƒD ]˜\}	}
t | d|	 d ƒ}t|
ƒ| d }t|dƒN}t|ƒD ]4}| dt|ƒ d ¡ | |
||| … d ¡ qjW d   ƒ n1 s´0    Y  t | d|	 d	 ƒ}t	|||||dd
 t
ƒ }t|ƒD ]\}}| |¡ qòt|dƒŠ}d}t|ƒD ]l\}}
||v r|rHt|ƒ| tkrt|ƒ}| dt|| ƒ d ¡ | |
d ¡ |	|f||| < qW d   ƒ n1 s¢0    Y  ||7 }tjr*t |¡ q*||fS )Nzkmc.downsampled.txtr   r   Zkmers_z.fastar1   ú>r.   z.filtered.fasta)Ú	min_kmersrI   )r   r   ÚcloseÚdictr   r4   Úranger   r6   Úfilter_contigsÚsetÚaddr:   ÚKMERS_INTERVALr
   Úspace_efficientÚosÚremove)r;   r    r<   rD   r=   r>   Zdownsampled_txt_fpathÚ	ref_kmersZprev_kmer_idxrZ   ÚseqZkmc_fasta_fpathZnum_kmers_in_seqÚout_fÚiZfiltered_fpathZfiltered_kmersÚidxÚ_Zkmer_ir+   r+   r,   Údownsample_kmersi   s:    
:
4ro   c                 C   s   t | ƒ dd¡S )Nr@   Ú )r   Úreplace)rC   r+   r+   r,   Úget_clear_name‰   s    rr   c                 C   s¦   t | d  dd„ |D ƒ¡d ƒ}t|ƒdkrFtdg| d|g ||ƒ n\|d }td	t|ƒƒD ]@}t | t|ƒd t|ƒ d ƒ}td||| d|g||ƒ |}q\|}|S )
Nrn   c                 S   s   g | ]}t |ƒd d… ‘qS )Né   )rr   )Ú.0rE   r+   r+   r,   Ú
<listcomp>Ž   ó    z#intersect_kmers.<locals>.<listcomp>r@   r/   ÚsimpleZ	intersectr   r1   )r   r4   r9   ra   rr   r6   )r;   Úkmc_out_fpathsr=   r>   Úintersect_out_fpathZprev_kmc_out_fpathrl   Ztmp_out_fpathr+   r+   r,   Úintersect_kmers   s    rz   r1   c                 C   s6   |   d¡rd|  } td|| dt|ƒ d|g||ƒ d S )Nz.txtú@Úfilterz-ciz-fa)Úendswithr9   r6   )Zinput_fpathZoutput_fpathZdb_fpathr=   r>   r^   r+   r+   r,   rb   ›   s    
rb   Tc                 C   s@   |rt nt}tj|dttjƒ dg|  t|dƒt|dƒd d S )NrH   z-hprI   )rJ   rK   )Úkmc_tools_fpathÚkmc_bin_fpathr   rR   r6   r
   rU   r   )Úparamsr=   r>   rA   Z
tool_fpathr+   r+   r,   r9   ¡   s    ÿr9   c                 C   sX   t t | | ƒt || ƒ ƒ}|rT||k rT|| |k rTt t | | ƒt || | ƒ ƒ}|S )N)Úabs)r[   Úprev_posÚref_posÚprev_ref_posÚcyclic_ref_lensÚdistr+   r+   r,   Ú_get_dist_inconstistency§   s     r‡   c           4         s  |  ¡  tj}| dt|ƒ d ¡ g ‰ |D ]Ì}t |¡}t| |||ƒr,t| |d ƒ}t	|ƒ 
¡  d¡}t|ƒdk rxq,| d| d ¡ t |¡}	|	 tjjdt|d	  ¡  d
¡d ƒ ¡ t|ƒdkrðt|d  ¡  d
¡d ƒ}
t|d  ¡  d
¡d ƒ}t|d  ¡  d
¡d ƒ}t|d  ¡  d
¡d ƒ}t|d  ¡  d
¡d ƒ}t|d  ¡  d
¡d ƒ}|	 tjjd|
d |  ¡ |	 tjjd|d |  ¡ |	 tjjd|d |  ¡ |	 tjj|¡ |	 tjj|¡ |	 tjj|| ¡ ˆ  |¡ q,‡ fdd„|D ƒ}t|ƒd	kr2t| ƒ | d¡ d S tjdkrL| d¡ d S t t!dddg|ƒ}t"d|dddda#t"d|dddda$t%t#ƒr t%t$ƒr t&|ƒs®| d¡ d S | d¡ t'| ƒsÌt( )| ¡ t| dƒ}t| d ƒ}t	|d!ƒ *¡  t	|d!ƒ *¡  t| d"ƒ}t'|ƒst( )|¡ t+|||||ƒ}t,||||ƒ}|s\| d#| d$ | d% ¡ d S | d&¡ g }t-|ƒD ]\}}t .|¡}| d't /|¡ | ¡ t |¡}	t+|||||ƒ}t0|||g||ƒ}t,||||ƒ}|d | }|	 tjjd| ¡ | |¡ qr| d(¡ d)d„ t1|ƒD ƒ}| d*¡ t2||||||ƒ\}}t-t3||ƒƒD ]˜\}\}} t .|¡}| d't /|¡ | ¡ t |¡}	d }
d }d }d+\}}d	}t4ƒ }!t1|ƒD ]"\}"}#|t|#ƒ7 }t|#ƒ|!|"< q®t|ƒt5krî| d,¡ nÔd	}
d	}t6||||tj7ƒ\}$}%tj8otj9 }&|&r0|	 :tjj;¡nd }'d	}d	}t	t|t |¡d- ƒd!ƒÄ}(|$ <¡ D ]¨})g }*d.\}+},}-}.t=t3|%|) |$|) ƒd/d0„ d1D ]Š\}/}0||0 \}1}2|+r|-r|-|1kròt>t>|/|+ ƒt>|2|, ƒ d ƒd2krò|/|2|1f}.n|.r|* |.¡ d.\}/}2}1}.|/|2|1  }+},}-q–|.r2|* |.¡ d3\}+},}-d4}3|*D ]–}.|.\}/}2}1|+rÈ|-rÈ|1|-krŽ|d7 }|( ?d5|)|-|+|1|/f ¡ d}3n:t@|/|+|2|,|'ƒtAkrÈ|d7 }|( ?d6|)|+|,|/|2f ¡ d}3|/|2|1  }+},}-qD|3rð||!|) 7 }nt|*ƒd	krb|
|!|) 7 }
qbW d   ƒ n1 s$0    Y  ||
 | }|	 tjjd|
d |  ¡ |	 tjjd|d |  ¡ |	 tjjd|d |  ¡ |	 tjj|¡ |	 tjj|¡ |	 tjj|| ¡ tB| |||	 :tjj¡|
|||||ƒ
 qNt| ƒ tjCstD E|¡ | d¡ d S )7Nz!Running analysis based on unique z-mers...r   r.   r1   z  Using existing results for z... z%.2fr   z: r0   rP   r/   é   é   rO   é   g      Y@c                    s   g | ]}|ˆ vr|‘qS r+   r+   )rt   rC   ©Zchecked_assembliesr+   r,   ru   Í   rv   zdo.<locals>.<listcomp>zDone.Zlinux_32z4  Sorry, can't run KMC on this platform, skipping...ZKMCr   r   T)Úplatform_specificÚis_executablez#  Sorry, can't run KMC, skipping...z  Running KMC on reference...zkmc.logzkmc.errr   ÚtmpzKMC failed, check z and z. Skipping...z&  Analyzing assemblies completeness...z    z%  Analyzing assemblies correctness...c                 S   s   g | ]\}}|‘qS r+   r+   )rt   Únamern   r+   r+   r,   ru      rv   z    Downsampling k-mers...)NNzGReference is too fragmented. Scaffolding accuracy will not be assessed.z.misjoins.txt)NNNNc                 S   s   | d S )Nr   r+   )Úxr+   r+   r,   Ú<lambda>!  rv   zdo.<locals>.<lambda>)Úkeygš™™™™™©?)NNNFz#Translocation in %s: %s %d | %s %d
z$Relocation in %s: %d (%d) | %d (%d)
)FÚprint_timestampr
   rQ   Ú	main_infor6   r   r   r8   r   r   r2   r3   r4   Úinfor   ÚgetÚ	add_fieldÚFieldsÚKMER_COMPLETENESSÚfloatr5   r:   ÚKMER_CORR_LENGTHÚKMER_MIS_LENGTHÚKMER_UNDEF_LENGTHÚKMER_TRANSLOCATIONSÚKMER_RELOCATIONSÚKMER_MISASSEMBLIESrT   r   Úplatform_nameÚwarningr   Úkmc_dirnamer   r   r~   r   r   r	   rg   Úmakedirsr_   rF   r?   Ú	enumerateÚlabel_from_fpathÚindex_to_strrz   r   ro   Úzipr`   ÚMAX_REF_CONTIGS_NUMr\   rU   Ú
prokaryoteÚcheck_for_fragmented_refÚ	get_fieldÚREFLENÚkeysÚsortedr   r   r‡   ÚEXT_RELOCATION_SIZEr-   ÚdebugÚshutilÚrmtree)4r   r    r7   ÚloggerrD   r   r(   r*   Zstats_contentÚreportr"   r#   r$   r%   r&   r'   Úkmc_dirpathr=   r>   r;   Zref_kmc_out_fpathÚunique_kmersrx   ÚidZassembly_labelrE   ry   Zmatched_kmersr!   Zref_contigsri   Zdownsampled_kmers_fpathr<   Zcontig_lensr   rj   Zkmers_by_contigZkmers_pos_by_contigÚ	is_cyclicr…   ÚoutrY   Zcontig_markersr‚   r„   Z
prev_chromZmarkerr[   ZkmerZ	ref_chromrƒ   Zis_misassembledr+   r‹   r,   Údo®   s*   


*


















ÿ&0




ÿÿ0þ
r»   )r1   )T)5Ú
__future__r   r   rg   r²   Úcollectionsr   Zos.pathr   r   r   r   r	   Ú
quast_libsr
   r   r   Úquast_libs.ca_utils.miscr   r   Zquast_libs.fastaparserr   Úquast_libs.qutilsr   r   r   r   Zquast_libs.reportingr   ZKMER_FRACTIONre   ZMAX_CONTIGS_NUMr©   ZMIN_CONTIGS_LENr°   r£   ÚLIBS_LOCATIONr¡   r¶   r   r~   r-   r8   r?   rF   r\   ro   rr   rz   rb   r9   r‡   r»   r+   r+   r+   r,   Ú<module>   s@   

	 

