a
    b                     @   s  d dl mZ d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlmZmZmZmZmZmZmZ d dlmZmZ d dlmZmZ d d	lmZ d d
lT d dlmZmZmZm Z m!Z!m"Z"m#Z#m$Z$ d dl%m&Z& d dl'm(Z( e&ej)a*i a+dZ,G dd de-Z.G dd de-Z/d?ddZ0dd Z1dd Z2d@ddZ3dd Z4dAdd Z5d!d" Z6d#d$ Z7d%d& Z8d'd( Z9d)d* Z:d+d, Z;d-d. Z<d/d0 Z=dBd2d3Z>d4d5 Z?d6d7 Z@dCd9d:ZAd;d< ZBdDd=d>ZCdS )E    )with_statement)divisionN)defaultdict)sqrt)isfilejoinbasenameabspathisdirdirnameexists)qconfigqutils)minimap_fpathref_labels_by_chromosomes)create_fai_file)*)is_non_empty_file
add_suffixget_chr_len_fpathrun_parallelget_path_to_programcheck_java_version
percentilecalc_median)
get_logger)
save_reads
   c                   @   s$   e Zd ZdZdd Zedd ZdS )Mapping   c                 C   sX   |d t |d t |d |d t|d f\| _| _| _| _| _| j| j d | _d S )N            	      )intlenrefstartmapqref_nextend)selffields r/   C/home/psgendb/BIRCHDEV/pkg/quast-5.2.0/quast_libs/reads_analyzer.py__init__$   s    ,zMapping.__init__c                 C   s6   |  drd S t| ddk r$d S t| d}|S )N@	   )
startswithr'   splitr   )linemappingr/   r/   r0   parse)   s    
zMapping.parseN)__name__
__module____qualname__MIN_MAP_QUALITYr1   staticmethodr9   r/   r/   r/   r0   r   !   s   r   c                   @   sh   e Zd ZdZdZejde  ZdddZdd Z	d	d
 Z
dddZdddZdd Zdd Zdd ZdS )QuastDeletionay   describes situtations: GGGGBBBBBNNNNNNNNNNNNBBBBBBGGGGGG, where
    G -- "good" read (high mapping quality)
    B -- "bad" read (low mapping quality)
    N -- no mapped reads
    size of Ns fragment -- "deletion" (not less than MIN_GAP)
    size of Bs fragment -- confidence interval (not more than MAX_CONFIDENCE_INTERVAL,
        fixing last/first G position otherwise)
       r    Nc                 C   s2   ||||||f\| _ | _| _| _| _| _d| _d S )NQuastDEL)r(   	prev_goodprev_badnext_bad	next_goodnext_bad_endid)r-   r(   rB   rC   rD   rE   rF   r/   r/   r0   r1   @   s    zQuastDeletion.__init__c                 C   s:   | j d uo8| jd uo8| jd uo8| jd uo8| j| j tjkS N)rB   rC   rD   rE   r?   MIN_GAPr-   r/   r/   r0   is_validE   s    zQuastDeletion.is_validc                 C   s   |j | _| j| _| S rH   )r,   rB   rC   r-   r8   r/   r/   r0   set_prev_goodJ   s    zQuastDeletion.set_prev_goodc                 C   sD   |r|n|j | _| jd u s,| jtj | jk r@td| jtj | _| S )Nr%   )r,   rC   rB   r?   MAX_CONFIDENCE_INTERVALmaxr-   r8   positionr/   r/   r0   set_prev_badO   s    zQuastDeletion.set_prev_badc                 C   sH   |r|n|j | _| jd u r$| j| _n | jtj | jkrD| jtj | _d S rH   )r)   rE   rD   r?   rN   rP   r/   r/   r0   set_next_goodU   s
    

zQuastDeletion.set_next_goodc                 C   s   |j | _|j| _| j| _d S rH   )r)   rD   r,   rF   rE   rL   r/   r/   r0   set_next_bad\   s    zQuastDeletion.set_next_badc                 C   s4   | j d u r|j| _ |j| _t|j| j tj | _d S rH   )rD   r)   r,   rF   minr?   rN   rE   rL   r/   r/   r0   set_next_bad_enda   s    
zQuastDeletion.set_next_bad_endc                 C   s,   d tt| j| j| j| j| j| j| jgS )Nr3   )	r   mapstrr(   rB   rC   rD   rE   rG   rJ   r/   r/   r0   __str__g   s    zQuastDeletion.__str__)NNNNN)NN)NN)r:   r;   r<   __doc__rN   r   extensive_misassembly_thresholdrI   r1   rK   rM   rR   rS   rT   rV   rY   r/   r/   r/   r0   r?   3   s   	


r?   c                 C   sX  t | }|s:t||d }t||d }t||d }n|dd}t|d}|p`t||d }t|rt|std| d  tt	|d }nt
d|  |S t|st||tj|td	d
 t|||t|d t|d st jtdd|gt|dtd t|  t||d }	t|	|d }
t|
st|	rJtj|	dd t|	 t }tj }|d  tjt 7  < t| |t t jdddt| d dddddt  dd | d!|
 d"t|	|d#  d$|  d%t| d&|	 gt|dt|d' t|
rTt|d(}t|d)}t jddt  d*d |
 d!| d+| d$|  d,g	t|dtd t!|| |S )-N.sam.bamz.sorted.bamsorted.bed  Existing BED-file: 0 may be corrupted. Bed file will be re-created.   Using existing BED-file: znot unmapped and proper_pairZfilter_rule)threadsz.baisambambaindexastderrloggerZ_gridssz.vcfTignore_errorsPATHjavaz-eaz-Xmxgz-Dsamjdk.create_index=truez(-Dsamjdk.use_async_io_read_samtools=truez)-Dsamjdk.use_async_io_write_samtools=truez(-Dsamjdk.use_async_io_write_tribble=truez-cpzgridss.CallVariantszI=zO=z	ASSEMBLY=z.gridss.bamzR=zWORKER_THREADS=zWORKING_DIR=)ri   rj   envrawfilteredz#au.edu.wehi.idsv.VcfBreakendToBedpezOF=zINCLUDE_HEADER=TRUE)"r   name_from_fpathr   replacer   r   is_valid_bedrj   warning
output_dirinfor   sambamba_viewr   max_threadssort_bamcall_subprocesssambamba_fpathopenr   r
   shutilrmtreeosmakedirsZget_gridss_memoryenvironcopypathsepZbwa_dirpath	bwa_indexrX   get_gridss_fpathZreformat_bedpe)cur_ref_fpathoutput_dirpath	err_fpathrz   	bam_fpath	bed_fpathref_name	sam_fpathbam_sorted_fpathZvcf_output_dirpathZ	vcf_fpathmax_memrp   Zraw_bed_fpathZfiltered_bed_fpathr/   r/   r0   process_one_refm   sf    











r   c           	         s   t d tt| d tj }t|r>t d|  |S tdrNt	ds\t 
d d S tdsrt 
d d S |rtt|tj}td	tj|  fd
d|D }tt||dd}|rt|| nt|  tj||d t d|  |S )Nz0  Searching structural variations with GRIDSS..._    Using existing file: rn   g?z`Java 1.8 (Java version 8) or later is required to run GRIDSS. Please install it and rerun QUAST.ZRscriptz?R is required to run GRIDSS. Please install it and rerun QUAST.r%   c                    s   g | ]}| fqS r/   r/   ).0r   r   r   Zthreads_per_jobr/   r0   
<listcomp>       z)search_sv_with_gridss.<locals>.<listcomp>T)filter_results)r   r       Saving to: )rj   rx   r   r   rs   r   sv_bed_fnamer   r   r   rv   rU   r'   rz   rO   r   r   	cat_files)	main_ref_fpathr   meta_ref_fpathsr   r   Zfinal_bed_fpathn_jobsparallel_argsZ
bed_fpathsr/   r   r0   search_sv_with_gridss   s*    


r   c                 C   s  g }t | tj}td d}t|r8d}td|  |sB|rt|$}	d }
|	D ]}t|}|rV|j	dkrvqV|
r|
j	|j	kr|
j
d u r|j|
j tjkr|
| |jtjkr|
| |
 r||
 t|j	|}
n$|jtjkr|
| n
|
| n|j|
j tjkrP|
 r:||
 t|j	j|
jd}
nH|jtjkr|
| |
 r|||
 t|j	|}
n
|
| nH|
r|
j	|v r|
j||
j	 d |
 r||
 t|j	|}
|rV||j	 }|j dks|||j krV|| d urV|| | qV|
rb|
j	|v rb|
j||
j	 d |
 rb||
 W d    n1 sx0    Y  |r| D ]}|d ur|  q|rtdt|  td	|  t|d
.}|D ]}|t|d  qW d    n1 s0    Y  |S )Nz@  Looking for trivial deletions (long zero-covered fragments)...TFr   r   )rQ   =z  Trivial deletions: %d foundr   w
) r   r   trivial_deletions_fnamerj   rx   r   r~   r   r9   r(   rD   r)   rC   r?   rI   rT   r*   r=   rS   rK   appendrM   rR   rF   rV   r+   stripwritevaluescloser'   rX   )temp_output_dirsam_sorted_fpath	ref_files
ref_labelsseq_lengthsneed_ref_splitting	deletionstrivial_deletions_fpathZneed_trivial_deletionssam_fileZcur_deletionr7   r8   Zcur_refZref_handlerfdeletionr/   r/   r0   search_trivial_deletions   s|    
















*
6r   allFc                 C   s6  g }t | }tjp t||d }t|d}|dkrHt||}t||}t||d }t|sh|| t|s|dks|dkr|| t|d}	t|	st	
|	 t|d}
t|d}t| ||	|
|tjtjtj|d	d	|d
\}}}tjrtjdkrv|dkr4|r4t|||\}}}|s,td n|t_nB|dkrvt|rvz tt| }|rf|t_W n   Y n0 |s|||fS |std dS |r,t|	t|d}t|	t|d}t|rtd|  n$t||tj|tdd t|||t t|s,|r,t|	| ||||
||||dd |||fS )N.cov	uncoveredr   .is.txtpetemp_outputreads_stats.logreads_stats.errT)r   r   required_filesis_referencealignment_onlyusing_readsautoz!  Failed calculating insert size.z%  Failed detecting uncovered regions.NNNmappedr^   z"  Using existing sorted BAM-file: not unmappedrc   F)uncovered_fpathcreate_cov_files)r   rs   r   	cov_fpathr   r   r   r   r
   r   r   align_single_filerz   reference_samreference_bamoptimal_assembly_insert_sizecalculate_insert_sizerj   rx   r&   r~   readlineget_safe_fpathry   r{   get_coverage)	ref_fpathrw   r   Zcalculate_coverager   r   r   r   insert_size_fpathr   log_pathr   correct_chr_namesr   r   insert_sizer   bam_mapped_fpathr   r/   r/   r0   align_reference  sh    













r   c           %         sj  g }d\}	}
}|rlt |}tjp0t|d }	tjpDt|d }
tjpXt|d }|	|
|g}tjrzt	d d }	nt
|	rt|	std|	 d  |t|d  qt	d|	  n8tjstjstjstjst	d	 d }	d
t_n
||	 tjrZt
|
r4t|
}|r*t	d|
  n
||
 t
|rNt	d|  n
|| nt	d d }
d }tjsttjt| d }tdtj| tjpd gt|  tjpd gt|    fddt| D }nd}tjg }|r ||tjtjd |d
f |rxtt||\} tjsdd t|  t_ d t|  t_t| | t |sdS |d }d  d  }}|t_|t_|s|	|
|fS t ||gst	d dS t!t"|d}t!t"|d}t!t"|d}t
|rt	d|  n@t
|sLt#||tjtdd t$||t t#||tjt tjrt
|
rzt
|st%||||||
|
\}
}t
|	stjs|rt	d g }i }t&|}|D ]}|'ds qT|'drBd|v rBd|v rB|(dd (dd }t)|(d d (dd }|||< ||*  qW d    n1 sj0    Y  d!}i }|rr|D ]}t |}t|d" } | t+|< t
| rt	d#|| f  d ||< nt&| d$}!|d 'ds|!,|d d%  d&d' |D D ]D}"|"(dd (dd }||v r
|| |kr
|!,|"d%  q
|!,|d d%  |!||< d
}qt-|||||}#t. rt/t. rz$t0|||}$t 1|$|#g|	 W n   Y n0 t/|#rt
|	st23|#|	 tjs0t
|	rt4d(|	  n$t/|	r"t4d) n
t4d d }	t
|
rJt4d*|
  ntjs\t4d+ d }
|	|
|fS ),Nr   r_   r   z.physical.covzH  Will not search Structural Variations (--fast or --no-sv is specified)r`   ra   rb   z@  Will not search Structural Variations (needs paired-end reads)Tz&  Using existing reads coverage file: z)  Using existing physical coverage file: zf  Will not calculate coverage (--fast or --no-html, or --no-icarus, or --space-efficient is specified)r%   c                    s.   g | ]&\}}||  | |f	qS r/   r/   )r   rf   contigs_fpath
bam_fpathsr   r   Zmax_threads_per_jobrw   
sam_fpathsr   r/   r0   r   s  s   z(run_processing_reads.<locals>.<listcomp>z)  Failed searching structural variations.r^   r   z"  Using existing sorted SAM-file: r   rc   z%  Splitting SAM-file by references...r2   @SQSN:zLN:z	SN:r3   r   z	LN:Fr\   z,    Using existing split SAM-file for %s: %sr   r   c                 s   s$   | ]}| d rd|v r|V  qdS )r   r   N)r5   )r   hr/   r/   r0   	<genexpr>  r   z'run_processing_reads.<locals>.<genexpr>z  Structural variations are in z&  No structural variations were found.z9  Coverage distribution along the reference genome is in z+  Failed to calculate coverage distribution)5r   rs   r   bedr   r   phys_cov_fpathno_svrj   rx   r   ru   rv   r   forward_readsinterlaced_readsr   r   create_icarus_htmlZcheck_cov_fileno_read_statsrU   rz   r'   rO   r   r   	enumerater   r   add_statistics_to_reportr   r   r   r   ry   r{   r   r~   r5   r6   r&   r   ref_sam_fpathsr   r   r   r   r   r   r   r   	main_info)%contigs_fpathsr   r   r   r   rw   r   r   r   r   r   physical_cov_fpathr   Zis_correct_filer   Zparallel_align_argsr   r   r   r   r   r   headersr   r   r7   Zseq_nameZ
seq_lengthr   r   r   Zcur_ref_nameZref_sam_fpathZref_sam_filer   r   Zgridss_sv_fpathr/   r   r0   run_processing_readsC  s   
















 2








r   c                 C   s  t | }|d urt |nd}tj}|sF|rFt||d d d }n(|pVt||d }t||d d d }|dkrt||d | d }|dd}|s|
r|	rtdd |	D r|		| t
|rt|| |||t|
}nt|| |||t|
}|d u}|s
|s
d	S tt||d
 }|r|	r<tdd |	D r|st|rdtd| d |  nLt|rt jtdddt||gt|dt|dd t|| ||||t t|s|r|||fS td| d  t
|r|rtd| d |  t|| |||t|
}nLt
|rd|rdtd| d |  t||tj|t t|| |||t|
}|rtt
|s|r|
rtd ntd| d  t| } t|}t }t| t| |t t| |||||}t|dkrt||||| nDt|dkrBt !|d | |d dd}t
|rBt !|| td| d  t| t
|st"d|  d | d  d	S t|| |||t|
}n|rt
|sd	S |
rtd ntd| d   |rt
|rt#|rtd| d |  n4t||d | d! }t$||}t||||td d" t %|d# |st|rntd| d |  nLt|rt jtdddt||gt|dt|dd t|| ||||t |
rtd$ ntd| d%  |||fS )&N r\   r]   r   .c                 s   s   | ]}| d V  qdS )r   N)endswith)r   r   r/   r/   r0   r     r   z$align_single_file.<locals>.<genexpr>r   .statc                 s   s   | ]}t |V  qd S rH   )r   )r   fpathr/   r/   r0   r     r     z$Using existing flag statistics file re   Zflagstat-tr   rg   )stdoutri   zPre-processing reads...zUsing existing SAM-file: zUsing existing BAM-file: z  Running BWA for reference...zRunning BWA...r%   r   Done.z  Failed running BWA for z. See z for information.z#  Sorting SAM-file for reference...zSorting SAM-file...z.correct.samrc   zbam filez%  Analysis for reference is finished.zAnalysis is finished.)&r   rs   index_to_strr   reads_fpathsr   r   rt   anyr   r   Zget_correct_names_for_chromsrj   r   r   r   rx   r|   r}   rX   r~   analyse_coveragery   rz   r	   r   getcwdchdirr   align_readsr'   merge_sam_filesr   moveerrorZall_read_names_correctZclean_read_namesassert_file_exists)r   Zmain_output_dirr   r   r   rz   r   r   rf   r   r   r   r   filenameZ	index_strr   r   Z	can_reusestats_fpathprev_dirr   tmp_bam_fpathZcorrect_sam_fpathr/   r/   r0   r     s    

 









r   c              
   C   s   g }|dks|dkr.t tj| |||||dd |dks>|dkrXt tj| |||||dd |dksh|dkrt tj| |||||dd |dks|dkrt tj| |||||dd |dks|dkrt tj| |||||dd |S )Nr   r   )
reads_typempsinglepacbionanopore)run_alignerr   paired_reads
mate_pairsunpaired_readspacbio_readsnanopore_reads)r   r   r   rw   r   rz   out_sam_fpathsr/   r/   r0   r   F  s    r   c                 C   s  t dd t| }g }	g }
t| D ]\}}t|tr|dksH|dkr~|dkrVd}nd}t d t| | | d | }q||d	krd
nd | d | }n$|\}}|d | d | d | }t||t|d  }|dd}t|stj	t
|t|dt|dtd t|st|s<t||||td d |d	krt|d}tj	tddddt|d|||g	t|dtd t|rt|| |d	krt||t|\}}}|d ur|tjk r|	| |
| q$t|
dkr<t||}|dd}t|
d | t|
d dd| || n
||
 |	rt|}t||d }t|d"}|tt|	 W d    n1 s0    Y  d S )NZbwaz mem -t r  r  z -ax map-pb z -ax map-ont z -t  r   z -p r%   r\   r]   r   rg   )r   ri   rj   rc   Zdedupre   Zmarkdupz-rr   z--tmpdirrh   r   r   )Z	bwa_fpathrX   r   
isinstancer   r   rt   r   r   r|   shlexr6   r~   rj   ry   r}   r   r   r  r   rs   r   optimal_assembly_max_ISr   r'   extendr   r   rO   )Zread_fpathsr   r   r  rw   r   rz   r	  Zbwa_cmdinsert_sizesZtemp_sam_fpathsidxreadspresetcmdlineread1read2output_fpathr   Zbam_dedup_fpathr   r   Zfinal_sam_fpathZfinal_bam_fpathr   r   outr/   r/   r0   r  V  s\    
$"
$










r  c           	      C   s   g }| D ]B}t |r|dd}t|d}t |s@t|||t || qtjtdddt	||g| t
|dtd t||||t |S )	Nr\   r]   r^   re   merger   rg   rh   )r   rt   r   r{   rj   r   r   r|   r}   rX   r~   ry   )	Ztmp_sam_fpathsr   r   rz   r   Ztmp_bam_fpathsZtmp_sam_fpathr  Ztmp_bam_sorted_fpathr/   r/   r0   r    s    

r  c                 C   s  t t}g |d< t| }|D ]z}| d }d|v rHt||d< q d|v rf|d  t|8  < q d|v r|d  t|8  < q d|v r|d  t|8  < q d|v r||d< q d	|v r||d
< q d|v rd|v r||d< t||d |d< q d|v r||d< t||d |d< q d|v r>||d< t||d |d< q d|v rnd|vrn||d< t||d |d< q d|v r||d< q d|v r |d t| q W d    n1 s0    Y  |S )Ncoverage_thresholdsr   totalZ	secondaryZsupplementary
duplicatesr  rightr   leftr   %mapped_pcntzproperly pairedpairedpaired_pcnt
singletonssingletons_pcntzdifferent chrZmapQmisjointmisjoint_pcntdepthcoverage)r   r&   r~   r6   get_pcnt_readsr   float)r  reads_statsr   r7   valuer/   r/   r0   parse_reads_stats  sD    






4r7  c                 C   s$   |dkr t dt| d |  S d S )Nr   z%.2fg      Y@)r4  r&   )r  Ztotal_readsr/   r/   r0   r3    s    r3  c                    sF  ddl m} d |rVt|}t| |d }t|rVt|td dkrVt	d t
|D ]\}}||}t|}	t|}
t| |	d }r||jjd  ||jjd  ||jjd  ||jjd  ||jjd	  ||jjd
  ||jjd  ||jjd  ||jjd  d rtd ttjkr||jjfddt
tjD  ||jjd d  t|sq^t| ||jj d  ||jj d  ||jj d  ||jj  d  ||jj! d  ||jj" d  ||jj# d  t d dkrt	dt$| d d |
 d  ||jj% d	  ||jj& d
  ||jj' d  ||jj( d  ||jj) d   d r^t d ttjkr^||jj* fddt
tjD  ||jj+ d d  q^d S )Nr   )	reportingr   r   z%  BWA: nothing aligned for reference.r*  r+  r,  r-  r.  r/  r0  r1  r$  c                    s   g | ]\}} d  | qS r$  r/   r   i	threshold)ref_reads_statsr/   r0   r     r   z,add_statistics_to_report.<locals>.<listcomp>r%  r(  r'  r   zBWA: nothing aligned for 'z'.c                    s   g | ]\}} d  | qS r9  r/   r:  )r5  r/   r0   r     r   ),
quast_libsr8  r   rs   r   r   r7  r&   rj   rx   r   getlabel_from_fpath	add_fieldFieldsREF_MAPPED_READSREF_MAPPED_READS_PCNTREF_PROPERLY_PAIRED_READSREF_PROPERLY_PAIRED_READS_PCNTREF_SINGLETONSREF_SINGLETONS_PCNTREF_MISJOINT_READSREF_MISJOINT_READS_PCNT	REF_DEPTHr'   r   r$  REF_COVERAGE__FOR_THRESHOLDSREF_COVERAGE_1X_THRESHOLDTOTAL_READS
LEFT_READSRIGHT_READSMAPPED_READSMAPPED_READS_PCNTPROPERLY_PAIRED_READSPROPERLY_PAIRED_READS_PCNTr   
SINGLETONSSINGLETONS_PCNTMISJOINT_READSMISJOINT_READS_PCNTDEPTHCOVERAGE__FOR_THRESHOLDSCOVERAGE_1X_THRESHOLD)rw   r   r   r8  r   r  rf   r   reportZassembly_nameassembly_labelr/   )r5  r=  r0   r     sf    




"

$
r   c                 C   sd  t |}t| ||||}t||}	t| |d }
t||
|	||dd d}dd tjD }t|
}|D ]j}|	 }t
|d t|d  }}|d d	krh||| 7 }ttjD ] \}}||kr||  |7  < qqhW d    n1 s0    Y  t|d
R}|dt
|  ttjD ]$\}}|d|| d |f  qW d    n1 sV0    Y  d S )Nz
.genomecovF)Zprint_all_positionsr   c                 S   s   g | ]}d qS )r   r/   )r   r<  r/   r/   r0   r     r   z$analyse_coverage.<locals>.<listcomp>r%   r"   Zgenomerg   z	%s depth
z%.2f coverage >= %sx
d   )r   rs   
bam_to_bedr   r   calculate_genome_covr   r$  r~   r6   r&   r4  r   r   )r   r   	chr_namesr   r  r   rj   r  r   chr_len_fpathr   	avg_depthZcoverage_for_thresholdsr   r7   lr1  Zgenome_fractionr;  r<  out_fr/   r/   r0   r     s(    


2r   c              	   C   s   t |d}t|std t| |d }t||tj|tdtj tjf d t| |d }	t	||	|tdd t
| |d	 |	|td
d}
t|
|||t |S )Nrq   z"  Calculating physical coverage...z.physical.bamzeproper_pair and not supplementary and not duplicate and template_length > %d and template_length < %drc   z.physical.sorted.bamz-n)Z	sort_rulez	.physicalT)Zbedpe)r   r   rj   rx   r   ry   r   rz   	MAX_PE_ISr{   r`  ra  )r   r   r   r   r   r   rc  raw_cov_fpathZbam_filtered_fpathZbam_filtered_sorted_fpathr   r/   r/   r0   get_physical_coverage  s    

ri  Tc                 C   s   |d }t ||}t|s~td t|s^t|sBt|||t t||||t t|d |
rnt||
| |r~t	||| t|	s|rt
| |||||	|}t	||	| ||	fS )NZ_rawz  Calculating reads coverage...zcoverage file)r   r   rj   rx   r{   ra  r   r  print_uncovered_regionsproceed_cov_fileri  )r   r   r   r   r   r   r   r   r   r   r   r   rh  rc  r/   r/   r0   r   %  s&    

r   c                 C   s  t t}t }d}t| d}t|d~}|D ]T}t| }	|	d }
tt|	d }|
|vr|d7 }t|||
< |r||
 n|
}|d| d ||
  d  t	|	d	krt|	d t|	d
  }}||
 
|g||   n||
 | t	||
 tkr2t	||
 t	||
 t  }td|tD ]B}t||
 ||t  t }|d||
 t|d g q,||
 |t d  ||
< q2tjst|  W d    n1 s0    Y  W d    n1 s0    Y  d S )Nr   rr   r   r%   #r  r   r!   r    )r   listdictr~   r6   r&   r4  rX   r   r'   r  r   COVERAGE_FACTORrangesumr   r   debugr   remove)rh  r   r   Z	chr_depthZused_chromosomes	chr_indexin_coverageZout_coverager7   fsnamer1  correct_namer)   r,   Z	max_indexrf   Z	cur_depthr/   r/   r0   rk  ;  s4    
$rk  c                 C   s   t | d}t | d}||fS )Nr   Z   )r   )r  Zdecile_1Zdecile_9r/   r/   r0   get_max_min_isY  s    

r{  r   c                 C   s  t |||rd| nd d }t|rz`t|4}t| }t| }t| }W d    n1 sj0    Y  |r|||fW S W n   Y n0 g }	g d}
t| r}t|D ]X\}}|dkr q|drq|d}|d }||
vrqtt|d	 }|		| qW d    n1 s"0    Y  |	r|	
  t|	}|d
krPdS t|	\}}ttj|}t|dF}|t|d  |t|d  |t|d  W d    n1 s0    Y  |||fS dS )Nr   r   r   )Z99Z147Z83Z163i@B r2   r3   r%      r   r   r   r   )r   r   r~   r&   r   r   r5   r6   absr   sortr   r{  rO   r   optimal_assembly_min_ISr   rX   )r   rw   r   Zreads_suffixr   r   r   Zmin_insert_sizeZmax_insert_sizer  Zmapped_flagsZsam_inr;  re  rw  flagZ	median_isrf  r/   r/   r0   r   _  sL    
*


,
2
r   c              
   C   s  t t}t| |}|D ]f}t| }|d }tt|d }|rJ|| n|}	t|dkr|dkr||	 |d |d f qW d    n1 s0    Y  t|dJ}
| D ]0\}}|D ]"\}}|
	d
|||gd  qqW d    n1 s0    Y  d S )	Nr   r   r!   r%   r    r   r3   r   )r   rn  r~   r6   r&   r4  r'   r   itemsr   r   )rh  r   r   Zuncovered_regionsrv  r7   rw  rx  r1  ry  rf  chromZregionsr)   r,   r/   r/   r0   rj    s    
:rj  c           
   	      s0  |r|a t   t d tt s0t d dS t|sBt| tt tj	 t
|d t sjt  tjstjrt fddttjtjD st d dS t
|d}t
|d	}t|d
  t|d
  t dd||f   t|| |t |||\}}}	tjstj dd t d |||	fS )NzRunning Reads analyzer...zFailed reads analysisr   r   c                    s    g | ]\}}t ||g tqS r/   )Zpaired_reads_names_are_equalrj   )r   r  r   r   r/   r0   r     s   zdo.<locals>.<listcomp>z5  Read names are discordant, skipping reads analysis!r   r   r   r   zLogging to files %s and %s...Trk   r   )rj   print_timestampr   Zcompile_reads_analyzer_toolsr
   r   r   Zdownload_gridssr   r   r   mkdirno_checkr   r   zipreverse_readsr  r~   r   rx   r   r   rs  r   r   )
r   r   rw   r   external_loggerr   r   r   r   r   r/   r  r0   do  sB    










r  )NN)r   F)NNNNFFr   )NNT)r   )NN)D
__future__r   r   r   rer   r  collectionsr   mathr   os.pathr   r   r   r	   r
   r   r   r?  r   r   quast_libs.ca_utils.miscr   r   quast_libs.fastaparserr   Zquast_libs.ra_utils.miscquast_libs.qutilsr   r   r   r   r   r   r   r   quast_libs.logr   Zquast_libs.reportingr   LOGGER_DEFAULT_NAMErj   r   rp  objectr   r?   r   r   r   r   r   r   r   r  r  r7  r3  r   r   ri  r   rk  r{  r   rj  r  r/   r/   r/   r0   <module>   sX   $(
:
3L
<   
e5%7 

*