3
b                 @   s  d dl mZ d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlmZmZmZmZmZmZmZ d dlmZmZ d dlmZmZ d d	lmZ d d
lT d dlmZmZmZm Z m!Z!m"Z"m#Z#m$Z$ d dl%m&Z& d dl'm(Z( e&ej)a*i a+dZ,G dd de-Z.G dd de-Z/d?ddZ0dd Z1dd Z2d@ddZ3dd Z4dAdd Z5d!d" Z6d#d$ Z7d%d& Z8d'd( Z9d)d* Z:d+d, Z;d-d. Z<d/d0 Z=dBd2d3Z>d4d5 Z?d6d7 Z@dCd9d:ZAd;d< ZBdDd=d>ZCdS )E    )with_statement)divisionN)defaultdict)sqrt)isfilejoinbasenameabspathisdirdirnameexists)qconfigqutils)minimap_fpathref_labels_by_chromosomes)create_fai_file)*)is_non_empty_file
add_suffixget_chr_len_fpathrun_parallelget_path_to_programcheck_java_version
percentilecalc_median)
get_logger)
save_reads
   c               @   s$   e Zd ZdZdd Zedd ZdS )Mapping   c             C   sX   |d t |d t |d |d t|d f\| _| _| _| _| _| j| j d | _d S )N            	      )intlenrefstartmapqref_nextend)selffields r/   C/home/psgendb/BIRCHDEV/pkg/quast-5.2.0/quast_libs/reads_analyzer.py__init__$   s    BzMapping.__init__c             C   s6   | j drd S t| jddk r$d S t| jd}|S )N@	   )
startswithr'   splitr   )linemappingr/   r/   r0   parse)   s    
zMapping.parseN)__name__
__module____qualname__MIN_MAP_QUALITYr1   staticmethodr9   r/   r/   r/   r0   r   !   s   r   c               @   sh   e Zd ZdZdZejde  ZdddZdd Z	d	d
 Z
dddZdddZdd Zdd Zdd ZdS )QuastDeletionay   describes situtations: GGGGBBBBBNNNNNNNNNNNNBBBBBBGGGGGG, where
    G -- "good" read (high mapping quality)
    B -- "bad" read (low mapping quality)
    N -- no mapped reads
    size of Ns fragment -- "deletion" (not less than MIN_GAP)
    size of Bs fragment -- confidence interval (not more than MAX_CONFIDENCE_INTERVAL,
        fixing last/first G position otherwise)
       r    Nc             C   s2   ||||||f\| _ | _| _| _| _| _d| _d S )NQuastDEL)r(   	prev_goodprev_badnext_bad	next_goodnext_bad_endid)r-   r(   rB   rC   rD   rE   rF   r/   r/   r0   r1   @   s    (zQuastDeletion.__init__c             C   s:   | j d k	o8| jd k	o8| jd k	o8| jd k	o8| j| j tjkS )N)rB   rC   rD   rE   r?   MIN_GAP)r-   r/   r/   r0   is_validE   s    zQuastDeletion.is_validc             C   s   |j | _| j| _| S )N)r,   rB   rC   )r-   r8   r/   r/   r0   set_prev_goodJ   s    zQuastDeletion.set_prev_goodc             C   sD   |r|n|j | _| jd ks,| jtj | jk r@td| jtj | _| S )Nr%   )r,   rC   rB   r?   MAX_CONFIDENCE_INTERVALmax)r-   r8   positionr/   r/   r0   set_prev_badO   s    zQuastDeletion.set_prev_badc             C   sH   |r|n|j | _| jd kr$| j| _n | jtj | jkrD| jtj | _d S )N)r)   rE   rD   r?   rK   )r-   r8   rM   r/   r/   r0   set_next_goodU   s
    

zQuastDeletion.set_next_goodc             C   s   |j | _|j| _| j| _d S )N)r)   rD   r,   rF   rE   )r-   r8   r/   r/   r0   set_next_bad\   s    zQuastDeletion.set_next_badc             C   s4   | j d kr|j| _ |j| _t|j| j tj | _d S )N)rD   r)   r,   rF   minr?   rK   rE   )r-   r8   r/   r/   r0   set_next_bad_enda   s    
zQuastDeletion.set_next_bad_endc          
   C   s,   dj tt| j| j| j| j| j| j| jgS )Nr3   )	r   mapstrr(   rB   rC   rD   rE   rG   )r-   r/   r/   r0   __str__g   s    zQuastDeletion.__str__)NNNNN)NN)NN)r:   r;   r<   __doc__rK   r   extensive_misassembly_thresholdrH   r1   rI   rJ   rN   rO   rP   rR   rU   r/   r/   r/   r0   r?   3   s   


r?   c             C   sX  t j| }|s:t||d }t||d }t||d }n|jdd}t|d}|p`t||d }t|rt|stjd| d  tt	|d }ntj
d|  |S t|st||tj|td	d
 t|||t|d t|d st jtdd|gt|dtd t|  t||d }	t|	|d }
t|
st|	rJtj|	dd tj|	 t }tjj }|d  tjt 7  < t| |t t jdddt| d dddddt  dd | d!|
 d"t|	|d#  d$|  d%t| d&|	 gt|dt|d' t|
rTt|d(}t|d)}t jddt  d*d |
 d!| d+| d$|  d,g	t|dtd t!|| |S )-Nz.samz.bamz.sorted.bamsortedz.bedz  Existing BED-file: z0 may be corrupted. Bed file will be re-created. z  Using existing BED-file: znot unmapped and proper_pair)filter_rule)threadsz.baisambambaindexa)stderrloggerZ_gridssz.vcfT)ignore_errorsPATHjavaz-eaz-Xmxgz-Dsamjdk.create_index=truez(-Dsamjdk.use_async_io_read_samtools=truez)-Dsamjdk.use_async_io_write_samtools=truez(-Dsamjdk.use_async_io_write_tribble=truez-cpzgridss.CallVariantszI=zO=z	ASSEMBLY=z.gridss.bamzR=zWORKER_THREADS=zWORKING_DIR=)r^   r_   envrawZfilteredz#au.edu.wehi.idsv.VcfBreakendToBedpezOF=zINCLUDE_HEADER=TRUE)"r   name_from_fpathr   replacer   r   is_valid_bedr_   warning
output_dirinfor   sambamba_viewr   max_threadssort_bamcall_subprocesssambamba_fpathopenr   r
   shutilrmtreeosmakedirsZget_gridss_memoryenvironcopypathsepZbwa_dirpath	bwa_indexrT   get_gridss_fpathZreformat_bedpe)cur_ref_fpathoutput_dirpath	err_fpathrm   	bam_fpath	bed_fpathref_name	sam_fpathbam_sorted_fpathZvcf_output_dirpathZ	vcf_fpathmax_memrd   Zraw_bed_fpathZfiltered_bed_fpathr/   r/   r0   process_one_refm   sZ    









r   c       	         s   t jd ttj| d tj }t|r>t jd|  |S td sRt	d r`t j
d d S tdsvt j
d d S |rtt|tj}td	tj|  fd
d|D }tt||dd}|rtj|| nt|  tj||d t jd|  |S )Nz0  Searching structural variations with GRIDSS..._z    Using existing file: rb   g?z`Java 1.8 (Java version 8) or later is required to run GRIDSS. Please install it and rerun QUAST.ZRscriptz?R is required to run GRIDSS. Please install it and rerun QUAST.r%   c                s   g | ]}| fqS r/   r/   ).0r{   )r}   r|   threads_per_jobr/   r0   
<listcomp>   s    z)search_sv_with_gridss.<locals>.<listcomp>T)filter_results)r~   r   z    Saving to: )r_   rk   r   r   rf   r   sv_bed_fnamer   r   r   ri   rQ   r'   rm   rL   r   r   	cat_files)	main_ref_fpathr~   meta_ref_fpathsr|   r}   Zfinal_bed_fpathn_jobsparallel_argsZ
bed_fpathsr/   )r}   r|   r   r0   search_sv_with_gridss   s*    


r   c             C   s  g }t | tj}tjd d}t|r8d}tjd|  |sB|rpt|}	d }
x|	D ]}tj|}|rZ|j	dkrzqZ|
o|
j	|j	kr|
j
d kr|j|
j tjkr|
j| |jtjkr|
j| |
j r|j|
 t|j	j|}
n$|jtjkr|
j| n
|
j| n|j|
j tjkrR|
j r<|j|
 t|j	j|
jd}
nH|jtjkr|
j| |
j r~|j|
 t|j	j|}
n
|
j| nH|
r|
j	|kr|
j||
j	 d |
j r|j|
 t|j	j|}
|rZ||j	 }|jj dks|||j krZ|| d k	rZ|| j| qZW |
rf|
j	|krf|
j||
j	 d |
j rf|j|
 W d Q R X |rx$|j D ]}|d k	r|j  qW |rtjdt|  tjd	|  t|d
(}x |D ]}|jt|d  qW W d Q R X |S )Nz@  Looking for trivial deletions (long zero-covered fragments)...TFz    Using existing file: r   )rM   =z  Trivial deletions: %d foundz    Saving to: w
) r   r   trivial_deletions_fnamer_   rk   r   rq   r   r9   r(   rD   r)   rC   r?   rH   rP   r*   r=   rO   rI   appendrJ   rN   rF   rR   r+   stripwritevaluescloser'   rT   )temp_output_dirsam_sorted_fpath	ref_files
ref_labelsseq_lengthsneed_ref_splitting	deletionstrivial_deletions_fpathZneed_trivial_deletionssam_fileZcur_deletionr7   r8   Zcur_refZref_handlerfdeletionr/   r/   r0   search_trivial_deletions   s|    

















"r   allFc             C   s<  g }t j| }tjp t||d }t|d}|dkrHt||}t||}t||d }t|sh|j| t| r|dks|dkr|j| t|d}	t|	st	j
|	 t|d}
t|d}t| ||	|
|tjtjtj|d	d	|d
\}}}tj ptjdkrz|dkr8|r8t|||\}}}|s0tjd n|t_nB|dkrzt|rzy tt|j }|rj|t_W n   Y nX |s|||fS |stjd dS |r2t|	t|d}t|	t|d}t|rtjd|  n$t||tj|tdd t|||t t| r2|r2t|	| ||||
||||dd |||fS )Nz.cov	uncoveredr   z.is.txtpetemp_outputzreads_stats.logzreads_stats.errT)r   r~   required_filesis_referencealignment_onlyusing_readsautoz!  Failed calculating insert size.z%  Failed detecting uncovered regions.mappedrX   z"  Using existing sorted BAM-file: znot unmapped)rY   F)uncovered_fpathcreate_cov_files)NNN)r   rf   r   	cov_fpathr   r   r   r   r
   rt   ru   align_single_filerm   reference_samreference_bamoptimal_assembly_insert_sizecalculate_insert_sizer_   rk   r&   rq   readlineget_safe_fpathrl   rn   get_coverage)	ref_fpathrj   r   Zcalculate_coverager   r   r   r   insert_size_fpathr   log_pathr}   correct_chr_namesr   r~   insert_sizer   bam_mapped_fpathr   r/   r/   r0   align_reference  sd    













r   c       %         sr  g }d*\}	}
}|rxt j|}tjp0t|d }	tjpDt|d }
tjpXt|d }|	|
|g}tjrztj	d d }	nt
|	rt|	stjd|	 d  |jt|d  ntj	d|	  nDtj otj  rtj otj rtj	d d }	d	t_n
|j|	 tjrft
|
r@t|
}|r6tj	d
|
  n
|j|
 t
|rZtj	d|  n
|j| ntj	d d }
d }tjsttjt| d }tdtj| tjpd gt|  tjpd gt|    fddt| D }nd}tjg }|r,|j|tjtjd |d	f |rtt||\} tjspd t|  t_ d t|  t_t| | t |sd+S |d, }d-  d.  }}|t_|t_|s|	|
|fS t ||gstj	d d/S t!t"|d}t!t"|d}t!t"|d}t
|r*tj	d|  n@t
|sXt#||tjtdd t$||t t#||tjt tjrt
|
 st
| rt%||||||
|
\}
}t
|	 rtj r|rtj	d g }i }t&|}x|D ]~}|j'dsP |j'drTd|krTd|krT|j(dd j(dd }t)|j(dd j(dd }|||< |j|j*  qW W d Q R X d}i }|rxx|D ]}t j|}t|d  } | t+|< t
| rtj	d!|| f  d ||< nt&| d"}!|d j'ds|!j,|d d#  xVd$d% |D D ]D}"|"j(dd j(dd }||kr|| |kr|!j,|"d#  qW |!j,|d0 d#  |!||< d	}qW t-|||||}#t. rt/t. ry$t0|||}$t j1|$|#g|	 W n   Y nX t/|#rt
|	 rt2j3|#|	 tjs8t
|	rtj4d&|	  n$t/|	r*tj4d' n
tj4d d }	t
|
rRtj4d(|
  ntjsdtj4d) d }
|	|
|fS )1Nz.bedz.covz.physical.covzH  Will not search Structural Variations (--fast or --no-sv is specified)z  Existing BED-file: z0 may be corrupted. Bed file will be re-created. z  Using existing BED-file: z@  Will not search Structural Variations (needs paired-end reads)Tz&  Using existing reads coverage file: z)  Using existing physical coverage file: zf  Will not calculate coverage (--fast or --no-html, or --no-icarus, or --space-efficient is specified)r%   c                s.   g | ]&\}}||  | |f	qS r/   r/   )r   r\   contigs_fpath)
bam_fpathsr}   r   max_threads_per_jobrj   
sam_fpathsr   r/   r0   r   s  s   z(run_processing_reads.<locals>.<listcomp>z)  Failed searching structural variations.rX   r   z"  Using existing sorted SAM-file: znot unmapped)rY   z%  Splitting SAM-file by references...r2   z@SQzSN:zLN:z	SN:r3   r   z	LN:Fz.samz,    Using existing split SAM-file for %s: %sr   r   c             s   s$   | ]}|j d rd|kr|V  qdS )z@SQzSN:N)r5   )r   hr/   r/   r0   	<genexpr>  s    z'run_processing_reads.<locals>.<genexpr>z  Structural variations are in z&  No structural variations were found.z9  Coverage distribution along the reference genome is in z+  Failed to calculate coverage distribution)NNN)NNNr   r   )NNNr   )5r   rf   r   bedr   r   phys_cov_fpathno_svr_   rk   r   rh   ri   r   forward_readsinterlaced_readsr   r   create_icarus_htmlZcheck_cov_fileno_read_statsrQ   rm   r'   rL   r   r   	enumerater   r   add_statistics_to_reportr   r   r   r   rl   rn   r   rq   r5   r6   r&   r   ref_sam_fpathsr   r   rz   r   r   r   rr   rw   	main_info)%contigs_fpathsr   r   r   r   rj   r   r}   r   r   r   physical_cov_fpathr   Zis_correct_filer   Zparallel_align_argsr   r   r~   r   r   r   headersr   r   r7   Zseq_nameZ
seq_lengthr   r   r{   Zcur_ref_nameZref_sam_fpathZref_sam_filer   r   Zgridss_sv_fpathr/   )r   r}   r   r   rj   r   r   r0   run_processing_readsC  s
   













 


 










r   c             C   s  t j| }|d k	rt j|nd}tj}| rH|rHt||d d% d }n(|pXt||d }t||d d& d }|dkrt||d | d }|jdd}|s|
r|	rtdd |	D r|	j	| t
|rt|| |||t|
}nt|| |||t|
}|d k	}| r| rd'S tt||d	 }|r|	 sDtd
d |	D r|st|rltjd| d |  nLt|rt jtdddt||gt|dt|dd t|| ||||t t|s|r|||fS tjd| d  t
|r |r tjd| d |  t|| |||t|
}nLt
|rl|rltjd| d |  t||tj|t t|| |||t|
}| st
| r|r|
rtjd ntjd| d  t| } t|}tj }tj| t| |t t| |||||}t|dkr
t||||| nDt|dkrNt j!|d | |d jdd}t
|rNt j!|| tjd| d  tj| t
|stj"d|  d | d  d(S t|| |||t|
}n| st
| rd)S |
rtjd ntjd| d  |rt
|rt#|rtjd| d |  n4t||d | d  }t$||}t||||td d! t j%|d" |st|r~tjd| d |  nLt|rt jtdddt||gt|dt|dd t|| ||||t |
rtjd# ntjd| d$  |||fS )*N r"   z.samz.bamr   .c             s   s   | ]}|j d V  qdS )r   N)endswith)r   r   r/   r/   r0   r     s    z$align_single_file.<locals>.<genexpr>z.statc             s   s   | ]}t |V  qd S )N)r   )r   fpathr/   r/   r0   r     s    z  z$Using existing flag statistics file r[   Zflagstatz-tr   r]   )stdoutr^   zPre-processing reads...zUsing existing SAM-file: zUsing existing BAM-file: z  Running BWA for reference...zRunning BWA...r%   r   zDone.z  Failed running BWA for z. See z for information.z#  Sorting SAM-file for reference...zSorting SAM-file...z.correct.sam)rY   zbam filez%  Analysis for reference is finished.zAnalysis is finished.r   )NNN)NNN)NNN)&r   rf   index_to_strr   reads_fpathsr   r   rg   anyr   r   Zget_correct_names_for_chromsr_   r   r   r   rk   ro   rp   rT   rq   analyse_coveragerl   rm   r	   rt   getcwdchdirry   align_readsr'   merge_sam_filesrr   moveerrorZall_read_names_correctZclean_read_namesassert_file_exists)r   Zmain_output_dirr|   r   r}   rm   r   r~   r\   r   r   r   r   filenameZ	index_strr   r   Z	can_reusestats_fpathprev_dirr   tmp_bam_fpathZcorrect_sam_fpathr/   r/   r0   r     s    


"









r   c          
   C   s   g }|dks|dkr.t tj| |||||dd |dks>|dkrXt tj| |||||dd |dksh|dkrt tj| |||||dd |dks|dkrt tj| |||||dd |dks|dkrt tj| |||||dd |S )Nr   r   )
reads_typempZsinglepacbionanopore)run_alignerr   paired_reads
mate_pairsunpaired_readspacbio_readsnanopore_reads)r   r   r   rj   r}   rm   out_sam_fpathsr/   r/   r0   r   F  s    r   c             C   s  t dd t| }g }	g }
xt| D ]\}}t|tr|dksL|dkr|dkrZd}nd}t d t| | | d | }q||d	krd
nd | d | }n$|\}}|d | d | d | }t||t|d  }|jdd}t|stj	t
j|t|dt|dtd t|st|s@t||||td d |d	krt|d}tj	tddddt|d|||g	t|dtd t|rtj|| |d	krt||tj|\}}}|d k	r|tjk r|	j| |
j| q(W t|
dkrBt||}|jdd}tj|
d | tj|
d jdd| |j| n
|j|
 |	rtj|}t||d }t|d}|jtt|	 W d Q R X d S )NZbwaz mem -t r   r   z -ax map-pb z -ax map-ont z -t  r   z -p r%   z.samz.bamr   r]   )r   r^   r_   )rY   Zdedupr[   Zmarkdupz-rz-tz--tmpdir)r^   r_   r   z.is.txt)Z	bwa_fpathrT   r   
isinstancer   r   rg   r   r   ro   shlexr6   rq   r_   rl   rp   r   rr   r   r   rf   r   optimal_assembly_max_ISr   r'   extendr   r   rL   )Zread_fpathsr   r   r   rj   r}   rm   r   Zbwa_cmdinsert_sizesZtemp_sam_fpathsidxreadspresetcmdlineread1read2output_fpathr~   Zbam_dedup_fpathr   r   Zfinal_sam_fpathZfinal_bam_fpathr   r   outr/   r/   r0   r   V  sX    
$"
$









r   c       	      C   s   g }xJ| D ]B}t |r
|jdd}t|d}t |sBt|||t |j| q
W tjtdddt	||g| t
|dtd t||||t |S )	Nz.samz.bamrX   r[   mergez-tr]   )r^   r_   )r   rg   r   rn   r_   r   r   ro   rp   rT   rq   rl   )	Ztmp_sam_fpathsr   r~   rm   r}   Ztmp_bam_fpathsZtmp_sam_fpathr   Ztmp_bam_sorted_fpathr/   r/   r0   r     s    

r   c             C   s  t t}g |d< t| }x|D ]z}|j d }d|krLt||d< q$d|krj|d  t|8  < q$d|kr|d  t|8  < q$d|kr|d  t|8  < q$d|kr||d< q$d	|kr||d
< q$d|krd|kr||d< t||d |d< q$d|kr||d< t||d |d< q$d|krB||d< t||d |d< q$d|krrd|krr||d< t||d |d< q$d|kr||d< q$d|kr$|d jt| q$W W d Q R X |S )Ncoverage_thresholdsr   totalZ	secondaryZsupplementary
duplicatesr  rightr  leftr   %mapped_pcntzproperly pairedpairedpaired_pcnt
singletonssingletons_pcntzdifferent chrZmapQmisjointmisjoint_pcntdepthcoverage)r   r&   rq   r6   get_pcnt_readsr   float)r   reads_statsr   r7   valuer/   r/   r0   parse_reads_stats  sD    





 r  c             C   s$   |dkr t dt| d |  S d S )Nr   z%.2fg      Y@)r  r&   )r   Ztotal_readsr/   r/   r0   r    s    r  c                sL  ddl m} d |rVtj|}t| |d }t|rVt|td dkrVtj	d xt
|D ]\}}|j|}tj|}	tj|}
t| |	d }r|j|jjd  |j|jjd  |j|jjd  |j|jjd  |j|jjd	  |j|jjd
  |j|jjd  |j|jjd  |j|jjd  d rtd ttjkr|j|jjfddt
tjD  |j|jjd d  t|sqbt| |j|jj d  |j|jj d  |j|jj d  |j|jj  d  |j|jj! d  |j|jj" d  |j|jj# d  t d dkrtj	dtj$| d d |
 d  |j|jj% d	  |j|jj& d
  |j|jj' d  |j|jj( d  |j|jj) d   d rbt d ttjkrb|j|jj* fddt
tjD  |j|jj+ d d  qbW d S )Nr   )	reportingz.statr   z%  BWA: nothing aligned for reference.r  r  r  r  r  r  r  r  r  c                s   g | ]\}} d  | qS )r  r/   )r   i	threshold)ref_reads_statsr/   r0   r     s    z,add_statistics_to_report.<locals>.<listcomp>r  r  r
  z  zBWA: nothing aligned for 'z'.c                s   g | ]\}} d  | qS )r  r/   )r   r  r  )r  r/   r0   r     s    ),
quast_libsr  r   rf   r   r   r  r&   r_   rk   r   getlabel_from_fpath	add_fieldFieldsREF_MAPPED_READSREF_MAPPED_READS_PCNTREF_PROPERLY_PAIRED_READSREF_PROPERLY_PAIRED_READS_PCNTREF_SINGLETONSREF_SINGLETONS_PCNTREF_MISJOINT_READSREF_MISJOINT_READS_PCNT	REF_DEPTHr'   r   r  REF_COVERAGE__FOR_THRESHOLDSREF_COVERAGE_1X_THRESHOLDTOTAL_READS
LEFT_READSRIGHT_READSMAPPED_READSMAPPED_READS_PCNTPROPERLY_PAIRED_READSPROPERLY_PAIRED_READS_PCNTr   
SINGLETONSSINGLETONS_PCNTMISJOINT_READSMISJOINT_READS_PCNTDEPTHCOVERAGE__FOR_THRESHOLDSCOVERAGE_1X_THRESHOLD)rj   r   r   r  r   r   r\   r   reportZassembly_nameassembly_labelr/   )r  r  r0   r     sb    




"

$
r   c             C   sF  t j|}t| ||||}t||}	t| |d }
t||
|	||dd d}dd tjD }t|
~}xv|D ]n}|j	 }t
|d t|d  }}|d d	krj||| 7 }x.ttjD ] \}}||kr||  |7  < qW qjW W d Q R X t|d
L}|jdt
|  x2ttjD ]$\}}|jd|| d |f  qW W d Q R X d S )Nz
.genomecovF)Zprint_all_positionsr   c             S   s   g | ]}d qS )r   r/   )r   r  r/   r/   r0   r     s    z$analyse_coverage.<locals>.<listcomp>r%   r"   Zgenomer]   z	%s depth
z%.2f coverage >= %sx
d   )r   rf   
bam_to_bedr   r   calculate_genome_covr   r  rq   r6   r&   r  r   r   )r|   r   	chr_namesr~   r   r}   r_   r   r   chr_len_fpathr   	avg_depthZcoverage_for_thresholdsr   r7   lr  Zgenome_fractionr  r  out_fr/   r/   r0   r     s(    



"r   c          	   C   s   t |d}t|stjd t| |d }t||tj|tdtj tjf d t| |d }	t	||	|tdd t
| |d	 |	|td
d}
t|
|||t |S )Nre   z"  Calculating physical coverage...z.physical.bamzeproper_pair and not supplementary and not duplicate and template_length > %d and template_length < %d)rY   z.physical.sorted.bamz-n)Z	sort_rulez	.physicalT)Zbedpe)r   r   r_   rk   r   rl   r   rm   	MAX_PE_ISrn   rA  rB  )r|   r   r~   r   r}   r   rD  raw_cov_fpathZbam_filtered_fpathZbam_filtered_sorted_fpathr   r/   r/   r0   get_physical_coverage  s    

rJ  Tc             C   s   |d }t ||}t|s~tjd t|s^t|sBt|||t t||||t tj|d |
rnt||
| |r~t	||| t|	 r|rt
| |||||	|}t	||	| ||	fS )NZ_rawz  Calculating reads coverage...zcoverage file)r   r   r_   rk   rn   rB  r   r   print_uncovered_regionsproceed_cov_filerJ  )r|   r   r   r~   r   r   r}   r   r   r   r   r   rI  rD  r/   r/   r0   r   %  s$    

r   c             C   s  t t}t }d}t| d}t|d~}xb|D ]X}t|j }	|	d }
tt|	d
 }|
|kr|d7 }t|||
< |r||
 n|
}|jd| d ||
  d  t	|	dkrt|	d t|	d	  }}||
 j
|g||   n||
 j| t	||
 tkr6t	||
 t	||
 t  }xRtd|tD ]B}t||
 ||t  t }|jdj||
 t|d g q2W ||
 |t d  ||
< q6W tjstj|  W d Q R X W d Q R X d S )Nr   rr   r%   #r   r   r!   r    r   )r   listdictrq   r6   r&   r  rT   r   r'   r   r   COVERAGE_FACTORrangesumr   r   debugrt   remove)rI  r   r   Z	chr_depthZused_chromosomes	chr_indexin_coverageZout_coverager7   fsnamer  correct_namer)   r,   Z	max_indexr\   Z	cur_depthr/   r/   r0   rL  ;  s4    &rL  c             C   s   t | d}t | d}||fS )Nr   Z   )r   )r   Zdecile_1Zdecile_9r/   r/   r0   get_max_min_isY  s    

r\  r   c             C   s  t |||rd| nd d }t|r~yJt|*}t|j }t|j }t|j }W d Q R X |rn|||fS W n   Y nX g }	ddddg}
t| h}x`t|D ]T\}}|dkrP |jd	rq|jd
}|d }||
krqtt|d }|	j	| qW W d Q R X |	r|	j
  t|	}|dkr(dS t|	\}}ttj|}t|d<}|jt|d  |jt|d  |jt|d  W d Q R X |||fS dS )Nr   r   z.is.txtZ99Z147Z83Z163i@B r2   r3   r%      r   r   r   )NNN)NNN)r   r   rq   r&   r   r   r5   r6   absr   sortr   r\  rL   r   optimal_assembly_min_ISr   rT   )r   rj   r   Zreads_suffixr   r   r   Zmin_insert_sizeZmax_insert_sizer   Zmapped_flagsZsam_inr  rF  rX  flagZ	median_isrG  r/   r/   r0   r   _  sL    





r   c             C   s   t t}t| v}xn|D ]f}t|j }|d }tt|d }|rL|| n|}	t|dkr|dkr||	 j|d |d f qW W d Q R X t|dH}
x@|j D ]4\}}x*|D ]"\}}|
j	dj
|||gd  qW qW W d Q R X d S )	Nr   r%   r!   r    r   r3   r   r   )r   rO  rq   r6   r&   r  r'   r   itemsr   r   )rI  r   r   Zuncovered_regionsrW  r7   rX  rY  r  rZ  rG  chromZregionsr)   r,   r/   r/   r0   rK    s    

(rK  c       
   	      s2  |r|a t j  t jd tt s0t jd dS t|sBtj| tt tj	 t
|d t sjtj  tjstjrt fddttjtjD  rt jd dS t
|d}t
|d}t|d	j  t|d	j  t jd
d||f   t|| |t |||\}}}	tjstj dd t jd |||	fS )NzRunning Reads analyzer...zFailed reads analysisr   c                s    g | ]\}}t ||g tqS r/   )Zpaired_reads_names_are_equalr_   )r   r  r  )r   r/   r0   r     s   zdo.<locals>.<listcomp>z5  Read names are discordant, skipping reads analysis!zreads_stats.logzreads_stats.errr   z  zLogging to files %s and %s...T)r`   zDone.)NNN)NNN)r_   print_timestampr   Zcompile_reads_analyzer_toolsr
   rt   ru   Zdownload_gridssr   r   r   mkdirno_checkr   r   zipreverse_readsr   rq   r   rk   r   r   rT  rr   rs   )
r   r   rj   r   external_loggerr   r}   r   r   r   r/   )r   r0   do  s<    









rj  )NN)r   F)NNNNFFr   )NNT)r   )NN)D
__future__r   r   rt   rerr   r   collectionsr   mathr   os.pathr   r   r   r	   r
   r   r   r   r   r   quast_libs.ca_utils.miscr   r   quast_libs.fastaparserr   Zquast_libs.ra_utils.miscquast_libs.qutilsr   r   r   r   r   r   r   r   quast_libs.logr   Zquast_libs.reportingr   LOGGER_DEFAULT_NAMEr_   r   rQ  objectr   r?   r   r   r   r   r   r   r   r   r   r  r  r   r   rJ  r   rL  r\  r   rK  rj  r/   r/   r/   r0   <module>   sT   $(
:
3L
<  
d5%7

*