3
b>                 @   s0  d dl mZ d dlmZmZmZmZmZ d dlm	Z	m
Z
mZmZ d dlmZ d dlmZmZmZmZmZmZmZ yd dlmZ W n  ek
r   d dlmZ Y nX d dlZd dlZd dlmZ d d	lmZmZm Z m!Z! d d
l"m#Z# d dl$j%j%Z%d dlm&Z& d dl'm(Z( e(ej)Z*dddZ+dd Z,dddZ-dS )    )with_statement)"prepare_alignment_data_for_one_refsave_alignment_data_for_one_refsave_contig_size_htmlget_assemblies_dataget_contigs_data)parse_contigs_fpathparse_features_dataparse_cov_fpathparse_genes_data)parse_aligner_contig_report)make_output_dirgroup_referencesformat_cov_dataformat_long_numbersget_info_by_chrget_assembliescheck_misassembled_blocks)OrderedDictN)defaultdict)qconfigqutilsfastaparsergenome_analyzer)ref_labels_by_chromosomes)	reporting)
get_loggerTc       -      C   s
  t | g }t }i }i }d}t }d }d }g }d }|rxDtj|D ]6\}}|j d }|j| t|}||7 }|||< qFW d}dg}trt}nt|j	 t
jkr8t }t|t
jkr d}d}t
jt| }xb|j D ]<\}}||7 }|||< |t
jkrd}|d7 }t
jt| }qW nx|D ]}|||< q&W xjt|D ]^\}} ||  }!|d |! }"|r|t|d k r||  |||d   krd}"|j|" qBW t|j	 |t|j	 d   }#x| D ]}$t
j|$ }%|st|$}&n|tj|$ }'t|'t|j |\}(})}&}*|&s t|$}&|(d kr.d S x|(D ]}+|%|+_q4W t|(|)}(|j|( |)||%< t
jdkrt|*||%< |&||%< qW |rt|	||}| rt
jrt|| |r|rt| ||#|}|s|rt
j rt!|| |||||||||||||
d},nd },|,S )Nr   d      all)
contig_names_by_refs	ref_fpathstdout_patternambiguity_alignments_by_labelscontigs_by_assembliesfeatures_datagc_fpath	cov_fpathphysical_cov_fpathjson_output_dir)"r   r   r   
read_fastasplitappendlenr   sumvaluesr   MAX_SIZE_FOR_COMB_PLOTdictICARUS_MAX_CHROMOSOMESalignment_viewer_part_namestritems	enumerateassembly_labels_by_fpathr   r   label_from_fpath_for_fnamer   listkeyslabelr   ambiguity_usager	   gene_findingr   r   create_icarus_htmljs_data_gen)-contigs_fpathscontig_report_fpath_patternoutput_dirpathr!   r'   r(   r&   r"   find_similarfeaturesr)   genes_by_labelslists_of_aligned_blocksr$   structures_by_labelsr#   Ztotal_genome_sizereference_chromosomesr    
assemblies	chr_namesr%   nameseqchr_namechr_lenZvirtual_genome_shiftZcumulative_ref_lengthsZsummary_lenZ	num_parts	html_nameichrZ
chr_lengthZlen_to_appendvirtual_genome_sizecontigs_fpathr<   contigsreport_fpathaligned_blocksmisassembled_id_to_structureZambiguity_alignmentsblockicarus_html_fpath r[   ;/home/psgendb/BIRCHDEV/pkg/quast-5.2.0/quast_libs/icarus.pydo%   s    


, 








r]   c             C   s   dd t jd| D S )Nc             S   s    g | ]}|j  rt|n|qS r[   )isdigitint).0sr[   r[   r\   
<listcomp>   s    z natural_sort.<locals>.<listcomp>z(\d+))rer,   )Zstring_r[   r[   r\   natural_sort   s    rd   c       L         s  g }r| rt  }tj }x| jD ]}tt||j< d}d}xB|jD ]8}||j |j j| |j	rH|j
rx|d7 }qH|d7 }qHW tj|j}|jtjj| |jtjj| q&W tjj|tj}tjj|tj}tjj|stj| t||\}t|||\}}t|||\}}t|||\}}i }i } tj }!tjjtjjtjj tjj!g}"t"|||	|"\}#}$}%i }&i }'d}(t#dd t$|D })x2|)j% D ]&\}*}+|(dt&|+ d |* d 7 }(qW xpt$|D ]d\}+rfd	d
|D },nt'|dkr
|},ng},|,|&< dgfdd
|,D  |'< qW tt(}-tt}.tt) xZt$|D ]L\}+|& },|' }/t*fdd
|,D }0|0|< t'|,| < g }1|1jd xF|,D ]>}2|2 }3|1jd|2 d t&|3 d  |. j+|!|2  qW |rt,|d|dnd }4|r(t,|d|dnd }5|rBt,|dddnd }6t-||)|,|1|||||4|5|6|d\}7}8}9}:};|-<  < t.j/|}<t0|,|<||7|8|;|(|
|#|9|:d q^W t1||"|%|||\}=}>|#|$ |= }?t2|||>|? tt}@t'|dkrtj}A|@d j|A |@d jtj3 t4j5tj6}Bt# }Cdd
 |D }D|D|Cd< t4j7|dj|Dd tjjtjtj8}E|E|Cd< t4j7||Ed |s|@d j|E |@d jtj3 |rt'|dkstj9rd g i|Cd!<  fd"d
|D }Ft't)|Fdk}G|Gr d#|Cd! d$< xt:|t;d%D ]t<|.||t'|dkd&\}A}H}I}0}Jt# }K|A|Kd'< |J|Kd(< tjj=|H|Kd)< t&|  |Kd*< t>|0|Kd+< |Grt&t'  |Kd,< d-|I |Kd.< t&|- |Kd/< |Cd! d  j|K qW t4j7||Cd! d!d0d1 n|r|d t<|.||d#d&\}A}H}I}0}Jt# |Cd2< |A|Cd2 d3< tjj=||Cd2 d4< t&|  |Cd2 d5< t>|0|Cd2 d6< d-|I |Cd2 d7< t&|- |Cd2 d/< |@d j|A |@d jtj3 t4j7||Cd2 d8d0d1 t4j?|B||C t4j@||@ |S )9Nr   r   zvar references_by_id = {};
c             s   s   | ]\}}|t |fV  qd S )N)r5   )r`   rQ   chromr[   r[   r\   	<genexpr>   s    zjs_data_gen.<locals>.<genexpr>zreferences_by_id["z"] = "z";
c                s   g | ]}|  kr|qS r[   r[   )r`   contig)rR   r    r[   r\   rb      s    zjs_data_gen.<locals>.<listcomp>c                s   g | ]} | qS r[   r[   )r`   rg   )chromosomes_lengthr[   r\   rb      s    c                s   g | ]} | qS r[   r[   )r`   rg   )rh   r[   r\   rb      s    zvar chromosomes_len = {};zchromosomes_len["z"] = ;Zcoverage_dataZreads_max_depthZphysical_coverage_dataphysical_max_depthgc_datar   max_gc)r#   cov_data_strphysical_cov_data_strgc_data_strr    output_dir_path)ref_datar%   assemblies_datacontigs_structure_stradditional_assemblies_datalinkslinks_namesc             S   s   g | ]}t j| qS r[   )r   r8   )r`   rT   r[   r[   r\   rb      s    rJ   z, Zcontig_size_html
referencesZtable_referencesc                s   g | ]}t  | qS r[   )r.   )r`   rR   )aligned_assembliesr[   r\   rb      s    TZth_assemblies)key)one_chromosomechr_linktooltiprN   num_contigschr_sizeZnum_assembliesz%.3fZchr_gfnum_misassembliesF)as_textZone_referenceZalignment_linkr!   Zref_fragmentsZref_sizeZref_gfZmenu_reference)Ar   r:   r;   rJ   r   r<   
alignmentsref_namer-   similarmisassembledr   getfpath	add_fieldFieldsSIMILAR_CONTIGSSIMILAR_MIS_BLOCKSospathjoinr   icarus_html_fnameicarus_dirnameexistsmkdirr   r
   r   Zget_ref_aligned_lengthsN50NxNG50NGxr   r2   r7   r6   r5   r.   r_   setr/   extendr   r   r   name_from_fpathr   r   r   icarus_link
html_saverget_real_pathicarus_menu_template_fnamesave_icarus_datacontig_size_viewer_fnameis_combined_refsortedrd   r   basenamer   save_icarus_htmlsave_icarus_links)LrJ   rA   rh   rC   rH   r$   r#   r    r!   r"   r%   r&   r'   r(   r)   rK   Zchr_to_aligned_blocksassemblyZsimilar_correctZsimilar_misassembledalignreportZmain_menu_fpathZoutput_all_files_dir_pathchr_full_namescov_data	max_depthZphysical_cov_datarj   rk   rl   	chr_sizesr}   Zaligned_basesZnx_marksrr   Zassemblies_contig_size_dataZassemblies_n50Zref_contigs_dictZchr_lengths_dictrq   Zchr_names_by_idre   rQ   ref_contigsr   aligned_bases_by_chrchr_lengthsr~   Zdata_strZ
ref_contiglrm   rn   ro   alignment_viewer_fpathZref_data_strrs   rt   Zms_selectorsr   Zcontigs_sizes_strZtoo_many_contigsZall_dataicarus_linksr{   Zmain_menu_template_fpathZmain_data_dictlabelsZcontig_size_browser_fpathZnum_aligned_assembliesZis_unaligned_asm_existsrN   
chr_genomer|   Zreference_dictr[   )rx   rR   rh   r    r\   r@      s    

""

 




r@   )NNNNTNNN)	NNNNNNNNN).
__future__r   Zquast_libs.icarus_builderr   r   r   r   r   Zquast_libs.icarus_parserr   r	   r
   r   r   quast_libs.icarus_utilsr   r   r   r   r   r   r   collectionsr   ImportErrorZ%quast_libs.site_packages.ordered_dictr   rc   r   
quast_libsr   r   r   r   quast_libs.ca_utils.miscr    quast_libs.html_saver.html_saverr   r   quast_libs.logr   LOGGER_DEFAULT_NAMEloggerr]   rd   r@   r[   r[   r[   r\   <module>
   s,   $
 
[ 