a
    b>                  	   @   s4  d dl mZ d dlmZmZmZmZmZ d dlm	Z	m
Z
mZmZ d dlmZ d dlmZmZmZmZmZmZmZ zd dlmZ W n ey   d dlmZ Y n0 d dlZd dlZd dlmZ d d	lmZmZm Z m!Z! d d
l"m#Z# d dl$m%  m%Z% d dlm&Z& d dl'm(Z( e(ej)Z*dddZ+dd Z,dddZ-dS )    )with_statement)"prepare_alignment_data_for_one_refsave_alignment_data_for_one_refsave_contig_size_htmlget_assemblies_dataget_contigs_data)parse_contigs_fpathparse_features_dataparse_cov_fpathparse_genes_data)parse_aligner_contig_report)make_output_dirgroup_referencesformat_cov_dataformat_long_numbersget_info_by_chrget_assembliescheck_misassembled_blocks)OrderedDictN)defaultdict)qconfigqutilsfastaparsergenome_analyzer)ref_labels_by_chromosomes)	reporting)
get_loggerTc           -      C   s  t | g }t }i }i }d}t }d }d }g }d }|rt|D ]6\}}| d }|| t|}||7 }|||< qDd}dg}trt}nt|	 t
jkr,t }t|t
jkrd}d}t
jt| }| D ]<\}}||7 }|||< |t
jkrd}|d7 }t
jt| }qn|D ]}|||< qt|D ]^\}} ||  }!|d |! }"|r|t|d k r||  |||d   krd}"||" q4t|	 |t|	 d   }#| D ]}$t
j|$ }%|st|$}&n|t|$ }'t|'t| |\}(})}&}*|&st|$}&|(d u r d S |(D ]}+|%|+_q"t|(|)}(||( |)||%< t
jdkr`|*||%< |&||%< q|r~t|	||}| rt
jrt|| |r|rt| ||#|}|s|rt
j rt!|| |||||||||||||
d},nd },|,S )Nr   d      all)
contig_names_by_refs	ref_fpathstdout_patternambiguity_alignments_by_labelscontigs_by_assembliesfeatures_datagc_fpath	cov_fpathphysical_cov_fpathjson_output_dir)"r   r   r   
read_fastasplitappendlenr   sumvaluesr   MAX_SIZE_FOR_COMB_PLOTdictICARUS_MAX_CHROMOSOMESalignment_viewer_part_namestritems	enumerateassembly_labels_by_fpathr   r   label_from_fpath_for_fnamer   listkeyslabelr   ambiguity_usager	   gene_findingr   r   create_icarus_htmljs_data_gen)-contigs_fpathscontig_report_fpath_patternoutput_dirpathr"   r(   r)   r'   r#   find_similarfeaturesr*   genes_by_labelslists_of_aligned_blocksr%   structures_by_labelsr$   Ztotal_genome_sizereference_chromosomesr!   
assemblies	chr_namesr&   nameseqchr_namechr_lenZvirtual_genome_shiftZcumulative_ref_lengthsZsummary_lenZ	num_parts	html_nameichrZ
chr_lengthZlen_to_appendvirtual_genome_sizecontigs_fpathr<   contigsreport_fpathaligned_blocksmisassembled_id_to_structureZambiguity_alignmentsblockicarus_html_fpath r[   ;/home/psgendb/BIRCHDEV/pkg/quast-5.2.0/quast_libs/icarus.pydo%   s    


, 







r]   c                 C   s   dd t d| D S )Nc                 S   s    g | ]}|  rt|n|qS r[   )isdigitint).0sr[   r[   r\   
<listcomp>       z natural_sort.<locals>.<listcomp>z(\d+))rer,   )string_r[   r[   r\   natural_sort   s    rf   c           L         s  g }r| rt  }t }| jD ]}tt||j< d}d}|jD ]8}||j |j | |j	rD|j
rt|d7 }qD|d7 }qDt|j}|tjj| |tjj| q$tj|tj}tj|tj}tj|st| t||\}t|||\}}t|||\}}t|||\}}i }i } t }!tjjtjjtjj tjj!g}"t"|||	|"\}#}$}%i }&i }'d}(t#dd t$|D })|)% D ]&\}*}+|(dt&|+ d |* d 7 }(qt$|D ]d\}+rfd	d
|D },nt'|dkr|},ng},|,|&< dgfdd
|,D  |'< qtt(}-tt}.tt) t$|D ]H\}+|& },|' }/t*fdd
|,D }0|0|< t'|,| < g }1|1d |,D ]>}2|2 }3|1d|2 d t&|3 d  |. +|!|2  q|rt,|d|dnd }4|rt,|d|dnd }5|r*t,|dddnd }6t-||)|,|1|||||4|5|6|d\}7}8}9}:};|-<  < t./|}<t0|,|<||7|8|;|(|
|#|9|:d qJt1||"|%|||\}=}>|#|$ |= }?t2|||>|? tt}@t'|dkrtj}A|@d |A |@d tj3 t45tj6}Bt# }Cdd
 |D }D|D|Cd< t47|d|Dd tjtjtj8}E|E|Cd< t47||Ed |s|@d |E |@d tj3 |rt'|dkstj9rd g i|Cd!<  fd"d
|D }Ft't)|Fdk}G|Grd#|Cd! d$< t:|t;d%D ]t<|.||t'|dkd&\}A}H}I}0}Jt# }K|A|Kd'< |J|Kd(< tj=|H|Kd)< t&|  |Kd*< t>|0|Kd+< |Grxt&t'  |Kd,< d-|I |Kd.< t&|- |Kd/< |Cd! d  |K qt4j7||Cd! d!d0d1 n|r|d t<|.||d#d&\}A}H}I}0}Jt# |Cd2< |A|Cd2 d3< tj=||Cd2 d4< t&|  |Cd2 d5< t>|0|Cd2 d6< d-|I |Cd2 d7< t&|- |Cd2 d/< |@d |A |@d tj3 t4j7||Cd2 d8d0d1 t4?|B||C t4@||@ |S )9Nr   r   zvar references_by_id = {};
c                 s   s   | ]\}}|t |fV  qd S )N)r5   )r`   rQ   chromr[   r[   r\   	<genexpr>   rc   zjs_data_gen.<locals>.<genexpr>zreferences_by_id["z"] = "z";
c                    s   g | ]}|  kr|qS r[   r[   r`   contig)rR   r!   r[   r\   rb      rc   zjs_data_gen.<locals>.<listcomp>c                    s   g | ]} | qS r[   r[   ri   chromosomes_lengthr[   r\   rb      rc   c                    s   g | ]} | qS r[   r[   ri   rk   r[   r\   rb      rc   zvar chromosomes_len = {};zchromosomes_len["z"] = ;Zcoverage_dataZreads_max_depthZphysical_coverage_dataphysical_max_depthgc_datar   max_gc)r$   cov_data_strphysical_cov_data_strgc_data_strr!   output_dir_path)ref_datar&   assemblies_datacontigs_structure_stradditional_assemblies_datalinkslinks_namesc                 S   s   g | ]}t j| qS r[   )r   r8   )r`   rT   r[   r[   r\   rb      rc   rJ   z, Zcontig_size_html
referencesZtable_referencesc                    s   g | ]}t  | qS r[   )r.   )r`   rR   )aligned_assembliesr[   r\   rb      rc   TZth_assemblies)key)one_chromosomechr_linktooltiprN   num_contigschr_sizeZnum_assembliesz%.3fZchr_gfnum_misassembliesF)as_textZone_referenceZalignment_linkr"   Zref_fragmentsZref_sizeZref_gfZmenu_reference)Ar   r:   r;   rJ   r   r<   
alignmentsref_namer-   similarmisassembledr   getfpath	add_fieldFieldsSIMILAR_CONTIGSSIMILAR_MIS_BLOCKSospathjoinr   icarus_html_fnameicarus_dirnameexistsmkdirr   r
   r   Zget_ref_aligned_lengthsN50NxNG50NGxr   r2   r7   r6   r5   r.   r_   setr/   extendr   r   r   name_from_fpathr   r   r   icarus_link
html_saverget_real_pathicarus_menu_template_fnamesave_icarus_datacontig_size_viewer_fnameis_combined_refsortedrf   r   basenamer   save_icarus_htmlsave_icarus_links)LrJ   rA   rl   rC   rH   r%   r$   r!   r"   r#   r&   r'   r(   r)   r*   rK   Zchr_to_aligned_blocksassemblyZsimilar_correctZsimilar_misassembledalignreportZmain_menu_fpathZoutput_all_files_dir_pathchr_full_namescov_data	max_depthZphysical_cov_datarn   ro   rp   	chr_sizesr   Zaligned_basesZnx_marksrv   Zassemblies_contig_size_dataZassemblies_n50Zref_contigs_dictZchr_lengths_dictru   Zchr_names_by_idrg   rQ   ref_contigsr   aligned_bases_by_chrchr_lengthsr   data_strZ
ref_contiglrq   rr   rs   alignment_viewer_fpathZref_data_strrw   rx   Zms_selectorsr   Zcontigs_sizes_strZtoo_many_contigsZall_dataicarus_linksr   Zmain_menu_template_fpathZmain_data_dictlabelsZcontig_size_browser_fpathZnum_aligned_assembliesZis_unaligned_asm_existsrN   
chr_genomer   Zreference_dictr[   )r|   rR   rl   r!   r\   r@      s   




  






r@   )NNNNTNNN)	NNNNNNNNN).
__future__r   Zquast_libs.icarus_builderr   r   r   r   r   Zquast_libs.icarus_parserr   r	   r
   r   r   quast_libs.icarus_utilsr   r   r   r   r   r   r   collectionsr   ImportErrorZ%quast_libs.site_packages.ordered_dictr   rd   r   
quast_libsr   r   r   r   quast_libs.ca_utils.miscr    quast_libs.html_saver.html_saverr   r   quast_libs.logr   LOGGER_DEFAULT_NAMEloggerr]   rf   r@   r[   r[   r[   r\   <module>
   s0   $
  
]  