a
    b_                     @   s   d dl mZ d dlmZ zd dlmZ W n eyF   d dlmZ Y n0 d dlmZm	Z	m
Z
 d dlmZ d dlmZmZmZmZmZ dd Zd	d
 Zdd ZdddZdddZdd ZdddZdd ZdS )    )join)defaultdict)OrderedDict)qconfigqutils	reporting)
html_saver)	Alignmentget_html_nameformat_long_numbersget_misassembly_for_alignmentparse_misassembly_infoc                 C   s6  t t}d}|d7 }|d7 }|d7 }|d7 }|d7 }d}| D ]}t|}t|}	|	tjj}
|	tjj	}|	tjj
}|r|t| d }t||}|d| d	 | d
 7 }|d| d t|
 d 7 }|d| d t| d 7 }|d| d	 t| d
 7 }|D ]}|	||| |< qq<|||fS )N zvar assemblies_links = {};
zvar assemblies_len = {};
zvar assemblies_contigs = {};
z#var assemblies_misassemblies = {};
zvar assemblies_n50 = {};
z.stdoutzassemblies_links[""] = "";
assemblies_len[""] = ;
assemblies_contigs["zassemblies_n50[")r   dictr   label_from_fpathr   get	get_fieldFieldsTOTALLENCONTIGSN50label_from_fpath_for_fnamerelpathstr)contigs_fpathsicarus_dirpathstdout_patternnx_marksassemblies_n50assemblies_dataassemblies_contig_size_datacontigs_fpathassembly_labelreportlcontigsn50Zcontig_stdout_fpathnx r.   C/home/psgendb/BIRCHDEV/pkg/quast-5.2.0/quast_libs/icarus_builder.pyget_assemblies_data   s0    

r0   c                 C   s*  |D ]}t |tr|j|v r,||j}	nF|j|vrr||j |rr||j }
|d|j d t|
| d  |j}|j}| dt| d t| d t|j	 d t|j
 d t|j d	 t|j d
 |j d ||j  d  qt|tkrt|\}}| d| d | d  q| S )Nlinks_to_chromosomes["r   ";z{corr_start: ,corr_end: ,start:,end:,start_in_contig:,end_in_contig:,IDY:,chr: ""},{contig_type: "M", mstype: "	", msg: ")
isinstancer	   ref_nameindexappendr
   startendr   unshifted_startunshifted_endstart_in_contigend_in_contigidytyper   )data_str	structureref_contigschr_full_namescontig_names_by_refsused_chromosomeslinks_to_chromosomeschr_names_by_idelZnum_chrZother_ref_nameZcorr_el_startZcorr_el_endms_descriptionms_typer.   r.   r/   add_contig_structure_data0   s`    




rT   c
                 C   s   g }
|
 d |
 d | D ]}|
 d| d  |
 d| d  | | }|| D ]}|j|vrhqX|
 d| d |j d t|j d  d| d |j d	 g}|| |j }t||||||||	}| d
 |
| qXq d|
}
|
S )Nzvar contig_lengths = {};zvar contig_structures = {};zcontig_lengths[""] = {};zcontig_structures[""]["r   ;"] = [ ];
)r@   keysnamer   sizerT   extendr   )assemblies_contigschr_to_aligned_blockscontigs_by_assembliesrK   rL   rM   structures_by_labelsrN   rO   rP   Zcontigs_data_strassemblyZused_contigscontigrI   contig_structurer.   r.   r/   get_contigs_structureK   s(    


*


rf   Nc           9         s   t | |}t|
|d }d}|d g }g }|	r<|d d}t }t|dk}|dt|  d  |d	 |d
|  d  tt}tt}t	 |
 D ]}|d
|  d | d  tt|< t	dd || D }t|D ]\}}||| v rtt}t|| | dd d}d}g }tjs|D ]R} || jkrz|D ]}!| j|!jkrP|!j| j dkrP||! d| j d t| j d t| j d t| j d t| j d t| j d t| j d || j  d  ||  d|!j d t|!j d t|!j d t|!j d t|!j d t|!j d t|!j d ||!j  d  qP||  n| g}t|| j}q:|D ]2} ||  t| j| j d 7  < || | j || j }"d}#| jrJ|d7 }t|"| j | \}$}%d|$v rd| _d}%d}#n&|$D ] }&|&r| |&  d7  < qd|$| _n|| j jd kr`d}#d}%g }'t | j| jt| j| j }(})|| j j!D ]}*|(|*j  k r|)k sn |(|*j  k r|)k rn nrt| j| j|*j|(  }+t | j| j|*j|)  },d!t|*j d t|*j d" t|+ d# t|, d$ }-|'|- q|d%| j d& t| j d# t| j d t| j d t| j d' | j d( |% d)  | j"r|d*  d+7  < | j#r|d*  d,7  < | j$r|d*  d-7  < |#r|d*  d.7  < || j ||  r@|d/ |d0||   |d1 tj%r`|d2d0|' d1  |rtj&d3kr|d4 t'||| j | j |||	|||}|d* d d* d5 |d*< |d* d6 |d*< qq|d* d d* d7 |d*< || }.t|| }/| d8 d9 }0t(| ) | d:  d9 |0 | d:  }1|d;| d< t|. d= 7 }|d>| d< t|/ d= 7 }|d?| d@ t|1 dA t|0 dB 7 }q|r|*| |r|*| |r|dCtjrttj+nttj, d  |*| g dD}2tj-s|2.d: t/ }3|2D ]:d:kr*dnd9 t( fdEd|
 D |3< qt(|3) |3d8  }4g }5|30 D ]>\}6}7|6dkrd8kr|7dF7 }7|5|7t|6f qtt1||||||	||||
}8|	r|d| dG|}|||8||5||fS )HNz.htmlr   var links_to_chromosomes;links_to_chromosomes = {};r      zvar oneHtml = rW   var contig_data = {};contig_data["rU   rV   rX   c                 s   s   | ]}|j |fV  qd S N)r\   ).0rd   r.   r.   r/   	<genexpr>}       z5prepare_alignment_data_for_one_ref.<locals>.<genexpr>c                 S   s   | j S rl   )rA   xr.   r.   r/   <lambda>   ro   z4prepare_alignment_data_for_one_ref.<locals>.<lambda>)keyd   	{contig:"",corr_start: r3   r4   r5   r6   r7   r9   z"}FunknownTZcorrect_unaligned{start:z,corr_start:z
,corr_end:}{name:"z",corr_start:z,misassemblies:"z",mis_ends:""z,similar:"True"z,ambiguous:"True"z,is_best:"True"z,more_unaligned:"True"z,overlaps:[ ,],genes:[allz,ambiguous_alignments:[ z],},rY   local   interspecies translocationr   r   r   r   zassemblies_misassemblies["r   +r   zvar gc_window_size = )
relocationtranslocation	inversionr   r   c                 3   s   | ]}|    V  qd S rl   r.   )rm   rc   factorrS   Zms_typesr.   r/   rn      ro   srZ   )2r
   r   r@   setlenr   lowerr   intr   r[   	enumeratelistsortedr   large_genomerA   r\   rB   rC   rD   rE   rF   r>   maxabsaddlabelmisassembledr   misassembliescontig_typemingenessimilar	ambiguousis_best_setgene_findingambiguity_usagerT   sumvaluesr^   GC_window_size_largeGC_window_sizeis_combined_refremover   itemsrf   )9chrrL   rP   rK   rI   r`   rb   ra   ambiguity_alignments_by_labelsrM   output_dir_pathcov_data_strphysical_cov_data_strgc_data_str	html_namealignment_viewer_fpathadditional_assemblies_datarO   rN   num_misassembliesaligned_assembliesZis_one_htmlZassemblies_lenr_   rc   r+   Z
num_contig
ref_contigZoverlapped_contigs
alignmentsprev_endZprev_alignments	alignment
prev_alignre   Zcontig_more_unalignedr   misassembled_endsmisassemblyr   rE   rF   geneZ
corr_startZcorr_endZ	gene_infoassembly_lenZassembly_contigsZlocal_misassembliesZext_misassembliesZmisassemblies_typesZms_counts_by_typeZtotal_ms_countms_selectorsms_countms_namecontigs_structure_strr.   r   r/   "prepare_alignment_data_for_one_refb   s   





 

 

:








&

r   FTc                    s  | |j  }g  d }|D ]D}|| | |j kr|d t|ksP||d  j |j kr | qd  dd |jD } r fdd|D } rd  d nd |	r rg }|r||v r|| }||j D ]}t|trl|d	|j d
 t|j	 d t|j
 d t|j d t|j d t|j d t|j d t|j  d |j d |j d  qt|tkrt|\}}|d| d | d  q|
r|jsd|_d|j d t|j    d |j d d| d tjrdd| d nd d }||||fS )Nri   z, c                 S   s,   g | ]$}d t |j d t |j d qS rx   r5   ry   )r   rA   rB   )rm   r   r.   r.   r/   
<listcomp>	  ro   zadd_contig.<locals>.<listcomp>c                    s   g | ]}| vr|qS r.   r.   rm   r-   marksr.   r/   r     ro   z
, marks: "r{   r   ru   rv   r3   r4   r5   r6   r7   z,size:r8   z,chr:"r:   r;   r<   Z	unalignedrz   z",size:z,contig_type: "z",structure:[r~   r   r}   r   )r]   r   r@   r   r   r\   r=   r	   r   rA   rB   rC   rD   rE   rF   rG   r>   rH   r   r   r   r   )
cum_lengthrd   not_used_nxr$   rc   r+   Zcontig_size_linesnumrb   only_nxhas_aligned_contigsZ
end_contigalignr-   r   rJ   Zassembly_structurerQ   rR   rS   r.   r   r/   
add_contig   s    


$r   c                 C   s  g }| d t|dk}|sn| d |D ]@}	|	}
|rH|	|v rH||	 }
t|
|}
| d|	 d |
 d  q,dg}| d g }d	}tj}d
}| }| D ]}| d| d  d	}t| | dd dd}tt|tj}||d  j	}dd |D }t|tjkrd}t
|D ]H\}}||kr. q`t||||||||||d
\}}}}| | qt|tjkr|}tdd ||d  D }||7 }dd ||d  D }tt|| d t| d t| d }| d| d t| d tjrdd| d nd   |rx|t|k rxt
||d  D ]>\}}|sL qxt||||||||| |dd!
\}}}}q8t||}|d" d d" d# |d"< qd$|}|d%d| d# 7 }|d&t| d' 7 }|d(t| d) 7 }|d$| }||fS )*Nrg   ri   rh   r1   r   r2   rj   zvar chromosome;r   Frk   rX   c                 S   s   | j S rl   r]   rp   r.   r.   r/   rr   8  ro   z"get_contigs_data.<locals>.<lambda>T)rs   reversec                 S   s   g | ]}|qS r.   r.   r   r.   r.   r/   r   ;  ro   z$get_contigs_data.<locals>.<listcomp>)r   c                 s   s   | ]}|j V  qd S rl   r   )rm   r   r.   r.   r/   rn   H  ro   z#get_contigs_data.<locals>.<genexpr>c                 S   s8   g | ]0}|j D ]$}d t|j d t|j d qqS r   )r   r   rA   rB   )rm   rd   r   r.   r.   r/   r   J  s   
z hidden contigs shorter than z bp (total length: z bp)rz   z", size:z, contig_type:"short_contigs"r   r}   z]},r   )r   r|   z];

rZ   zvar contigLines = [zvar contigs_total_len = r   zvar minContigSize = rW   )r@   r   r
   r   
min_contigr   r   r   max_contigs_num_for_size_viewerr]   r   r   r   r   r   r   r   r   )ra   r#   r$   rb   rM   	ref_namesrL   Zadditional_dataZone_htmlr>   chr_namecontigs_sizes_strZcontigs_sizes_lines	total_lenZmin_contig_sizetoo_many_contigsr   rc   r   r+   Zlast_contig_numZcontig_thresholdr   ir   r   r   Zremained_lenZremained_genesZremained_contigs_nameZcontig_viewer_datar.   r.   r/   get_contigs_data!  s    










r   r   c                 C   sB  t tj}t }d|  d }d|}|d| d 7 }tj| v rn|} | dtt| t|dkrfdnd	 7 } | 	d
d} ||	 | | | | |
 }d|d< | |d< d| d |d< g |d< |D ]$\}}}|||d}|d 
| qt ||| t || d t ||d d t ||d d t j||d ddd d S )Nzchromosome = "r   z","zvar chrContigs = ["z"];
z (ri   z	 entries)z entry)_ zContig alignment viewertitle	reference<script type="text/javascript">	</script>dataZmisassemblies_checkboxes)rS   r   r   r>   Zdata_alignmentsZms_selectorF)as_text)r   get_real_pathr   icarus_viewers_template_fnamer   r   alignment_viewer_part_namer   r   replacer@   save_icarus_htmlsave_icarus_data)r   rK   r>   json_output_dirr   rI   r   ref_datafeatures_datar%   r   r   Zalignment_viewer_template_fpath	data_dictZchr_datachromosomesall_datarS   r   r   Zcheckboxr.   r.   r/   save_alignment_data_for_one_refa  s,    

(r   c                 C   s   t tj}t| tj}t }d|d< d|d< |rRttj|d< t 	||d d d| d |d< t 	||d d	 t 
||| d S )
NzContig size viewerr   TZsize_viewerZnum_contigs_warningr   r   r   Z
data_sizes)r   r   r   r   r   contig_size_viewer_fnamer   r   r   r   r   )output_all_files_dir_pathr   r   r   Zcontig_size_template_fpathZcontig_size_viewer_fpathr   r.   r.   r/   save_contig_size_html{  s    r   )NNNNNN)FT)r   r   r   r   r   )os.pathr   collectionsr   r   ImportError%quast_libs.site_packages.ordered_dict
quast_libsr   r   r   quast_libs.html_saverr   quast_libs.icarus_utilsr	   r
   r   r   r   r0   rT   rf   r   r   r   r   r   r.   r.   r.   r/   <module>	   s,      
  
"A 
