a
    £žb‡h  ã                   @   s|  d dl mZ d dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZ zd dlmZ W n eyv   d dlmZ Y n0 d dlmZmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZmZmZ d dlmZmZm Z  d dl!m"Z" dZ#dZ$dZ%dZ&dZ'dd„ Z(dd„ Z)dd„ Z*dd„ Z+dd„ Z,dd„ Z-dd„ Z.dd „ Z/d!d"„ Z0d#d$„ Z1d%d&„ Z2d'd(„ Z3d)d*„ Z4d+d,„ Z5d-d.„ Z6d/d0„ Z7dS )1é    )Úwith_statementN)Údefaultdict)ÚjoinÚexistsÚdirnameÚrealpath)ÚOrderedDict)ÚqutilsÚqconfig)Úget_aux_out_fpaths)Úcreate_minimap_output_dirÚparse_cs_tag)Úget_chr_lengths_from_fastafile)Úget_assembliesÚcheck_misassembled_blocksÚ	Alignment)Úget_path_to_programÚis_non_empty_fileÚrelpath)ÚCOVERAGE_FACTORz
circos.pngg{®Gáz¤?g¸…ëQ¸®?éPÃ  c           	      C   sÈ  t |  ¡ ƒ}d}t|dƒ}t|dƒR}|  ¡ D ]8\}}| d dd||dt|ƒdg¡d	 ¡ t||ƒ}q.W d   ƒ n1 s|0    Y  t|d
ƒ}t|dƒ}| d¡ | d¡ tj	rÌ|dkrÌ| d¡ n2|dkrà| d¡ n|dkrô| d¡ n
| d¡ | d¡ | d¡ | d¡ | d¡ | d¡ | d¡ | d¡ | d¡ | d¡ | d¡ | d¡ | d¡ | d ¡ | d!¡ | d"¡ | d#¡ W d   ƒ n1 s´0    Y  |||fS )$Nr   zreference.karyotype.txtÚwú	Úchrú-Ú0ZlgreyÚ
zideogram.confz<ideogram>
z
<spacing>
é   zdefault = 0r
é   zdefault = 0.005r
éd   zdefault = 0.001r
zdefault = 0.0005r
zbreak = 0.005r
z</spacing>
zthickness = 30p
zstroke_thickness = 2
zstroke_color = black
zfill = yes
zradius = 0.85r
úshow_label = no
zlabel_font = default
z-label_radius = dims(ideogram,radius) + 0.05r
zlabel_size = 36
zlabel_parallel = yes
zband_stroke_thickness = 2
zshow_bands = yes
zfill_bands = yes
z</ideogram>)
ÚlenÚkeysr   ÚopenÚitemsÚwriteÚstrÚmaxr
   Ú
prokaryote)	Úchr_lengthsÚ
output_dirZnum_chromosomesÚmax_lenÚkaryotype_fpathÚout_fÚnameÚseq_lenÚideogram_fpath© r1   ú;/home/psgendb/BIRCHDEV/pkg/quast-5.2.0/quast_libs/circos.pyÚcreate_ideogram$   sF    
&*


















*r3   c                 C   sü  t |dƒ}t|dƒÐ}| d¡ | d¡ | d¡ | d¡ | d¡ | d¡ | d	¡ | d
¡ | d¡ | d¡ | d¡ | d¡ | d¡ | d dkr¸d| d  }d}nd|  }d}| dt|ƒ d ¡ | d¡ | d¡ | d¡ | d¡ | d¡ | d¡ | d¡ | d¡ | d¡ | d¡ | d ¡ | d!¡ | d"¡ | d¡ | d¡ | d¡ | d#¡ | d¡ | d$¡ | d!¡ | d%¡ | d&| ¡ | d¡ | d¡ | d'¡ W d   ƒ n1 sî0    Y  |S )(Nz
ticks.confr   zshow_ticks = yes
zshow_tick_labels = yes
zshow_grid = no
z<ticks>
zskip_first_label = yes
zskip_last_label = no
z%radius = dims(ideogram,radius_outer)
ztick_separation = 2p
z min_label_distance_to_edge = 0p
zlabel_separation = 5p
zlabel_offset = 5p
zlabel_size = 12p
zthickness = 3p
é
   é@B g      ð?ZMbpZkbpzlabel_multiplier = r   z<tick>
zspacing = 1u
zcolor = dgrey
zsize = 12p
r    zformat = %s
z</tick>
zspacing = 5u
zcolor = black
zsize = 18p
zshow_label = yes
zlabel_size = 24p
zspacing = 10u
zsize = 24p
zlabel_size = 32p
zsuffix = " %s"
z</ticks>)r   r#   r%   r&   )Úchrom_unitsr*   Úticks_fpathr-   Zlabel_multiplierÚsuffixr1   r1   r2   Úcreate_ticks_confL   s^    




































*r9   c                 C   sÜ   t |dƒ}g d¢}t|dƒ¬}tƒ }ddlm} |j ¡ D ]&\}}||vrRg ||< ||  |¡ q:t| ¡ ƒD ]J\}	\}}
|
D ]8}| 	d  |dt
| | ƒd||	t|ƒ   g¡d	 ¡ q~qnW d   ƒ n1 sÎ0    Y  |S )
Nzhighlights.txt)ÚorangeÚpurpleÚbluer   r   )Úcontigs_analyzerr   r   zfill_color=r   )r   r#   r   Ú
quast_libsr=   Úref_labels_by_chromosomesr$   ÚappendÚ	enumerater%   r&   r!   )r)   r*   Úhighlights_fpathÚcolorsr-   Zchrom_by_refsr=   ÚchromÚrefÚiZchromosomesr1   r1   r2   Úcreate_meta_highlights   s    
VrG   c              	   C   s  g }t tƒ}t| ƒ`}d }d }d }d }d }d }	d }
t|ƒD ](\}}| dd¡ d¡}|dkr¨| d¡}| d¡}| d¡}| d¡}| d	¡}| d
¡}	| d¡}
q<|r¼|d dkr¼q<q<|ràt|ƒdk rà||  | 	¡ ¡ q<|r<t|ƒdkr<t
|| ƒt
|| ƒ|| || ||	 ||
 f\}}}}}}t|||||dkd}||_|jr<| |¡ ||  |¡ q<W d   ƒ n1 s~0    Y  ||fS )Nr   Ú r   r   ÚS1ZE1Ú	ReferenceZContigZIDYZ	AmbiguousZ
Best_groupZCONTIGé   ÚTrue)r.   ÚstartÚendÚref_nameÚis_best_set)r   Úlistr#   rA   ÚreplaceÚsplitÚindexr!   r@   ÚstripÚintr   Ú	ambiguousrP   )Úreport_fpathÚaligned_blocksÚmisassembled_id_to_structureZreport_fileZ	contig_idÚ	start_colÚend_colZref_colZ
contig_colZ	ambig_colZbest_colrF   ÚlineÚ
split_lineZidy_colrM   rN   rO   Ú	ambiguityZis_bestÚblockr1   r1   r2   Úparse_aligner_contig_report   sF    





þ
0ra   c                 C   s|   g }| D ]D}|r|t  |¡ }t|ƒ\}}|d u r4qt||dd}| |¡ q|rttdd„ |D ƒƒ}t| |ƒj|fS dS d S )NT)Zfilter_localc                 S   s   g | ]}t |ƒ‘qS r1   )r!   )Ú.0rY   r1   r1   r2   Ú
<listcomp>Ä   ó    z$parse_alignments.<locals>.<listcomp>©NN)r	   Úlabel_from_fpath_for_fnamera   r   r@   r'   r   Ú
assemblies)Úcontigs_fpathsÚcontig_report_fpath_patternZlists_of_aligned_blocksÚcontigs_fpathrX   rY   rZ   Zmax_contigsr1   r1   r2   Úparse_alignments·   s    rk   c                 C   s@  t || jd ƒ}|d }t|dƒ}d }| jD ]º}d|_|jrHd|_n|jrTd|_|r²|j|jkr²|j|jkr²t|j	|j	ƒt
|j|jƒ |k r²t
|j	|j	ƒ|_	t|j|jƒ|_q0|ræ| d  |jt|j	ƒt|jƒd|j g¡d	 ¡ |}q0| d  |jt|j	ƒt|jƒd|j g¡d	 ¡ W d   ƒ n1 s20    Y  |S )
Nz.confr   r   ÚgreenÚredZppurpler   zcolor=r   )r   Úlabelr#   Z
alignmentsÚcolorZmisassembledrW   rO   r'   rM   ÚminrN   r%   r&   )ÚassemblyÚref_lenr*   Ú
conf_fpathZmax_gapr-   Z
prev_alignÚalignr1   r1   r2   Úcreate_alignment_plotsÊ   s(    
ÿ0Pru   c           	      C   s   g }t | ƒ0}|D ]}| t| ¡ d ƒ¡ qW d   ƒ n1 sB0    Y  t|ƒ}tt|ƒƒtt|ƒƒ }}t|dƒ}t	 
| |¡ ||||fS )Néÿÿÿÿzgc.txt)r#   r@   ÚfloatrS   r!   rV   rp   r'   r   ÚshutilÚcopy)	Úgc_fpathÚdata_dirZ	gc_valuesÚfr]   Ú
max_pointsÚmin_gcÚmax_gcZdst_gc_fpathr1   r1   r2   Úcreate_gc_plotá   s    
6
r€   c                 C   s¤  d}| sd |fS t ƒ }t|dƒ}t| ¡ ƒ}t| ƒ®}d}t|ƒD ]\}	}
|
 ¡ }|
 d¡r¨d}|d dd … }t|d ƒd }|| }dd„ t	|| d ƒD ƒ||< qBt|d ƒ}|| ||   
|¡ |t7 }qBW d   ƒ n1 sè0    Y  t|d	ƒŽ}| ¡ D ]t\}}t|ƒD ]`\}}|r4t|ƒt|ƒ nd}| d
 |t|| ƒt|d | ƒt|ƒg¡d ¡ |d7 }qqW d   ƒ n1 s’0    Y  ||fS )Nr   zcoverage.txtú#r   c                 S   s   g | ]}g ‘qS r1   r1   )rb   rF   r1   r1   r2   rc   þ   rd   z(create_coverage_plot.<locals>.<listcomp>é   rv   r   r   r   )Údictr   rQ   Úvaluesr#   rA   rS   Ú
startswithrV   Úranger@   r   r$   Úsumr!   r%   r&   )Ú	cov_fpathÚwindow_sizer)   r*   r}   Zcov_by_chromÚcov_data_fpathr|   ÚposrT   r]   ÚfsrD   Zchrom_orderZ	chrom_lenÚdepthr-   Z
depth_listrF   ÚdepthsZ	avg_depthr1   r1   r2   Úcreate_coverage_plotí   s4    


 (40r   c                    sJ  t  | j¡}t|dtjƒ}tt|ƒ|ƒ}t|ƒ\}}	}}t|	ƒrFtj	sJd S t||d ƒ}
t
‡ ‡fdd„ƒ}t|	ƒ¶}|D ] }t| d¡d  ¡ d ƒ}| ¡ d  ¡ }| ¡ d  ¡ }|}t|ƒD ]T}t|ƒd	 }| d
¡r|| t|ƒˆ   d	7  < |d	7 }qÂ| d¡sÂ||7 }qÂqxW d   ƒ n1 s00    Y  t|
dƒð}| ¡ D ]Ö\}}d\}}t|ƒD ]\}}|dkr†|d	 ˆ }nn|r°| d |t|ƒt|ƒdg¡d ¡ | d |t|ˆ ƒt|d	 ˆ ƒt|ƒg¡d ¡ |d	 ˆ }d }qf|rN| d |t|ƒt|ƒdg¡d ¡ qNW d   ƒ n1 s<0    Y  |
S )Nz..z.mismatches.txtc                      s   dgˆ ˆ d  S ©Nr   r   r1   r1   ©rr   r‰   r1   r2   Ú<lambda>  rd   z(create_mismatches_plot.<locals>.<lambda>ú|r   é   rv   r   Ú*ú+r   )r   r   r   r   r   )r	   rf   Úfpathr   r
   Ú detailed_contigs_reports_dirnamer   r   r   Ú	show_snpsr   r#   rV   rS   rU   r   r!   r…   r$   rA   r%   r&   )rq   r‰   rr   Úroot_dirr*   Úassembly_labelÚaligner_dirpathZcoords_basenameÚ_Zcoords_filtered_fpathZmismatches_fpathZmismatch_density_by_chromZcoords_filer]   Ús1rD   ÚcigarÚref_posÚopZn_basesr-   Zdensity_listrM   rN   rF   Údensityr1   r‘   r2   Úcreate_mismatches_plot  sF    


,
$4Hr£   c                    s¨  g }d}| s||fS | D ]„}t ||jd ƒ}t|jƒdkr>qd}t‡ ‡fdd„ƒ}	t|dƒ}
|jD ]’}|jr„|j|jv r„|jn|j}||jv rž|j| nd }|s¨qht	|j
ˆ t|jˆ d t|	| ƒƒƒD ](}|t|	| ƒk rÐ|	| |  d7  < qÐqh|	 ¡ D ]Z\}}t|ƒD ]F\}}|
 d  |t|ˆ ƒt|d ˆ ƒt|ƒg¡d ¡ |d7 }qqW d   ƒ n1 sv0    Y  t|ƒr”| |¡ t||ƒ}q||fS )	Nr   z.txtc                      s   dgˆ ˆ d  S r   r1   r1   r‘   r1   r2   r’   A  rd   z#create_genes_plot.<locals>.<lambda>r   r   r   r   )r   Úkindr!   Úregion_listr   r#   Ú
chromosomeZchr_names_dictÚseqnamer†   rM   rp   rN   r$   rA   r%   r&   r   r@   r'   )Úfeatures_containersr‰   rr   r*   Úfeature_fpathsr}   Úfeature_containerÚfeature_fpathZ
num_pointsZgene_density_by_chromr-   ÚregionrD   rF   Zgene_density_listr¢   r1   r‘   r2   Úcreate_genes_plot5  s:    

ÿ,40

r­   c              
   C   sh   t |dƒ}t|dƒ@}|  ¡ D ]&\}}| d  |dt|ƒg¡d ¡ qW d   ƒ n1 sZ0    Y  |S )Nz
genome.txtr   r   r   r   )r   r#   r$   r%   r&   )r)   r*   Zgenome_fpathr-   r.   r/   r1   r1   r2   Úcreate_genome_fileV  s
    
>r®   c                 C   s6  t |dƒ}g }d}t|ƒD ]*\}}	| dt|d ƒ |f¡ |d7 }q|D ]*}
t|
jƒdkrJ| |
j|f¡ |d7 }qJ|rˆ| d|f¡ t|dƒ>}| t	|  
¡ ƒd d d  d	d
„ |D ƒ¡ ¡ W d   ƒ n1 sÖ0    Y  t |dƒ}t|dƒ(}| dt||ƒ d ¡ W d   ƒ n1 s$0    Y  ||fS )Nz
labels.txtr   rq   r   Úcoverager   z
	0	0	null	ú,c                 S   s   g | ]\}}d ||f ‘qS )z
track%d=%sr1   )rb   rn   rF   r1   r1   r2   rc   m  rd   z!create_labels.<locals>.<listcomp>z
label.confzRz = 10
type = text
label_size = 30p
label_font = bold
label_parallel = yes
file = zÍ
r0 = eval(sprintf("%fr+5p", conf(conf(., track_idx)_pos)))
r1 = eval(sprintf("%fr+500p", conf(conf(., track_idx)_pos)))
<rules>
<rule>
condition = 1
value = eval(var(conf(., track_idx)))
</rule>
</rules>
)r   rA   r@   r&   r!   r¥   r¤   r#   r%   rQ   r"   r   )r)   rg   r¨   Úcoverage_fpathr*   Zlabels_txt_fpathÚtrack_labelsÚplot_idxrF   rq   rª   r-   Zlabels_conf_fpathr1   r1   r2   Úcreate_labels^  s,    


L
ûû&r´   c                 C   s@   | dkrd}n.| dkrd}n | dkr*d}n| dkr8d}nd	}|S )
Ni eÍi N  i £áé'  i áõiˆ  r5   éè  r   r1   r‘   r1   r1   r2   Úset_window_size  s    r·   c           
      C   sÌ   t |dƒ}t|dƒ¤}| dtjr&dnd||f ¡ tjrD| d¡ | d¡ t| ƒD ] \}}	| d|d	 |	jf ¡ qV| d
¡ |r| d¡ |rª| d|r¢dnd ¡ W d   ƒ n1 s¾0    Y  |S )Nz
legend.txtr   zn1) The outer circle represents reference sequence%s with GC (%%) heatmap [from %d%% (white) to %d%% (black)].
ÚsrH   z=Color bars help to distinguish between different references.
z
2) Assembly tracks:
z	assembly%d - %s
r   zjAssembly tracks are combined with mismatches visualization: higher columns indicate larger mismatch rate.
zK
3) User-provided genes. A darker color indicates higher density of genes.
z:
%d) The inner circle represents read coverage histogram.
é   é   )r   r#   r%   r
   Úis_combined_refrA   rn   )
rg   r~   r   r¨   r±   r*   Zlegend_fpathr-   rF   rq   r1   r1   r2   Úcreate_legend  s     
ÿ



4r¼   c           '         sf  t ˆdƒ‰ tˆ ƒst ˆ ¡ t| ƒ}t|ˆ ƒ\}	}
}|	dkrBd}n|	dkrPd}nd}t|ˆ ƒ}t| ¡ ƒ‰t	ˆƒ‰t
||ƒ\}}‡ ‡fdd„|D ƒ}|sœd S t|ˆ ƒ\}}}}t|ˆˆˆ ƒ\}}‡ ‡‡‡fdd„|D ƒ}t|ˆ|ˆ ƒ\}}tt||||gƒ}t||||ˆ ƒ\}}t ˆd	ƒ}d
}d}tgt|ƒ }|rPt|d< |tgt|ƒ 7 }|rht|d< | t¡ t|d< t|dƒ¾} |  d¡ |  dt|ˆƒ ¡ |  dt|ˆƒ ¡ |  dt|
ˆƒ ¡ |  d| ¡ |  d¡ |  dttƒ d ¡ tt|ƒƒD ],}!|  d|!|f ¡ |t8 }|||! 8 }qþ|  dt|ƒ|f ¡ |  d¡ |  dˆ ¡ |  dt ¡ |  d¡ |  d¡ |  d¡ |  d¡ |  d¡ |  d¡ |  d¡ |  d ¡ tjr|  d!¡ t|ˆ ƒ}"|  d"¡ |  dt|"ˆƒ ¡ |  d#¡ |  d$¡ |  d%¡ |  d&¡ |  dt d'd(ƒ ¡ |  d)| ¡ |  d*t| ¡ ƒ ¡ |  d+¡ |  d,¡ |D ]@\}#}!|  d-¡ |  d.|! ¡ |  dt|ˆƒ ¡ |  d/¡ qjt |ƒD ]\}!}$|  d-¡ |  d0¡ |  d1¡ |  d2¡ |  d3¡ |  dt|$ˆƒ ¡ |  d4t|ƒ d5 ¡ |  d6t|ƒ d7 ¡ |  d/¡ |rÀ||! rÀ|  d-¡ |  d8¡ |  d9¡ |  d:¡ |  dt||! ˆƒ ¡ |  d4t|ƒ d5 ¡ |  d6t|ƒ d7 ¡ |  d/¡ |d;7 }q´|D ]v}%|  d-¡ |  d<¡ |  dt|%ˆƒ ¡ |  d=¡ |  d4t|ƒ d5 ¡ |  d6t|ƒ d7 ¡ |  d/¡ |d;7 }qÐ|rÈ|  d-¡ |  d8¡ |  d9¡ |  dt|ˆƒ ¡ |  d>¡ |  d4t|ƒ d5 ¡ |  d6t|ƒ d7 ¡ |  d/¡ |d;7 }|  d-¡ |  d<¡ |  dt|ˆƒ ¡ |  d?¡ |  d@¡ |  dA¡ |  dB¡ |  d/¡ |  dC¡ W d   ƒ n1 sB0    Y  t!|||||ˆƒ}&||&fS )DNÚdatar5   i † rµ   r¶   c                    s   g | ]}t |ˆˆ ƒ‘qS r1   )ru   ©rb   rq   )r{   rr   r1   r2   rc   ´  rd   zcreate_conf.<locals>.<listcomp>c                    s   g | ]}t |ˆˆˆˆ ƒ‘qS r1   )r£   r¾   ©r{   r*   rr   r‰   r1   r2   rc   º  rd   zcircos.confgffffffî?r   rv   r   z+<<include etc/colors_fonts_patterns.conf>>
z<<include %s>>
zkaryotype = %s
zchromosomes_units = %d
z"chromosomes_display_default = yes
ztrack_width = r   ztrack%d_pos = %f
z<image>
z	dir = %s
z
file = %s
z
png = yes
z	svg = no
zradius = 1500p
zangle_offset = -90
zauto_alpha_colors = yes
zauto_alpha_steps = 5
zbackground = white
z	</image>
z<highlights>
z<highlight>
zr0 = 1r - 50p
zr1 = 1r - 30p
z</highlight>
z</highlights>
Úetczhousekeeping.confzmax_points_per_track* = %d
zmax_ideograms* = %d
z<plots>
zlayers_overflow = collapse
z<plot>
ztrack_idx = track%d
z</plot>
ztype = tile
zthickness = 50p
zstroke_thickness = 0
zlayers = 1
z$r0 = eval(sprintf("%.3fr",conf(trackz_pos) - conf(track_width)))
z$r1 = eval(sprintf("%.3fr",conf(trackz_pos)))
ztype = histogram
zthickness = 1
zfill_color = vlyellow
r   ztype = heatmap
zcolor = ylorbr-9
zfill_color = vlblue
zcolor = greys-6
zscale_log_base = 1.5
zr0 = 1r - 29p
zr1 = 1r - 1p
z	</plots>
)"r   r   ÚosÚmakedirsr   r3   r9   r‡   r„   r·   rk   r€   r­   r   r'   Ú
MAX_POINTSr´   ÚTRACK_INTERVALr!   ÚBIG_TRACK_INTERVALr@   r#   r%   r   r&   ÚTRACK_WIDTHr†   Úcircos_png_fnamer
   r»   rG   r"   rA   r¼   )'Ú	ref_fpathrh   ri   r*   rz   r¨   rˆ   Úloggerr)   r+   r,   r0   r6   r7   rg   Zcontig_pointsZalignments_fpathsr~   r   Z	gc_pointsr©   Zgene_pointsZmismatches_fpathsrŠ   Z
cov_pointsr}   Zlabels_fpathr²   rs   Úradiusr³   Ztrack_intervalsr-   rF   rB   rn   Zalignments_confr«   Úcircos_legend_fpathr1   r¿   r2   Úcreate_conf¢  sþ    




















































*rÌ   c              	   C   sÔ   t |ƒst |¡ t| |||||||ƒ\}}	tdƒ}
|
sV| d| d |	 d ¡ dS |
d|g}t|dƒ}t|dƒ}t|tƒ}tj	|t
|d	ƒt
|d	ƒd
}|dkr²t|ƒr²||	fS | d| d | d ¡ dS d S )NÚcircoszØCircos is not installed!
If you want to create Circos plots, install Circos as described at http://circos.ca/tutorials/lessons/configuration/distribution_and_installation and run the following command:
	circos -conf z
The plot legend is saved to r   re   z-confz
circos.logz
circos.errr   )ÚstdoutÚstderrr   z&  Circos diagram was not created. See z and z for details)r   rÁ   rÂ   rÌ   r   Úwarningr   rÇ   r	   Úcall_subprocessr#   r   )rÈ   rh   ri   rz   r¨   rˆ   r*   rÉ   rs   rË   Zcircos_execÚcmdlineÚ	log_fpathÚ	err_fpathÚcircos_png_fpathÚreturn_coder1   r1   r2   Údo-  s0    
þþýý



r×   )8Ú
__future__r   rÁ   Úrerx   Úcollectionsr   Úos.pathr   r   r   r   r   ÚImportErrorZ%quast_libs.site_packages.ordered_dictr>   r	   r
   Z!quast_libs.ca_utils.align_contigsr   Úquast_libs.ca_utils.miscr   r   Zquast_libs.fastaparserr   Zquast_libs.icarus_utilsr   r   r   Úquast_libs.qutilsr   r   r   Zquast_libs.reads_analyzerr   rÇ   rÆ   rÄ   rÅ   rÃ   r3   r9   rG   ra   rk   ru   r€   r   r£   r­   r®   r´   r·   r¼   rÌ   r×   r1   r1   r1   r2   Ú<module>   sL   (3( (!# 