ó
£žbc           @   s´   d  d l  m Z d  d l m Z m Z d  d l m Z m Z m Z m	 Z	 d  d l
 m Z m Z m Z d  d l m Z d „  Z d d „ Z d „  Z d	 „  Z d
 „  Z d d „ Z d S(   iÿÿÿÿ(   t   defaultdict(   t   fastaparsert   qconfig(   t   process_misassembled_contigt
   IndelsInfot   find_all_svt   Misassembly(   t   get_best_aligns_setst   get_used_indexest   score_single_align(   t   ref_labels_by_chromosomesc         C   s/   t  |  } | | j t j ƒ | j | ƒ d  S(   N(   R
   t   appendR   t   POSSIBLE_MISASSEMBLIESt   add(   t   reft   misassemblies_by_reft   refs_with_translocationst   cur_ref(    (    sQ   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/ca_utils/analyze_contigs.pyt   add_potential_misassembly   s    
c   	      C   s€   |  } t  | ƒ } | j d ƒ } d } | | t j k r| d } t | j | | ƒ | r| | d 7} t | j | | ƒ q| n  | S(   Nt   Ni    i   (   t   lent   countR   t   unaligned_part_sizeR   R   (	   t   seqt   alignR   R   t   second_alignt   unaligned_partt   unaligned_lent   count_nst   possible_misassemblies(    (    sQ   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/ca_utils/analyze_contigs.pyt   process_unaligned_part   s    
c         C   sM  d } d } t  ƒ  } x‘ t | ƒ D]ƒ \ }	 }
 |
 j ƒ  | d k r™ |	 d k r^ | |	 d n d  } t |  | |
 j ƒ  !|
 | | d | ƒ} | | 7} n  |
 j ƒ  } q" W| | k râ t |  | | !| d | | ƒ } | | 7} n  | sì d  S| j t j ƒ | j	 t j
 g | ƒ x" | D] } | | j t j ƒ qW| j d | ƒ d  S(   Ni    i   R   iÿÿÿÿsd   		It can contain up to %d interspecies translocations (will be reported as Possible Misassemblies).
(   t   sett	   enumeratet   startt   NoneR   t   endR   R   t   POTENTIALLY_MIS_CONTIGSt   extendR   t   write(   R   t   ctg_lent   sorted_alignst   region_misassembliesR   t	   log_out_ft   prev_endt   total_misassemblies_countR   t   iR   t
   prev_alignR   R   (    (    sQ   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/ca_utils/analyze_contigs.pyt!   check_for_potential_translocation#   s,    	 	c         C   s”   d } x[ | D]S } | j  ƒ  | d |  | | j  ƒ  !j d ƒ } | t j k rT t S| j ƒ  } q W| | d |  | j d ƒ t j k r t St S(   Ni    i   R   (   R!   R   R   R   t   TrueR#   t   False(   R   R(   R'   R+   R   R   (    (    sQ   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/ca_utils/analyze_contigs.pyt   check_partially_unaligned=   s    .(c         C   sŒ  | | k } | r d n d } g  } |  r-|  d j  ƒ  d rf | j d d |  d j  ƒ  d f ƒ n  |  d j ƒ  } t |  ƒ d k rî xc |  d D]T }	 |	 j  ƒ  | d d k rÛ | j d | d |	 j  ƒ  d f ƒ n  |	 j ƒ  } q“ Wn  | |  d j ƒ  rD| j d |  d j ƒ  d | f ƒ qDn | j d d | f ƒ d j | ƒ }
 | j d j | t | ƒ t | ƒ | |
 g ƒ d	 ƒ d  S(
   Nt   fullt   partiali    i   s   %d-%diÿÿÿÿt   ,s   	s   
(   R!   R   R#   R   t   joinR&   t   str(   R(   t   contigR'   R   t   unaligned_info_filet   is_fully_unalignedt   unaligned_typet   unaligned_partsR+   R   t   unaligned_parts_str(    (    sQ   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/ca_utils/analyze_contigs.pyt   save_unaligned_infoI   s"    ((+c   F      C   s:  d } d }	 d }
 d } d } d } d } d } d } d } d } t  ƒ  } g  } g  } g  } t  ƒ  } g  } t t j ƒ } t  ƒ  } t t ƒ } x4 t j ƒ  D]& } t  d „  t j ƒ  Dƒ ƒ | | <q Wt ƒ  } t	 | d ƒ } t	 | d ƒ } | j
 d j d d d	 d
 d g ƒ d ƒ xTt j | ƒ D]C\ }  }! t |! ƒ }" |  j j
 d |  |" f ƒ d }# | j d ƒ | j d ƒ g  }$ |  | k r½g  | |  D] }% |% j t j k r–|% ^ q–}$ n  |$ r¯d }# t |$ d d „  d t ƒ}& |& d j }' |& d j }( t |& d ƒ }) g  }* |  j j
 d |) |' |( t |& ƒ f ƒ |' |" |	 k sZ|" |' | k  rx|* j |& d ƒ |& d }& xB |& r¹t |& d ƒ t j |) k r¹|* j |& d ƒ |& d }& qxW|& r|  j j
 d t t j ƒ ƒ x/ |& D]$ }% |  j j
 d t |% ƒ d ƒ qäWn  t |* ƒ d k rÇ|  j j
 d t |* d ƒ ƒ |  j j
 |* d j ƒ  d ƒ | j |* d j g  ƒ j |* d ƒ |  j j
 |* d j ƒ  d ƒ | j |* d j ƒ |* d j | d <q¬|  j j
 d t |* ƒ ƒ | d 7} | |" 7} t j d k rF|  j j
 d ƒ x^|* D]$ }% |  j j
 d t |% ƒ d ƒ qWq¬t j d k rD|  j j
 d ƒ |  j j
 d t |* d ƒ ƒ |  j j
 |* d j ƒ  d ƒ | j |* d j g  ƒ j |* d ƒ | j |* d j ƒ |* d j | d <|  j j
 |* d j ƒ  d ƒ |* d }* x`|* D]$ }% |  j j
 d t |% ƒ d ƒ qWq¬t j d k r¬|  j j
 d  ƒ | |* d j 8} t }+ d! }# xò t |* ƒ rq|  j j
 d t |* d ƒ ƒ |  j j
 |* d j d" t ƒ d ƒ |  j j
 |* d j ƒ  |+ ród n d# ƒ | j |* d j g  ƒ j |* d ƒ |+ rSt  }+ | j |* d j ƒ |* d j | d <n  | |* d j 7} |* d }* qƒWq¬q+t! |& |" |  j |! | | | ƒ \ }, }- }& }. |. d }/ t |- rÊt" t |& ƒ ƒ n	 t# |. ƒ ƒ }0 t |0 ƒ t |& ƒ k  rh|  j j
 d$ ƒ xd t$ g  t" t |& ƒ ƒ D] }1 |1 |0 k r|1 ^ qƒ D]( }1 |  j j
 d% t |& |1 ƒ d ƒ q9Wn  |, r÷	|  j j
 d& ƒ | d 7} | |" 7} t j d k rû| |" |/ j% 8} |  j j
 d' ƒ x0 |0 D]( }1 |  j j
 d t |& |1 ƒ d ƒ qÉWq'q÷	t j d k rš| d 7} |  j j
 d( ƒ t |/ j& ƒ t |0 ƒ k  rô	|  j j
 d) ƒ xE |0 D]: }1 |1 |/ j& k rV|  j j
 d t |& |1 ƒ d ƒ qVqVWqô	q÷	t j d k r÷	|  j j
 d* ƒ |  j j
 d+ ƒ xQ t' |. d ƒ D]? \ }1 }2 |  j j
 d, |1 d- |2 j( t |2 j& ƒ |2 j% f ƒ qÚW|- r6	|  j j
 d. ƒ n  |  j j
 d/ ƒ x« t$ |0 ƒ t$ |/ j& ƒ D] }1 |& |1 }% |  j j
 d0 t |% ƒ ƒ | j |% j g  ƒ j |% ƒ | |% j 7} |  j j
 |% j ƒ  d# ƒ |  j j
 |% j d1 t  ƒ d ƒ q`	Wq÷	n  |  j j
 d2 |/ j( t |/ j& ƒ |/ j% f ƒ g  |/ j& D] }3 |& |3 ^ q-
}4 t |4 ƒ d k ru|4 d }5 |  j j
 |5 j ƒ  d ƒ | j |5 j ƒ |5 j | d <|5 j) ƒ  |5 j* ƒ  }6 }7 |6 d |" |7 }8 |! |6 d  j+ d3 ƒ |! |7 j+ d3 ƒ }9 |" |8 }: |" |! j+ d3 ƒ }; t, |! |4 |" ƒ }< |< r| d 7} | |8 |9 7} |: t j- |; k  rRd4 }# n  |  j j
 d5 |: |; d6 |: |; f ƒ t. |4 |  |" |8 | ƒ n  |  j j
 d0 t |5 ƒ ƒ |  j j
 |5 j ƒ  d ƒ |< rV|6 d rù|  j j
 d7 |6 d |6 d f ƒ n  |" |7 r+|  j j
 d8 |7 d |" |" |7 f ƒ n  t j/ rVt0 |! |" |4 | | |  j ƒ qVn  | j |5 j g  ƒ j |5 ƒ q+t |4 d d9 „  ƒ}& |  j j
 d: ƒ |/ j% }8 dT \ }9 }= x> |& D]6 }% |9 |! |= |% j) ƒ  d !j+ d3 ƒ 7}9 |% j* ƒ  }= q¶W|9 |! |= j+ d3 ƒ 7}9 |" |8 }: |! j+ d3 ƒ }> |" |> }; t, |! |& |" ƒ }< |< r˜| d 7} | |8 |9 7} |  j j
 d5 |: |; d6 |: |; f ƒ t. |& |  |" |8 | ƒ n  |: t j- |; k  rê|  j j
 d; d< |" |> |: f ƒ t1 d= „  |& Dƒ ƒ | d <x˜ |& D] }% |  j j
 d0 t |% ƒ ƒ |  j j
 |% j ƒ  d ƒ |  j j
 d> ƒ |  j j
 |% j ƒ  d ƒ | j |% j ƒ | j |% j g  ƒ j |% ƒ qíW| d 7} |  j j
 d? |8 ƒ d@ }# |  j j
 d j dA |  t |" ƒ |# d g ƒ ƒ |  j j
 d ƒ q'n  t2 |& | | | | | | |! | | | |  ƒ \ }? }@ }A }B }C |C | d <| |@ 7} | |A 7} |? rg|" | |  <dB }# |B | d <n  |< r+|  j j
 d? |8 ƒ t j/ r¬t0 |! |" |& | | |  j ƒ q¬q+n| |  j j
 dC |" ƒ | j
 |  d ƒ |
 d 7}
 |! j+ d3 ƒ }> | |" |> 7} |  j j
 dD |" |> f ƒ t. g  |  |" |" | ƒ |  j j
 d j dA |  t |" ƒ |# g ƒ d ƒ |  j j
 d ƒ q'W| j3 ƒ  | j3 ƒ  t1 | j ƒ  ƒ }D t j- dE k r¬d  } n  i | dF 6| dG 6|D dH 6| dI 6|
 d 6| dJ 6| dK 6| dL 6t1 | ƒ dM 6| dN 6| dO 6| dP 6| dQ 6| dR 6| dS 6}E |E | | | | | | f S(U   Ni
   g®Gáz®ï?i    c         s   s   |  ] } | d  f Vq d S(   i    N(    (   t   .0t   key(    (    sQ   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/ca_utils/analyze_contigs.pys	   <genexpr>y   s    t   ws   	t   Contigt   Total_lengtht   Unaligned_lengtht   Unaligned_typet   Unaligned_partss   
s   CONTIG: %s (%dbp)
t	   unalignedt   correctR@   c         S   s   t  |  ƒ |  j f S(   N(   R	   t   len2(   t   x(    (    sQ   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/ca_utils/analyze_contigs.pyt   <lambda>   s    t   reversesP   Best alignment score: %.1f (LEN: %d, IDY: %.2f), Total number of alignments: %d
i   sX   		Skipping these alignments as insignificant (option --ambiguity-score is set to "%s"):
s   			Skipping alignment s-   		One align captures most of this contig: %s
iÿÿÿÿsL   		This contig has %d significant alignments. [An ambiguously mapped contig]
t   nonesI   		Skipping these alignments (option --ambiguity-usage is set to "none"):
t   onesR   		Using only first of these alignment (option --ambiguity-usage is set to "one"):
s   			Alignment: %s
t   allsI   		Using all these alignments (option --ambiguity-usage is set to "all"):
t	   ambiguoust	   ambiguitys    ambiguous
sK   			Skipping redundant alignments after choosing the best set of alignments
s   		Skipping redundant alignment sY   		This contig has several significant sets of alignments. [An ambiguously mapped contig]
sU   		Skipping all alignments in these sets (option --ambiguity-usage is set to "none"):
sK   		Using only the very best set (option --ambiguity-usage is set to "one").
s+   		So, skipping alignments from other sets:
sQ   		Using all alignments in these sets (option --ambiguity-usage is set to "all"):
s>   			The very best set is shown in details below, the rest are:
sJ   				Group #%d. Score: %.1f, number of alignments: %d, unaligned bases: %d
i   s   				etc...
sO   			List of alignments used in the sets above but not in the best set (if any):
s   		Alignment: %s
t   is_bestsU   			The best set is below. Score: %.1f, number of alignments: %d, unaligned bases: %d
R   t   correct_unalignedsR   		This contig is partially unaligned. (Aligned %d out of %d non-N bases (%.2f%%))
g      Y@s    		Unaligned bases: 1 to %d (%d)
s!   		Unaligned bases: %d to %d (%d)
c         S   s   |  j  ƒ  |  j ƒ  f S(   N(   R#   R!   (   RJ   (    (    sQ   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/ca_utils/analyze_contigs.pyRK   8  s    s   		This contig is misassembled.
s=   			Warning! This contig is more unaligned than misassembled. sL   Contig length is %d (number of Ns: %d) and total length of all aligns is %d
c         s   s   |  ] } | j  Vq d  S(   N(   RI   (   R?   R   (    (    sQ   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/ca_utils/analyze_contigs.pys	   <genexpr>T  s    s   unknown
s   		Unaligned bases: %d
t   mis_unalignedt   CONTIGt   misassembleds$   		This contig is unaligned. (%d bp)
s)   		Unaligned bases: %d (number of Ns: %d)
g        R)   t   misassembled_contigst   misassembled_basest   misassembly_internal_overlapt   partially_unalignedt   partially_unaligned_basest   fully_unaligned_basest   aligned_assembly_basest   ambiguous_contigst   ambiguous_contigs_extra_basest   ambiguous_contigs_lent   half_unaligned_with_misassemblyR   t   istranslocations_by_refs(   i    i    (5   t   dictR   R   t   bedR    t   listR
   t   valuesR   t   openR&   R6   R   t
   read_fastaR   t   stdout_fR   RI   t   min_alignmentt   sortedR0   t   idyR	   t   ambiguity_scoreR7   t   icarus_out_ft   icarus_report_strt
   setdefaultR   t   coords_filtered_ft
   coords_strt   ambiguity_usageR1   R   t   rangeR   R   t	   uncoveredt   indexesR    t   scoreR!   R#   R   R2   t   unaligned_mis_thresholdR>   t   is_combined_refR/   t   sumR   t   closeR"   (F   t	   ca_outputt   contigs_fpatht   unaligned_fpatht   unaligned_info_fpatht   alignst   ref_featurest   ref_lenst	   is_cyclict   maxunt   epsilonRG   RZ   R\   R[   R^   R_   R`   Ra   RY   t
   ref_alignst   contigs_aligned_lengthst   aligned_lengthsR)   RW   t   misassemblies_in_contigst   region_struct_variationst   istranslocations_by_refR   R   t   total_indels_infot   unaligned_fileR9   R8   R   R'   t   contig_typet   filtered_alignsR   R(   t   top_lent   top_idt	   top_scoret
   top_alignst   first_alignmentt   is_ambiguoust   too_much_best_setst	   best_setst   the_best_sett   used_indexest   idxt   cur_setR-   t   real_alignst   the_only_alignt   beginR#   t   unaligned_basest   number_unaligned_nst   aligned_bases_in_contigt   acgt_ctg_lent   is_partially_unalignedt   prev_post	   number_nst   is_misassembledt   current_miot   indels_infot   cnt_misassembliest   contig_aligned_lengthRX   t   result(    (    sQ   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/ca_utils/analyze_contigs.pyt   analyze_contigs^   sô   			$	)2 
&%$

%$
%$*$*
*>)

&
/+ 
*  
(

	
%
(		$


 
/



	
/

	

N(   t   collectionsR    t
   quast_libsR   R   t)   quast_libs.ca_utils.analyze_misassembliesR   R   R   R   t&   quast_libs.ca_utils.best_set_selectionR   R   R	   t   quast_libs.ca_utils.miscR
   R   R"   R   R/   R2   R>   R¬   (    (    (    sQ   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/ca_utils/analyze_contigs.pyt   <module>   s   "				