ó
£žbc           @   sø   d  d l  m Z d  d l  m Z d  d l Z d  d l Z d  d l m Z d  d l m Z m	 Z	 m
 Z
 m Z m Z d  d l m Z d  d l m Z e e	 j ƒ Z d Z e	 j d	 Z e d
 „ Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d S(   iÿÿÿÿ(   t   with_statement(   t   divisionN(   t   join(   t   fastaparsert   qconfigt   qutilst	   reportingt   plotter(   t   set_window_size(   t
   get_loggeri   i   c            s+  d } d } t  d t j ƒ d } g  t d | ƒ D] } | t j ^ q3 } d g | } t  d t j ƒ d } g  t d | ƒ D] } | t j ^ q€ }	 d g | }
 d	 } | rË | |	 |
 f | | f f Sx!t j |  ƒ D]\ } ‰ t ˆ ƒ ˆ j	 d ƒ } | sqÛ n  ˆ j	 d ƒ ˆ j	 d ƒ } d | | } | t  | t j ƒ c d 7<t j
 ‰  xx ‡  ‡ f d †  t d t ˆ ƒ ˆ  ƒ Dƒ D]K } t | ƒ } | d	 k	 rˆ|
 t  t  | t j ƒ t j ƒ c d 7<qˆqˆW| | 7} | | 7} qÛ W| d k rd	 } n | d | } | |	 |
 f | | f f S(
   se   
       Returns percent of GC for assembly and GC distribution: (list of GC%, list of # windows)
    i    id   i   t   Nt   Gt   Cg      Y@c         3   s    |  ] } ˆ | | ˆ  !Vq d  S(   N(    (   t   .0t   i(   t   nt   seq_full(    sD   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/basic_stats.pys	   <genexpr>1   s    N(   t   intR   t   GC_contig_bin_sizet   ranget   GC_bin_sizet   NoneR   t
   read_fastat   lent   countt   GC_window_sizet   get_GC_percent(   t   contigs_fpatht   skipt   total_GC_amountt   total_contig_lengtht   GC_contigs_bin_numR   t   GC_contigs_distribution_xt   GC_contigs_distribution_yt
   GC_bin_numt   GC_distribution_xt   GC_distribution_yt   total_GCt   namet   contig_ACGT_lent   contig_GC_lent   contig_GC_percentt   seqt
   GC_percent(    (   R   R   sD   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/basic_stats.pyt
   GC_content   s:    ))	21
	c         C   sw   t  |  ƒ t k  r d  St  |  ƒ |  j d ƒ } | t  |  ƒ d k  rI d  S|  j d ƒ |  j d ƒ } d | | } | S(   NR
   i   R   R   id   (   R   t   MIN_GC_WINDOW_SIZER   R   (   R*   t   ACGT_lent   GC_lenR+   (    (    sD   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/basic_stats.pyR   @   s    c   
   
   C   sô   d } t  j r t  j n t  j } t | d ƒ ¾ } x´ t j |  ƒ D]£ \ } } | j d | d t | ƒ d ƒ xq t	 d t
 | ƒ | ƒ D]W } | | | | !} t | ƒ }	 |	 d  k	 r‹ | j t | ƒ d t |	 ƒ d ƒ q‹ q‹ WqC WWd  QXd  S(   Ni    t   wt   #t    s   
(   R   t   large_genomet   GC_window_size_largeR   t   openR   R   t   writet   strR   R   R   R   (
   t	   ref_fpatht   gc_fpatht	   chr_indext   window_sizet   out_fR&   R   R   R*   R+   (    (    sD   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/basic_stats.pyt   save_icarus_GCM   s    #c   
      C   sÙ   t  | ƒ } t | d ƒ ¸ } x® t j |  ƒ D] \ } } xŽ t d t | ƒ | ƒ D]t } | | | | !} t | ƒ }	 |	 d  k	 rS | j d j	 | t
 | ƒ t
 | t | ƒ ƒ t
 |	 ƒ d g ƒ ƒ qS qS Wq. WWd  QXd  S(   NR0   i    s   	s   
(   R   R5   R   R   R   R   R   R   R6   R   R7   (
   R8   t   reference_lengthR9   R;   R<   R&   R   R   R*   R+   (    (    sD   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/basic_stats.pyt   save_circos_GCZ   s    c         C   sú  d } g  } g  } g  } g  } t  d „  |  Dƒ ƒ } xí t |  | ƒ D]Ü \ } }	 t | ƒ }
 g  } x. t | ƒ D]  \ } } | j | g | ƒ qo W| |
 d } | |
 d } | |
 d d } | | } | j t | d | ƒ ƒ | j t | d | ƒ ƒ | j t d | |	 d
 ƒ ƒ qD Wt  t | ƒ d ƒ } t  t | ƒ d	 ƒ } t t  | ƒ | ƒ } | | | | k  r| d k rt  | | | d ƒ } n  | | | 8} | | | 8} | | d } | | t k  r| t d 8} | t d 7} | | d } n  d	 } | | k r1| | d } | | 8} n d	 } x­ t |  ƒ D]Ÿ \ } } | j d	 g t | ƒ ƒ xv t | ƒ D]h \ } } | | | } | | k  r¦d	 } n | | k r¿| d } n  | t | ƒ t | ƒ c | 7<qwWqDW| | | | | f S(   Ni   c         s   s   |  ] } t  | ƒ Vq d  S(   N(   R   (   R   t   v(    (    sD   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/basic_stats.pys	   <genexpr>k   s    i   i   i   g      ø?g      ð?i   i    gUUUUUUÕ?(	   t   maxt   zipt   sumt	   enumeratet   extendt   appendR   t   mint   MIN_HISTOGRAM_POINTS(   t
   cov_valuest   nums_contigst   min_bins_cntt	   bin_sizest   low_thresholdst   high_thresholdst   cov_by_binst   max_covt   valuest   num_contigst   assembly_lent   bases_by_covt   coveraget   basest   q1t   q2t   q3t   iqrt   bin_sizet   low_thresholdt   high_thresholdt
   max_pointst   offsett   indext   bin_idx(    (    sD   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/basic_stats.pyt   binning_coveragee   sZ    
# 	(c         C   sï  t  ƒ  } t  ƒ  } g  | D] } |  | r | ^ q } xR | D]J } t j | ƒ j t j j ƒ | | <t j | ƒ j t j j ƒ | | <q< Wg  | D] } |  | ^ q‘ } g  | D] } | | ^ q® } t | | ƒ \ }	 }
 } } } d t |
 ƒ d } t	 j
 | |	 | d | d |
 d | d | d | ƒxÀ | D]¸ } t |  | g | | g ƒ \ } }
 } } } t j | ƒ } t j | ƒ } | d t |
 ƒ d } t j j | | d	 ƒ } t	 j
 | g | | | d
 t d |
 d | d | d | ƒq/Wd  S(   Ns   Coverage histogram (bin size: s   x)s   /coverage_histogramR[   RP   R\   R]   s    coverage histogram (bin size: t   _coverage_histogramt	   draw_bars(   t   dictR   t   gett	   get_fieldt   Fieldst   TOTALLENt   CONTIGSRb   R7   R   t   coverage_histogramR   t   label_from_fpatht   label_from_fpath_for_fnamet   ost   pathR   t   True(   t   coverage_dictt   contigs_fpathst   output_dirpatht	   total_lent   contigs_dictR   t   contigs_with_coverageRI   RR   t   common_coverage_valuesR[   R\   R]   RP   t   histogram_titlet   coverage_valuest   labelt
   corr_labelt   histogram_fpath(    (    sD   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/basic_stats.pyt   draw_coverage_histograms—   s,    		#"&c   7         s›  t  j ƒ  t  j d ƒ t j j | ƒ s9 t j | ƒ n  d  } g  } d  } d  } d  } |  r”t t	 j
 |  ƒ j ƒ  d t ƒ} t | ƒ } t | ƒ } t |  ƒ \ }	 }
 } t j s½ t j rÜ t | d ƒ } t |  | ƒ n  t j rt | d ƒ } t |  | | ƒ n  t  j d ƒ t  j |	 d  k	 rdd t j j |  ƒ d t | ƒ d t | ƒ d	 d
 |	 n d ƒ | d k rÉt j rÉt  j d ƒ qÉn5 t j rÉt j } | g } t  j d t | ƒ ƒ n  t  j d ƒ g  } g  } t ƒ  ‰  t j d ƒ } xJt  | ƒ D]<\ } } g  ˆ  | <t! j" | ƒ } t  j d t! j# | ƒ | ƒ g  } d } xÐ t	 j$ | ƒ D]¿ \ } } | j% t | ƒ ƒ | | j& d ƒ 7} | j' | ƒ rft( t) | j' | ƒ d ƒ ƒ } t ˆ  | ƒ | k rˆ  | c d g | t ˆ  | ƒ d 7<n  ˆ  | | c t | ƒ 7<qfqfW| j% | ƒ | j% | ƒ qWg  | D] } t | d t ƒ^ qN} t* g  | D] } t | ƒ ^ qvƒ } d } | t j+ d k r!d d  l, } t( | t j+ ƒ } | | } g  | D]W } g  t- d | ƒ D]; } | | t | ƒ k  rðt | | d | | | !ƒ ^ qð^ qÚ} t | ƒ d k rÎg  t- d | ƒ D]S } | | t | ƒ k  r”t | | d | | | !ƒ n t | | d | ƒ ^ qYt | | d | ƒ g } n  xu t- t | ƒ ƒ D]9 } t | | ƒ } | | j% t | | | | ƒ ƒ qáWn% g  | D] } t | d t ƒ^ q(} | r{t j. r{d d l/ m0 }  |  j1 | | ƒ q{n  t j. rºd d l/ m0 }  |  j2 | | | ƒ |  j3 | | ƒ n  t  j d ƒ g  }! g  }" d }# d d l4 m5 }$ x t  t6 | | | ƒ ƒ D]†\ } \ } }% } t7 j8 | ƒ }& |$ j9 |% ƒ \ }' }( |$ j: |% ƒ }) d. \ }* }+ | r‰|$ j; |% | ƒ \ }* }+ |$ j: |% | ƒ }, n  |$ j9 |% t j< ƒ \ }- }. d/ \ }/ }0 | r×|$ j; |% | t j< ƒ \ }/ }0 n  t |% ƒ }1 t | d t j= ƒ\ }2 }3 }4 |! j% |3 ƒ |" j% |4 ƒ t  j |1 d k rÓd t! j# | ƒ t! j" | ƒ d t |' ƒ d t |( ƒ d |) d  k	 r€d |) n d  d t |1 ƒ d	 |2 d  k	 r¬d
 |2 n d d d  t) | ƒ d! t) |1 ƒ n d ƒ |& j> t7 j? j5 |' ƒ |& j> t7 j? j@ |( ƒ |& j> t7 j? jA |) d  k	 r+d |) n d  ƒ | rt jB r|& j> t7 j? jC |* ƒ |& j> t7 j? jD |+ ƒ |& j> t7 j? jE |, d  k	 r“d |, n d  ƒ n  |& j> t7 j? jF |- ƒ |& j> t7 j? jG |. ƒ | r	t jB r	|& j> t7 j? jH |/ ƒ |& j> t7 j? jI |0 ƒ n  |& j> t7 j? jJ t |% ƒ ƒ |% rð	|& j> t7 j? jK t* |% ƒ ƒ t* |# t* |% ƒ ƒ }# |& j> t7 j? jL |1 ƒ t jB s©	|& j> t7 j? jM |2 d  k	 rŸ	d
 |2 n d  ƒ n  |& j> t7 j? jN | ƒ |& j> t7 j? jO d
 t) | ƒ d! t) |1 ƒ ƒ n  |  rc
|& j> t7 j? jP t( | ƒ ƒ |& j> t7 j? jQ | ƒ t jB sˆ
|& j> t7 j? jR |	 d  k	 rV
d
 |	 n d  ƒ qˆ
q| r|& j> t7 j? jS t( | ƒ ƒ qqWd d  l, } | jT |# d" d# ƒ t _U |! }5 d  }6 |  rà
t |5 ƒ }6 |5 j% |
 ƒ n  t j. rt j= rd d l/ m0 }  |  jV | | |5 |" |6 ƒ n  tW jX | | t j+ k | | t | d$ ƒ d% g  ƒ | r³t jB r³tW jX | | t j+ k | | t | d& ƒ d' g  t- t | ƒ ƒ D] } | ^ qƒ n  t j r„tW jY |  | | t | d( ƒ d) ƒ t j= sRtW jZ |  | |5 t | d* ƒ ƒ xI t6 | |" ƒ D]5 \ } }3 tW j[ | |3 t | t! j" | ƒ d+ ƒ ƒ qWn  t\ ‡  f d, †  | Dƒ ƒ r„t] ˆ  | | ƒ q„n  t  j d- ƒ | | f S(0   Ns%   Running Basic statistics processor...t   reverses   gc.icarus.txts   gc.circos.txts     Reference genome:s       s   , length = s   , num fragments = s	   , GC % = s   %.2ft	   undefinedi   sÜ     Reference genome is fragmented. You may consider rerunning QUAST using --fragmented option. QUAST will try to detect misassemblies caused by the fragmentation and mark them fake (will be excluded from # misassemblies).s     Estimated reference length = s     Contig files: s   _cov_(\d+\.?\d*)i    R
   i   i   iÿÿÿÿ(   t
   html_savers     Calculating N50 and L50...(   t   N50R   s   , N50 = s   , L50 = s   , auN = s   %.1fs   , Total length = s   , # N's per 100 kbp = s    %.2fg     jø@iè  iX  t   Nx_plott   Nxt   NGx_plott   NGxt   cumulative_plots   Cumulative lengtht   GC_content_plott   _GC_content_plotc         3   s   |  ] } ˆ  | Vq d  S(   N(    (   R   R   (   Rq   (    sD   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/basic_stats.pys	   <genexpr>l  s    s   Done.(   NN(   NN(^   t   loggert   print_timestampt	   main_infoRn   Ro   t   isdirt   mkdirR   t   sortedR   t   get_chr_lengths_from_fastafileRQ   Rp   R   RC   R,   R   t   create_icarus_htmlt
   draw_plotsR   R=   t   draw_circosR?   t   infot   basenameR7   t   check_for_fragmented_reft   warningt   estimated_reference_sizeRe   t   ret   compileRD   R   Rl   t   index_to_strR   RF   R   t   findallR   t   floatRA   R^   t   mathR   t   html_reportt   quast_libs.html_saverR€   t   save_reference_lengthst   save_contigs_lengthst   save_tick_xt    R   RB   R   Rf   t   N50_and_L50t	   au_metrict   NG50_and_LG50t   x_for_additional_Nxt   no_gct	   add_fieldRh   t   L50t   auNt   is_combined_reft   NG50t   LG50t   auNGRƒ   t   LxR…   t   LGxRj   t
   LARGCONTIGRi   t   GCt   UNCALLEDt   UNCALLED_PERCENTt   REFLENt   REF_FRAGMENTSt   REFGCt	   ESTREFLENt   ceilt   min_differencet   save_GC_infoR   R‚   R†   R‡   t   contigs_GC_content_plott   anyR}   (7   R8   Rr   Rs   t   results_dirR>   t   reference_lengthst   reference_fragmentst   icarus_gc_fpatht   circos_gc_fpatht   reference_GCt   reference_GC_distributiont!   reference_GC_contigs_distributiont   lists_of_lengthst   numbers_of_Nst   cov_patternt   idR   t   assembly_labelt   list_of_lengtht   number_of_NsR&   R*   t   covt   listRR   t   multiplicatorR   R^   R   t   corr_lists_of_lengthst   num_listt
   last_indexR€   t   list_of_GC_distributionst    list_of_GC_contigs_distributionst   largest_contigR   t   lengths_listt   reportt   n50t   l50R«   t   ng50t   lg50R¯   t   nxt   lxt   ngxt   lgxt   total_lengthR%   t   GC_distributiont   GC_contigs_distributiont!   list_of_GC_distributions_with_reft   reference_index(    (   Rq   sD   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/basic_stats.pyt   do²   s,   
!	Q				
,!%%
a‚*%		.!¹,/	/1	2#1*)	"	$(   t
   __future__R    R   Rn   R˜   t   os.pathR   t
   quast_libsR   R   R   R   R   t   quast_libs.circosR   t   quast_libs.logR	   t   LOGGER_DEFAULT_NAMER‰   RH   R   R-   t   FalseR,   R   R=   R?   Rb   R}   Ræ   (    (    (    sD   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/basic_stats.pyt   <module>   s"   (*				2	