ó
£žbc           @€  sã   d  d l  m Z d  d l Z d  d l Z d  d l m Z d  d l m Z m Z m	 Z	 m
 Z
 m Z d  d l m Z d  d l m Z e e
 j ƒ Z i  Z d d d „  ƒ  YZ d	 „  Z d
 „  Z d „  Z d „  Z d d d „  ƒ  YZ d S(   iÿÿÿÿ(   t   with_statementN(   t   defaultdict(   t   fastaparsert   genes_parsert	   reportingt   qconfigt   qutils(   t
   get_logger(   t   run_parallelt   FeatureContainerc           B€  s   e  Z d  d „ Z RS(   t    c         C€  s(   | |  _  | |  _ g  |  _ i  |  _ d  S(   N(   t   kindt   fpathst   region_listt   chr_names_dict(   t   selfR   R   (    (    sH   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genome_analyzer.pyt   __init__   s    			(   t   __name__t
   __module__R   (    (    (    sH   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genome_analyzer.pyR	      s   c           C€  s   t  S(   N(   t   ref_lengths_by_contigs(    (    (    sH   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genome_analyzer.pyt   get_ref_aligned_lengths   s    c         C€  sD  i  } x= | D]5 } | j  | k r5 | j  | | j  <q d	 | | j  <q Wt | ƒ d k rÞ t | ƒ d k rÞ | | d j  d	 k rÞ | j ƒ  } t j d |  | d j  | f d d ƒx‰ | D] } | | _  | | | j  <q» Wnb t d „  | j ƒ  Dƒ ƒ rt j d |  d d ƒn, d	 | j ƒ  k r@t j d |  d d ƒn  | S(
   sŠ   
    returns dictionary to translate chromosome name in list of features (genes or operons) to
    chromosome name in reference file.
    i   i    s«   Reference name in file with genomic features of type "%s" (%s) does not match the name in the reference file (%s). QUAST will ignore this issue and count as if they match.t   indents     c         s€  s   |  ] } | d  k Vq d  S(   N(   t   None(   t   .0t   chr_name(    (    sH   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genome_analyzer.pys	   <genexpr>8   s    s{   Reference names in file with genomic features of type "%s" do not match any chromosome. Check your genomic feature file(s).s‰   Some of the reference names in file with genomic features of type "%s" does not match any chromosome. Check your genomic feature file(s).N(	   t   seqnameR   t   lent   popt   loggert   noticet   allt   valuest   warning(   t   featuret   regionst	   chr_namest   region_2_chr_namet   regionR   (    (    sH   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genome_analyzer.pyt   chromosomes_names_dict"   s(    ;		
	c   6      C€  sä	  t  j |  ƒ } t  j |  ƒ } t ƒ  }	 t t ƒ }
 t j d t  j | ƒ | ƒ t	 j
 j | | d ƒ } t j r| | } n
 | d } t	 j
 j | ƒ s· t j d | d d d ƒd2 Si  } x/ | j ƒ  D]! \ } } d g | d | | <qÊ Wt j |  ƒ } t t | ƒ d	 d
 „  d t ƒ} g  } g  } x4 | D], \ } \ } } | j | ƒ | j | ƒ q2Wd g t | ƒ } d g t | ƒ } i  } t | ƒ } t j r¹| r¹t j d ƒ n  | rÝx | D] } g  | | <qÆWn  t | ƒ b} xX| D]P} t | j d ƒ d j ƒ  d ƒ } t | j d ƒ d j ƒ  d ƒ } t | j d ƒ d j ƒ  d ƒ } t | j d ƒ d j ƒ  d ƒ }  | j ƒ  d j ƒ  }! | j ƒ  d j ƒ  } | | k rÖt j d | d ƒ d  S| r| |! j t d | d | d | d |! d | d |  ƒ ƒ n  x) t | | d ƒ D] }" d | | |" <q+WqóWWd  QXxK | j  ƒ  D]= } x  | | D] }" d | | |" <qkWt! | | ƒ |
 | <qZWt j" rÃ| j# d ƒ rÃt	 j$ | ƒ n  d }# t j% r0t j" sôt	 j
 j | | d ƒ n d }$ t |$ d ƒ "}% x| j ƒ  D]
\ } } |% j& | d ƒ d }& x› t d | d ƒ D]† }" | | |" d k sz|" | | k rÌ|& t j' k rÃ|# d 7}# |% j& t( |" |& ƒ d t( |" d ƒ d ƒ n  d }& qP|& d 7}& qPW|& t j' k r|# d 7}# |% j& t( | |& d ƒ d t( | ƒ d ƒ qqWWd  QXn  |# |	 d <d  |	 t) j* j+ d <d  |	 t) j* j+ d <d  |	 t) j* j, d <d  |	 t) j* j, d <xæ| D]Þ}' |' j- s¦q‘n  d }( d }) t	 j
 j | | d  |' j. j/ ƒ  d! ƒ }* t |* d ƒ }+ |+ j& d" d3 ƒ |+ j& d( d) d ƒ d g t |' j- ƒ }, xˆt |' j- ƒ D]w\ }" }- d |, |" <g  }. |- j0 d  k rd* t( |- j1 d ƒ |- _0 n  x¹t | ƒ D]«\ }/ } t2 }0 xŒ| | D]€}1 |1 j3 |- j3 k rÇq©n  |- j4 |1 j5 k s©|1 j4 |- j5 k rñq©n.|1 j5 |- j5 k r·|- j4 |1 j4 k r·|, |" d+ k r2|) d 8}) n  d |, |" <|( d 7}( |1 j6 |- ƒ }2 |+ j& d, |- j0 |- j5 |- j4 |2 f ƒ |' j. d- k r| |/ c d 7<n | |/ c d 7<t }0 Pnh t7 |- j4 |1 j4 ƒ t8 |- j5 |1 j5 ƒ t j9 k r|, |" d k rd+ |, |" <|) d 7}) n  |. j |1 ƒ n  |0 r©Pq©q©W|0 rŒPqŒqŒW|, |" d+ k r7d. j g  t |. d	 d/ „  ƒD] }3 |3 j6 |- ƒ ^ qgƒ }2 |+ j& d0 |- j0 |- j5 |- j4 |2 f ƒ q7q7W|' j. d- k rì|( |	 t) j* j, d <|) |	 t) j* j, d <ny |	 t) j* j+ d d  k r1	d |	 t) j* j+ d <d |	 t) j* j+ d <n  |	 t) j* j+ d c |( 7<|	 t) j* j+ d c |) 7<|+ j: ƒ  q‘Wt j d t  j | ƒ d1 ƒ g  | D] } | | ^ q˜	}4 g  | D] } | | ^ qµ	}5 |
 |	 |4 | |5 | f f S(4   Ns     s   .coordss	   .filtereds   File with alignment coords (s"   ) not found! Try to restart QUAST.R   i    i   t   keyc         S€  s   t  |  d d ƒ S(   Ni   (   R   (   t   x(    (    sH   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genome_analyzer.pyt   <lambda>`   s    t   reverses¥   Analysis of genes and/or operons files (provided with -g and -O) requires extensive RAM usage, consider running QUAST without them if memory consumption is critical.t   |i   i   s?   Something went wrong and chromosome names in your coords file (sS   ) differ from the names in the reference. Try to remove the file and restart QUAST.R   t   startt   endt   contigt   start_in_contigt   end_in_contigs	   _gaps.txts	   /dev/nullt   ws   
t    t
   gaps_countt   _fullt   _partialt   _genomic_features_s   .txts   %s		%s	%s	%s	%s
s   ID or #t   Startt   Endt   Typet   Contigt   =i2   s   # i   s   %s		%d	%d	complete	%s
t   operont   ,c         S€  s   |  j  S(   N(   R,   (   t   block(    (    sH   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genome_analyzer.pyR)   Ü   s    s   %s		%d	%d	partial	%s
s   Analysis is finished.(   NN(   s   ID or #R7   R8   s   Types   Contig(;   R   t   label_from_fpatht   label_from_fpath_for_fnamet   dictR   t   intR   t   infot   index_to_strt   ost   patht   joinR   t   use_all_alignmentst   isfilet   errorR   t   itemsR   t
   read_fastat   sortedt	   enumeratet   Truet   appendR   t   memory_efficientR    t   opent   splitt   stript   AlignedBlockt   ranget   keyst   sumt   space_efficientt   endswitht   removet   analyze_gapst   writet   min_gap_sizet   strR   t   Fieldst   GENESt   OPERONSR   R   t   lowert   idt   numbert   FalseR   R-   R,   t   format_gene_infot   mint   maxt   min_gene_overlapt   close(6   t   contigs_fpatht   indext   coords_dirpatht   genome_stats_dirpatht   reference_chromosomest   ns_by_chromosomest
   containerst   assembly_labelt   corr_assembly_labelt   resultst   ref_lengthst   coords_base_fpatht   coords_fpatht   genome_mappingR   t   chr_lent   contig_tuplest   sorted_contig_tuplest   sorted_contigs_namest   contigs_ordert   idxt   namet   _t   features_in_contigst   operons_in_contigst   aligned_blocks_by_contig_namet   gene_searching_enabledt	   coordfilet   linet   s1t   e1t   s2t   e2t   contig_namet   iR3   t
   gaps_fpatht	   gaps_filet   cur_gap_sizet	   containert
   total_fullt   total_partialt   found_fpatht
   found_filet
   found_listR%   t   gene_blockst	   contig_idt   cur_feature_is_foundt	   cur_blockt   contig_infoR>   t   unsorted_features_in_contigst   unsorted_operons_in_contigs(    (    sH   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genome_analyzer.pyt   process_single_fileB   s   			
!####"	($
0	
=
	*
$$

&1
:-c   ;   
   C€  s	  t  j j | t j ƒ } d d l m } | j rI t  j j | d ƒ } n  t j	 ƒ  t j
 d ƒ t  j j | ƒ s‚ t  j | ƒ n  t j |  ƒ \ }	 }
 } t  j j | d ƒ } t | d ƒ } g  } x3 | j ƒ  D]% \ } } | j t | g | ƒ ƒ qÑ W| st j d d d	 ƒn  | r5| j t | d
 ƒ ƒ n t j d d d	 ƒx*| D]"} | j sdqOn  x/ | j D]$ } | j t j | | j ƒ 7_ qnWt | j ƒ d k rìt j d | j d d d	 ƒ| j d | j d d d ƒ qOt j d t t | j ƒ ƒ d | j d ƒ | j d | j d t t | j ƒ ƒ d ƒ t | j | j t |
 j  ƒ  ƒ ƒ | _! qOWdL \ } } x¬ | D]¤ } t# j$ | ƒ } d } xa | D]Y } | j d
 k rðt | j ƒ } | j% t# j& j' t | j ƒ ƒ qª| t | j ƒ 7} qªW| rˆ| } | j% t# j& j( | ƒ qˆqˆWi  } i  } i  } i  } g  } g  } g  } t j) } t* t | ƒ t j+ ƒ }  g  t, | ƒ D]' \ }! } | |! | | |
 | | f ^ qˆ}" t- t. |" |  d t/ ƒ\ }# }$ | t | ƒ t |# ƒ 7} | t _) |# st j
 d ƒ | j0 ƒ  d  Sx? |
 D]7 }% g  t1 t |# ƒ ƒ D] }& |# |& |% ^ q7t2 |% <qW| j d ƒ x{ |
 j ƒ  D]m \ }' }( t3 t2 |' ƒ }) | j d |' d t |( ƒ d d t |( t | |' ƒ ƒ d t |) ƒ d ƒ qsW| j d ƒ | j d t |	 ƒ d  ƒ | j d! t t j4 ƒ d ƒ | j d" t t j5 ƒ d  ƒ | j d  ƒ | j d# dM ƒ | j d# dN ƒ | j d/ d0 d ƒ xût6 | |$ ƒ D]ê\ } \ }* }+ }, }- }. t7 j8 | ƒ }/ |, | | <|+ | | <|. | | <|- | | <| j t9 |, ƒ ƒ | j t9 |. ƒ ƒ |* d1 }0 |* t# j& j: d2 }1 |* t# j& j: d3 }2 |* t# j& j; d2 }3 |* t# j& j; d3 }4 t# j$ | ƒ } | j d4 |/ d5  | j< t# j& j= ƒ | j< t# j& j> ƒ |0 f ƒ | j t? | j< t# j& j= ƒ ƒ ƒ x˜ t# j& j: |1 |2 f t# j& j; |3 |4 f g D]l \ }5 }6 }7 |6 d  k rD|7 d  k rD| j d6 dO ƒ q	| j d6 |6 |7 f ƒ | j% |5 d8 |6 |7 f ƒ q	W| j d ƒ qœW| j0 ƒ  t j@ rôd d9 lA mB }8 | rÏ|8 jC | | d: | | ƒ n  | rô|8 jC | | d* | | ƒ qôn  t jD r	d; d< lE mF }9 d d= lG mH }: | r€|9 jI | | | | d> d? ƒ |9 jJ | |  | |: | | d@ d? ƒ |9 jK | | | dA dB ƒ n  | rã|9 jI | | | | dC d* ƒ |9 jJ | |  | |: | | dD d* ƒ |9 jK | | | dE dF ƒ n  |9 jK | | | dG dH dI dJ ƒn  t j
 dK ƒ | S(P   Niÿÿÿÿ(   t   search_references_metat   raws   Running Genome analyzer...s   genome_info.txtR1   sb   No file with genomic features were provided. Use the --features option if you want to specify it.
R   s     R<   sP   No file with operons were provided. Use the -O option if you want to specify it.i    s   No genomic features of type "s   " were loaded.s   Genomic features of type "s
   " loaded: R   s   
s	     Loaded s    genomic features of type "t   "t   filter_resultss.   Genome analyzer failed for all the assemblies.s   reference chromosomes:
s   	s    (total length: s    bp, s   total length without N's: s    bp, maximal covered length: s    bp)
s   total genome size: s   

s   gap min size: s   partial gene/operon min size: s8   %-25s| %-10s| %-12s| %-10s| %-10s| %-10s| %-10s| %-10s|
t   assemblyt   genomet   duplicationt   gapst   genest   partialt   operonsR
   t   fractiont   ratioRe   R;   ix   R3   R4   R5   s   %-25s| %-10s| %-12s| %-10s|i   s    %-10s| %-10s|t   -s   %s + %s part(   t
   html_savert   featuresi   (   t   plotter(   t   contigs_aligned_lengthss   /features_cumulative_plots   genomic featuress   /features_frcurve_plots   /complete_features_histograms   # complete genomic featuress   /operons_cumulative_plots   /operons_frcurve_plots   /complete_operons_histograms   # complete operonss   /genome_fraction_histograms   Genome fraction, %t	   top_valueid   s   Done.(   NN(   s   assemblys   genomeR¥   R¦   s   geness   partials   operonss   partial(   R
   s   fractions   ratios   numberR
   s   genesR
   s   operons(   R¬   R¬   (L   RE   RF   RG   R   t   aligner_output_dirnamet
   quast_libsRŸ   t   is_quast_first_runR   t   print_timestampt	   main_infot   isdirt   mkdirR   t   get_genome_statsRR   RK   RP   R	   R   R   R   R   t   get_genes_from_fileR   R   R    R]   RC   R_   R&   t   listRW   R   R   R   t   gett	   add_fieldR`   t   REF_OPERONSt	   REF_GENESt   _num_nf_errorsRh   t   max_threadsRN   R   Rž   RO   Rk   RV   R   Ri   R^   Rj   t   zipR   t   name_from_fpathRX   Ra   Rb   t	   get_fieldt   MAPPEDGENOMEt   DUPLICATION_RATIOt   floatt   html_reportt   quast_libs.html_saverR­   t   save_features_in_contigst
   draw_plotsR
   R¯   t   quast_libs.ca_utils.miscR°   t   genes_operons_plott   frc_plott	   histogram(;   t	   ref_fpatht   aligned_contigs_fpathst   output_dirpatht   features_dictt   operons_fpathst    detailed_contigs_reports_dirpathRo   Rn   RŸ   t   genome_sizeRp   Rq   t   result_fpatht   res_fileRr   R!   t   feature_fpathR‘   t   fpatht   ref_genes_numt   ref_operons_numRl   t   reportt   genomic_featurest   files_features_in_contigst"   files_unsorted_features_in_contigst   files_operons_in_contigst!   files_unsorted_operons_in_contigst   genome_mappedt   full_found_genest   full_found_operonst   num_nf_errorst   n_jobsRm   t   parallel_run_argsRv   t   results_genes_operons_tuplest   refR   R   Rz   t   aligned_lenRu   Rœ   R‚   R   Rƒ   t   assembly_nameR3   t
   genes_fullt
   genes_partt   operons_fullt   operons_partt   fieldt   fullt   partR­   R¯   R°   (    (    sH   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genome_analyzer.pyt   doñ   s   	

	"#//+"	7	
5O		(




	6"%
		

RU   c           B€  s,   e  Z d d d d d d d  „ Z d „  Z RS(   c         C€  s:   | |  _  | |  _ | |  _ | |  _ | |  _ | |  _ d  S(   N(   R   R,   R-   R.   R/   R0   (   R   R   R,   R-   R.   R/   R0   (    (    sH   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genome_analyzer.pyR   §  s    					c         C€  sÒ   |  j  |  j } } |  j | j k  r[ | j |  j } | | k  rN | | 7} q[ | | 8} n  | j |  j k  r¯ | j t | j |  j ƒ } | | k  r¢ | | } q¯ | | } n  |  j d t | ƒ d t | ƒ S(   Nt   :R¬   (   R/   R0   R,   R-   Ri   R.   R_   (   R   R%   R,   R-   t   region_shiftt   region_size(    (    sH   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genome_analyzer.pyRg   ¯  s    N(   R   R   R   R   Rg   (    (    (    sH   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genome_analyzer.pyRU   ¦  s   (    (    (   t
   __future__R    t   loggingRE   t   collectionsR   R³   R   R   R   R   R   t   quast_libs.logR   t   quast_libs.qutilsR   t   LOGGER_DEFAULT_NAMER   R   R	   R   R&   Rž   Rô   RU   (    (    (    sH   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genome_analyzer.pyt   <module>   s   (		 	¯	µ