ó
£žbc           @   sÀ  d  d l  m Z d  d l  m Z d  d l Z d  d l Z d  d l m Z d  d l m Z m	 Z	 m
 Z
 m Z m Z d  d l m Z m Z m Z d  d l m Z m Z d  d l m Z d  d	 l m Z m Z m Z m Z d  d
 l m Z d Z d Z d Z d Z  d Z! d Z" d Z# e	 e e j$ e# e j% ƒ ƒ Z& e e& d ƒ a' e e& d ƒ a( d „  Z) d „  Z* d „  Z+ d „  Z, d „  Z- d „  Z. d „  Z/ d „  Z0 d d „ Z1 e2 d „ Z3 d „  Z4 d „  Z5 d S(   iÿÿÿÿ(   t   with_statement(   t   divisionN(   t   defaultdict(   t   joint   abspatht   existst   basenamet   isdir(   t   qconfigt	   reportingt   qutils(   t   compile_minimapt   minimap_fpath(   t
   read_fasta(   t   get_free_memoryt   md5t   download_external_toolt   get_dir_for_download(   t
   save_kmersgü©ñÒMbP?iè  i'  iÈ   i † t   kmct	   kmc_toolsc
      	   C   s  t  j | ƒ }
 t |  |
 d ƒ } t |  |
 d ƒ } t | d ƒ 5 } | j d t | ƒ ƒ | j d t | ƒ ƒ Wd  QXt | d ƒ  } | j d | ƒ | sª | r| j d | ƒ | j d | ƒ | j d	 | ƒ | j d
 | ƒ | j d | ƒ | j d |	 ƒ n  Wd  QXd  S(   Ns   .sfs   .statt   ws   Assembly md5 checksum: %s
s   Reference md5 checksum: %s
s   Completeness: %s
s   K-mer-based correct length: %d
s!   K-mer-based misjoined length: %d
s!   K-mer-based undefined length: %d
s   Total length: %d
s   # translocations: %d
s   # 100 kbp relocations: %d
(   R
   t   label_from_fpath_for_fnameR   t   opent   writeR   (   t
   output_dirt   contigs_fpatht	   ref_fpatht   completenesst   corr_lent   mis_lent	   undef_lent	   total_lent   translocationst   relocationst   labelt   kmc_check_fpatht   kmc_stats_fpatht   check_ft   stats_f(    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/unique_kmers.pyt   create_kmc_stats_file$   s    c         C   sÇ   t  j | ƒ } t |  | d ƒ } t | ƒ s2 t St | ƒ j ƒ  j d ƒ } t | ƒ d k  rc t S| d j	 ƒ  j ƒ  d t
 t | ƒ ƒ k r“ t S| d j	 ƒ  j ƒ  d t
 t | ƒ ƒ k rÃ t St S(   Ns   .sfs   
i   i    iÿÿÿÿi   (   R
   R   R   R   t   FalseR   t   readt   splitt   lent   stript   strR   t   True(   R   R   t   contigs_fpathsR   R#   R$   t   successful_check_content(    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/unique_kmers.pyt   check_kmc_successful_check7   s    ,,c         C   sm   t  |  t | ƒ d ƒ } t d | | g | | ƒ d } t | ƒ ri t t | ƒ j ƒ  j ƒ  d ƒ } n  | S(   Ns
   .histo.txtt	   histogrami    iÿÿÿÿ(   R   R   t   run_kmcR   t   intR   R*   R+   (   t   tmp_dirpatht   kmc_db_fpatht	   log_fpatht	   err_fpatht   histo_fpatht	   kmers_cnt(    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/unique_kmers.pyt   get_kmers_cntF   s    %c      
   C   st   t  |  t | ƒ d ƒ } t d t ƒ  ƒ } t d t | ƒ d d t | ƒ d d d | | |  g	 | | d	 t ƒ| S(
   Ns   .kmci   s   -ms   -n128s   -ks   -fms   -cx1s   -ci1t   use_kmc_tools(   R   R   t   maxR   R4   R.   R)   (   R6   t   fpatht   kmer_lenR8   R9   t   kmc_out_fpatht   max_mem(    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/unique_kmers.pyt   count_kmersO   s
    5c      	   C   s8  t  |  d ƒ } t ƒ  d d d t t j d ƒ d d t | ƒ | | g	 } t j | d t | d	 ƒ d
 t | d ƒ d d ƒt t	 ƒ } t t	 ƒ } t | ƒ  }	 xƒ |	 D]{ }
 |
 j
 d ƒ } t | ƒ d k  rÖ q© n  | d | d | d } } } | | j t | ƒ ƒ | | j t | ƒ ƒ q© WWd  QX| | f S(   Ns   kmers.coordss   -cxt   srs   -si   s	   --frag=nos   -tt   stdoutR   t   stderrt   at   indents     s   	i
   i    i   i   (   R   R   R.   R   t   unique_kmer_lenR
   t   call_subprocessR   R   t   listR+   R,   t   appendR5   (   R   R   t   kmers_fpatht   log_err_fpatht   max_threadst	   out_fpatht   cmdlinet   kmers_pos_by_chromt   kmers_by_chromt   ft   linet   fst   contigt   chromt   pos(    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/unique_kmers.pyt   align_kmersW   s    #1 !c         C   s,  t  |  d ƒ } t | d ƒ j ƒ  t ƒ  } d } xît | ƒ D]à\ }	 }
 t  |  d |	 d ƒ } t |
 ƒ | d } t | d ƒ U } xK t | ƒ D]= } | j d t | ƒ d ƒ | j |
 | | | !d ƒ q” WWd  QXt  |  d |	 d	 ƒ } t	 | | | | | d
 d ƒt
 ƒ  } x' t | ƒ D] \ } } | j | ƒ q$Wt | d ƒ © } d } x™ t | ƒ D]‹ \ } }
 | | k rf| s›t | ƒ | t k rñt | ƒ } | j d t | | ƒ d ƒ | j |
 d ƒ |	 | f | | | <qñqfqfWWd  QX| | 7} t j r> t j | ƒ q> q> W| | f S(   Ns   kmc.downsampled.txtR   i    t   kmers_s   .fastai   t   >s   
s   .filtered.fastat	   min_kmersRG   (   R   R   t   closet   dictR   R,   t   rangeR   R.   t   filter_contigst   sett   addR5   t   KMERS_INTERVALR   t   space_efficientt   ost   remove(   R6   R   R7   R@   R8   R9   t   downsampled_txt_fpatht	   ref_kmerst   prev_kmer_idxRX   t   seqt   kmc_fasta_fpatht   num_kmers_in_seqt   out_ft   it   filtered_fpatht   filtered_kmerst   idxt   _t   kmer_i(    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/unique_kmers.pyt   downsample_kmersi   s:    	&	$
	c         C   s   t  |  ƒ j d d ƒ S(   Ns   .kmct    (   R   t   replace(   R?   (    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/unique_kmers.pyt   get_clear_name‰   s    c   	      C   só   t  |  d j  g  | D] } t | ƒ d  ^ q ƒ d ƒ } t | ƒ d k ro t d g | d | g | | ƒ n€ | d } xm t d t | ƒ ƒ D]V } t  |  t | ƒ d t | ƒ d ƒ } t d | | | d | g | | ƒ | } q W| } | S(	   NRs   i   s   .kmci   t   simplet	   intersecti    i   (   R   Rx   R,   R4   R`   R.   (	   R6   t   kmc_out_fpathsR8   R9   RA   t   intersect_out_fpatht   prev_kmc_out_fpathRo   t   tmp_out_fpath(    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/unique_kmers.pyt   intersect_kmers   s    9$
'#
i   c         C   sL   |  j  d ƒ r d |  }  n  t d | |  d t | ƒ d | g | | ƒ d  S(   Ns   .txtt   @t   filters   -cis   -fa(   t   endswithR4   R.   (   t   input_fpatht   output_fpatht   db_fpathR8   R9   R]   (    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/unique_kmers.pyRa   ›   s    c         C   s[   | r t  n t } t j | d t t j ƒ d g |  d t | d ƒ d t | d ƒ ƒd  S(   Ns   -ts   -hpRE   RG   RF   (   t   kmc_tools_fpatht   kmc_bin_fpathR
   RJ   R.   R   RO   R   (   t   paramsR8   R9   R=   t
   tool_fpath(    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/unique_kmers.pyR4   ¡   s    &c         C   su   t  t  |  | ƒ t  | | ƒ ƒ } | rq | | k  rq | | | k  rq t  t  |  | ƒ t  | | | ƒ ƒ } n  | S(   N(   t   abs(   RY   t   prev_post   ref_post   prev_ref_post   cyclic_ref_lenst   dist(    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/unique_kmers.pyt   _get_dist_inconstistency§   s    $"+c   7      C   sÙ
  | j  ƒ  t j } | j d t | ƒ d ƒ g  } xv| D]n} t j | ƒ } t |  | | | ƒ r; t |  | d ƒ } t	 | ƒ j
 ƒ  j d ƒ }	 t |	 ƒ d k  r« q; n  | j d | d ƒ t j | ƒ }
 |
 j t j j d t |	 d	 j ƒ  j d
 ƒ d ƒ ƒ t |	 ƒ d k r™t |	 d j ƒ  j d
 ƒ d ƒ } t |	 d j ƒ  j d
 ƒ d ƒ } t |	 d j ƒ  j d
 ƒ d ƒ } t |	 d j ƒ  j d
 ƒ d ƒ } t |	 d j ƒ  j d
 ƒ d ƒ } t |	 d j ƒ  j d
 ƒ d ƒ } |
 j t j j d | d | ƒ |
 j t j j d | d | ƒ |
 j t j j d | d | ƒ |
 j t j j | ƒ |
 j t j j | ƒ |
 j t j j | | ƒ n  | j | ƒ q; q; Wg  | D] } | | k r´| ^ q´} t | ƒ d	 k rÿt |  ƒ | j d ƒ d  St j d k r| j d ƒ d  St! t" d d d g | ƒ } t# d | d d t$ d t$ ƒa% t# d | d d t$ d t$ ƒa& t' t% ƒ st' t& ƒ st( | ƒ r®| j d ƒ d  S| j d ƒ t) |  ƒ s×t* j+ |  ƒ n  t |  d ƒ } t |  d ƒ } t	 | d ƒ j, ƒ  t	 | d ƒ j, ƒ  t |  d  ƒ } t) | ƒ sFt* j+ | ƒ n  t- | | | | | ƒ } t. | | | | ƒ } | sš| j d! | d" | d# ƒ d  S| j d$ ƒ g  } xÓ t/ | ƒ D]Å \ } } t j0 | ƒ } | j d% t j1 | ƒ | ƒ t j | ƒ }
 t- | | | | | ƒ } t2 | | | g | | ƒ } t. | | | | ƒ } | d | } |
 j t j j d | ƒ | j | ƒ qºW| j d& ƒ g  t3 | ƒ D] \ } }  | ^ q}! | j d' ƒ t4 | | | | | | ƒ \ }" }# x¿t/ t5 | | ƒ ƒ D]¨\ } \ } }$ t j0 | ƒ } | j d% t j1 | ƒ | ƒ t j | ƒ }
 d  } d  } d  } d/ \ } } d	 } t6 ƒ  }% x: t3 | ƒ D], \ } }& | t |& ƒ 7} t |& ƒ |% | <qWt |! ƒ t7 k rÓ| j d( ƒ nšd	 } d	 } t8 | | |# | t j9 ƒ \ }' }( t j: ot j; }) |) r.|
 j< t j j= ƒ n d  }* d	 } d	 } t	 t | t j | ƒ d) ƒ d ƒ L}+ xB|' j> ƒ  D]4}, g  }- d0 \ }. }/ }0 }1 xÛ t? t5 |( |, |' |, ƒ d* d+ „  ƒD]³ \ }2 }3 |" |3 \ }4 }5 |. rY|0 rY|0 |4 k r.t@ t@ |2 |. ƒ t@ |5 |/ ƒ d ƒ d, k r.|2 |5 |4 f }1 qY|1 rY|- j |1 ƒ d1 \ }2 }5 }4 }1 qYn  |2 |5 |4 }. }/ }0 qºW|1 r‡|- j |1 ƒ n  d2 \ }. }/ }0 tA }6 xÐ |- D]È }1 |1 \ }2 }5 }4 |. rW	|0 rW	|4 |0 k r	| d 7} |+ jB d- |, |0 |. |4 |2 f ƒ t$ }6 qW	tC |2 |. |5 |/ |* ƒ tD k rW	| d 7} |+ jB d. |, |. |/ |2 |5 f ƒ t$ }6 qW	n  |2 |5 |4 }. }/ }0 q£W|6 r†	| |% |, 7} qut |- ƒ d	 k ru| |% |, 7} ququWWd  QX| | | } |
 j t j j d | d | ƒ |
 j t j j d | d | ƒ |
 j t j j d | d | ƒ |
 j t j j | ƒ |
 j t j j | ƒ |
 j t j j | | ƒ tE |  | | |
 j< t j j ƒ | | | | | | ƒ
 qùWt |  ƒ t jF sÈ
tG jH | ƒ n  | j d ƒ d  S(3   Ns!   Running analysis based on unique s   -mers...s   .stats   
i   s     Using existing results for s   ... s   %.2fi    s   : iÿÿÿÿi   i   i   i   i   i   g      Y@s   Done.t   linux_32s4     Sorry, can't run KMC on this platform, skipping...t   KMCR   R   t   platform_specifict   is_executables#     Sorry, can't run KMC, skipping...s     Running KMC on reference...s   kmc.logs   kmc.errR   t   tmps   KMC failed, check s    and s   . Skipping...s&     Analyzing assemblies completeness...s       s%     Analyzing assemblies correctness...s       Downsampling k-mers...sG   Reference is too fragmented. Scaffolding accuracy will not be assessed.s   .misjoins.txtt   keyc         S   s   |  d S(   Ni    (    (   t   x(    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/unique_kmers.pyt   <lambda>!  s    gš™™™™™©?s#   Translocation in %s: %s %d | %s %d
s$   Relocation in %s: %d (%d) | %d (%d)
(   NN(   NNNN(   NNNN(   NNN(I   t   print_timestampR   RI   t	   main_infoR.   R
   R   R2   R   R   R*   R+   R,   t   infoR	   t   gett	   add_fieldt   Fieldst   KMER_COMPLETENESSt   floatR-   R5   t   KMER_CORR_LENGTHt   KMER_MIS_LENGTHt   KMER_UNDEF_LENGTHt   KMER_TRANSLOCATIONSt   KMER_RELOCATIONSt   KMER_MISASSEMBLIESRL   R   t   platform_namet   warningt   NoneR   t   kmc_dirnameR   R/   R‡   R†   R   R   R   Rf   t   makedirsR^   RC   R<   t	   enumeratet   label_from_fpatht   index_to_strR   R   Ru   t   zipR_   t   MAX_REF_CONTIGS_NUMRZ   RO   t
   prokaryotet   check_for_fragmented_reft	   get_fieldt   REFLENt   keyst   sortedRŠ   R)   R   R   t   EXT_RELOCATION_SIZER(   t   debugt   shutilt   rmtree(7   R   R   R0   t   loggerR@   t   checked_assembliesR   R#   R%   t   stats_contentt   reportR   R   R   R    R!   R"   R?   t   kmc_dirpathR8   R9   R6   t   ref_kmc_out_fpatht   unique_kmersR{   t   idt   assembly_labelRA   R|   t   matched_kmersR   t   nameRs   t   ref_contigsRi   t   downsampled_kmers_fpathR7   t   contig_lensRk   t   kmers_by_contigt   kmers_pos_by_contigt	   is_cyclicRŽ   t   outRW   t   contig_markersR‹   R   t
   prev_chromt   markerRY   t   kmert	   ref_chromRŒ   t   is_misassembled(    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/unique_kmers.pyt   do®   s"   
	7######"""%
'%!(	!(3:
		
	"""
	(6   t
   __future__R    R   Rf   R¹   t   collectionsR   t   os.pathR   R   R   R   R   t
   quast_libsR   R	   R
   t   quast_libs.ca_utils.miscR   R   t   quast_libs.fastaparserR   t   quast_libs.qutilsR   R   R   R   t   quast_libs.reportingR   t   KMER_FRACTIONRd   t   MAX_CONTIGS_NUMR°   t   MIN_CONTIGS_LENR·   Rª   t   LIBS_LOCATIONR§   R¿   R‡   R†   R(   R2   R<   RC   RZ   Ru   Rx   R   Ra   R/   R4   R   RÓ   (    (    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/unique_kmers.pyt   <module>   s@   ("							 			