
yZc           @  s  d  d l  m Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l m	 Z	 m
 Z
 m Z d  d l m Z m Z d  d l m Z d  d l m Z d  d l m Z m Z m Z m Z m Z m Z e e j  Z y  d  d l m Z d  d l Z Wn& d  d l m Z d  d l j  Z n Xd  d l! j" j# Z$ d  d l% Z% e% j& d	  e j' d
 e j(  Z) e j' d e j(  Z* d Z+ d Z, d Z- d e- d Z. e e j/ d  Z0 e e0 d e j1  Z2 d d g Z3 e j4 e j5 e2 e j/  Z6 d a8 d a9 d a: d Z; e< Z= i  Z> d a? d   Z@ e j' d  d  ZA d   ZB d   ZC d   ZD e e< d  ZE e d  ZF e e< d  ZG d    ZH d!   ZI d"   ZJ d d#  ZK d$   ZL d%   ZM d&   ZN d d'  ZO d S((   i(   t   with_statementN(   t   isdirt   isfilet   join(   t   qconfigt   qutils(   t   _get_fasta_file_handler(   t
   get_logger(   t   is_non_empty_filet
   is_python2t   slugifyt   correct_namet   get_dir_for_downloadt   md5(   t   urlopenix   s%   \S+\_(?P<taxons>\S+);(?P<seqname>\S+)s)   (?P<id>\S+\_[0-9.]+)[_ |](?P<seqname>\S+)sF   http://www.arb-silva.de/fileadmin/silva_databases/release_123/Exports/s%   SILVA_123_SSURef_Nr99_tax_silva.fastat   123s   silva.s   .dbt   external_toolst   blastt   makeblastdbt   blastnii    c         C  sG   t  r4 t j j t  |   } t j j |  r4 | Sn  t j |   } | S(   N(   t   blast_dirpatht   ost   pathR   t   existsR   t   get_path_to_program(   t   fnamet
   blast_path(    (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pyt   get_blast_fpath:   s    s   ([0-9]+)c         C  sE   g  t  j | |  d  D]* } | j   r5 t |  n	 | j   ^ q S(   Ni    (   t   ret   splitt   isdigitt   intt   lower(   t   st   _nsret   text(    (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pyt   natural_sort_keyD   s    c         C  s   d } d  } x | d k  r yo t |   } d a | j   } t | t  s] | j d  } n  | d  k su d | k r | j   t  n  PWq t k
 r | d 7} | d k r t d 7a t d k r t	 j
 d d d n  d  Sq Xq W| S(	   Ni    i   s   utf-8t   ERRORi   s   Cannot established internet connection to download reference genomes! Check internet connection or run MetaQUAST with option "--max-ref-number 0".t   exit_with_codei  (   t   NoneR   t   connection_errorst   readt
   isinstancet   strt   decodet   closet	   Exceptiont   loggert   error(   t   urlt   attemptst   responset   request(    (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pyt   try_send_requestI   s,    
	


c         C  s  d } d } |  j  d d  }  t | d |  |  } | s@ d  St j |  } | j d  j d k rk d  S| j d  j d	  } g  } x| D]	} d
 d g }	 x |	 D] }
 t | d | j |
 f |  } | s q n  t j |  } | j d  } | d  k rq n  | j d  j d  } | d  k r6q n  | j d  } | rit |  t |  k riq n  | } | r Pq q W| r t |  d k  r Pq q W| sd  St |  d k rt	 j
 d |  j  d d   d  St d   | D  } t } g  } xH | D]@ } t | d |  } | r| d d k r| j |  qqWg  | D] } | j d  d ^ qN} t | d  e } x[ t t | |  d t D]> \ } } | st } n d | j   } | j | j    qWWd  QXt j j |  sd  St |  st j |  d  S| S(   Ns.   https://eutils.ncbi.nlm.nih.gov/entrez/eutils/s/   &tool=quast&email=quast.support@bioinf.spbau.rut   _t   +s6   esearch.fcgi?db=assembly&term=%s+[Organism]&retmax=100t   Countt   0t   IdListt   Idt   assembly_nuccore_refseqt   assembly_nuccore_insdcs9   elink.fcgi?dbfrom=assembly&db=nuccore&id=%s&linkname="%s"t   LinkSett	   LinkSetDbt   Linki   i  sB   %s has too fragmented reference genome! It will not be downloaded.t    c         s  s!   |  ] } | j  d   j Vq d S(   R;   N(   t   findR#   (   t   .0t   link(    (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pys	   <genexpr>   s    s9   efetch.fcgi?db=sequences&id=%s&rettype=fasta&retmode=texti    t   >t   |it   wt   keys   
(   t   replaceR5   R'   t   ETt
   fromstringRB   R#   t   findallt   lenR/   t   infot   sortedt   Falset   appendR   t   opent   zipR$   t   Truet   rstript   writeR   R   R   R   t   remove(   t   organismt	   ref_fpatht   ncbi_urlt   quast_fieldsR3   t   xml_treet   ref_id_listt   best_ref_linkst   idt	   databasest   dbt   link_sett   link_dbt	   ref_linkst   ref_idst   is_first_piecet   fasta_filest   ref_idt   fastat   ft   fasta_namest
   fasta_filet   name(    (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pyt   download_refse   st    &(	c         C  sm   |  d k ri |  t  | | d  d k ri d t d t  t |  |  | d   | f Gt j j   n  d  S(   Ni    id   s   % 3.1f%% of %d bytes(   R   t   mint   floatt   syst   stdoutt   flush(   t   at   bt   c(    (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pyt   show_progress   s    */c         C  s
  g  t  D] } t |  s | ^ q } | r7 | r7 t St d d t  |  d | a t s\ t S| r t j j t  r t	 j
 t d t n  t Sxu t |  D]g \ } } t | d |  } |  j   | d k r t St |  } t j | t j |  j t j B q Wt S(   NR   t   BLASTt
   only_cleant   ignore_errorsR/   i    (   t   blast_filenamesR   RT   R   R   RP   R   R   R   t   shutilt   rmtreet	   enumeratet   download_blast_binaryRN   t   chmodt   statt   st_modet   S_IEXEC(   R/   Ry   t   cmdt   required_filest   it   return_codet
   blast_file(    (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pyt   download_all_blast_binaries   s$    %
'c         C  sH  t  j j t  s" t  j t  n  t  j j t |   } t  j j t |   } t  j j |  sDt  j j |  r | j	 d |  t
 j | t  qDt j   } t  j j t |   } t  j j |  sD| j	 d |   y | j | | d t  Wn# t k
 r| j d |   d SXt
 j | d |  | j	 d |   qDn  d S(   Ns   Copying blast files from s   Downloading %s...s	   .downloads   Failed downloading %s! The search for reference genomes cannot be performed. Please install it and ensure it is in your PATH, then restart your command.i   s   %s successfully downloaded!i    (   R   R   R   R   t   makedirsR   t   blast_external_tools_dirpathR   R   RN   R|   t   copyt   urllibt	   URLopenert   blast_dirpath_urlt   retrieveRw   R.   R0   t   move(   t   blast_filenameR/   t   blast_libs_fpatht   blast_external_fpatht   blast_downloadt   blast_webpath(    (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pyR      s,    c         C  s  t  d d t d g |  d | a t s, t S| ri t j j t  re |  j d t  t j	 t  n  t
 St t t  a t j j t d  r t j j t d  t k r t
 St j j t d  } t j j t t d  } t j j t t  } |  j   t j j |  r|  j d  n |  j d	  t j j t  sLt j t  n  t j   } t t d } y | j | | d
 t  Wn) t k
 r|  j d | t f  t SXt j | d
 |  |  j d |  t j j |  s|  j d  | d } d | } t j t j |  d t | d  d t | d  d |  | d }	 t |  I }
 t |	 d  1 } x' |
 D] } | j | j  d d   qqWWd  QXWd  QXt j! |  t j |	 |  n  |  j d  t" d  d | t f } t j t j |  d t | d  d t | d  d |  t j j# t d  sTt j j t d  t k  rq|  j d t d |  t St$ j% st j! |  t j! |  n  t
 S(   Nt   silvat   Silvas   .nsqRy   s	   Removing s   blastdb.logs   .gzsB   SILVA 16S ribosomal RNA gene database has already been downloaded.s4   Downloading SILVA 16S ribosomal RNA gene database...s	   .downloads   Failed downloading SILVA 16S rRNA gene database (%s)! The search for reference genomes cannot be performed. Try to download it manually in %s and restart your command.s,   Processing downloaded file. Logging to %s...s'   Unpacking and replacing " " with "_"...s	   .unpackeds   gunzip -c %sRr   RG   t   stderrRt   R/   s   .substitutedRA   R6   s   Making BLAST database...R   s    -in %s -dbtype nucl -out %ss    Failed to make BLAST database ("s3   "). See details in log. Try to make it manually: %s(&   R   t   silva_downloaded_fnamet   blastdb_dirpathRP   R   R   R   RN   R|   R}   RT   R   t   db_fpathR   t   getsizet   db_nsq_fsizet   silva_fnameR   R   t   FancyURLopenert   silva_db_urlR   Rw   R.   R0   R   R   t   call_subprocesst   shlexR   RR   RV   RI   RW   R   R   R   t   debug(   R/   Ry   t	   log_fpatht   db_gz_fpatht   silva_fpatht   silva_downloadt   silva_remote_fpatht   unpacked_fpathR   t   substituted_fpatht   in_filet   out_filet   line(    (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pyt   download_blastdb   sl    "2


:
):3	c           s  t  j d |    } d d d d d g } t   f d   | D  r t  j d |  t j j | t j j    d
  }	 t    = }
 t |	 d  % } x |
 D] } | j	 |  q WWd  QXWd  QX|	 } n  t
 | |  } t
 | |  } t d  d | t | f } t j t j |  d t | d  d t | d  d t  t  j d d | | f  t | d  $ } | j d   t    f  Wd  QXd  S(   Ns     s   processing s   .gzs   .gzips   .bz2s   .bzip2s   .zipc         3  s   |  ] }   j  |  Vq d  S(   N(   t   endswith(   RC   t   ext(   t   contigs_fpath(    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pys	   <genexpr>.  s    s
   unpacking s	   .unpackedRG   R   s+    -query %s -db %s -outfmt 7 -num_threads %sRr   R   Rt   R/   s'   BLAST results for %s are saved to %s...s   Assembly: %s md5 checksum: %s
s     processing s     unpacking (   R/   RN   t   anyR   R   R   t   basenameR   RR   RV   t   get_blast_output_fpathR   R   R   R   R   R   t
   writelinesR   (   R   t   labelt   corrected_dirpatht	   err_fpatht   blast_res_fpatht   blast_check_fpatht   blast_threadst   blast_query_fpatht   compress_extR   t   f_int   f_outt   lt	   res_fpatht   check_fpathR   t
   check_file(    (   R   sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pyt   parallel_blast*  s&    %	:c         C  s   |  d t  |  S(   NR6   (   R
   (   t   blast_output_fpathR   (    (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pyR   @  s    c      
   C  s  g  } g  } g  | D] } | ^ q }	 xt  |  D]\ }
 } t |  | |
  } t | | |
  } d  } t } t j j |  r2 t |  r2 t |  A} x7| D]/} d | k r t	 } n  | r@| r@| j
   d | j
   d } } | | j   k r| | | k r| | } t j d | |
  |	 j |  qq | r | r | j
 d  } t |  d k r| d d k r| | d j   j
 d  7} q| d d	 k r| | d j   j
 d  7} qqq q WWd  QXq2 q2 W|	 t |  t |  f S(
   Ns   ---i   is,     Using existing BLAST alignments for %s... RA   i    s   Downloaded:t   ,s   Not_founded:(   R~   R   R'   RT   R   R   R   R   RR   RP   R   t   keysR/   t	   main_infoRW   RM   RU   t   set(   R   R   t	   files_md5t   assemblies_fpathst
   assembliest   labelst   downloaded_organismst   not_founded_organismst   assemblyt   blast_assembliesR   t   assembly_fpathR   R   t   existing_assemblyt   assembly_infoR   R   R   (    (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pyt   check_blastD  s6    	!"
 7c      
   C  s  t  j   t j j | d  } t j j | d  } t j j | d  } t d   |  D  } t d   |  D  }	 t | | | |	 |  |  \ }
 } } g  } | r t |  } d  } n[ t	 |
 | | | | |  \ } } | rt
 | d t } g  | D] \ } } | ^ q } n  g  t j |  D]@ \ } } } | D]* } t j |  r8t j j | |  ^ q8q%} t | |  | | | | | | | 	 } | st  j d  n  t j rt j j |  rt j |  n  | j   | S(   Ns	   blast.errs   blast.checks	   blast.resc         s  s'   |  ] } | j  t | j   f Vq d  S(   N(   t   fpathR   (   RC   R   (    (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pys	   <genexpr>g  s    c         s  s   |  ] } | j  | f Vq d  S(   N(   R   (   RC   R   (    (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pys	   <genexpr>h  s    t   reverses    Reference genomes are not found.(   R/   t   print_timestampR   R   R   t   dictR   t   parse_refs_listR'   t   process_blastRO   RT   t   walkR   t   check_is_fasta_filet   process_refsR   R   R   R   RW   t   sort(   R   R   t   downloaded_dirpathR   t   ref_txt_fpathR   R   R   R   R   R   R   R   t	   organismst   organisms_assembliest   scores_organismst   scoreRX   R   t   dirst   filest   filet   downloaded_ref_fpathst
   ref_fpaths(    (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pyt   dob  s4    
$	"7
c         C  s7  d  } d  } t j |   r t j |   } | r | j d  } | j d d  } | j   d } | r | d k r d | k r d | k r | j d	  } | d | 7} q q nK t j |   r t j |   } | r | j d	  } q n |  j d
 d  } | r-t j d d |  } | j d  d } n  | | f S(   Nt   taxonst   ;s   	i    t   Bacteriat   Archaeat   Chloroplastt   mitochondriat   seqnameRA   R6   s   [\[,/\]](   R   R   (	   R'   t   silva_patternt   matcht   groupRI   R   t   ncbi_patternR   t   sub(   t   organism_idR   R   t   mt   domain(    (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pyt   parse_organism_id  s*    	!c           s"  t    s d St j r t j a t t  r g  t j t  D] } | j d  r; | ^ q; } | r t	 t j | d j
 d d   a q n/ t t  r t j d  r t t d   a n  t j j t d  s t j d d d q n t   s d St j j	 | d   t |   d k rt j d  t t j t |    } t d	 t j |   t   ryd
 d l m }	 m  n d
 d l m }	 m  |	 d |         f d   t |   D  n  t j d  g  }
 i  } x(| D] } g  } g  } t  |  } t j j |  rfd } t |  2} x(| D] } | d k rA| j d  rAt | j    d k rA| j   } | d	 } t  | d  } t! | d  } t  | d  } | t j" k rY| t j# k rY| t j$ k rYt% |  \ } } | sq9n  | j d  } t |  d	 k r>d | k r>| d d | d	 } | | k r| j& | | f  | r| t' t( |  <n  | j& |  | d	 7} q;g  | D] } | | d	 k r| ^ q} | r;| | d d k r;| j) | d d | d d	 f  | j& | | f  | r+| t' t( |  <n  | d	 7} q;q>qYq9| j d  r9d } q9q9WWd  QXn  t* | d t+ } | t j,  } xa | D]Y } | s| j-   rg  | j-   D] } | d	 | k rd	 ^ qr|
 j& |  qqWg  | D] } | d	 ^ q| | <qW|
 sd S|
 | f S(   Ns   .nsqi    t    s  You should specify path to BLAST database obtained by running makeblastdb command: either path to directory containing <dbname>.nsq file or path to <dbname>.nsq file itself. Also you can rerun MetaQUAST without --blast-db option. MetaQUAST uses SILVA 16S RNA database by default.R&   i   s	   blast.ress   Running BlastN..i   i(   t   Parallelt   delayedt   n_jobsc      	   3  s?   |  ]5 \ } }  t   | j | j        Vq d  S(   N(   R   R   R   (   RC   R   R   (   R   R   R   R   R   R   (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pys	   <genexpr>  s   t   #i
   i   i   R6   t
   unculturedR   (   NN(   NN(   NN(.   R   R'   R   t   custom_blast_db_fpathR   R   R   t   listdirR   R   RI   R   RM   R   R/   R0   R   R   Ro   t   max_threadst   maxR	   t   joblibR   R   t   joblib3R~   R   R   RR   t
   startswithR   Rp   R   t   identity_thresholdt
   min_lengtht   min_bitscoreR   RQ   t   taxons_for_kronaR   RW   RO   RT   t   max_referencest   values(   R   R   R   R   R   R   Rj   t   db_aux_filesR   R   R   R   R   t
   all_scoresR   R   t   refs_for_queryt   res_fileR   R   t   idyt   lengthR   R   R   t   speciet   xt   tuple_scorest   list(    (   R   R   R   R   R   R   sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pyR     s    			1(		'4
-)#C%c         C  sm   g  } t  |   U } xK | j   j d  D]4 } | r+ | j   j d d  } | j |  q+ q+ WWd  QX| S(   Ns   
RA   R6   (   RR   R)   R   t   stripRI   RQ   (   R   R   Rj   R   RX   (    (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pyR     s    c	         C  s  g  }	 g  }
 d } t  |   } | d k rZ t j rV t j j |  rV t j |  n  |	 Sd } x# |  D] } t t  |  |  } qg Wx# |
 D] } t t  |  |  } q Wt j	   t j
 d t |  d  t  |  d k r t j
 d  n  x|  D]} t j j | t |  d  } | t  |  d } d  } t } t j j |  rp| | k rpt | |  } n! t j j |  rt } | } n  | r?| d 8} | d 7} | rt j
 d | j d	 d  | | | f  | |	 k r/|	 j |  q/n6 t j
 d
 | j d	 d  | | | f  |	 j |  |
 j |  q | d 8} t j
 d | j d	 d  | f  | j |  q Wxt | |  D]v\ } } t | |  } t j j |  rt |  & } | j   } | | j d   } Wd  QXn d | j t | j  f } t | d   } | j |  | j d  | sXg  |
 D]5 } | ^ qFn& g  |
 D] } | | | k r_| ^ q_} | sg  | D]5 } | ^ qn& g  | D] } | | | k r| ^ q} | j d d j |   | j d d j |   Wd  QXqW|	 S(   Ni    s7   Trying to download found references from NCBI. Totally s    organisms to try.sA   MetaQUAST will attempt to use previously downloaded references...s   .fastaRA   i   s<     %s%s | was downloaded previously (total %d, %d more to go)R7   s:     %s%s | successfully downloaded (total %d, %d more to go)s'     %s%s | not found in the NCBI databases   
s   Assembly: %s md5 checksum: %s
RG   s   
---
s   Downloaded: %s
R   s   Not_founded: %s
(   RM   R   R   R   R   R   RW   R  R/   R   R   R+   R   R   R'   RP   Rn   RT   RI   RQ   t   addRS   R   RR   R)   RB   R   R   R   (   R   R   R   R   R   R   R   R   R   R   R   t   total_downloadedt   total_scored_leftt   max_organism_name_lenRX   RY   t   spacest   new_ref_fpatht   was_downloadedR   R   R   R   R#   t   cur_downloaded_organismst   cur_not_founded_organisms(    (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pyR     sx    
	

	 	 
#))$(P   t
   __future__R    R   R   R|   R   Rq   R   t   os.pathR   R   R   t
   quast_libsR   R   t   quast_libs.fastaparserR   t   quast_libs.logR   t   quast_libs.qutilsR   R	   R
   R   R   R   t   LOGGER_META_NAMER/   t   urllib2R   R   t   urllib.requestR4   t   xml.etree.ElementTreet   etreet   ElementTreeRJ   t   sockett   setdefaulttimeoutt   compilet   IR   R   R   R   t   silva_idR   t
   QUAST_HOMEt   external_tools_dirpatht   platform_nameR   R{   t   GIT_ROOT_URLt   relpathR   R'   R   R   R   R   RP   t   is_quast_first_runR  R(   R   R$   R5   Rn   Rw   R   R   R   R   R   R   R   R   R   R   R   (    (    (    sP   /home/birch/BIRCH/local/install/quast-4.6.3/quast_libs/search_references_meta.pyt   <module>   sl   .	
		F	D			#		Z	