ó
ólÀ[c           @   s	  d  d l  Z  d  d l Z d  d l Z d  d l Z d  d l Z d  d l m Z d  d l m Z d  d l m	 Z	 d  d l
 Z
 d Z d d d g Z d d d	 d
 g Z d „  Z d „  Z d „  Z d „  Z e d „ Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d S(   iÿÿÿÿN(   t   bool_to_str(   t
   addsitedir(   t   dir_utilt   constructions
   paired-endt   singles   hq-mate-pairss   trusted-contigss   untrusted-contigsc	   
      C   sc  t  ƒ  }	 t | ƒ |	 d <t j | j ƒ |	 d <t j | j ƒ |	 d <t j | j ƒ |	 d <| r‡ t j | ƒ |	 d <t t ƒ |	 d <n t t	 ƒ |	 d <t | ƒ |	 d <| |	 d <| |	 d	 <t | j
 ƒ |	 d
 <t | pà | d k ƒ |	 d <t | où | j ƒ |	 d <| j |	 d <| j |	 d <t | j ƒ |	 d <| sIt t	 ƒ |	 d <n  d | j k rh| j |	 d <n  d | j k r£t | j ƒ |	 d <t j | j ƒ |	 d <n  | j d k rÅt t	 ƒ |	 d <n9 t t ƒ |	 d <| j d k rñd |	 d <n | j |	 d <| j d  k	 r-t t ƒ |	 d <| j |	 d <n  d | j k rL| j |	 d <n  t j |  |	 | ƒ d  S(   Nt   Kt   datasett   output_baset   tmp_dirt   additional_contigst   use_additional_contigst   main_iterationt   entry_pointt	   load_fromt   developer_modei7   t   gap_closer_enablet	   rr_enablet   max_threadst
   max_memoryt   save_gpt   correct_mismatchest   resolving_modet   pacbio_modet   pacbio_test_ont   pacbio_readst   offt   use_coverage_thresholdt   autog        t   coverage_thresholdt   lcer_enabledt   lcer_coverage_thresholdt   series_analysis(   t   dictt   strt   process_cfgt   process_spacesR   t
   output_dirR   R    t   Truet   FalseR   R   R   R   R   t   __dict__R   R   R   t
   cov_cutofft   lcer_cutofft   NoneR   t   substitute_params(
   t   filenamet   cfgt   logt   additional_contigs_fnameR   t   staget	   saves_dirt   last_onet   execution_homet
   subst_dict(    (    sb   /home/psgendb/BIRCHDEV/pkg/SPAdes-3.13.0/linux-x86_64/share/spades/spades_pipeline/spades_logic.pyt   prepare_config_spades   sJ    	

c         C   s_   t  j s d  St ƒ  } t t  j d  k	 ƒ | d <t t  j d k ƒ | d <t j |  | | ƒ d  S(   Nt
   ss_enabledt   rft	   antisense(   t   options_storaget   rnaR    R    t   strand_specificityR*   R"   R+   (   R,   R.   R4   (    (    sb   /home/psgendb/BIRCHDEV/pkg/SPAdes-3.13.0/linux-x86_64/share/spades/spades_pipeline/spades_logic.pyt   prepare_config_rnaspadesG   s    		c         C   s@   t  j d  k r d  St ƒ  } t  j | d <t j |  | | ƒ d  S(   Nt   read_cov_threshold(   R9   R=   R*   R    R"   R+   (   R,   R.   R4   (    (    sb   /home/psgendb/BIRCHDEV/pkg/SPAdes-3.13.0/linux-x86_64/share/spades/spades_pipeline/spades_logic.pyt   prepare_config_constructionP   s
    	c         C   sã   t  j j |  d | d ƒ } d } t  j j | ƒ r¼ t | ƒ t j j d ƒ r_ d d  l } n! t j j d ƒ r€ d d  l	 } n  | j
 t | d ƒ ƒ } t | d ƒ } | j d	 | ƒ n  | d k rß t j d
 | | ƒ n  | S(   Ns   K%ds   final.lib_datai    s   2.iÿÿÿÿs   3.t   rs   nomerge max read lengths   Max read length detected as %dsD   Failed to estimate maximum read length! File with estimated params: (   t   ost   patht   joint   isfileR   t   syst   versiont
   startswitht   pyyaml2t   pyyaml3t   loadt   opent   intt   infot   supportt   error(   R$   R   t   ext_python_modules_homeR.   t   est_params_filenamet   max_read_lengtht   pyyamlt   est_params_data(    (    sb   /home/psgendb/BIRCHDEV/pkg/SPAdes-3.13.0/linux-x86_64/share/spades/spades_pipeline/spades_logic.pyt   get_read_lengthX   s    
c         C   sõ   t  j ƒ  r‡ | d k rH | sA | j d t t  j ƒ | f ƒ n  t  j S| d k r‡ | s} | j d t t  j ƒ | f ƒ n  t  j Sn  | t |  ƒ k rñ g  |  D] } | | k  r  | ^ q  } | sí | j d t | ƒ | t |  ƒ f ƒ n  | S|  S(   Niú   se   Default k-mer sizes were set to %s because estimated read length (%d) is equal to or greater than 250i–   se   Default k-mer sizes were set to %s because estimated read length (%d) is equal to or greater than 150sM   K-mer sizes were set to %s because estimated read length (%d) is less than %d(   R9   t   auto_K_allowedRL   R!   t
   K_MERS_250t
   K_MERS_150t   max(   t
   cur_k_merst   RLR.   t   silentt   kt
   new_k_mers(    (    sb   /home/psgendb/BIRCHDEV/pkg/SPAdes-3.13.0/linux-x86_64/share/spades/spades_pipeline/spades_logic.pyt   update_k_mers_in_special_casesi   s$    		
%	 c         C   sŒ   t  j d  k s t  j d k rZ t  j } t  j t  _ t t  j |  d  d t ƒ} | t  _ n	 t  j } g  | D] } | |  k  rj | ^ qj } | S(   NR   R[   (   R9   t   original_k_mersR*   t   k_mersR^   t   K_MERS_SHORTR%   (   RZ   RY   R_   R\   (    (    sb   /home/psgendb/BIRCHDEV/pkg/SPAdes-3.13.0/linux-x86_64/share/spades/spades_pipeline/spades_logic.pyt   reveal_original_k_mers~   s    		%c         C   s#  d d d d d d g } x{ | D]s \ } } t  j | r | d k sP | d k ro |  j t j j | d ƒ ƒ n  |  j t j j | | d ƒ ƒ q q Wt  j d rî t  j d rÏ |  j t j j | d ƒ ƒ qî |  j t j j | d ƒ ƒ n  t  j rt  j r|  j t j j | d ƒ ƒ n  d  S(   Nt   single_cellt   mda_modet   metat	   meta_modet   truseq_modet   moleculo_modeR:   t   rna_modet   large_genomet   large_genome_modet   plasmidt   plasmid_modes   mda_mode.infos   .infot   carefuls   careful_mda_mode.infos   careful_mode.infos   rna_fast_mode.info(   Rc   Rd   (   Re   Rf   (   Rg   Rh   (   R:   Ri   (   Rj   Rk   (   Rl   Rm   (   R9   R'   t   appendR@   RA   RB   R:   t   fast(   t   commandt   configs_dirt   mode_config_mappingt   modet   config(    (    sb   /home/psgendb/BIRCHDEV/pkg/SPAdes-3.13.0/linux-x86_64/share/spades/spades_pipeline/spades_logic.pyt   add_configsŠ   s"    	'c      
   C   sh  t  j j | j d | ƒ } t } t  j j | d ƒ }	 t  j j | d ƒ }
 t j r-t  j j t  j j | d ƒ ƒ rÆ t j o¥ t j d | k p¥ t j j	 d | ƒ rÆ | j
 d d | d ƒ d  St j rt j j d	 ƒ d
 k rt j j	 d | ƒ rt j t j j d	 ƒ d } n  t j | ƒ n  | t k rht  j j |	 ƒ sÅt j d | |	 f ƒ qÅn] t  j j | ƒ rŠt j | ƒ n  t  j | ƒ i  t _ t j t  j j |  d ƒ |
 d t ƒ| j
 d d | d ƒ | r;t  j j | j d | d ƒ } t  j j | ƒ sAt j d | | f | ƒ d  } qAn d  } d | j k rt j t  j j |
 d ƒ i | j d 6| ƒ n  d | j k r½t j t  j j |
 d ƒ i | j d 6| ƒ n  t t  j j |
 d ƒ | ƒ t  t  j j |
 d ƒ | ƒ t  j j |
 d ƒ } t! | | | | | | |	 | | ƒ	 t  j j | d ƒ | g } t" | |
 ƒ t j# | | ƒ d  S(   Ns   K%dt   savest   configss   final_contigs.fastas   k%ds   k%d:s   
== Skipping assembler: s    (already processed)t   :iÿÿÿÿi   s8   Cannot restart from stage %s: saves were not found (%s)!t   debruijnt   preserve_timess   
== Running assembler: s   
s   simplified_contigs.fastas0   additional contigs for K=%d were not found (%s)!t   read_buffer_sizes   construction.infot   scaffolding_modes   pe_params.infos   rna_mode.infos   config.infos   spades-core($   R@   RA   RB   R$   t
   BASE_STAGER9   t   continue_modeRC   t   restart_fromRF   RL   t   findRM   t   continue_from_heret   isdirRN   t   existst   shutilt   rmtreet   makedirsR   t   _path_createdt	   copy_treeR&   t   warningR*   R'   R"   R+   R|   R}   R<   R>   R5   Rv   t   sys_call(   Rr   R3   R-   R.   R   t   prev_KR2   t   data_dirR0   R1   t   dst_configsR/   t   cfg_fnRq   (    (    sb   /home/psgendb/BIRCHDEV/pkg/SPAdes-3.13.0/linux-x86_64/share/spades/spades_pipeline/spades_logic.pyt   run_iteration£   sN    	**! 	%//"c         C   sâ   t  ƒ  } t | ƒ | d <t j | j ƒ | d <t j t j j | j d ƒ ƒ | d <t j | j	 ƒ | d <t
 t ƒ | d <t
 t ƒ | d <t | d <| | d	 <t
 | j ƒ | d
 <| j | d <| j | d <t j |  | | ƒ d  S(   NR   R   t   SCCR   R   R
   R   R   R   R   R   R   (   R    R!   R"   R#   R   R@   RA   RB   R$   R   R    R&   R~   R   R   R   R+   (   R,   R-   R.   R1   R   R4   (    (    sb   /home/psgendb/BIRCHDEV/pkg/SPAdes-3.13.0/linux-x86_64/share/spades/spades_pipeline/spades_logic.pyt"   prepare_config_scaffold_correctionÚ   s    	%

c         C   sÏ  t  j j | j d d | ƒ } t  j j | d ƒ } t  j j | d ƒ } t  j j | d ƒ }	 t  j j | ƒ r€ t j | ƒ n  t  j | ƒ t j	 t  j j |  d ƒ | d t
 ƒ| j d ƒ t  j j | d	 ƒ }
 t  j j |
 ƒ sý t j d
 |
 | ƒ n  d | j k rAt  j j | d ƒ } t j | i | j d 6| ƒ n  t j t  j j | d ƒ i |
 d 6| ƒ t |	 | | | | ƒ t  j j | d ƒ |	 g } t | | ƒ | j t | ƒ ƒ t j | | ƒ d  S(   NR‘   s   K%dRw   Rx   s   config.infoRz   R{   s!   
== Running scaffold correction 
s   scaffolds.fastas   Scaffodls were not found in R|   s   construction.infos   moleculo_mode.infot   scaffolds_files   spades-truseq-scfcorrection(   R@   RA   RB   R$   R„   R…   R†   R‡   R   R‰   R&   RL   RC   RM   RN   R'   R"   R+   R|   R’   Rv   R!   R‹   (   Rr   R3   R-   R.   t   latestR   R   R1   RŽ   t   cfg_file_nameR“   t   construction_cfg_file_nameRq   (    (    sb   /home/psgendb/BIRCHDEV/pkg/SPAdes-3.13.0/linux-x86_64/share/spades/spades_pipeline/spades_logic.pyt   run_scaffold_correctioní   s*    % )c      	   C   s“  t  | j t ƒ s$ | j g | _ n  t | j ƒ | _ g  } t j rXt j t j k rXg  } x t t j	 t j
 d ƒ D]e } t j j | j d | ƒ }	 t j j |	 ƒ rv t j j t j j |	 d ƒ ƒ rv | j | ƒ qv qv W| rXt | j | d | | ƒ }
 t | j |
 | d t ƒ} g  | D] } | |
 k  r#| ^ q#} t |
 ƒ } g  } xr t | ƒ D]d \ } } t | ƒ | k r¦| d | d k r¢| d g } n  Pn  | | | k r`| | } Pq`q`W| rþt | ƒ t | ƒ k rþ| t | ƒ d } n  | rU| j d t | ƒ ƒ x4 | D]) } t j t j j | j d | ƒ ƒ q"WqUqXn  t j j | j d	 ƒ } t j j | ƒ rœt j rœt j | ƒ n  t j d
 d ƒ | _ t  } | j d } t | j ƒ d k rt! |  | | | | d  t ƒ | j | ƒ nKt! |  | | | | d  t  ƒ | j | ƒ t j# d | k rJt } n| } t | j | | | ƒ }
 t | j |
 | ƒ | _ t | j ƒ d k  s¬| j d d |
 k rZ| j$ rPt | j ƒ d k  rå| j d | j d ƒ n+ t j% d | j d |
 | j d f | ƒ t! |  | | | | j d d  t ƒ | j | j d ƒ | j d } qPnö | j } | j& d ƒ d } x— | D] } | d 7} | t | j ƒ k p³| | d |
 k } t! |  | | | | | | ƒ | j | ƒ | } | rïPn  t j# d | k r}t } Pq}q}W| t | j ƒ k  rP| rPt j% d | j | |
 f | ƒ n  t j# r{t j# j' d ƒ r{t j( | ƒ n  t j j | j d | ƒ } | j) ryt j* ryt j rþt j j t j j | j d d ƒ ƒ rþt j d k rþ| j d d ƒ n2 t j rt j+ | ƒ n  t, |  | | | | d ƒ t j j t j j | j d ƒ d ƒ } t j# d k ryt j( | ƒ qyn  | j) rÂt j j | d ƒ } t j j | ƒ rÔt j- | | j. ƒ qÔn| sÔt j j t j j | d ƒ ƒ rOt j j t j j/ | j0 ƒ d ƒ } t j j | ƒ s*t j rOt j- t j j | d ƒ | ƒ qOn  t j1 rÈt j j t j j | d ƒ ƒ rÁt j j | j2 ƒ s™t j rÁt j- t j j | d ƒ | j2 ƒ qÁn  t j j t j j | d ƒ ƒ r*t j j | j3 ƒ st j r*t j- t j j | d ƒ | j3 ƒ q*n  x¤t j4 D] } | d } t j j | j | t j5 ƒ } t j j | | d ƒ } t j j | ƒ r4t j j | ƒ s«t j rÁt j- | | ƒ qÁq4q4WqÔt j j t j j | d ƒ ƒ r1	t j j | j0 ƒ s		t j r1	t j- t j j | d ƒ | j0 ƒ q1	n  t j j t j j | d ƒ ƒ r¸	t j j t j j/ | j0 ƒ d ƒ } t j j | ƒ s“	t j r¸	t j- t j j | d ƒ | ƒ q¸	n  | j$ r–
t j j t j j | d ƒ ƒ r*
t j j | j. ƒ s
t j r*
t j- t j j | d ƒ | j. ƒ q*
n  t j j t j j | d ƒ ƒ r–
t j j | j6 ƒ sk
t j r“
t j- t j j | d ƒ | j6 ƒ q“
q–
n  t j j t j j | d  ƒ ƒ rÿ
t j j | j7 ƒ s×
t j rÿ
t j- t j j | d  ƒ | j7 ƒ qÿ
n  t j j t j j | d! ƒ ƒ rht j j | j8 ƒ s@t j rht j- t j j | d! ƒ | j8 ƒ qhn  t j j t j j | d" ƒ ƒ rÔt j j | j9 ƒ s©t j rÑt j- t j j | d" ƒ | j9 ƒ qÑqÔn  | j: rEt j j t j j/ | j0 ƒ d# ƒ } t j j; | ƒ r#t j< | ƒ n  t j= t j j | d# ƒ | ƒ n  t j j | ƒ rgt j | ƒ n  t j j | j ƒ rt j | j ƒ n  | S($   Ni   s   K%ds   final_contigs.fastai    R[   iÿÿÿÿi   sx   Restart mode: removing previously processed directories for K=%s to avoid conflicts with K specified with --restart-froms
   .bin_readst   prefixt   spades_s   k%ds@   == Rerunning for the first value of K (%d) with Repeat Resolvingsƒ   Second value of iterative K (%d) exceeded estimated read length (%d). Rerunning for the first value of K (%d) with Repeat ResolvingsG   Iterations stopped. Value of K (%d) exceeded estimated read length (%d)R\   R‘   s   corrected_scaffolds.fastat   sccs)   
===== Skipping %s (already processed). 
s   scaffold correctioni   t   K21s   before_rr.fastas   transcripts.fastas   transcripts.pathst
   _filtered_s   final_paths.fastas   first_pe_contigs.fastas   scaffolds.fastas   scaffolds.pathss!   assembly_graph_with_scaffolds.gfas   assembly_graph.fastgs   final_contigs.pathsRw   (>   t
   isinstancet   iterative_Kt   listt   sortedR9   R€   t   restart_k_mersR_   t   ranget   MIN_Kt   MAX_KR@   RA   RB   R$   Rƒ   RC   Ro   RT   R^   R%   Rb   t	   enumeratet   lenRL   R!   R…   R†   R   RM   t   get_tmp_dirR   R&   R   R*   t
   stop_afterR   RŠ   t   popRF   t   finish_heret   correct_scaffoldst   run_completedR‚   R—   t   copyfilet   result_scaffoldst   dirnamet   result_contigsR:   t   result_transcriptst   result_transcripts_pathst   filtering_typest   transcripts_namet   result_scaffolds_pathst   result_graph_gfat   result_grapht   result_contigs_pathsR   t   lexistst   removet   symlink(   Rr   R3   R-   t   dataset_dataRO   R.   t   used_Kt   processed_KR\   t	   cur_K_dirRZ   t   needed_Kt
   original_Kt   k_to_deletet   idt   bin_reads_dirt   finished_on_stop_afterR   RŒ   t   rest_of_iterative_Kt   countR2   R”   t   correct_scaffolds_fpatht   result_before_rr_contigst   filtering_typeR˜   t   result_filtered_transcriptst   latest_filtered_transcriptst   result_first_pe_contigst
   saves_link(    (    sb   /home/psgendb/BIRCHDEV/pkg/SPAdes-3.13.0/linux-x86_64/share/spades/spades_pipeline/spades_logic.pyt
   run_spades  s   3%
	0	,			"#	
)	@	'	!$%	! (! (
! (!$%	! (! +! (! (! +	$"(   R@   RD   R…   RM   R"   R    t   siteR   t	   distutilsR   R9   R~   t    READS_TYPES_USED_IN_CONSTRUCTIONt   READS_TYPES_USED_IN_RNA_SEQR5   R<   R>   RT   R&   R^   Rb   Rv   R   R’   R—   RÏ   (    (    (    sb   /home/psgendb/BIRCHDEV/pkg/SPAdes-3.13.0/linux-x86_64/share/spades/spades_pipeline/spades_logic.pyt   <module>
   s,   	.							7		