a
    hQ                    @   s  d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZmZ d dlZd dl	Z	d dl
mZ dd Zdd	 Zd
d ZG dd dejZdd ZG dd dejZG dd dejZG dd dejZdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) ZdQd+d,Z d-d. Z!d/d0 Z"d1d2 Z#d3d4 Z$d5d6 Z%d7d8 Z&d9d: Z'd;d< Z(d=d> Z)dRd?d@Z*dSdAdBZ+dTdCdDZ,dEdF Z-dGdH Z.dIdJ Z/dKdL Z0dMdN Z1dOdP Z2dS )U    N)gettext)basename)abspath
expanduser)empty_configc                  C   sx  d } t tjd }tj}tjdd}|jdddd |jdd	dd |jd
ddd |jdddd |jdddd |jdddd |jdddd |jdddd |jdddd ||\}}|dks|jrd	} nL|dks|jrd} n8|dks|j	rd} n$|j
rd} n|dks|jr"d} |dksD|j	r<|jsD|jrHd} |dksZ|jr^d} |dksp|jrtd} | S ) Nr   Fadd_help	--isolateisolate
store_truedestaction--rnarna	--plasmidplasmid--metameta--biobio--metaviral	metaviral--metaplasmidmetaplasmid
--rnaviralrnaviral--coronacoronazrnaspades.pyzrnaviralspades.pyzplasmidspades.pybgczmetaspades.pyzmetaplasmidspades.pyzmetaviralspades.pyzcoronaspades.py)r   options_storagefirst_command_lineargparseArgumentParseradd_argumentparse_known_argsr   r   r   r   r   r   r   r   )modeZscript_basenameoptionsZmode_parsernargsunknown_args r*   m/home/psgendb/BIRCHDEV/install/SPAdes-4.2.0-Linux/linux-x86_64/share/spades/spades_pipeline/options_parser.pyget_mode   s>    "r,   c                 C   s   t  }|dkrd| _n|dkr&d| _n|dkr<d| _d| _nx|dkrLd| _nh|dkrhd| _d| _d| _nL|dkrd| _d| _d| _n0|dkrd| _d| _n|d	krd| _d| _d| _d S )
Nr   Tr   r   r   r   r   r   r   )	r,   r   r   r   r   r   r   r   r   )argsr&   r*   r*   r+   add_mode_to_args9   s2    r.   c                  C   s(   t  } dtj }| d ur$|d|  7 }|S )NzSPAdes genome assembler v%sz [%sSPAdes mode])r,   r    spades_version)r&   verr*   r*   r+   versionV   s
    
r1   c                       s0   e Zd Zd fdd	Zdd Zdd	d
Z  ZS )SpadesHelpFormatter      d   c                    s   t t| |||| d S N)superr2   __init__)selfprogZindent_incrementZmax_help_positionwidth	__class__r*   r+   r8   _   s    zSpadesHelpFormatter.__init__c                 C   s   |  S r6   )
splitlines)r9   textr;   r*   r*   r+   _split_linesb   s    z SpadesHelpFormatter._split_linesNc                 C   s*   |d u rt t d }tj| ||||S )Nz	

Usage: )r   r1   r"   HelpFormatter_format_usage)r9   usageZactionsgroupprefixr*   r*   r+   rB   e   s    z!SpadesHelpFormatter._format_usage)r3   r4   r5   )N)__name__
__module____qualname__r8   r@   rB   __classcell__r*   r*   r<   r+   r2   ^   s   r2   c                   C   s   t  S r6   )dictr*   r*   r*   r+   init_dataset_datak   s    rK   c                       s(   e Zd Zd fdd	ZdddZ  ZS )	AddToDatasetActionNFc                    s&   t t| |||||||||	|

 d S r6   )r7   rL   r8   r9   option_stringsr   r(   constdefaulttypechoicesrequiredhelpmetavarr<   r*   r+   r8   p   s    zAddToDatasetAction.__init__c                 C   s   |dkrdt _|tjvrdt _d|vs0|jd u rBt }t|d| d}t|dkrd|	dd  |d	  }t|	dd
kr|	dd dkr|d|d  7 }q|d|	dd  7 }|d }q|d }n|}t|d	kr|d	 }t 
|||j d S )N-sTFdataset_data r3   z---r      or   )supportold_style_single_readsr    OLD_STYLE_READS_OPTIONSonly_old_style_optionsrW   rK   setattrlensplitadd_to_dataset)r9   parser	namespacevaluesoption_stringrW   argoptr*   r*   r+   __call__u   s(    


zAddToDatasetAction.__call__)NNNNNFNN)NrF   rG   rH   r8   rl   rI   r*   r*   r<   r+   rL   o   s     rL   c                       s(   e Zd Zd fdd	ZdddZ  ZS )	StoreUniqueActionNFc                    s(   t t| j|||||||||	|
d
 d S N)
rN   r   r(   rO   rP   rQ   rR   rS   rT   rU   )r7   rn   r8   rM   r<   r*   r+   r8      s    zStoreUniqueAction.__init__c                 C   s.   |j | j d urt| dt|| j| d S )Nz#option was specified at least twice)__dict__r   r"   ArgumentErrorrb   r9   rf   rg   rh   ri   r*   r*   r+   rl      s    zStoreUniqueAction.__call__)NNNNNFNN)Nrm   r*   r*   r<   r+   rn      s     rn   c                       s(   e Zd Zd fdd	ZdddZ  ZS )	ConcatenationActionNFc                    s(   t t| j|||||||||	|
d
 d S ro   )r7   rs   r8   rM   r<   r*   r+   r8      s    zConcatenationAction.__init__c                 C   sb   dd |D }t |dkr0|d dkr0|d }n t |dkrPd|v rPt| dt|| j| d S )Nc                 S   s   g | ]}|D ]}|qqS r*   r*   ).0Zouterxr*   r*   r+   
<listcomp>       z0ConcatenationAction.__call__.<locals>.<listcomp>r]   r   autoz2cann't set 'auto' and kmers' size at the same time)rc   r"   rq   rb   r   rr   r*   r*   r+   rl      s    
zConcatenationAction.__call__)NNNNNFNN)Nrm   r*   r*   r<   r+   rs      s     rs   c                 C   sf   | dkr| gS t | }|tjk s*|tjkrBtd|tjtjf |d dkr\td| |gS d S )Nrx   z:wrong k value %d: all k values should be between %d and %dr3   r   z,wrong k value %d: all k values should be odd)intr    MIN_KMAX_Kr"   ArgumentTypeError)rj   kr*   r*   r+   kmer   s    r~   c                 C   sP   | }|d dkr|d d }| d}tt|D ]}t|| d ||< q2|S )Nr[   ,r   )rd   rangerc   r~   )rj   k_mersir*   r*   r+   kmers   s    
r   c                 C   s   | dkr| S t | S d S )Nrx   )ry   rj   r*   r*   r+   qvoffset   s    r   c                 C   sD   | dks| dkr| S t | r2t| dkr2t| S td|  d S )Nrx   off        zGwrong value %s (should be a positive float number, or 'auto', or 'off')r^   is_floatfloatr"   r|   r   r*   r*   r+   
cov_cutoff   s
    r   c                 C   s0   t | rt| dkrt| S td|  d S )Nr   z2wrong value %s (should be a positive float number)r   r   r*   r*   r+   lcer_cutoff   s    r   c                 C   s6   | t jvr2| t jkr2| ds2td| t jf | S )Nr}   z>wrong value %s (should be 'ec', 'as', 'k<int>', 'mc', or '%s'))r    SHORT_STAGES_NAME
LAST_STAGE
startswithr"   r|   r   r*   r*   r+   restart_from   s    r   c                 C   s&   | t jvr"| ds"td|  | S )Nr}   z8wrong value %s (should be 'ec', 'as', 'k<int>', or 'mc'))r    r   r   r"   r|   r   r*   r*   r+   
stop_after   s    r   c                 C   s0   t | rt| dkrt| S td|  d S )Nr   z8wrong value %s (should be a non-negative integer number))r^   is_intry   r"   r|   r   r*   r*   r+   read_cov_threshold   s    r   c              	   C   s
  t ddD ]}dD ]^}| jd||f ddtjtd | jd||f ddtjtd | jd||f ddtjtd qd	D ]X}| jd||f d
tjtd | jd||f d
tjtd | jd||f d
tjtd qv| jd| ddtjtd | jd| ddtjtd q
d S )Nr]   
   )z-12-1-2rV   z--pe%d%s
<filename>rU   r(   rT   r   z--mp%d%sz
--hqmp%d%s)z-frz-rfz-ffr   )r(   rT   r   z--s%dz--pe%d-m)r   r$   r"   SUPPRESSrL   )pgroup_input_datanumZsufixorientationr*   r*   r+   add_deprecated_input_data_args   s`    

r   c              	   C   s  t  }| jdddtd dtd |d u}| jdd|s6dntjd	d
 | jdd|sRdntjd	d
 | jdd|sndntjd	d
 | jdd|sdntjd	d
 | jdd|sdntjd	d
 | jdd|sdntjd	d
 | jdd|sdntjd	d
 | jdd|sdntjd	d
 | jd d!|sd"ntjd	d
 | jd#d$|s6d%ntjd	d
 | jd&d'|sTd(ntjd	d
 | jd)d*d+d	d
 | jd,d-d.d	d
 | jd/d0d1d2d3 | jd4d5d6d7t d8 d S )9N-oz<output_dir>z5directory to store all the resulting files (required)
output_dir)rU   rT   rQ   rP   r   r   r	   r
   zMthis flag is highly recommended for high-coverage isolate and multi-cell datar   r   rT   r   z--scsingle_cellz0this flag is required for MDA (single-cell) datar   r   z*this flag is required for metagenomic datar   r   z1this flag is required for biosyntheticSPAdes modez--sewagesewagez%this flag is required for sewage moder   r   z+this flag is required for coronaSPAdes moder   r   z&this flag is required for RNA-Seq datar   r   z1runs plasmidSPAdes pipeline for plasmid detectionr   r   z1runs metaviralSPAdes pipeline for virus detectionr   r   zoruns metaplasmidSPAdes pipeline for plasmid detection in metagenomic datasets (equivalent for --meta --plasmid)r   r   z9this flag enables virus assembly module from RNA-Seq dataz--iontorrent
iontorrentz)this flag is required for IonTorrent dataz--test	test_modezruns SPAdes on toy datasetz-hz--helpzprints this usage messagerT   rT   r   z-vz	--versionzprints versionr1   )rT   r   r1   )r,   r$   strrn   r"   r   r1   )pgroup_basicr&   help_hiddenr*   r*   r+   add_basic_args   s    r   Fc                 C   s4  d|v r2|j d|  dd|s&d|| f ntjtd d|v rd|j d|  dd|sXd	|| f ntjtd d
|v r|j d|  dd|sd|| f ntjtd d|v r|j d|  dd|sd|| f ntjtd d|v r|j d|  dd|sd|| f ntjtd d|v r0|j d|  dd|s$d|| f ntjtd d S )N12z--%s-12<#>r   r3   zefile with interlaced reads for %s library number <#>.
Older deprecated syntax is -%s<#>-12 <filename>r   1z--%s-1zafile with forward reads for %s library number <#>.
Older deprecated syntax is -%s<#>-1 <filename>2z--%s-2zafile with reverse reads for %s library number <#>.
Older deprecated syntax is -%s<#>-2 <filename>sz--%s-szbfile with unpaired reads for %s library number <#>.
Older deprecated syntax is -%s<#>-s <filename>mz--%s-mz`file with merged reads for %s library number <#>.
Older deprecated syntax is -%s<#>-m <filename>r\   z--%s-or)r   z<or>zkorientation of reads for %s library number <#> 
(<or> = fr, rf, ff).
Older deprecated syntax is -%s<#>-<or>)r$   r"   r   rL   )Zlibidnamesuffixesr   r   r*   r*   r+   add_library_argst  s    





r   c                 C   s$  t  }| jddddtd | jddddtd | jdddd	td | jd
dddtd | jddddtd t|  |dv }tddg d|  | jddddtd tddg d| | tddg d| | | jddd|sdntjtd |dk}| jddddtd | jd ddd!td |d"k}| jd#dd|s2d$ntjtd | jd%dd|sRd&ntjtd |dk}| jd'dd|szd(ntjtd | jd)d*d+d,d-g|sd.ntjd/d0 | jd1d*d+d,tjd2d3 | jd4d+d-tjd2d5 |d6ko|d7ko|d8ko|d9k}| jd:dd|sd;ntjtd d S )<Nz--12r   r]   z9file with interlaced forward and reverse paired-end readsr   r   z"file with forward paired-end readsr   z"file with reverse paired-end readsrV   zfile with unpaired readsz--mergedz5file with merged forward and reverse paired-end reads)r   r   pe
paired-end)r   r   r   r   r   r\   z--sr   r3   zjfile with unpaired reads for single reads library number <#>.
Older deprecated syntax is --s<#> <filename>mpz	mate-pair)r   r   r   r   r\   hqmpzhigh-quality mate-pairz--sangerzfile with Sanger readsr   z--pacbiozfile with PacBio readsz
--nanoporezfile with Nanopore readsr   z--trusted-contigszfile with trusted contigsz--untrusted-contigszfile with untrusted contigsz--fl-rnazFfile with PacBio/Nanopore/contigs that capture full-length transcriptsz--ssz<type>strand_specificityfrrfzestrand specific data, <type> = fr (normal) and rf (antisense).
Older deprecated syntax is --ss-<type>store)rU   r   rR   rT   r   z--ss-frZstore_const)rU   r   rO   rT   r   z--ss-rf)r   rO   rT   r   r   r   r   r   z--assembly-graphzfile with assembly graph)r,   r$   rL   r   r   r"   r   )r   r&   r   r*   r*   r+   add_input_data_args  s    	&r   c              	   C   sX  t  }|dkp|dk}| jddd |s(dntjdd | jdd	d |sFd
ntjdd |dv }|  }|jddd |stdntjdd |jddd tjdd | jdddddd | jddddd dtj }|dv rdtj }| jddd d t|dd! |  }|jd"d#d d$dd |jd%d#d tjdd |  }|jd&d'd d(dd |jd)d'd tjdd d S )*Nr   r   z--only-error-correctiononly_error_correctionz4runs only read error correction (without assembling)r   r   rP   rT   r   z--only-assembleronly_assemblerz4runs only assembling (without read error correction)r   r   r   z	--carefulcarefulz5tries to reduce number of mismatches and short indelsz--careful:falsestore_falsez--checkpointsz<last or all>checkpointsz.save intermediate check-points ('last', 'all')r   rU   r   rT   r   
--continuecontinue_modezNcontinue run from the last available check-point (only -o should be specified)r   zfrestart run with updated options and from the specified check-point
('ec', 'as', 'k<int>', 'mc', '%s'))r   r   zZrestart run with updated options and from the specified check-point
('as', 'k<int>', '%s')--restart-from<cp>r   )rU   r   rP   rQ   rT   r   z--disable-gzip-outputdisable_gzip_outputz;forces error correction not to compress the corrected readsz--disable-gzip-output:falsez--disable-rr
disable_rrz.disables repeat resolution stage of assemblingz--disable-rr:false)r,   r$   r"   r   add_mutually_exclusive_groupr    r   r   )pgroup_pipeliner&   r   Zcareful_groupZrestart_from_helpZdisable_gzip_output_groupr   r*   r*   r+   add_pipeline_args  s    r   c              	   C   s  t  }| jddtjdddd | jddd	d
tdtj dd | jddd	tddtj dd | jdddddd | jdd	ddtdtj	d  t
d |dv }| jddtd d|sd ntjdd! | jd"d#d$td%dd | jd&dd'd(dd) |  }|jd*d+d d,d-d. |jd/d+d tjd0d. d S )1Nz	--datasetr   dataset_yaml_filenamez,file with dataset description in YAML formatr   rU   rQ   r   rT   r   z-tz	--threads<int>threadsz!number of threads. [default: %s]
rU   r   rQ   rT   r   z-mz--memorymemoryzCRAM limit for SPAdes in Gb (terminates if exceeded). [default: %s]
z	--tmp-dirz	<dirname>z:directory for temporary files. [default: <output_dir>/tmp]tmp_dir)rU   rT   r   r   z-kr   +zDlist of k-mer sizes (must be odd and less than %d)
[default: 'auto']r]   )rU   r   r(   rQ   rT   r   r   z--cov-cutoff<float>r   zUcoverage cutoff value (a positive float number, or 'auto', or 'off')
[default: 'off'])rU   rQ   rP   r   rT   r   z--phred-offsetz
<33 or 64>r   zJPHRED quality offset in the input reads (33 or 64),
[default: auto-detect]z--custom-hmmscustom_hmmszEdirectory with custom hmms that replace default ones,
[default: None]r   z--gfa11gfa11z&use GFA v1.1 format for assembly graphr   r   z--gfa11:falser   )r,   r$   r^   check_file_existencery   r    THREADSMEMORYr   r{   rs   r   r"   r   r   r   )pgroup_advancedr&   r   r   r*   r*   r+   add_advanced_argsf  s    	
r   c                 C   s*  dt jv }|  }|jddd |r$dntjdd |jddd tjdd |jd	d
d |rXdntjdd | jdddt|rxdntjdd |  }|jddd |rdntjdd |jddd tjdd | jdddtj|rdntjdd | jdddtj|rdntjdd | jdddtj	|rd ntjdd | jd!d"d#t
|r>d$ntjdd | jd%d&d'|r^d(ntjdd | jd)d*d |r~d+ntjdd | jd,d-td.|rd/ntjdd0 | jd1d"d2t|rd3ntjdd | jd4d5|rd6ntjdd7 | jd8d9|rd:ntjdd7 | jd|rd;ntjd<d= d S )>N--help-hiddenz--debugdeveloper_modezruns SPAdes in debug moder   r   z--debug:falser   z--trace-timetime_tracerzenable time trackerz--stop-afterr   r   zRruns SPAdes until the specified check-point ('ec', 'as', 'k<int>', 'mc') inclusiver   r   z--mismatch-correctionmismatch_correctorz>runs post processing correction of mismatches and short indelsz--mismatch-correction:falsez--referencer   	referencez:file with reference for deep analysis (only in debug mode)z--series-analysisseries_analysisz3config for metagenomics-series-augmented reassemblyz--configs-dirz<config_dir>Zconfigs_dirzdirectory with configsz--read-buffer-sizer   read_buffer_sizez/sets size of read buffer for graph constructionz--large-genomelarge_genomeFz'Enables optimizations for large genomesz	--save-gpsave_gpzIEnables saving graph pack before repeat resolution (even without --debug)z--hidden-cov-cutoffr   r   zcoverage cutoff value deeply integrated in simplification (a positive float number). Base coverage! Will be adjusted depending on K and RL!r   z--read-cov-thresholdr   z5read median coverage threshold (non-negative integer)z--only-generate-configonly_generate_configz2generate configs and print script to run_spades.shr   z--no-clear-afterno_clear_afterz5don't delete tmp files after SPAdes pipeline finishedz1prints this usage message with all hidden optionsrT   r   )sysargvr   r$   r"   r   r   r^   r   check_dir_existencery   r   r   )pgroup_hiddenshow_help_hiddenZdebug_groupZmismatch_correction_groupr*   r*   r+   add_hidden_args  s   
r   c              	   C   s   | j ddddddd | j dd	d
dddd | j dddtdddd | j ddddd | j ddddd	dd | j dddd d	dd d!tjv }| j d"d#d|rd$ntjd% | j d&d'd|rd(ntjd% | j d)d*d|rd+ntjd% d S ),Nz--grid-enginez<ge>grid_enginelocalzOrun under grid control
('slurm', 'local', 'mpi', save_yaml') [default: 'local']r   )rU   r   rP   rT   r   z--grid-queuez<string>Z
grid_queueZstandardz/submits the jobs to one of the specified queuesz--grid-nnodesr   Zgrid_nnodesr3   z"specifies the number of processors)rU   r   rQ   rP   rT   r   z--grid-wait	grid_waitzwait for job finishr   r   z--grid-extraZ
grid_extrarX   zany extra commands)r   rP   rT   rU   r   z--grid-timeZ	grid_timez1:00:00z
time limitr   z--grid-profileZgrid_profilezFenable mpi task profiling (for SLURM grid engine, for developers only))r   r   rT   z--grid-valgrindZgrid_valgrindzGrun mpi tasks with valgrind (for SLURM grid engine, for developers onlyz--grid-coredumpZgrid_coredumpzGenable core dumps for mpi tasks (for SLURM cluster, for developers only)r$   ry   r   r   r"   r   )pgroup_clusterr   r*   r*   r+   add_cluster_args"  sr    
r   c                  C   s   t jdtddd} | d}| d}| d}| d}| d	}| d
}t| t| t| t| t| t	| | S )Nz	spades.pyz"%(prog)s [options] -o <output_dir>F)r:   Zformatter_classrC   r   zBasic optionsz
Input datazPipeline optionszAdvanced optionszCluster execution optionszHidden options)
r"   r#   r2   Zadd_argument_groupr   r   r   r   r   r   )rf   r   r   r   r   r   r   r*   r*   r+   create_parserY  s     





r   c                 C   s   dt jv r"t jjd ur"td|  t jjr6td|  t jjrJtd|  t jjr^td|  t jjrrtd|  t jj	rtd|  t jj
rtd|  t jjrtd	|  t jjrtd
|  t jjrtd|  t jjd urtd|  d S )NrW   z[you cannot specify input data (-1, -2, -12, --pe-1, --pe-2 ...) with --restart-from option!z8you cannot specify --dataset with --restart-from option!z3you cannot specify --sc with --restart-from option!z5you cannot specify --meta with --restart-from option!z8you cannot specify --plasmid with --restart-from option!z4you cannot specify --rna with --restart-from option!z8you cannot specify --isolate with --restart-from option!z;you cannot specify --iontorrent with --restart-from option!z?you cannot specify --only-assembler with --restart-from option!zFyou cannot specify --only-error-correction with --restart-from option!zVyou cannot specify strand specificity (--ss-rf or --ss-fr) with --restart-from option!)r    r-   rW   r^   errorr   r   r   r   r   r
   r   r   r   r   )logr*   r*   r+   check_options_for_restart_fromo  s,    r   c                 C   s  | j rt| ttj | jrT|stdtjv rBtjjd urBtd t	j
dtj_n dtjvsjtjjd u rttd |stt| j}|tj| j< t|d || _| jd urtt| j}|tj| j< t|d || _| jd urtt| j}t|d || _d| v r"| jd ur"d	| _| jr0d	| _| jrJ| jrJtd
 | jsZ| jrn| jrntd| | jr| jrtd| | jdkr| jd	krtd | jd	kr| jdkrtd | jr| js| jrtd| | jr| js
| jrtd| | jr0| jr0td | jrJ| jrJtd | j d ur\d	| _| jd urp| j| _| js| js| jr| jsd	| _d S )Nr   z/you cannot specify -o and --test simultaneouslyZspades_testGthe output_dir is not set! It is a mandatory parameter (-o output_dir).zoutput directoryzdirectory for temporary fileszdirectory with custom hmmsr   TzNyou cannot specify --only-error-correction and --only-assembler simultaneouslyz;you cannot specify --only-error-correction in RNA-seq mode!z;you cannot specify --only-error-correction in isolate mode!FzKyou cannot specify --mismatch-correction and --careful:false simultaneouslyzKyou cannot specify --mismatch-correction:false and --careful simultaneouslyzFyou cannot specify --mismatch-correction or --careful in RNA-seq mode!zFyou cannot specify --mismatch-correction or --careful in isolate mode!zLIsolate mode already implies --only-assembler, so this option has no effect.zHRNA mode already implies --only-assembler, so this option has no effect.)r   r   r.   r    r-   r   r   r^   r   ospathr   r   dict_of_rel2abscheck_path_is_asciir   r   r   r   r   r   r   r   r   r
   r   r   warningr   r   )r-   r   skip_output_dirr   r   r   r*   r*   r+   add_to_option  sh    








 r   c                    s  t  | d< t  | d< |js$t  | d< |js4t  | d< |j| d jd< |j| d jd< |j| d jd< |j| d jd< |j| d jd	< |j	| d jd
< |j
| d jd< tj|d| d jd< |j| d jd< |jr|j| d jd< d  |jd ur|j n2|jrtj|tj n|jr*tj|tj  d urd}dd tj rpd fddt D }n r~ }|dkrtd| || d jd< |j| d jd< |j	r|jr|j| d jd< |jsxtj| d jd| d jd< |j | d jd< |jr"|j| d jd< |j| d jd< tj| d jd< |j sR|j!r`d| d jd < |j"rx|j"| d jd!< |jsP|j#d"kr|j$d u rd |_#|j#r|j#| d jd#< n0|j%s|j&rd"| d jd#< ntj'| d jd#< |j(| d jd$< |j)| d jd%< |j*| d jd&< |j+| d jd'< |j"r@|j"| d jd!< |j,| d jd(< |js|j-rt  | d)< d | d) jd*< tj|d+| d) jd,< |j| d) jd-< |j| d) jd.< d S )/Ncommondataseterror_correctionassemblyr   r   r   max_threads
max_memoryr   r   zsewage/usher_barcodes.csvZsewage_matrixr   r   rX   c                 S   s\   t j| oZ| dpZ| dpZ| dpZ| dpZ| dpZ| dpZ| dpZ| dS )	NZhmmzhmm.gzZaazaa.gzZfazfa.gzZfnazfna.gz)r   r   isfileendswith)hmmfiler*   r*   r+   <lambda>  s   zadd_to_cfg.<locals>.<lambda>r   c                    s,   g | ]$}t j |rt j |qS r*   )r   r   join)rt   r  Z	hmms_pathZ
is_hmmfiler*   r+   rv     s   zadd_to_cfg.<locals>.<listcomp>zXCustom HMM folder does not contain any HMMs. They should have .hmm or .hmm.gz extension.Zset_of_hmmsyaml_filenamer   Z	correctedgzip_outputr   r   max_iterationsr]   Zcount_filter_singletonsr   rx   iterative_Kr   r   r   r   r   r   zskip-maskedz
spades-bwaZbwar   z
output-dir).r   r   r   r   rp   r   r   r   r   r   r   r   r   r
  r   r   r   r   r    biosyntheticspades_hmmsr   coronaspades_hmmsisdirlistdirr^   r   r   r   r   r   r   
ITERATIONSr   r   r   r   r   r   r   K_MERS_SHORTr   r   r   r   r   r   )cfgr   bin_homespades_homer-   Zhmmsr*   r  r+   
add_to_cfg  s    









r  c                 C   s~  dd l }| jrv| jrFtdtj|d| tdtj|d| n0tdtj|d| tdtj|d| | js| j	rd| _
| jstd	| tj| js| jrtd
| t| j | js|r| jrt|std| t  nt  nZ| jrVtjdd}	|	jdddd |	jdtdtd |	|\}
}|rVtd| | | j
r| js| js| jdkr| jd urtd| | js| j	r| jrtd| | j| j| j| j
| j| j | j	| j!| j"| j#| j| j$g%d}|dko| j
| j| j#g%ddk}|dko2| j
| jg%ddk}|dkoV| j
| j| j"g%ddk}|dkov| j
| j	g%ddk}|dko| j
| j	| j!g%ddk}|dko| j
| j	| j!| j$g%ddk}|dks |s |s |s |s |s |s td| n|dkrt&d | jr d S d }tj| jd}|rvtj'|rvz|(t)|}W n |j*yt   d }Y n0 |t+_,| j-rz|(t)| j-t+_,W n< |j*y   t./ \}}}td | j- t|  Y n0 t0t+j,tj1| j-t+_,n"t2t+j,t+_,t0t+j,t3 t+_,|d ur&|}nt+j,}|| _-t4|| j5pD| j| j6| t7|t+j8shtd! | jrt9|t9t7|t+j:krtd"d#t+j: d$  | j
r$| j;s$| j	s$t<dt9t7|d%}t<dt9t7|d&}t<dt9t7|d'd(g}t9||| | kr$td) |d u rtt)| j-d*(}|j=||dd+t>d,d- W d    n1 sj0    Y  t?  |S ).Nr   r   ztest_dataset_plasmid/pl1.fq.gzr   ztest_dataset_plasmid/pl2.fq.gzztest_dataset/ecoli_1K_1.fq.gzztest_dataset/ecoli_1K_2.fq.gzTr   zBthe output_dir should exist for --continue and for --restart-from!z4you cannot specify reads with --restart-from option!Fr   r   r   r   r   r   r   rQ   r   r   zyou cannot specify any option except -o with --continue option! Please use '--restart-from last' if you need to change some of the options from the initial run and continue from the last available checkpoint.
Extra options: %sr   zXyou cannot specify --careful, --mismatch-correction or --cov-cutoff in metagenomic mode!z-you cannot specify --careful in RNA-Seq mode!rZ   r3      r]   zWSpecified mode combination is not supported! Check out user manual for available modes.zxNo assembly mode was specified! If you intend to assemble high-coverage multi-cell/isolate data, use '--isolate' option.zinput_dataset.yamlz/exception caught while parsing YAML file (%s):
zYyou should specify at least one unpaired, paired-end, or high-quality mate-pairs library!z)you cannot specify any data types except , z in RNA-Seq mode!r   zassembly-graphpacbionanoporezyou cannot specify any data types except a single paired-end library (optionally accompanied by a single library of PacBio reads or Nanopore reads) in metaSPAdes mode!w"inf)default_flow_styledefault_styler;   )@pyyaml3r   r   r^   re   r   r   r
  r   r   r   r   r   r  r   makedirsr   dataset_is_emptysave_restart_optionsload_restart_optionsr"   r#   r$   r   rn   r%   r   r   r   r   r   r   r
   r   r   r   r   countr   r  loadopen	YAMLErrorr    original_dataset_datar   r   exc_inforelative2abs_pathsdirnamecorrect_datasetgetcwdcheck_dataset_readsr   r   get_lib_ids_by_type READS_TYPES_USED_IN_CONSTRUCTIONrc   READS_TYPES_USED_IN_RNA_SEQr   maxdumpr   set_default_values)r-   r  rW   r   r  load_processed_datasetr   r'   pyyamlcontinue_parserr(   r)   Zmodes_countZis_metaplasmidZis_bgcZis_metaviralZis_rnaviralZ	is_coronaZ	is_sewageZexisting_dataset_dataZprocessed_dataset_fpath_excZpaired_end_libsZ
graph_libsZlong_read_libsfr*   r*   r+   postprocessing.  s    

($ $ $(.









&r@  c              	   C   s  t  }t }|r tjj}tjj}	|}
|}||\t_}tjjd urz|sztjjD ]}|j	f i |d i qP||\t_}|rd}|
|d|  |r|tj_|	tj_ttj| |
 dtjv rtjj}nt }ttj||| ||||}tjjrtjd d fS t|| ||tj tj||fS )NzIPlease specify option (e.g. -1, -2, -s, etc)) for the following paths: %sr  rW   )rJ   r   r    r-   r   r   r%   r   rp   Zset_defaultsr   r
  r   rW   rK   r@  r   r  )r   r  r  secondary_fillingr   r'   r  rf   Zold_output_dirZold_stop_afterr   r:  r   rj   msgrW   r*   r*   r+   
parse_args  s:    

rC  c                 C   s   t  }|  d S r6   )r   Z
print_help)r/   Zshow_hiddenr&   rf   r*   r*   r+   rC     s    rC   c                   C   s  t jjd u rt jt j_t jjd u rNt rDttt j	t t j_n
t j	t j_t jj
d u rbdt j_
t jjd u rvdt j_t jjd u rdt j_t jjd u rdt j_t jjd u rdt j_t jjd u rdt j_t jjd u rdt j_t jjdkrd t j_t jjd u rdt j_t jjd u r*tjt jjt jt j_t jjd u r@dt j_t jjd u rVdt j_t jjd u rldt j_t jjd u rdt j_d S )NFZnonerx   r   )r    r-   r   r   r   r^   get_available_memoryry   minr   r   r   r   r   r   r   r   r   r   r   r   r   r
  r   TMP_DIRr   r   r   r   r*   r*   r*   r+   r9    sD    

r9  c                   C   s4   t jf i ttjt_d tj_d tj_d tj_d S r6   )	r"   Z	Namespacevarsr    r-   restartr   r   r   r*   r*   r*   r+   r'    s    r'  c                  C   s|   dt jv rFt jjrFt jjt _t jjdkr2d t j_nt jjt j_d t j_t jjD ](} t jj|  d urNt jj|  t jj| < qNd S )Nr   rx   )r    rH  r   r-   original_k_mersrp   )optionr*   r*   r+   r(    s    

r(  c                 C   s*   | D ] \}}|dks| dr dS qdS )Nr   r   TF)r   )r'   rk   rj   r*   r*   r+   
will_rerun  s    rK  c                  C   sN   t jdd} | jdddd | jddd td	d
 |  \}}|jpJ|jd u S )NFr   r   r   r   r   r   r   r   )r   rP   rQ   r   )r"   r#   r$   r   r%   r   )r<  r(   r)   r*   r*   r+   is_first_run  s
    rL  c                  C   sF   t jdd} | jdtdtd |  \}}|jd u r8d S tt|jS )NFr   r   r   r  )	r"   r#   r$   r   rn   r%   r   r   r   )Zoutput_parserr(   r)   r*   r*   r+   get_output_dir_from_args#  s    
rM  )F)N)FN)FN)3r   r   r"   r   Zos.pathr   r   r   r^   r    Zprocess_cfgr   r,   r.   r1   rA   r2   rK   ZActionrL   rn   rs   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r@  rC  rC   r9  r'  r(  rK  rL  rM  r*   r*   r*   r+   <module>
   sZ   $&
	-T
6pLKq7I^
{
*
&