3
Pzb                 @   s  d dl mZ d dlZd dlmZ d dlmZmZ d dlmZm	Z	m
Z
mZ d dlZd dlmZmZ d dlmZmZmZmZ d dlmZ d	ZeejeZee reere	eZeed
ZeedgZeedgZedeedfgZeedgZeedeedgZ eedeedeedgZ!eedeedgZ"G dd deZ#dd Z$dd Z%dd Z&dd  Z'd!d" Z(d#d$ Z)dAd%d&Z*d'd( Z+dd e,d)fd*d+Z-dBd,d-Z.dCd/d0Z/d1d2 Z0d3d4 Z1d5d6 Z2d7d8 Z3d9d: Z4d;d< Z5dDd=d>Z6d?d@ Z7dS )E    )with_statementN)copy)OptionParserOption)joinabspathisfileisdir)qconfigqutils)assert_file_existsset_up_output_dircheck_dirpathis_non_empty_file)get_modeZ	test_datazreference.fasta.gzzreads1.fastq.gzzreads2.fastq.gzgenez	genes.gffzoperons.gffzcontigs_1.fastazcontigs_2.fastazmeta_ref_1.fastazmeta_ref_2.fastazmeta_ref_3.fastazmeta_contigs_1.fastazmeta_contigs_2.fastac               @   s`   e Zd Zdd Zejd ZeejZeed< ejd	 Zej	d
 Z	ej
d Z
ejd Zdd ZdS )QuastOptionc             C   s   t || j t|S )N)r   destr   )optionoptvalue r   C/home/psgendb/BIRCHDEV/pkg/quast-5.2.0/quast_libs/options_parser.py
check_file)   s    zQuastOption.check_filefileextendc             C   s@   |dkr&|j d}tt|g j| ntj| ||||t| d S )Nr   ,)splitensure_valuer
   r   r   take_action)selfactionr   r   r   valuesparserZsplit_valuer   r   r   r   5   s
    
zQuastOption.take_actionN)r   )r   )r   )r   )r   )__name__
__module____qualname__r   r   ZTYPESr   ZTYPE_CHECKERZACTIONSZSTORE_ACTIONSZTYPED_ACTIONSZALWAYS_TYPED_ACTIONSr   r   r   r   r   r   (   s   





r   c             C   s0   t | | st| |d kr&t| || t| |S )N)hasattrgetattrsetattr)r"   attrr   r   r   r   r   >   s    r   c             C   s6   t jj|}tt| j| ttjdt| d  d S )NzYou have specified z6 as an output path.
Please, use a different directory.)	ospathr   r)   r
   r   r   output_dirpathstr)r   opt_strr   r#   loggerr-   r   r   r   check_output_dirD   s    r1   c             C   s2   |t jk r |jdt j ddd tt | j| d S )Nz^--extensive-mis-size should be equal to or greater than minimal local misassembly length (%d)!T   )	to_stderrexit_with_code)r
   local_misassembly_min_lengtherrorr)   r   )r   r/   r   r#   r0   r   r   r   set_extensive_mis_sizeK   s    
r7   c               C   s"   t jd krt jrt jS t jS t jS )N)r
   extensive_misassembly_thresholdlarge_genomeLARGE_EXTENSIVE_MIS_THRESHOLDDEFAULT_EXT_MIS_SIZEr   r   r   r   +get_current_extensive_misassembly_thresholdR   s    
r<   c             C   sB   |t jks|t kr0|jdt jt f ddd tt | j| d S )NzZ--local-mis-size should be between short indel size (>%d) and --extensive-mis-size (<=%d)!Tr2   )r3   r4   )r
   SHORT_INDEL_THRESHOLDr<   r6   r)   r   )r   r/   r   r#   r0   r   r   r   set_local_mis_sizeX   s
    
r>   c             C   s0   t jdk st jt jkr,| jdt j ddd d S )Nr   zJ--fragmented-max-indent should be between 0 and --extensive-mis-size (%d)!Tr2   )r3   r4   )r
   fragmented_max_indentr8   r6   )r0   r   r   r   check_fragmented_max_indent`   s    r@   c             C   sH   |d k	r"x|D ]}t t|d qW |d k	rDx|D ]}t t|d q0W d S )NTF)r)   r
   )r   r/   r   r#   store_true_valuesstore_false_valuesvr   r   r   set_multiple_variablesf   s    

rD   c             C   sR   |j  |kr tt| j|j   n.|jd| d t| d dj| ddd d S )Nzincorrect value for z (z+)! Please use one of the following values: z, Tr2   )r3   r4   )lowerr)   r
   r   r6   r.   r   )r   r/   r   r#   r0   available_valuesr   r   r   check_str_arg_valueo   s
     rG   ZInfc             C   s   |t |  ko|kn  r<tt| j| t|j| j| n~|rPtt| j| nj|r|jd| d t| d t| d t| ddd n,|jd| d t| d t| ddd d S )	Nzincorrect value for z (z#)! Please specify a number between z and Tr2   )r3   r4   z()! Please specify a number greater than )floatr)   r
   r   r"   r6   r.   )r   r/   r   r#   r0   default_value	min_value	max_valuer   r   r   check_arg_valuex   s    *rL   c             C   s   g }d}xPt | D ]D\}}||kr.|j| q|j|rt||kr|j| t|}qW x(t|ddD ]}|rz| |d = | |= qhW | S )N   T)reverse)	enumerateappend
startswithlensorted)quast_py_argsr   argZopt_idxsZcommon_lengthidxoZopt_idxr   r   r   remove_from_quast_py_args   s    



rY   Fc       	      C   s   |r,|}t |d td|fg}|jd n<d|krD|jd\}}ntj| }}t |d t||fg}ttdt j| d S )Nzgenomic featurer   a{  Option -G is deprecated! Please use --features (or -g) to specify a file with genomic features.
If you want QUAST to extract only a specific genomic feature from the file, 
you should prepend the filepath with the feature name and a colon, for example:
--features CDS:genes.gff --features transcript:transcripts.bed
Otherwise, all features would be counted:
--features genes.gff
:features)r   dictwarningr   r
   ALL_FEATURES_TYPEr   update)	r   r/   r   r#   r0   Zis_old_formatfpathr[   featurer   r   r   parse_features   s    

rb   c       
         s   g }|j d}x\t|D ]P\}}tjj|rT fddtj|D }	|jt|	 qt|d |j	| qW t
t| jg j| d S )Nr   c                s4   g | ],\}}}|D ]}t j| d rt||qqS ))r0   )r   check_is_fasta_filer   ).0r,   dirsfilesr   )r0   r   r   
<listcomp>   s    z)parse_meta_references.<locals>.<listcomp>	reference)r   rP   r+   r,   r	   walkr   rT   r   rQ   r   r
   r   )
r   r/   r   r#   r0   Z
ref_fpathsZ
ref_valuesiZ	ref_valueZ
referencesr   )r0   r   parse_meta_references   s    

rk   c       	      C   s   g }|j d}xdt|D ]X\}}|j|rHt||j d  |j| q|jd|j  d t| d ddd qW tt	| j
g j| d S )	Nr   z filezincorrect extension for z file (z)! Tr2   )r3   r4   )r   rP   endswithr   upperrQ   r6   r.   r   r
   r   r   )	r   r/   r   r#   	extensionr0   fpathsr"   rj   r   r   r   parse_files_list   s    

rp   c             C   sL   |r$t | t |kr$|jdddd |rHt | t |krH|jdddd d S )NzCNumber of SAM files does not match the number of files with contigsT   )r3   r4   zCNumber of BAM files does not match the number of files with contigs)rS   r6   )contigs_fpaths
sam_fpaths
bam_fpathsr0   r   r   r   check_sam_bam_files   s    ru   c               C   s   dt _dt _dt _d S )NF)r
   
prokaryoteanalyze_gaps	show_snpsr   r   r   r   set_large_genome_parameters   s    ry   c             C   s&   | j | tjtjd tjd d S )N)streamr2   )r6   r
   usagesysstderrexit)r0   msgr   r   r   wrong_test_option   s    
r   c          
   C   s~   ddddddddd	d
g
}dddddg}x|D ]}|| kr,| j | q,W x|D ]}t| |dd qLW x|D ]}t| | qhW | S )Nz-oz--output-dirz-rz-Rz--referencez--max-ref-numberz-lz--labelsz--references-listz
--blast-dbz-Lz--testz--test-no-refz--unique-mappingz--reuse-combined-alignmentsT)rV   )removerY   )rU   rr   opts_with_args_to_removeopts_to_removecontigs_fpathr   r   r   r   clean_metaquast_args   s    


r   c       	      C   s  ddg}dddg}x|D ]}t | |dd qW x|D ]}t | | q4W | dg7 } d	jd
d tjD t_tjstdt_| dg7 } | tjg7 } tjj|tj}tjtj	}tj
ptjj||d t_
tjptjj||d t_tjptjj||d t_tj
rttj
r| dg7 } | tj
g7 } tjrHttjrH| dg7 } | tjg7 } tjrrttjrr| dg7 } | tjg7 } tjj|tjtj}|rtjj|r| d|g7 } d S )Nz--contig-thresholdsz--sv-bedz-sz--split-scaffoldsz--combined-refT)rV   z--no-check-metar   c             S   s   g | ]}|t jkrt|qS r   )r
   
min_contigr.   )rd   	thresholdr   r   r   rg      s    z.prepare_regular_quast_args.<locals>.<listcomp>Nonez.bedz.covz.physical.covz--covz
--phys-covz--aligns-for-reuse)rY   r   r
   contig_thresholdsr+   r,   reads_stats_dirnamer   name_from_fpathcombined_ref_namebed	cov_fpathphys_cov_fpathr    detailed_contigs_reports_dirnamealigner_output_dirnamer	   )	rU   Zcombined_output_dirpathreuse_combined_alignmentsr   r   r   Zreads_stats_dirpathZreference_namealignments_for_reuse_dirpathr   r   r   prepare_regular_quast_args   s<    







r   c          ^   C   s  t |d }|dkrdnd}|dkr(dndt_d|ksFd|ksFd|krdtjd|k|dd	 tjd d
|kstd|krtj| tjd |dd  }dgtdddfdgtdddfdgtdddfdgtdddfdgtdddfddgtdddt| fdfd d!gtd"d#dt	| fddd$d%fd&d'd(gtd)|r6dnd*|rBdnd+|rP| fnd |r\t
nd d,fd-d.gtd/d*d0d1fd2d3gtd4dd| dftd,fd5d6gtd7dd| ftd,fd8d9gtd:d*d0d1fd;d<gtd=d*d0d1fd>gtd:d*d0d1fd?gtd=d*d0d1fd@gtdAd*d0d1fdBgtdCd*d0d1fdDgtdEd*d0d1fdFgtdEd*d0d1fdGgtdHd*d0d1fdIgtdJd*d0d1fdKgtdLd*d0d1fdMgtdNd*d0d1fdOgtdPd*dQfdRgtdSd*dQfdTgtdUdddV| ftd,fdWgtdXdddY| ftd,fdZgtd[d*dQfd\gtd]d*dQfd^gtd_d*dQfd`gtdaddQfdbdcgtddddQfdegtdfddfdggtdhdtdidjdhgiddkfdldmgtdnddfdodpgtdqddfdrgtdsdtdsgdqgdtdufdvgtdwddfdxdygtdjddfdzgtd{ddfd|gtd}ddfd~gtdd#dQfddgtddtjdt| fddddgidfdgtdddt	| fdddd%fddgtdddfdgtdddfdgtdtdQfdgtddddfddgtdd#tjdt| fdfdgtdd#tjdt| fdfdgtdtdQfddgtdd#dQfddgtdd#dQfdgtddtjdt	| fddddfdgtdd#dQfdgtddfdgtdd#tjdt	| fddddfdgtddfdgtddddfddgtddddfddgtddddfdgtdd#dQfdgtdddfdgtdd#dt	| fddid%fdgtdd#dt	| ftjtjdd%fdgtdddfdgtdddt| fdtjid%fdgtdddfdgtdddfdgtdddfdgtddtdddgdddddgdtddkfdgtdddfdgtddtdiddgidufdgtdddfdgtdddfdgtddtdddgidufdgtdddfdgtdddfdgtdddfdgtdddfdgtddtdgddgdtdufdgtdddfdgtdddfdgtdd0dfdgtdd0dfddgtdddfddgtddfdgtddfdgtdddfdgtdddfgZ}|rb|dgtdddfd gtdddfdgtdd#dt	| ftjdd$d%fdgtddfdgtddfg7 }ttd}x|D ]\}}|j|| qtW |j|dd  \}	}
tjr|rd	}t | | tj!r| rd
}t | | tj"	rtj#	r| j$dtj%	rdgndy d dd tj&	r@tj'	r@| j$d-dd t(tj)t(tj*k	rj| j$ddd t(tj+t(tj,k	r| j$ddd tj-	rtj.	s| j$ddd tj/ 	rtj0 	rtj+	ptj1 	r| j$ddd tj2
s
tj!
s
tjrt3tj4t_5t6tj5dt7t8j9j:tj5 d d  tj2
sRtj
r|
r\t;nt<t_.|
st=t_>t?t_@dt_"tj
sdt_#tj
rtAt_)tBt_*|
|
rtCntD7 }
dt_2tEdd |
D s tEdd tj)D s tEdd tj*D r| jFd tjd |
sL| j$ddd tjtjGd tjd tjrZtH  tI t_tjJrrtK|  tjLd krtjrtjMntjNt_LtjOd krtjrtjPntjQt_Otjd kr| rtjRt_x|
D ]}tS|d  qW tjTrdt_Utj5s,t6t8jV d!t7t8jV  d d"  tWtj5tjTtj5 |sHtjUnd \t_5t_T}| jXtj5tjY | jZtj[d# | j\|d dd$ | j]  |r| j^d% t_j`tj5 tjart_jbtja|
t_at_jc|
tjatjdt_atjed&krg t_end'd( tjejfd)D t_etjgd&kr&g t_gnd*d( tjgjfd)D t_gtjh|  |jijjrvtjdkrvdt_| jkd+ tjlrtjmstj@stj>r| jkd, |rtn||
}tjostjprtq|
tjotjp|  ||
fS (.  Nr   metaTFlargez-hz--helpz--help-hidden)modeshortz-vz	--versionrM   z--debugdebug
store_true)r   r!   z--no-portable-htmlportable_htmlZstore_falsez--testtestz	--test-svtest_svz--test-no-reftest_no_refz-oz--output-dirr-   stringcallback)r   typer!   r   callback_argsz-tz	--threadsmax_threadsint)rI   rJ   )r   r   r!   r   r   callback_kwargsz-rz-Rz--referencerh   r   store)r   r   r!   r   r   z-Oz	--operonsoperonsr   )r   r   r!   z-Gz--genesgenesz-gz
--featuresr[   z-1z--reads1forward_readsz-2z--reads2reverse_readsz--pe1z--pe2z--mp1mp_forward_readsz--mp2mp_reverse_readsz--12interlaced_readsz--pe12z--mp12mp_interlaced_readsz--singleunpaired_readsz--pacbiopacbio_readsz
--nanoporenanopore_readsz	--ref-samreference_sam)r   r   z	--ref-bamreference_bamz--samrs   z.samz--bamrt   z.bamz
--sv-bedper   z--covr   z
--phys-covr   z--aligns-for-reuser   z-lz--labelslabelsz-Lall_labels_from_dirsz--mgmmetagenemarkrA   gene_finding)r   r!   r   r   defaultz-sz--split-scaffoldssplit_scaffoldsz-ez--eukaryoterv   z--fungus	is_fungus)rA   rB   )r   r!   r   r   z--larger9   z-fz--gene-findingz--rna-findingrna_gene_findingz--fragmentedcheck_for_fragmented_refz--fragmented-max-indentr?   z-az--ambiguity-usageambiguity_usagerF   Znoneoneall)r   r   r   r!   r   r   r   z--ambiguity-scoreambiguity_scorerH   g?g      ?)rJ   rK   z-uz--use-all-alignmentsuse_all_alignmentsz--strict-NA	strict_NAz--unaligned-part-sizeunaligned_part_sizez--skip-unaligned-mis-contigsunaligned_mis_thresholdZstore_constg        )r   r!   Zconstz-xz--extensive-mis-sizer8   )r   r   r   r!   r   r   z--local-mis-sizer5   z--scaffold-gap-max-sizescaffolds_gap_thresholdz-mz--min-contigr   z-iz--min-alignmentmin_alignmentz--min-identitymin_IDYg      T@g      Y@z--est-ref-sizeestimated_reference_sizez--contig-thresholdsr   )r   z
--x-for-Nxx_for_additional_Nxd   z--gene-thresholdsgenes_lengthsz	--glimmerglimmer)r   r!   r   z-bz--conserved-genes-finding	run_buscoz-kz--k-mer-statsuse_kmcz--k-mer-sizeunique_kmer_lenz--upper-bound-assemblyoptimal_assemblyz--upper-bound-min-conupperbound_min_connectionsrJ   z--est-insert-sizeoptimal_assembly_insert_sizez--report-all-metricsreport_all_metricsz--plots-formatplot_extensionz--use-input-ref-orderuse_input_ref_orderz--circosdraw_circosz--no-read-statsno_read_statsz--fastZfastno_gcno_svrx   
draw_plotshtml_reportcreate_icarus_htmlrw   z
--no-checkno_checkz--no-check-metano_check_metaz	--no-snpsz
--no-plotsz	--no-htmlrB   z--no-icarusz--no-gcz--no-svz--memory-efficientmemory_efficientz--space-efficientspace_efficientz--silentsilentz--combined-refis_combined_refz--colorsused_colorsz--lsused_lsz-jz--save-json	save_jsonz-Jz--save-json-tojson_output_dirpathz--err-fpatherror_log_fpathz--read-supportcalculate_read_supportz--agbis_agb_modez--unique-mappingunique_mappingz--reuse-combined-alignmentsr   z--max-ref-numbermax_referencesz--references-listreferences_txtz
--blast-dbcustom_blast_db_fpath)Zoption_classz,Option --test-sv can be used for QUAST only
z4Option --test-no-ref can be used for MetaQUAST only
zYou cannot use --glimmer and z simultaneously!   )r4   zTYou cannot use --use-all-alignments and --reuse-combined-alignments simultaneously! zGReused alignments are always filtered, i.e. a subset of all alignments.a  Use the SAME number of files with forward and reverse reads for paired-end libraries (-1 <filepath> -2 <filepath>).
Use --pe12 option to specify a file with interlaced forward and reverse paired-end reads.
Use --single option to specify a file with unpaired (single-end) reads.zUse the SAME number of files with forward and reverse reads for mate-pair libraries (--mp1 <filepath> --mp2 <filepath>).
Use --mp12 option to specify a file with interlaced forward and reverse mate-pair reads.zUpperBound assembly is reference-based by design, so you cannot use --upper-bound-assembly option without specifying a reference (-r)!zUpperBound assembly construction requires mate-pairs or long reads (Pacbio SMRT or Oxford Nanopore), so you cannot use --upper-bound-assembly without specifying them!z!You are trying to run QUAST from z.
z/Please, rerun QUAST from a different directory.c             s   s   | ]}t | V  qd S )N)r   )rd   r`   r   r   r   	<genexpr><  s    z parse_options.<locals>.<genexpr>c             s   s   | ]}t | V  qd S )N)r   )rd   r`   r   r   r   r   =  s    a  
You are probably running QUAST installed via pip, which does not include test data.
This is fine, just start using QUAST on your own data!

If you still want to run tests, please download and unpack test data to CWD:
  wget quast.sf.net/test_data.tar.gz && tar xzf test_data.tar.gz
r2   z3You should specify at least one file with contigs!
)r3   )rz   ZcontigszLAn output path was not specified manually. You are trying to run QUAST from z6Please, specify a different directory using -o option.)r   )
wrap_afteris_mainzOutput directory already exists and looks like a QUAST output dir. Existing results can be reused (e.g. previously generated alignments)!r   c             S   s   g | ]}t |qS r   )r   )rd   xr   r   r   rg   z  s    z!parse_options.<locals>.<listcomp>r   c             S   s   g | ]}t |qS r   )r   )rd   r   r   r   r   rg   ~  s    zV--ambiguity-usage was set to 'all' because not default --ambiguity-score was specifiedzAnalysis of genes and/or operons files (provided with -g and -O) requires extensive RAM usage, consider running QUAST without them if memory consumption is critical.zYou cannot use --use-all-alignments and --reuse-combined-alignments simultaneously! Reused alignments are always filtered, i.e. a subset of all alignments.)rr   r
   r9   r{   r|   r~   print_versionr\   r1   rL   rk   rb   rp   rD   r   rG   r   r8   r7   r5   r>   r   r   optimal_assembly_min_ISoptimal_assembly_max_ISsupported_plot_extensionsr   r   r   
add_option
parse_argsr   r   r   r   r   r6   r   r   r   rS   r   r   r   r   r   rh   r   r   r   r   r   test_output_dirnamer-   r   r.   r+   r,   dirnamemeta_test_referencestest_referencetest_featuresr[   test_operonsr   test_forward_readstest_reverse_readsmeta_test_contigs_fpathstest_contigs_fpathsanyinfor}   ry   r<   r?   r@   r   LARGE_MIN_CONTIGDEFAULT_MIN_CONTIGr   LARGE_MIN_ALIGNMENTDEFAULT_MIN_ALIGNMENTDEFAULT_MIN_IDYr   r   r   getcwdr   set_up_file_handlerr   set_up_console_handlerr   print_command_linestartnoticer   remove_reportsr   parse_labelsprocess_labelsr   r   r   r   set_max_threadsr"   r   r]   r   r   r   rs   rt   ru   )r0   Z
quast_argsr   Zis_metaquastrU   optionsr#   argskwargsoptsrr   r   Zc_fpathexisting_quast_dirr   r   r   parse_options  s   










"
4



 
r  )NN)N)F)F)8
__future__r   r+   r   optparser   r   Zos.pathr   r   r   r	   r|   
quast_libsr
   r   quast_libs.qutilsr   r   r   r   Zquast_libs.qconfigr   Ztest_data_dir_basename
QUAST_HOMEZtest_data_dirr   r   r   r\   r   r   r  r   r  r   r   r1   r7   r<   r>   r@   rD   rG   rH   rL   rY   rb   rk   rp   ru   ry   r   r   r   r  r   r   r   r   <module>   sX   

		


$