
bc           @  sv  d  d l  m Z d  d l  m Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l m Z d  d l	 m
 Z
 d  d l m Z m Z m Z m Z m Z m Z m Z d  d l m Z m Z d  d l m Z m Z d  d	 l m Z d  d
 l Td  d l m Z m Z m Z m  Z  m! Z! m" Z" m# Z# m$ Z$ d  d l% m& Z& d  d l' m( Z( e& e j)  a* i  a+ d Z, d e- f d     YZ. d e- f d     YZ/ d d d  Z1 d   Z2 d   Z3 d e4 d  Z5 d   Z6 d d d d e4 e4 d d  Z7 d   Z8 d   Z9 d   Z: d   Z; d   Z< d   Z= d    Z> d!   Z? d d e@ d"  ZA d#   ZB d$   ZC d% d&  ZD d'   ZE d d d(  ZF d S()   i(   t   with_statement(   t   divisionN(   t   defaultdict(   t   sqrt(   t   isfilet   joint   basenamet   abspatht   isdirt   dirnamet   exists(   t   qconfigt   qutils(   t   minimap_fpatht   ref_labels_by_chromosomes(   t   create_fai_file(   t   *(   t   is_non_empty_filet
   add_suffixt   get_chr_len_fpatht   run_parallelt   get_path_to_programt   check_java_versiont
   percentilet   calc_median(   t
   get_logger(   t
   save_readsi
   t   Mappingc           B  s&   e  Z d  Z d   Z e d    Z RS(   i   c         C  st   | d t  | d  t  | d  | d t | d  f \ |  _ |  _ |  _ |  _ |  _ |  j |  j d |  _ d  S(   Ni   i   i   i   i	   i   (   t   intt   lent   reft   startt   mapqt   ref_nextt   end(   t   selft   fields(    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   __init__$   s    Yc         C  sK   |  j  d  r d  St |  j d   d k  r2 d  St |  j d   } | S(   Nt   @s   	i   (   t
   startswitht   NoneR   t   splitR   (   t   linet   mapping(    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   parse)   s    (   t   __name__t
   __module__t   MIN_MAP_QUALITYR%   t   staticmethodR,   (    (    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyR   !   s   	t   QuastDeletionc           B  s   e  Z d  Z d Z e j d e Z d d d d d d  Z d   Z	 d   Z
 d d d  Z d d d  Z d   Z d	   Z d
   Z RS(   sy   describes situtations: GGGGBBBBBNNNNNNNNNNNNBBBBBBGGGGGG, where
    G -- "good" read (high mapping quality)
    B -- "bad" read (low mapping quality)
    N -- no mapped reads
    size of Ns fragment -- "deletion" (not less than MIN_GAP)
    size of Bs fragment -- confidence interval (not more than MAX_CONFIDENCE_INTERVAL,
        fixing last/first G position otherwise)
    i   i   c         C  sI   | | | | | | f \ |  _  |  _ |  _ |  _ |  _ |  _ d |  _ d  S(   Nt   QuastDEL(   R   t	   prev_goodt   prev_badt   next_badt	   next_goodt   next_bad_endt   id(   R#   R   R3   R4   R5   R6   R7   (    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyR%   @   s    <c         C  sS   |  j  d  k	 oR |  j d  k	 oR |  j d  k	 oR |  j d  k	 oR |  j |  j t j k S(   N(   R3   R(   R4   R5   R6   R1   t   MIN_GAP(   R#   (    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   is_validE   s    c         C  s   | j  |  _ |  j |  _ |  S(   N(   R"   R3   R4   (   R#   R+   (    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   set_prev_goodJ   s    c         C  sc   | r | n | j  |  _ |  j d  k s@ |  j t j |  j k  r_ t d |  j t j  |  _ n  |  S(   Ni   (   R"   R4   R3   R(   R1   t   MAX_CONFIDENCE_INTERVALt   max(   R#   R+   t   position(    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   set_prev_badO   s    (c         C  si   | r | n | j  |  _ |  j d  k r6 |  j |  _ n/ |  j t j |  j k re |  j t j |  _ n  d  S(   N(   R   R6   R5   R(   R1   R<   (   R#   R+   R>   (    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   set_next_goodU   s
    c         C  s(   | j  |  _ | j |  _ |  j |  _ d  S(   N(   R   R5   R"   R7   R6   (   R#   R+   (    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   set_next_bad\   s    c         C  sM   |  j  d  k r | j |  _  n  | j |  _ t | j |  j  t j  |  _ d  S(   N(	   R5   R(   R   R"   R7   t   minR1   R<   R6   (   R#   R+   (    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   set_next_bad_enda   s    c      
   C  s@   d j  t t |  j |  j |  j |  j |  j |  j |  j g   S(   Ns   	(	   R   t   mapt   strR   R3   R4   R5   R6   R8   (   R#   (    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   __str__g   s    N(   R-   R.   t   __doc__R<   R   t   extensive_misassembly_thresholdR9   R(   R%   R:   R;   R?   R@   RA   RC   RF   (    (    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyR1   3   s   				c         C  s@  t  j |   } | sQ t | | d  } t | | d  } t | | d  } n! | j d d  } t | d  } | p t | | d  } t |  r t |  s t j d | d  t t	 | d  } q t j
 d |  | Sn  t |  s-t | | t j | t d	 d
 t | | | t d | n  t | d  sqt  j t d  d | g d t | d  d t n  t |   t | | d  }	 t |	 | d  }
 t |
  st |	  rt j |	 d t n  t j |	  t   } t j j   } | d c t j t 7<t |  | t  t  j d d d t  |  d d d d d d t!   d d  | d! |
 d" t |	 | d#  d$ |  d% t  |  d& |	 g d t | d  d t d' | n  t |
  r<t | d(  } t | d)  } t  j d d t!   d* d  |
 d! | d+ | d$ |  d, g	 d t | d  d t t" | |  n  | S(-   Ns   .sams   .bams   .sorted.bamt   sorteds   .beds     Existing BED-file: s0    may be corrupted. Bed file will be re-created. s     Using existing BED-file: t   filter_rules   not unmapped and proper_pairt   threadss   .bait   sambambat   indext   stderrt   at   loggert   _gridsss   .vcft   ignore_errorst   PATHt   javas   -eas   -Xmxt   gs   -Dsamjdk.create_index=trues(   -Dsamjdk.use_async_io_read_samtools=trues)   -Dsamjdk.use_async_io_write_samtools=trues(   -Dsamjdk.use_async_io_write_tribble=trues   -cps   gridss.CallVariantss   I=s   O=s	   ASSEMBLY=s   .gridss.bams   R=s   WORKER_THREADS=s   WORKING_DIR=t   envt   rawt   filtereds#   au.edu.wehi.idsv.VcfBreakendToBedpes   OF=s   INCLUDE_HEADER=TRUE(#   R   t   name_from_fpathR   t   replaceR   R   t   is_valid_bedRP   t   warningt
   output_dirt   infoR   t   sambamba_viewR   t   max_threadst   sort_bamt   call_subprocesst   sambamba_fpatht   openR   R   t   shutilt   rmtreet   Truet   ost   makedirst   get_gridss_memoryt   environt   copyt   pathsept   bwa_dirpatht	   bwa_indexRE   t   get_gridss_fpatht   reformat_bedpe(   t   cur_ref_fpatht   output_dirpatht	   err_fpathR`   t	   bam_fpatht	   bed_fpatht   ref_namet	   sam_fpatht   bam_sorted_fpatht   vcf_output_dirpatht	   vcf_fpatht   max_memRV   t   raw_bed_fpatht   filtered_bed_fpath(    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   process_one_refm   sZ    
	#c      
   C  s]  t  j d  t | t j |   d t j  } t |  rQ t  j d |  | St d  sk t	 d  r| t  j
 d  d  St d  s t  j
 d  d  S| r&t t |  t j  } t d	 t j |  } g  | D] } | | | | f ^ q }	 t t |	 | d
 t }
 |
 rHt j |
 |  qHn" t |  | | t j d | d | t  j d |  | S(   Ns0     Searching structural variations with GRIDSS...t   _s       Using existing file: RT   g?s`   Java 1.8 (Java version 8) or later is required to run GRIDSS. Please install it and rerun QUAST.t   Rscripts?   R is required to run GRIDSS. Please install it and rerun QUAST.i   t   filter_resultsRu   Rv   s       Saving to: (   RP   R^   R   R   RY   R   t   sv_bed_fnameR   R   R   R\   R(   RB   R   R`   R=   R   R   Rg   t	   cat_files(   t   main_ref_fpathRu   t   meta_ref_fpathsRs   Rt   t   final_bed_fpatht   n_jobst   threads_per_jobRr   t   parallel_argst
   bed_fpaths(    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   search_sv_with_gridss   s*    #%"c         C  s  g  } t  |  t j  } t j d  t } t |  rQ t } t j d |  n  | s] | r`t |  }	 d  }
 x|	 D]} t
 j |  } | ry | j d k r qy n  |
 r=|
 j | j k r=|
 j d  k r| j |
 j t j k rO|
 j |  | j t
 j k r~|
 j |  |
 j   r1| j |
  n  t | j  j |  }
 q~q:| j t
 j k rq|
 j |  q:|
 j |  q| j |
 j t j k r|
 j   r| j |
  n  t | j  j d |
 j  }
 q| j t
 j k r-|
 j |  |
 j   r| j |
  n  t | j  j |  }
 q|
 j |  nc |
 r|
 j | k r|
 j d | |
 j  |
 j   r| j |
  qn  t | j  j |  }
 | r| | j } | j j   d k s| | | j k r| | d  k	 r| | j |  qqqqy qy W|
 rW|
 j | k rW|
 j d | |
 j  |
 j   rW| j |
  qWn  Wd  QXn  | rx0 | j   D] } | d  k	 rs| j    qsqsWn  | r
t j d t! |   t j d |  t | d  / } x% | D] } | j t" |  d	  qWWd  QXn  | S(
   Ns@     Looking for trivial deletions (long zero-covered fragments)...s       Using existing file: R   R>   t   =s     Trivial deletions: %d founds       Saving to: t   ws   
(#   R   R   t   trivial_deletions_fnameRP   R^   Rg   R   t   FalseRd   R(   R   R,   R   R5   R   R4   R1   R9   RA   R    R/   R@   R:   t   appendR;   R?   R7   RC   R!   t   stript   writet   valuest   closeR   RE   (   t   temp_output_dirt   sam_sorted_fpatht	   ref_filest
   ref_labelst   seq_lengthst   need_ref_splittingt	   deletionst   trivial_deletions_fpatht   need_trivial_deletionst   sam_filet   cur_deletionR*   R+   t   cur_reft   ref_handlert   ft   deletion(    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   search_trivial_deletions   s|    !(!$t   allc         C  s/  g  } t  j |   } t j p. t | | d  } t | d  } | d k rm t | |  } t | |  } n  t | | d  } t |  s | j |  n  t |  r | d k s | d k r | j |  n  t | d  }	 t |	  s t	 j
 |	  n  t | d  }
 t | d  } t |  | |	 |
 | t j d	 t j d
 t j d | d t d t d | \ } } } t j st j d k r)| d k r| rt | | |  \ } } } | st j d  q&| t _ q)| d k r)t |  r)y. t t |  j    } | r| t _ n  Wq&q&Xq)n  | s<| | | f S| sSt j d  d S| r"t |	 t | d   } t |	 t | d   } t |  rt j d |  n2 t | | t j | t d d t | | | t  t |  r"| r"t |	 |  | | | |
 | | | d | d t 	q"n  | | | f S(   Ns   .covt	   uncoveredR   s   .is.txtt   pet   temp_outputs   reads_stats.logs   reads_stats.errRx   Ru   t   required_filest   is_referencet   alignment_onlyt   using_readst   autos!     Failed calculating insert size.s%     Failed detecting uncovered regions.t   mappedRI   s"     Using existing sorted BAM-file: RJ   s   not unmappedt   uncovered_fpatht   create_cov_files(   NNN(   R   RY   R   t	   cov_fpathR   R   R   R   R   Rh   Ri   t   align_single_fileR`   t   reference_samt   reference_bamRg   t   optimal_assembly_insert_sizet   calculate_insert_sizeRP   R^   R   Rd   t   readlineR(   t   get_safe_fpathR_   Ra   t   get_coverageR   (   t	   ref_fpathR]   R   t   calculate_coverageR   Rw   R   R   t   insert_size_fpathR   t   log_pathRt   t   correct_chr_namesRx   Ru   t   insert_sizeR   t   bam_mapped_fpathRy   (    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   align_reference  sd    %
c   *      C  s  g  } d& \ }	 }
 } | rt j |  } t j pC t | | d  }	 t j p_ t | | d  }
 t j p{ t | | d  } |	 |
 | g } t j r t	 j
 d  d  }	 n t |	  r
t |	  s t	 j d |	 d  | j t | | d   qat	 j
 d |	  nW t j rTt j rTt j rat j rat	 j
 d  d  }	 t t _ qan | j |	  t j rt |
  rt |
  } | rt	 j
 d	 |
  q| j |
  n  t |  rt	 j
 d
 |  q| j |  qt	 j
 d  d  }
 d  } n  t j st t j t |   d  } t d t j |  } t j pLd  g t |   } t j phd  g t |   } g  t |   D]5 \ } } | | | | | | | | | | | f	 ^ qx} n d } t j } g  } | r| j | | | | | | t j t j d  | t f  n  | rxt t | |  \ } } } t j s[| t |    t _ | t |    t _ n  t  | |  |  t! |  n  | sd' S| d } | d | d } } | t _ | t _ | s|	 |
 | f St" | | g  st	 j
 d  d( St# | t$ | d   } t# | t$ | d   } t# | t$ | d   } t |  rQt	 j
 d |  nZ t |  st% | | t j | t	 d d t& | | | t	  n  t% | | t j | t	  t j rt |
  st |  rt' | | | | | | | | |
 | 
 \ }
 } n  t |	  rt j r| r+t	 j
 d  n  g  } i  } t( |   } x | D] } | j) d  sfPn  | j) d  rd | k rd | k r| j* d  d j* d  d } t+ | j* d  d j* d  d  }  |  | | <n  | j | j,    qMWWd  QXt- }! i  }" | rIx7| D],}# t j |#  }$ t | |$ d  }% |% t. |# <t |%  rxt	 j
 d |$ |% f  d  |" |$ <qt( |% d  }& | d j) d  s|& j/ | d d   n  xh d!   | D D]V }' |' j* d  d j* d  d } | | k r| | |$ k r|& j/ |' d   qqW|& j/ | d d   |& |" |$ <t }! qWn  t0 | | |" | | |!  }( t1   rt2 t1    ry2 t3 | | | | |  }) t j4 |) |( g |	  WqqXn  t2 |(  rt |	  rt5 j6 |( |	  qn  t j sEt |	  rt	 j7 d" |	  qEt2 |	  r/t	 j7 d#  n t	 j7 d  d  }	 n  t |
  ret	 j7 d$ |
  n t j s~t	 j7 d%  n  d  }
 |	 |
 | f S()   Ns   .beds   .covs   .physical.covsH     Will not search Structural Variations (--fast or --no-sv is specified)s     Existing BED-file: s0    may be corrupted. Bed file will be re-created. s     Using existing BED-file: s@     Will not search Structural Variations (needs paired-end reads)s&     Using existing reads coverage file: s)     Using existing physical coverage file: sf     Will not calculate coverage (--fast or --no-html, or --no-icarus, or --space-efficient is specified)i   is)     Failed searching structural variations.RI   R   s"     Using existing sorted SAM-file: RJ   s   not unmappeds%     Splitting SAM-file by references...R&   s   @SQs   SN:s   LN:s   	SN:s   	i    s   	LN:s   .sams,       Using existing split SAM-file for %s: %sR   s   
c         s  s0   |  ]& } | j  d   r d | k r | Vq d S(   s   @SQs   SN:N(   R'   (   t   .0t   h(    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pys	   <genexpr>  s    s     Structural variations are in s&     No structural variations were found.s9     Coverage distribution along the reference genome is in s+     Failed to calculate coverage distribution(   NNN(   NNN(   NNN(8   R(   R   RY   R   t   bedR   R   t   phys_cov_fpatht   no_svRP   R^   R   R[   R\   R   t   forward_readst   interlaced_readsR   R   Rg   t   create_icarus_htmlt   check_cov_filet   no_read_statsRB   R`   R   R=   t
   sam_fpathst
   bam_fpathst	   enumerateR   R   t   add_statistics_to_reportR   R   R   R   R_   Ra   R   Rd   R'   R)   R   R   R   t   ref_sam_fpathsR   R   Rp   R   R   R   Re   Rl   t	   main_info(*   t   contigs_fpathsR   R   R   R   R]   R   Rt   R   Rv   R   t   physical_cov_fpathRw   t   is_correct_fileR   t   max_threads_per_jobR   R   RM   t   contigs_fpatht   parallel_align_argsR   Rx   Ru   R   R   Ry   t   headersR   R   R*   t   seq_namet
   seq_lengthR   R   Rr   t   cur_ref_namet   ref_sam_fpatht   ref_sam_fileR   R   t   gridss_sv_fpath(    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   run_processing_readsC  s
   					H	"	
		#' &
 
			c         C  sz  t  j |   } | d  k	 r* t  j |  n d } t j } | r` | r` t | | d  d  } n0 | pv t | | d  } t | | d  d  } | d k r t | | d | d  } | j d d  } n  | s |
 r|	 rt	 d   |	 D  r|	 j
 |  n  t |  r1t | |  | | | t |
  } n t | |  | | | t |
  } | d  k	 } | rm| rmd% St t |  | d  } | rm|	 st d	   |	 D  rm| sKt |  rt j d
 | d |  qKt |  rKt  j t d  d d t |  | g d t | d  d t | d  t | |  | | | | t  qKn  t |  s]| rm| | | f Sn  t j d
 | d  t |  r| rt j d
 | d |  t | |  | | | t |
  } ne t |  r3| r3t j d
 | d |  t | | t j | t  t | |  | | | t |
  } n  | sGt |  r| r|
 rct j d  n t j d
 | d  t |   }  t |  } t j   } t j |  t |  | t  t |  | | | | |  } t |  d k rt  | | | | |  n^ t |  d k r]t! j" | d |  | d j d d  } t |  r]t! j" | |  q]n  t j d
 | d  t j |  t |  st j# d |  d | d  d& St | |  | | | t |
  } n | st |  rd' S|
 rt j d  n t j d
 | d  | rJt |  rJt$ |  rJt j d
 | d |  nF t | | d | d   } t% | |  } t | | | | t d! d  t  j& | d"  | smt |  rt j d
 | d |  nq t |  r?t  j t d  d d t |  | g d t | d  d t | d  t | |  | | | | t  n  |
 rUt j d#  qmt j d
 | d$  n  | | | f S((   Nt    is   .sams   .bamR   t   .c         s  s   |  ] } | j  d   Vq d S(   R   N(   t   endswith(   R   R   (    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pys	   <genexpr>  s    s   .statc         s  s   |  ] } t  |  Vq d  S(   N(   R   (   R   t   fpath(    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pys	   <genexpr>  s    s     s$   Using existing flag statistics file RL   t   flagstats   -tt   stdoutR   RN   RO   s   Pre-processing reads...s   Using existing SAM-file: s   Using existing BAM-file: s     Running BWA for reference...s   Running BWA...i   i    s   Done.s     Failed running BWA for s   . See s    for information.s#     Sorting SAM-file for reference...s   Sorting SAM-file...s   .correct.samRJ   s   bam files%     Analysis for reference is finished.s   Analysis is finished.(   NNN(   NNN(   NNN('   R   RY   R(   t   index_to_strR   t   reads_fpathsR   R   RZ   t   anyR   R   t   get_correct_names_for_chromsRP   R	   R   R   R^   Rb   Rc   RE   Rd   t   analyse_coverageR_   R`   R   Rh   t   getcwdt   chdirRo   t   align_readsR   t   merge_sam_filesRe   t   movet   errort   all_read_names_correctt   clean_read_namest   assert_file_exists(   R   t   main_output_dirRs   R   Rt   R`   Rx   Ru   RM   R   R   R   R   t   filenamet	   index_strR   R   t	   can_reuset   stats_fpatht   prev_dirR   t   tmp_bam_fpatht   correct_sam_fpath(    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyR     s    !	(!#'"!!!'c         C  sJ  g  } | d k s | d k rF t  t j |  | | | | | d d n  | d k s^ | d k r t  t j |  | | | | | d d n  | d k s | d k r t  t j |  | | | | | d d n  | d k s | d k rt  t j |  | | | | | d d n  | d k s| d k rFt  t j |  | | | | | d d n  | S(   NR   R   t
   reads_typet   mpt   singlet   pacbiot   nanopore(   t   run_alignerR   t   paired_readst
   mate_pairst   unpaired_readst   pacbio_readst   nanopore_reads(   R   Rx   R   R]   Rt   R`   t   out_sam_fpaths(    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyR   F  s    (((((c         C  s  t  d  d t |  } g  }	 g  }
 x~t |   D]p\ } } t | t  r | d k sf | d k r | d k r{ d } n d } t   d t |  | | d | } q | | d	 k r d
 n d | d | } n* | \ } } | d | d | d | } t | | t | d   } | j d d  } t |  sxt j	 t
 j |  d t | d  d t | d  d t n  t |  s8t |  st | | | | t d d  n  | d	 k r8t | d  } t j	 t d  d d d t |  d | | | g	 d t | d  d t t |  r5t j | |  q5q8n  | d	 k rt | | t j |   \ } } } | d  k	 r| t j k  r|	 j |  qn  |
 j |  q3 Wt |
  d k rt | |  } | j d d  } t j |
 d |  t j |
 d j d d  |  | j |  n | j |
  |	 rt j |  } t | | d  } t | d    } | j t t |	    Wd  QXn  d  S(   Nt   bwas    mem -t R   R  s    -ax map-pb s    -ax map-ont s    -t t    R   s    -p i   s   .sams   .bamR   R   RN   RO   RP   RJ   t   dedupRL   t   markdups   -rs   -ts   --tmpdiri    s   .is.txt(   t	   bwa_fpathRE   R   t
   isinstanceR   R   RZ   R   R   Rb   t   shlexR)   Rd   RP   R_   R(   Rc   R
   Re   R   R   RY   R   t   optimal_assembly_max_ISR   R   t   extendR   R   R=   (   t   read_fpathsR   Rx   R  R]   Rt   R`   R   t   bwa_cmdt   insert_sizest   temp_sam_fpathst   idxt   readst   presett   cmdlinet   read1t   read2t   output_fpathRu   t   bam_dedup_fpathR   R   t   final_sam_fpatht   final_bam_fpathRw   R   t   out(    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyR  V  sX    	*+=$$ c   	      C  s   g  } xm |  D]e } t  |  r | j d d  } t | d  } t  |  sb t | | | t  n  | j |  q q Wt j t d  d d t	 |  | g | d t
 | d  d	 t t | | | | t  | S(
   Ns   .sams   .bamRI   RL   t   merges   -tRN   RO   RP   (   R   RZ   R   Ra   RP   R   R   Rb   Rc   RE   Rd   R_   (	   t   tmp_sam_fpathsRx   Ru   R`   Rt   t   tmp_bam_fpathst   tmp_sam_fpathR   t   tmp_bam_sorted_fpath(    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyR     s    +c         C  s'  t  t  } g  | d <t |   } x| D]} | j   d } d | k ra t |  | d <q, d | k r | d c t |  8<q, d | k r | d c t |  8<q, d | k r | d c t |  8<q, d | k r | | d <q, d	 | k r| | d
 <q, d | k r>d | k r>| | d <t | | d  | d <q, d | k rn| | d <t | | d  | d <q, d | k r| | d <t | | d  | d <q, d | k rd | k r| | d <t | | d  | d <q, d | k r| | d <q, d | k r, | d j t |   q, q, WWd  QX| S(   Nt   coverage_thresholdsi    t   totalt	   secondaryt   supplementaryt
   duplicatesR  t   rightR  t   leftR   t   %t   mapped_pcnts   properly pairedt   pairedt   paired_pcntt
   singletonst   singletons_pcnts   different chrt   mapQt   misjointt   misjoint_pcntt   deptht   coverage(   R   R   Rd   R)   t   get_pcnt_readsR   t   float(   R   t   reads_statsR   R*   t   value(    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   parse_reads_stats  sD    




$c         C  s,   | d k r( t  d t |   d |  Sd  S(   Ni    s   %.2fg      Y@(   R9  R   R(   (   R  t   total_reads(    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyR8    s    c      	   C  s  d d l  m } d  } | r t j |  } t |  | d  } t |  r t |  } t | d  d k r t	 j
 d  q q n  xt |  D]\ } } | j |  }	 t j |  }
 t j |  } t |  |
 d  } | rX|	 j | j j | d  |	 j | j j | d  |	 j | j j | d  |	 j | j j | d	  |	 j | j j | d
  |	 j | j j | d  |	 j | j j | d  |	 j | j j | d  |	 j | j j | d  | d rXt | d  t t j  k rX|	 j | j j g  t t j  D] \ } } | d | ^ q |	 j | j j | d d  qXn  t |  sjq n  t |  } |	 j | j j | d  |	 j | j j | d  |	 j | j j  | d  |	 j | j j! | d  |	 j | j j" | d  |	 j | j j# | d  |	 j | j j$ | d	  t | d  d k rot	 j
 d t j% |  d d | d  n  |	 j | j j& | d
  |	 j | j j' | d  |	 j | j j( | d  |	 j | j j) | d  |	 j | j j* | d  | d r t | d  t t j  k r |	 j | j j+ g  t t j  D] \ } } | d | ^ q9 |	 j | j j, | d d  q q Wd  S(   Ni(   t	   reportings   .statR   i    s%     BWA: nothing aligned for reference.R.  R/  R0  R1  R2  R4  R5  R6  R&  R'  R,  R+  s     s   BWA: nothing aligned for s   's   '.(-   t
   quast_libsR>  R(   R   RY   R   R   R<  R   RP   R^   R   t   gett   label_from_fpatht	   add_fieldt   Fieldst   REF_MAPPED_READSt   REF_MAPPED_READS_PCNTt   REF_PROPERLY_PAIRED_READSt   REF_PROPERLY_PAIRED_READS_PCNTt   REF_SINGLETONSt   REF_SINGLETONS_PCNTt   REF_MISJOINT_READSt   REF_MISJOINT_READS_PCNTt	   REF_DEPTHR   R   R&  t   REF_COVERAGE__FOR_THRESHOLDSt   REF_COVERAGE_1X_THRESHOLDt   TOTAL_READSt
   LEFT_READSt   RIGHT_READSt   MAPPED_READSt   MAPPED_READS_PCNTt   PROPERLY_PAIRED_READSt   PROPERLY_PAIRED_READS_PCNTR   t
   SINGLETONSt   SINGLETONS_PCNTt   MISJOINT_READSt   MISJOINT_READS_PCNTt   DEPTHt   COVERAGE__FOR_THRESHOLDSt   COVERAGE_1X_THRESHOLD(   R]   R   R   R>  t   ref_reads_statsRw   R   RM   R   t   reportt   assembly_namet   assembly_labelt   it	   thresholdR:  (    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyR     sb    )1$-)1c      
   C  s  t  j |  } t |  | | | |  } t | |  }	 t |  | d  }
 t | |
 |	 | | d t d } g  t j D] } d ^ qu } t	 |
   } x | D] } | j
   } t | d  t | d  } } | d d k r | | | 7} x? t t j  D]+ \ } } | | k r | | c | 7<q q Wq q WWd  QXt	 | d  ] } | j d t |   x< t t j  D]+ \ } } | j d	 | | d
 | f  qsWWd  QXd  S(   Ns
   .genomecovt   print_all_positionsi    i   i   t   genomeRO   s	   %s depth
s   %.2f coverage >= %sx
id   (   R   RY   t
   bam_to_bedR   R   t   calculate_genome_covR   R   R&  Rd   R)   R   R9  R   R   (   Rs   R   t	   chr_namesRu   R   Rt   RP   R   Rv   t   chr_len_fpathR   t	   avg_depthRb  t   coverage_for_thresholdsR   R*   t   lR6  t   genome_fractionRa  t   out_f(    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyR     s(    !$c      
   C  s   t  | d  } t |  s t j d  t |  | d  } t | | t j | t d d t j t j f t |  | d  }	 t	 | |	 | t d d t
 |  | d	 |	 | t d
 t }
 t |
 | | | t  n  | S(   NRW   s"     Calculating physical coverage...s   .physical.bamRJ   se   proper_pair and not supplementary and not duplicate and template_length > %d and template_length < %ds   .physical.sorted.bamt	   sort_rules   -ns	   .physicalt   bedpe(   R   R   RP   R^   R   R_   R   R`   t	   MAX_PE_ISRa   Re  Rg   Rf  (   Rs   Rw   Ru   R   Rt   R   Rh  t   raw_cov_fpatht   bam_filtered_fpatht   bam_filtered_sorted_fpathRv   (    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   get_physical_coverage  s    "c      	   C  s  | d } t  | |  } t |  s t j d  t |  s t |  s` t | | | t  n  t | | | | t  t j | d  n  |
 r t | |
 |  n  | r t	 | | |  q n  t |	  r| rt
 |  | | | | |	 |  } t	 | |	 |  n  | |	 f S(   Nt   _raws     Calculating reads coverage...s   coverage file(   R   R   RP   R^   Ra   Rf  R   R   t   print_uncovered_regionst   proceed_cov_fileRt  (   Rs   R   Rw   Ru   Ry   R   Rt   R   R   R   R   R   Rq  Rh  (    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyR   %  s$    
c         C  s%  t  t  } t   } d } t |  d  } t | d  } x| D]} t | j    }	 |	 d }
 t t |	 d   } |
 | k r | d 7} t |  | |
 <| r | |
 n |
 } | j d | d | |
 d  n  t	 |	  d	 k r0t |	 d  t |	 d
  } } | |
 j
 | g | |  n | |
 j |  t	 | |
  t k rF t	 | |
  t	 | |
  t } xc t d | t  D]O } t | |
 | | t ! t } | j d j | |
 t |  d g   qW| |
 | t | |
 <qF qF Wt j st j |   n  Wd  QXWd  QXd  S(   Ni    t   rR   ii   t   #R
  s   
i   i   (   R   t   listt   dictRd   R)   R   R9  RE   R   R   R  R   t   COVERAGE_FACTORt   ranget   sumR   R   t   debugRh   t   remove(   Rq  R   R   t	   chr_deptht   used_chromosomest	   chr_indext   in_coveraget   out_coverageR*   t   fst   nameR6  t   correct_nameR   R"   t	   max_indexRM   t	   cur_depth(    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyRw  ;  s4    	

$!".	c         C  s(   t  |  d  } t  |  d  } | | f S(   Ni
   iZ   (   R   (   R  t   decile_1t   decile_9(    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   get_max_min_isY  s    R   c      
   C  s  t  | | | r d | n d d  } t |  r yb t |  = } t | j    } t | j    } t | j    } Wd  QX| r | | | f SWq q Xn  g  }	 d d d d g }
 t |    } x t |  D] \ } } | d k r Pn  | j d	  rq n  | j d
  } | d } | |
 k r2q n  t t | d   } |	 j	 |  q WWd  QX|	 r|	 j
   t |	  } | d k rd St |	  \ } } t t j |  } t | d  L } | j t |  d  | j t |  d  | j t |  d  Wd  QX| | | f Sd S(   NR   R   s   .is.txtt   99t   147t   83t   163i@B R&   s   	i   i   i    R   s   
(   NNN(   NNN(   R   R   Rd   R   R   R   R'   R)   t   absR   t   sortR   R(   R  R=   R   t   optimal_assembly_min_ISR   RE   (   Rx   R]   Rw   t   reads_suffixR   R   R   t   min_insert_sizet   max_insert_sizeR  t   mapped_flagst   sam_inRa  Rk  R  t   flagt	   median_isRm  (    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyR   _  sL    '

c         C  s-  t  t  } t |    } x | D] } t | j    } | d } t t | d   } | rj | | n | }	 t |  d k r" | d k r" | |	 j | d | d f  q" q" WWd  QXt | d  ^ }
 xT | j   D]F \ } } x7 | D]/ \ } } |
 j	 d j
 | | | g  d  q Wq WWd  QXd  S(	   Ni    ii   i   i   R   s   	s   
(   R   Rz  Rd   R)   R   R9  R   R   t   itemsR   R   (   Rq  R   R   t   uncovered_regionsR  R*   R  R  R6  R  Rm  t   chromt   regionsR   R"   (    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyRv    s    
,c      
   C  s  | r | a  n  t  j   t  j d  t t   sC t  j d  d St |  s_ t j |  n  t t  t	 j
  t | d  } t |  s t j |  n  t	 j st	 j rt g  t t	 j t	 j  D]$ \ } } t | | g | t   ^ q  rt  j d  d Sn  t | d  } t | d  }	 t | d  j   t |	 d  j   t  j d d	 | |	 f  t | |  | t | | | |	  \ }
 } } t	 j st j | d
 t n  t  j d  |
 | | f S(   Ns   Running Reads analyzer...s   Failed reads analysisR   s5     Read names are discordant, skipping reads analysis!s   reads_stats.logs   reads_stats.errR   s     s   Logging to files %s and %s...RR   s   Done.(   NNN(   NNN(   RP   t   print_timestampR   t   compile_reads_analyzer_toolsR(   R   Rh   Ri   t   download_gridssR   R   R   t   mkdirt   no_checkR   R   t   zipt   reverse_readst   paired_reads_names_are_equalR   Rd   R   R^   R   R   R  Re   Rf   Rg   (   R   R   R]   R   t   external_loggerR   R  R  R   Rt   Rv   R   R   (    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   do  s<    	
		A	(G   t
   __future__R    R   Rh   t   reRe   R  t   collectionsR   t   mathR   t   os.pathR   R   R   R   R   R	   R
   R?  R   R   t   quast_libs.ca_utils.miscR   R   t   quast_libs.fastaparserR   t   quast_libs.ra_utils.misct   quast_libs.qutilsR   R   R   R   R   R   R   R   t   quast_libs.logR   t   quast_libs.reportingR   t   LOGGER_DEFAULT_NAMERP   R   R|  t   objectR   R1   R(   R   R   R   R   R   R   R   R   R  R   R<  R8  R   R   Rt  Rg   R   Rw  R  R   Rv  R  (    (    (    sG   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/reads_analyzer.pyt   <module>   sR   4
::3		L<	d		5		%		7				*	