ó
£žbc           @   s  d  d l  Z  d  d l Z d  d l Z d  d l m Z m Z d  d l m Z d  d l m	 Z	 e	 e j
 ƒ Z e j d e j ƒ Z e j d e j ƒ Z e j d e j ƒ Z e j d e j ƒ Z d „  Z d	 „  Z d
 „  Z d „  Z d „  Z d e f d „  ƒ  YZ d d d „  ƒ  YZ d S(   iÿÿÿÿN(   t   qutilst   qconfig(   t   open_gzipsafe(   t
   get_loggersC   (?P<seqname>\S+)\s+(?P<gene_id>.+)\s+(?P<start>\d+)\s+(?P<end>\d+)$sƒ   (?P<seqname>\S+)\s+\S+\s+(?P<feature>\S+)\s+(?P<start>\d+)\s+(?P<end>\d+)\s+\S+\s+(?P<strand>[\+\-\.]?)\s+\S+\s+(?P<attributes>.+)$s%   (?P<number>\d+)\.\s*(?P<name>\S+)\s*$c      	   C   s¨  |  s t  j j |  ƒ r g  St |  d ƒ } g  } | j ƒ  j ƒ  } x1 | d k sc | j d ƒ rx | j ƒ  j ƒ  } qH W| j d ƒ |  j d ƒ s¤ |  j d ƒ r³ t	 | ƒ } nç t
 j | ƒ sÑ t j | ƒ rà t | ƒ } nº t j | ƒ rt | | ƒ } n™ t j | ƒ rty t | ƒ } Wqšt k
 rpt j ƒ  \ } } } t j d | ƒ t j |  d ƒ g  } qšXn& t j d	 | d
 ƒ t j |  d ƒ | j ƒ  | S(   Nt   rt    t   #i    t   beds   bed.gzs   Parsing exception s    was skippeds   Incorrect format of sA   's file! GFF, NCBI and the plain TXT format accepted. See manual.(   t   ost   patht   existsR   t   readlinet   rstript
   startswitht   seekt   endswitht	   parse_bedt   txt_pattern_git   matcht   txt_patternt	   parse_txtt   gff_patternt	   parse_gfft   ncbi_start_patternt
   parse_ncbit   ParseExceptiont   syst   exc_infot   loggert   warningt   close(   t   fpatht   featuret
   genes_filet   genest   linet   exc_typet	   exc_valuet   _(    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genes_parser.pyt   get_genes_from_file   s4    
c         C   sH  t  j d t  j ƒ } t  j d t  j ƒ } t  j d t  j ƒ } g  } |  j ƒ  } xð| d k rCxA | j ƒ  d k s„ | j d ƒ r£ | d k r” Pn  |  j ƒ  } qc Wt j | j ƒ  ƒ } x | sÚ t j | j ƒ  ƒ } q¼ Wt d t	 | j
 d ƒ ƒ d t j | j
 d ƒ ƒ ƒ } g  } |  j ƒ  } xE | d k rjt j | j ƒ  ƒ rj| j | j ƒ  ƒ |  j ƒ  } q&Wx¤| D]œ}	 |	 j d ƒ r·t  j | |	 ƒ } | r·| j
 d	 ƒ | _ q·n  |	 j d
 ƒ r¦t  j | |	 ƒ } | rz| j
 d ƒ | _ t	 | j
 d ƒ ƒ | _ t	 | j
 d ƒ ƒ | _ d t | j ƒ }
 | j r£| j j |
 ƒ r£| j t |
 ƒ | _ | j j d ƒ q£q¦t j d t | j ƒ d | j d ƒ n  |	 j d ƒ rrt  j | |	 ƒ } | râ| j
 d ƒ | _ qt j d t | j ƒ d | j d ƒ qrqrW| j d  k	 rT | j d  k	 rT | j | ƒ qT qT W| S(   NsM   Annotation: (?P<seqname>.+) \((?P<start>\d+)\.\.(?P<end>\d+)(, complement)?\)s    Chromosome: (?P<chromosome>\S+);s   ID: (?P<id>\d+)R   s   ##t   numbert   names   Chromosome:t
   chromosomes   Annotation:t   seqnamet   startt   endt
   Chromosomet    s    ,s   Wrong NCBI annotation for gene s   . s   . Skipping this gene.s   ID:t   ids.   Can't parse gene's ID in NCBI format. Gene is s   . Skipping it.s   Chromosome (   t   ret   compilet   IR   R   R   R   R   t   Genet   intt   groupR    t   correct_namet   appendR*   R+   R,   R-   t   strt   lent   lstripR   R   R(   R)   R0   t   None(   t	   ncbi_filet   annotation_patternt   chromosome_patternt
   id_patternR"   R#   t   mt   genet   the_rest_linest	   info_linet   to_trim(    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genes_parser.pyR   L   sX    $	%,0c      	   C   sò   g  } d } xß |  D]× } | j  ƒ  } t j | ƒ p@ t j | ƒ } | r t d | d t j | j d ƒ ƒ ƒ } | d 7} t | j d ƒ ƒ } t | j d ƒ ƒ } t	 | | ƒ | _
 t | | ƒ | _ | j d ƒ | _ | j | ƒ q q W| S(   Ni    R(   R+   i   R,   R-   t   gene_id(   R   R   R   R   R4   R    R7   R6   R5   t   minR,   t   maxR-   R0   R8   (   t   fileR"   R(   R#   RA   RB   t   st   e(    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genes_parser.pyR      s     
c         C   sš  g  } d } x‡|  D]} t  j | j ƒ  ƒ } | r | t j k sd | j d ƒ j ƒ  | j ƒ  k r t d t j	 | j d ƒ ƒ d t
 | j d ƒ ƒ d t
 | j d ƒ ƒ ƒ } | j d ƒ j d ƒ } x¥ | D] } | rÎ | d k rÎ d	 | k rÎ | j d	 ƒ d }	 | t |	 ƒ d
 }
 |	 j ƒ  d k r7|
 | _ n  |	 j ƒ  d k rU|
 | _ n  |
 | j |	 j ƒ  <qÎ qÎ W| | _ | d
 7} | j | ƒ q q W| S(   Ni    R    R+   R,   R-   t
   attributest   ;R   t   =i   R0   R)   (   R   R   R   R   t   ALL_FEATURES_TYPER6   t   lowerR4   R    R7   R5   t   splitR:   R0   R)   RL   R(   R8   (   RI   R    R"   R(   R#   RA   RB   RL   t   attrt   keyt   val(    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genes_parser.pyR   ¬   s,    6	
c   	      C   s  g  } d } xò |  D]ê } | j  ƒ  j ƒ  } | r | d } t | d ƒ } t | d ƒ } t d | d t j | ƒ ƒ } t | | ƒ | _ t | | ƒ | _	 t
 | ƒ d k r¹ | d n d  | _ | | k  rÚ d | _ n	 d | _ | d 7} | j | ƒ q q W| S(	   Ni    i   i   R(   R+   i   t   +t   -(   R   RQ   R5   R4   R    R7   RG   R,   RH   R-   R:   R<   R0   t   strandR8   (	   RI   R"   R(   R#   t   fsR+   RJ   RK   RB   (    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genes_parser.pyR   Ë   s$    
%	
R   c           B   s   e  Z d  „  Z d „  Z RS(   c         O   s&   t  t |  ƒ j | | Ž  | |  _ d  S(   N(   t   superR   t   __init__t   value(   t   selfR[   t   argst   kwargs(    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genes_parser.pyRZ   æ   s    c         C   s   t  |  j ƒ S(   N(   t   reprR[   (   R\   (    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genes_parser.pyt   __str__é   s    (   t   __name__t
   __module__RZ   R`   (    (    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genes_parser.pyR   å   s   	R4   c           B   s5   e  Z d  d  d d d d  d d d d d d d „ Z RS(   R   c         C   s|   | |  _  | |  _ | |  _ | |  _ | |  _ | |  _ | |  _ | |  _ |	 |  _ |
 |  _	 | |  _
 t ƒ  |  _ | |  _ d  S(   N(   R0   R+   R,   R-   R(   R)   R*   t   contigRW   t   seqt   proteint   dictRL   t   is_full(   R\   R0   R+   R,   R-   R(   R)   R*   Rc   RW   Rd   Re   Rg   (    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genes_parser.pyRZ   î   s    											N(   Ra   Rb   R<   RZ   (    (    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genes_parser.pyR4   í   s   (    (   R   R1   R   t
   quast_libsR    R   t   quast_libs.ca_utils.miscR   t   quast_libs.logR   t   LOGGER_DEFAULT_NAMER   R2   R3   R   R   R   R   R'   R   R   R   R   t	   ExceptionR   R4   (    (    (    sE   /home/psgendb/BIRCHDEV/install/quast-5.2.0/quast_libs/genes_parser.pyt   <module>   s"   	1	C			