a
    bF                      @   s8  d dl mZ d dlZd dlZd dlZd dlZzd dlmZ W n eyZ   d dl	mZ Y n0 zd dl
Z
W n ey   d dlm
Z
 Y n0 ejd  dkrd dlZd dlmZ d dlmZ eejZdd	 Zd
d Zdd Zdd Zd&ddZdd Zdd Zdd Zdd Zdd Zdd Zd'd d!Zd"d# Z d$d% Z!dS )(    )with_statementN)OrderedDict)bz2   )qconfig)
get_loggerc                 C   s  d }t j| \}}t | t js6tjd|  ddd |dv rPtj| dd}n:|dv rrt	j
| d	d}t|}n|d
v rLztj| d	d}W n6 ty   t \}}}tjdt| dd Y n0 | }t|dkrtjd|  dd t|dkrtd|d   z||d }t|}W n" tyH   tjddd Y n0 n>zt| }W n0 ty   t \}}}tj|dd Y n0 |S )NzPermission denied accessing T   )	to_stderrexit_with_code)z.gzz.gzipZrt)mode)z.bz2z.bzip2r)z.zipzCan't open zip file: )r
   r   z Reading %s: zip archive is emptyz3Zip archive must contain exactly one file. Using %sz=Use python 2.6 or newer to work with contigs directly in zip.   )	exit_code)ospathsplitextaccessR_OKloggererrorgzipopenr   BZ2File_read_compressed_filezipfileZipFile	Exceptionsysexc_infostrZnamelistlenwarningAttributeErrorIOError	exception)fpath
fasta_file_extZzfileexc_type	exc_valuenames r,   @/home/psgendb/BIRCHDEV/pkg/quast-5.2.0/quast_libs/fastaparser.py_get_fasta_file_handler!   s>    
r.   c                 C   s&   t jd dkr"tt|  S | S )Nr   r   )r   version_infoioTextIOWrapperBytesIOread)Zcompressed_filer,   r,   r-   r   M   s    r   c                 C   s0   z| dd   d W S  ty*   Y dS 0 dS )zk
        Extracts name from fasta entry line:
        ">chr1  length=100500; coverage=15;" ---> "chr1"
    r   Nr    )split
IndexError)liner,   r,   r-   __get_entry_nameS   s    r8   c                 C   s   t  }d}d}t| }|D ]j}|ddkr8|d}n|g}|D ]@}|sLqB|d dkrr|rh|||< d}t|}qB|t| 7 }qBq|||< |  |S )ze
        Takes filename of FASTA-file
        Returns list of lengths of sequences in FASTA-file
    r   N>)r   r.   findr5   r8   r    stripclose)r%   chr_lengthslchr_namer&   raw_linelinesr7   r,   r,   r-   get_chr_lengths_from_fastafile^   s(    
rD   Fc           	      C   sz   d}i }i }t | D ]Z\}}| d }t|}||7 }tdd t|D ||< |rf|t|| 8 }|||< q|||fS )Nr   c                 s   s"   | ]\}}|d kr|d V  qdS )Nr   Nr,   ).0xsr,   r,   r-   	<genexpr>       z#get_genome_stats.<locals>.<genexpr>)
read_fastar5   r    set	enumerate)	fasta_fpathZskip_nsZgenome_sizeZreference_chromosomesZns_by_chromosomesnameseqrA   chr_lenr,   r,   r-   get_genome_stats|   s    
rR   c                 C   sr  d}d}d}d }| d }g }t | }|D ]}|ddkrH|d}	n|g}	|	D ]}
|
s\qR|
d dkr|r||||t| t|g ||7 }d}d}t|
}|t|
7 }qR|s|
}|t|
 7 }|t|
7 }qRq*W d    n1 s0    Y  ||||t| t|g t |d:}|D ]$}|ddd	 |D d
  q(W d    n1 sd0    Y  d S )Nr   z.fair9   r:   r;   w	c                 S   s   g | ]}t |qS r,   )r   )rF   Zfsr,   r,   r-   
<listcomp>   rJ   z#create_fai_file.<locals>.<listcomp>
)	r   r<   r5   appendr    r=   r8   writejoin)rN   r@   Ztotal_offsetZ
chr_offsetrA   Z	fai_fpathZ
fai_fieldsZin_frB   rC   r7   Zchr_lineout_ffieldsr,   r,   r-   create_fai_file   s<    
 . r\   c                 C   sz   t j|st | d}t| D ]F}|d dkrZ|r>|  tt j|t|d d}|r"|| q"|rv|  dS )z
        Takes filename of FASTA-file and directory to output
        Creates separate FASTA-files for each sequence in FASTA-file
        Returns nothing
        Oops, similar to: pyfasta split --header "%(seqid)s.fasta" original.fasta
    Nr   r;   z.farS   )	r   r   isdirmkdirr   r>   rY   r8   rX   )r%   output_dirpathZoutFiler7   r,   r,   r-   split_fasta   s    
r`   c                 c   s   d}g }d}t | }|D ]^}|d}|D ]J}|s4q*|d dkrf|sT|d|fV  d}t|}g }q*||  q*q|s|r|d|fV  |  dS )zH
        Generator that returns FASTA entries in tuples (name, seq)
    Tr4   r9   r   r;   FN)r.   r5   rY   r8   rW   r=   r>   )r%   firstrP   rO   r&   rB   rC   r7   r,   r,   r-   rK      s&    
rK   c                 C   s(   g }t | D ]\}}|||f q|S )zB
        Returns list of FASTA entries (in tuples: name, seq)
    )rK   rW   )r%   list_seqrO   rP   r,   r,   r-   read_fasta_one_time   s    rc   c                 C   s`   t | }g }|D ]8}|d}|D ]$}|s,q"|d dkr"||  q"q|  d|}|S )z
        Returns string
    r9   r   r;   r4   )r.   r5   rW   r=   r>   rY   )r%   r&   rb   rB   rC   r7   Z	fasta_strr,   r,   r-   read_fasta_str   s    

rd   c                 C   sH   | D ]>\}}t d|  tdt|dD ]}t |||d   q(qd S )Nz>%sr   <   )printranger    )fastarO   rP   ir,   r,   r-   print_fasta   s    rj   rS   c                 C   sb   t | |}|D ]F\}}|d|  tdt|dD ]}||||d  d  q4q|  d S )Nz>%s
r   re   rV   )r   rX   rg   r    r>   )r%   rh   r   outfilerO   rP   ri   r,   r,   r-   write_fasta  s    
rl   c                 C   s   dddddd|    S )NTAGCrE   )rn   rm   rp   ro   rE   )upper)Zletterr,   r,   r-   comp  s    rr   c                    s*   t tdd d fddt| D S )NZ
ATCGNatcgnZ
TAGCNtagcnr4   c                 3   s   | ]}  |d V  qdS )r4   N)get)rF   Z
nucleotidecr,   r-   rI     rJ   zrev_comp.<locals>.<genexpr>)dictziprY   reversed)rP   r,   rt   r-   rev_comp  s    ry   )F)rS   )"
__future__r   r   r   r   r   collectionsr   ImportErrorZ%quast_libs.site_packages.ordered_dictr   Zquast_libs.site_packagesr/   r0   
quast_libsr   quast_libs.logr   LOGGER_DEFAULT_NAMEr   r.   r   r8   rD   rR   r\   r`   rK   rc   rd   rj   rl   rr   ry   r,   r,   r,   r-   <module>   s>   
,
#


