3
bF                  @   s<  d dl mZ d dlZd dlZd dlZd dlZyd dlmZ W n  ek
r\   d dl	mZ Y nX yd dl
Z
W n  ek
r   d dlm
Z
 Y nX ejd  dkrd dlZd dlmZ d dlmZ eejZdd	 Zd
d Zdd Zdd Zd&ddZdd Zdd Zdd Zdd Zdd Zdd Zd'd d!Zd"d# Z d$d% Z!dS )(    )with_statementN)OrderedDict)bz2   )qconfig)
get_loggerc             C   s  d }t jj| \}}t j| t js6tjd|  ddd |dkrPtj| dd}n@|dkrrt	j
| dd}t|}n|dkrPytj| dd}W n8 tk
r   tj \}}}tjdt| dd Y nX |j }t|dkrtjd|  dd t|dkrtjd|d   y|j|d }t|}W n$ tk
rL   tjddd Y nX n@yt| }W n2 tk
r   tj \}}}tj|dd Y nX |S )NzPermission denied accessing T   )	to_stderrexit_with_code.gz.gzipZrt)mode.bz2.bzip2r.zipzCan't open zip file: )r
   r   z Reading %s: zip archive is emptyz3Zip archive must contain exactly one file. Using %sz=Use python 2.6 or newer to work with contigs directly in zip.   )	exit_code)r   r   )r   r   )r   )ospathsplitextaccessR_OKloggererrorgzipopenr   BZ2File_read_compressed_filezipfileZipFile	Exceptionsysexc_infostrZnamelistlenwarningAttributeErrorIOError	exception)fpath
fasta_file_extZzfileexc_type	exc_valuenames r1   @/home/psgendb/BIRCHDEV/pkg/quast-5.2.0/quast_libs/fastaparser.py_get_fasta_file_handler!   s>    
r3   c             C   s&   t jd dkr"tjtj| j S | S )Nr   r   )r"   version_infoioTextIOWrapperBytesIOread)Zcompressed_filer1   r1   r2   r   M   s    r   c             C   s.   y| dd j  d S  tk
r(   dS X dS )zk
        Extracts name from fasta entry line:
        ">chr1  length=100500; coverage=15;" ---> "chr1"
    r   Nr    )split
IndexError)liner1   r1   r2   __get_entry_nameS   s    r=   c             C   s   t  }d}d}t| }xv|D ]n}|jddkr:|jd}n|g}xH|D ]@}|sPqF|d dkrv|rl|||< d}t|}qF|t|j 7 }qFW qW |||< |j  |S )ze
        Takes filename of FASTA-file
        Returns list of lengths of sequences in FASTA-file
    r   Nr   >)r   r3   findr:   r=   r%   stripclose)r*   chr_lengthslchr_namer+   raw_linelinesr<   r1   r1   r2   get_chr_lengths_from_fastafile^   s(    


rI   Fc       	      C   s~   d}i }i }xft | D ]Z\}}|j d }t|}||7 }tdd t|D ||< |rh|t|| 8 }|||< qW |||fS )Nr   c             s   s"   | ]\}}|d kr|d V  qdS )Nr   Nr1   ).0xsr1   r1   r2   	<genexpr>   s    z#get_genome_stats.<locals>.<genexpr>)
read_fastar:   r%   set	enumerate)	fasta_fpathZskip_nsZgenome_sizeZreference_chromosomesZns_by_chromosomesnameseqrF   chr_lenr1   r1   r2   get_genome_stats|   s    rV   c             C   sT  d}d}d}d }| d }g }t | }x|D ]}|jddkrJ|jd}	n|g}	x|	D ]}
|
s`qV|
d dkr|r|j|||t|j t|g ||7 }d}d}t|
}|t|
7 }qV|s|
}|t|
j 7 }|t|
7 }qVW q,W W d Q R X |j|||t|j t|g t |d4}x,|D ]$}|jdjdd	 |D d
  qW W d Q R X d S )Nr   z.fair>   r   r?   w	c             S   s   g | ]}t |qS r1   )r$   )rK   Zfsr1   r1   r2   
<listcomp>   s    z#create_fai_file.<locals>.<listcomp>
r@   )	r   rA   r:   appendr%   rB   r=   writejoin)rR   rE   Ztotal_offsetZ
chr_offsetrF   Z	fai_fpathZ
fai_fieldsZin_frG   rH   r<   Zchr_lineout_fZfieldsr1   r1   r2   create_fai_file   s<    


  
r_   c             C   s~   t jj|st j| d}xRt| D ]F}|d dkr\|r@|j  tt jj|t|d d}|r$|j| q$W |rz|j  dS )z
        Takes filename of FASTA-file and directory to output
        Creates separate FASTA-files for each sequence in FASTA-file
        Returns nothing
        Oops, similar to: pyfasta split --header "%(seqid)s.fasta" original.fasta
    Nr   r?   z.farW   )	r   r   isdirmkdirr   rC   r]   r=   r\   )r*   output_dirpathZoutFiler<   r1   r1   r2   split_fasta   s    
rc   c             c   s   d}g }d}t | }xj|D ]b}|jd}xR|D ]J}|s8q.|d dkrj|sX|dj|fV  d}t|}g }q.|j|j  q.W qW |s|r|dj|fV  |j  dS )zH
        Generator that returns FASTA entries in tuples (name, seq)
    Tr9   r>   r   r?   FN)r3   r:   r]   r=   r[   rB   rC   )r*   firstrT   rS   r+   rG   rH   r<   r1   r1   r2   rO      s&    


rO   c             C   s,   g }x"t | D ]\}}|j||f qW |S )zB
        Returns list of FASTA entries (in tuples: name, seq)
    )rO   r[   )r*   list_seqrS   rT   r1   r1   r2   read_fasta_one_time   s    rf   c             C   sh   t | }g }xD|D ]<}|jd}x,|D ]$}|s0q&|d dkr&|j|j  q&W qW |j  dj|}|S )z
        Returns string
    r>   r   r?   r9   )r3   r:   r[   rB   rC   r]   )r*   r+   re   rG   rH   r<   Z	fasta_strr1   r1   r2   read_fasta_str   s    



rg   c             C   sP   xJ| D ]B\}}t d|  x,tdt|dD ]}t |||d   q,W qW d S )Nz>%sr   <   )printranger%   )fastarS   rT   ir1   r1   r2   print_fasta   s    rm   rW   c             C   sj   t | |}xR|D ]J\}}|jd|  x2tdt|dD ]}|j|||d  d  q8W qW |j  d S )Nz>%s
r   rh   rZ   )r   r\   rj   r%   rC   )r*   rk   r   outfilerS   rT   rl   r1   r1   r2   write_fasta  s    
"ro   c             C   s   dddddd| j   S )NTAGCrJ   )rq   rp   rs   rr   rJ   )upper)Zletterr1   r1   r2   comp  s    ru   c                s*   t tdd dj fddt| D S )NZ
ATCGNatcgnZ
TAGCNtagcnr9   c             3   s   | ]} j |d V  qdS )r9   N)get)rK   Z
nucleotide)cr1   r2   rN     s    zrev_comp.<locals>.<genexpr>)dictzipr]   reversed)rT   r1   )rw   r2   rev_comp  s    r{   )F)rW   )"
__future__r   r   r"   r   r   collectionsr   ImportErrorZ%quast_libs.site_packages.ordered_dictr   Zquast_libs.site_packagesr4   r5   
quast_libsr   quast_libs.logr   LOGGER_DEFAULT_NAMEr   r3   r   r=   rI   rV   r_   rc   rO   rf   rg   rm   ro   ru   r{   r1   r1   r1   r2   <module>   s>   
,
#


