U
    &cqF                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	m
Z
 ddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ ddlZdd
lmZ ddlmZ ddlmZ e e!Z"zjej#$dd% Z&ej#$dd% Z'e'dkr0e&dkr0ddl(Z(dZ)e"*d+e(j ne"*d dZ)W n e,k
rZ   dZ)Y nX zej#$dd% Z&ej#$dd% Z'e&dkre'dkrddl-Z.e/e.dre0e.jd dkst1dZ2e"*d+e.j ne"*d dZ2W n e,e1fk
r   dZ2Y nX zddl3m4Z4 e4 Z5W n< e,k
r\   ej67e8dej69e8dddZ5Y nX ej69e5d Z:e8d!e:Z;e8d"e;Z<e8d#e<Z=d$Z>d%Z?d&Z@d'ZAd(ZBdgdggdgdgggZCd)d*dddgddd+ddgdddd,d-ggZDdddddgdddddgdddddggZEd.ZFd/ZGd0d1 ZHd2d3 ZId4d5 ZJd6d7 ZKd8d9 ZLd:d; ZMdQeNeNeNd<d=d>ZOdRd?d@ZPdSdAdBZQdTeeN dCdDdEZRdUdFdGZSdVeeN dCdIdJZTG dKdL dLeUZVdMdN ZWdOdP ZXdS )Wz
Utilities for working with the local dataset cache.
This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp
Copyright by the AllenNLP authors.
    N)contextmanager)partialwraps)sha256)Path)Optional)urlparse)ZipFile
is_zipfile)FileLock)tqdm   )__version__USE_TFAUTO	USE_TORCH)1ONYESr   )r   r   r   TzPyTorch version {} available.z'Disabling PyTorch because USE_TF is setFr      z TensorFlow version {} available.z-Disabling Tensorflow because USE_TORCH is set)_get_torch_homeZ
TORCH_HOMEXDG_CACHE_HOMEz~/.cachetorchZtransformersPYTORCH_PRETRAINED_BERT_CACHEPYTORCH_TRANSFORMERS_CACHETRANSFORMERS_CACHEzpytorch_model.binztf_model.h5z
model.ckptzconfig.jsonzmodelcard.json               z3https://s3.amazonaws.com/models.huggingface.co/bertzhttps://cdn.huggingface.coc                   C   s   t S N)_torch_available r#   r#   ;/tmp/pip-unpacked-wheel-ymerj3tt/transformers/file_utils.pyis_torch_available\   s    r%   c                   C   s   t S r!   )_tf_availabler#   r#   r#   r$   is_tf_available`   s    r'   c                     s    fdd}|S )Nc                    s$   d  | jd k	r| jnd | _| S N )join__doc__fndocstrr#   r$   docstring_decoratore   s     z1add_start_docstrings.<locals>.docstring_decoratorr#   r/   r0   r#   r.   r$   add_start_docstringsd   s    r2   c                     s    fdd}|S )Nc                    sP   d | jdd }d |}d}|| d  | jd k	rD| jnd | _| S )Nz:class:`~transformers.{}`.r   zH   The {} forward method, overrides the :func:`__call__` special method.a@  

    .. note::
        Although the recipe for forward pass needs to be defined within
        this function, one should call the :class:`Module` instance afterwards
        instead of this since the former takes care of running the
        pre and post processing steps while the latter silently ignores them.
        r)   )format__qualname__splitr*   r+   )r-   
class_nameZintroZnoter.   r#   r$   r0   m   s
    
(z=add_start_docstrings_to_callable.<locals>.docstring_decoratorr#   r1   r#   r.   r$    add_start_docstrings_to_callablel   s    r8   c                     s    fdd}|S )Nc                    s   | j d  | _ | S r(   )r+   r*   r,   r.   r#   r$   r0      s    z/add_end_docstrings.<locals>.docstring_decoratorr#   r1   r#   r.   r$   add_end_docstrings~   s    r9   c                 C   s   t | }|jdkS )N)httphttps)r   scheme)url_or_filenameparsedr#   r#   r$   is_remote_url   s    r?   )model_idfilenamereturnc                 C   sD   |rt nt}d| k}|r,| d|  d| S | d|  d| S dS )a  
    Resolve a model identifier, and a file name, to a HF-hosted url
    on either S3 or Cloudfront (a Content Delivery Network, or CDN).

    Cloudfront is replicated over the globe so downloads are way faster
    for the end user (and it also lowers our bandwidth costs). However, it
    is more aggressively cached by default, so may not always reflect the
    latest changes to the underlying file (default TTL is 24 hours).

    In terms of client-side caching from this library, even though
    Cloudfront relays the ETags from S3, using one or the other
    (or switching from one to the other) will affect caching: cached files
    are not shared between the two because the cached file's name contains
    a hash of the url.
    /-N)CLOUDFRONT_DISTRIB_PREFIXS3_BUCKET_PREFIX)r@   rA   Zuse_cdnZendpointZlegacy_formatr#   r#   r$   hf_bucket_url   s
    rG   c                 C   sV   |  d}t|}| }|r@| d}t|}|d|  7 }| drR|d7 }|S )a  
    Convert `url` into a hashed filename in a repeatable way.
    If `etag` is specified, append its hash to the url's, delimited
    by a period.
    If the url ends with .h5 (Keras HDF5 weights) adds '.h5' to the name
    so that TF 2.0 can identify it as a HDF5 file
    (see https://github.com/tensorflow/tensorflow/blob/00fad90125b18b80fe054de1055770cfb8fe4ba3/tensorflow/python/keras/engine/network.py#L1380)
    utf-8r3   z.h5)encoder   	hexdigestendswith)urletagZ	url_bytesZurl_hashrA   Z
etag_bytesZ	etag_hashr#   r#   r$   url_to_filename   s    	


rN   c              	   C   s   |dkrt }t|trt|}tj|| }tj|sFtd	||d }tj|shtd	|t
|dd}t|}W 5 Q R X |d }|d }||fS )z
    Return the url and etag (which may be ``None``) stored for `filename`.
    Raise ``EnvironmentError`` if `filename` or its stored metadata do not exist.
    Nfile {} not found.jsonrH   )encodingrL   rM   )r   
isinstancer   strospathr*   existsEnvironmentErrorr4   openjsonload)rA   	cache_dir
cache_path	meta_path	meta_filemetadatarL   rM   r#   r#   r$   filename_to_url   s    
r`   )rB   c	              
   C   s  |dkrt }t| trt| } t|tr0t|}t| rPt| ||||||d}	n>tj| rb| }	n,t	| j
dkrtd| ntd| |rt|	st|	s|	S tj|	\}
}|ddd }tj|
|}tj|rt|r|s|S |	d	 }t| tj|d
d t| t|	rTt|	d}|| |  W 5 Q R X n8t|	r~t|	}|| |  ntd|	W 5 Q R X |S |	S )a  
    Given something that might be a URL (or might be a local path),
    determine which. If it's a URL, download the file and cache it, and
    return the path to the cached file. If it's already a local path,
    make sure the file exists and then return the path.
    Args:
        cache_dir: specify a cache directory to save the file to (overwrite the default cache dir).
        force_download: if True, re-dowload the file even if it's already cached in the cache dir.
        resume_download: if True, resume the download if incompletly recieved file is found.
        user_agent: Optional string or dict that will be appended to the user-agent on remote requests.
        extract_compressed_file: if True and the path point to a zip or tar file, extract the compressed
            file in a folder along the archive.
        force_extract: if True when extract_compressed_file is True and the archive was already extracted,
            re-extract the archive and overide the folder where it was extracted.

    Return:
        None in case of non-recoverable file (non-existent or inaccessible url + no cache on disk).
        Local path (string) otherwise
    N)r[   force_downloadproxiesresume_download
user_agentlocal_files_onlyr)   rO   z.unable to parse {} as a URL or as a local pathr3   rD   z
-extracted.lockT)ignore_errorsrz,Archive format of {} could not be identified)r   rR   r   rS   r?   get_from_cacherT   rU   rV   r   r<   rW   r4   
ValueErrorr
   tarfile
is_tarfiler6   replacer*   isdirlistdirr   shutilrmtreemakedirsr	   
extractallcloserX   )r=   r[   ra   rb   rc   rd   Zextract_compressed_fileZforce_extractre   Zoutput_path
output_dirZoutput_fileZoutput_extract_dir_nameZoutput_path_extracted	lock_pathzip_fileZtar_filer#   r#   r$   cached_path   sX    

	






rx   c              	   C   sF  d ttj d }t r,|d tj7 }t rB|d tj7 }t	|t
rn|dddd | D  7 }nt	|tr|d| 7 }d|i}|dkrd	|f |d
< tj| d||d}|jdkrd S |jd}|d k	r|t| nd }	tdd|	|dtt tjkd}
|jddD ]$}|r|
t| || q|
  d S )Nztransformers/{}; python/{}r   z
; torch/{}z; tensorflow/{}z; c                 s   s   | ]\}}d  ||V  qdS )z{}/{}N)r4   ).0kvr#   r#   r$   	<genexpr>8  s     zhttp_get.<locals>.<genexpr>z
user-agentz	bytes=%d-ZRangeT)streamrb   headersi  zContent-LengthBZDownloading)unitZ
unit_scaletotalinitialdescdisablei   )
chunk_size)r4   r   sysversionr6   r%   r   r'   tfrR   dictr*   itemsrS   requestsgetstatus_coder~   intr   boolloggergetEffectiveLevelloggingNOTSETiter_contentupdatelenwritert   )rL   	temp_filerb   resume_sizerd   Zuar~   responsecontent_lengthr   progresschunkr#   r#   r$   http_get1  s<    
"

r   
   c              
      sD  |dkrt }t|trt|}tj|dd d}|s~z,tj| d||d}	|	jdkr^|	j	
d}W n ttjjfk
r|   Y nX t| |}
tj||
}|dk rtj|r|S dd tt||
d	 D }t|d
krtj||d S |rtddS tj|r|s|S |d }t| tj|rN|sN|W  5 Q R  S |r|d  t fdd}|}tj rt j}nd
}nttj|dd}d
}| (}td| |j  t!| ||||d W 5 Q R X td| | t"|j | td| | |d}|d }t#|d}t$%|| W 5 Q R X W 5 Q R X |S )a.  
    Given a URL, look for the corresponding file in the local cache.
    If it's not there, download it. Then return the path to the cached file.

    Return:
        None in case of non-recoverable file (non-existent or inaccessible url + no cache on disk).
        Local path (string) otherwise
    NT)exist_ok)allow_redirectsrb   timeout   ETagc                 S   s$   g | ]}| d s| ds|qS )rP   rf   )rK   )ry   filer#   r#   r$   
<listcomp>  s   
 
z"get_from_cache.<locals>.<listcomp>z.*r   zCannot find the requested files in the cached path and outgoing traffic has been disabled. To enable model look-ups and downloads online, set 'local_files_only' to False.rf   z.incompletec               	   3   s    t  d} | V  W 5 Q R X d S )Nza+b)rX   )fZincomplete_pathr#   r$   _resumable_file_manager  s    z/get_from_cache.<locals>._resumable_file_managerF)dirdeletezF%s not found in cache or force_download set to True, downloading to %s)rb   r   rd   zstoring %s in cache at %szcreating metadata file for %s)rL   rM   rP   w)&r   rR   r   rS   rT   rr   r   headr   r~   r   rW   
exceptionsTimeoutrN   rU   r*   rV   fnmatchfilterro   r   rj   r   r   statst_sizer   tempfileNamedTemporaryFiler   infonamer   rm   rX   rY   dump)rL   r[   ra   rb   Zetag_timeoutrc   rd   re   rM   r   rA   r\   Zmatching_filesrv   r   Ztemp_file_managerr   r   metar]   r^   r#   r   r$   ri   R  sn    




 ri   c                   @   s   e Zd ZdZdddZdS )cached_propertyz
    Descriptor that mimics @property but caches output in member variable.

    From tensorflow_datasets

    Built-in in functools from Python 3.8.
    Nc                 C   sX   |d kr| S | j d krtdd| j j }t||d }|d krT|  |}t||| |S )Nzunreadable attributeZ	__cached_)fgetAttributeError__name__getattrsetattr)selfobjZobjtypeattrcachedr#   r#   r$   __get__  s    

zcached_property.__get__)N)r   
__module__r5   r+   r   r#   r#   r#   r$   r     s   r   c                    s   t   fdd}|S )Nc                     s&   t  r | |S td j dd S )NMethod `z` requires PyTorch.)r%   ImportErrorr   argskwargsfuncr#   r$   wrapper  s    
ztorch_required.<locals>.wrapperr   r   r   r#   r   r$   torch_required  s    r   c                    s   t   fdd}|S )Nc                     s&   t  r | |S td j dd S )Nr   z` requires TF.)r'   r   r   r   r   r#   r$   r     s    
ztf_required.<locals>.wrapperr   r   r#   r   r$   tf_required  s    r   )T)N)N)NFNFNFFF)Nr   N)NFNr   FNF)Yr+   r   rY   r   rT   rp   r   rk   r   
contextlibr   	functoolsr   r   hashlibr   pathlibr   typingr   urllib.parser   zipfiler	   r
   r   Zfilelockr   Z	tqdm.autor   r)   r   	getLoggerr   r   environr   upperr   r   r   r"   r   r4   r   Z
tensorflowr   hasattrr   AssertionErrorr&   Z	torch.hubr   Ztorch_cache_homerU   
expandusergetenvr*   Zdefault_cache_pathr   r   r   ZWEIGHTS_NAMEZTF2_WEIGHTS_NAMEZTF_WEIGHTS_NAMEZCONFIG_NAMEZMODEL_CARD_NAMEZMULTIPLE_CHOICE_DUMMY_INPUTSZDUMMY_INPUTSZ
DUMMY_MASKrF   rE   r%   r'   r2   r8   r9   r?   rS   rG   rN   r`   rx   r   ri   propertyr   r   r   r#   r#   r#   r$   <module>   s   


$



((

        
\
#       	r