U
    &cL                     @   s   d dl Z d dlZd dlZd dlmZmZ d dlmZmZ d dl	Z	d dl
mZ d dlmZ ddlmZmZ ddlmZ dd	lmZ d
dlmZmZmZ d
dlmZ e eZeG dd dZG dd deZdS )    N)	dataclassfield)ListOptional)FileLock)Dataset   )RobertaTokenizerRobertaTokenizerFast)PreTrainedTokenizer)XLMRobertaTokenizer   )!glue_convert_examples_to_featuresglue_output_modesglue_processors)InputFeaturesc                   @   s   e Zd ZU dZeddde  idZe	e
d< eddidZe	e
d< ed	dd
idZee
d< edddidZee
d< dd ZdS )GlueDataTrainingArgumentsz
    Arguments pertaining to what data we are going to input our model for training and eval.

    Using `HfArgumentParser` we can turn this class
    into argparse arguments to be able to specify them on
    the command line.
    helpz"The name of the task to train on: z, )metadata	task_namezUThe input data dir. Should contain the .tsv files (or other data files) for the task.data_dir   zThe maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.)defaultr   max_seq_lengthFz1Overwrite the cached training and evaluation setsoverwrite_cachec                 C   s   | j  | _ d S N)r   lowerself r   C/tmp/pip-unpacked-wheel-ymerj3tt/transformers/data/datasets/glue.py__post_init__.   s    z'GlueDataTrainingArguments.__post_init__N)__name__
__module____qualname____doc__r   joinr   keysr   str__annotations__r   r   intr   boolr!   r   r   r   r    r      s    
$  r   c                   @   s\   e Zd ZU dZeed< eed< ee ed< dee	e
e ddd	Zd
d ZedddZdS )GlueDatasetzL
    This will be superseded by a framework-agnostic approach
    soon.
    argsoutput_modefeaturesNF)r-   	tokenizerlimit_lengthc              	   C   sz  || _ t|j  }t|j | _tj|jd	|r4dnd|j
jt|j|j}|d }t| tj|r|jst }t|| _td| dt |  ntd|j  | }	|jdkr|j
tttfkr|	d	 |	d
  |	d
< |	d	< |r||jn
||j}
|d k	r(|
d | }
t|
||j|	| jd| _t }t| j| td|t |  W 5 Q R X d S )Nzcached_{}_{}_{}_{}devZtrainz.lockz"Loading features from cached file z [took %.3f s]z'Creating features from dataset file at )Zmnlizmnli-mmr      )
max_length
label_listr.   z1Saving features into cached file %s [took %.3f s])r-   r   r   r   r.   ospathr&   r   format	__class__r"   r(   r   r   existsr   timetorchloadr/   loggerinfoZ
get_labelsr	   r
   r   Zget_dev_examplesZget_train_examplesr   save)r   r-   r0   r1   evaluate	processorZcached_features_fileZ	lock_pathstartr5   Zexamplesr   r   r    __init__<   sb    
   	
 


  
zGlueDataset.__init__c                 C   s
   t | jS r   )lenr/   r   r   r   r    __len__x   s    zGlueDataset.__len__)returnc                 C   s
   | j | S r   )r/   )r   ir   r   r    __getitem__{   s    zGlueDataset.__getitem__)NF)r"   r#   r$   r%   r   r)   r(   r   r   r   r   r*   rD   rF   rI   r   r   r   r    r,   2   s   
  <r,   ) loggingr6   r;   Zdataclassesr   r   typingr   r   r<   Zfilelockr   Ztorch.utils.data.datasetr   Ztokenization_robertar	   r
   Ztokenization_utilsr   Ztokenization_xlm_robertar   Zprocessors.gluer   r   r   Zprocessors.utilsr   	getLoggerr"   r>   r   r,   r   r   r   r    <module>   s    
