U
    &cUJ                     @   s  d Z ddlZddlZddlmZ ddlmZmZmZ ddl	m
Z
 ddlmZ dd	lmZmZmZ e
 rnddlZeeZd.eee d
f eee dddZe
 redfejjeee ejjdddZd/ee eee dddZG dd deZG dd deZG dd deZG dd deZ G dd deZ!G dd deZ"G dd  d eZ#G d!d" d"eZ$G d#d$ d$eZ%G d%d& d&eZ&G d'd( d(eZ'd)dd)d)dd)d)d)d)d*	Z(e!ee ee"e#e$e%e&e'd+
Z)d,d,d,d,d,d-d,d,d,d,d+
Z*dS )0z GLUE processors and helpers     N)Enum)ListOptionalUnion   )is_tf_available)PreTrainedTokenizer   )DataProcessorInputExampleInputFeaturesztf.data.Dataset)examples	tokenizer
max_lengthc                 C   sH   t  r4t| tjjr4|dkr$tdt| |||dS t| |||||dS )aS  
    Loads a data file into a list of ``InputFeatures``

    Args:
        examples: List of ``InputExamples`` or ``tf.data.Dataset`` containing the examples.
        tokenizer: Instance of a tokenizer that will tokenize the examples
        max_length: Maximum example length. Defaults to the tokenizer's max_len
        task: GLUE task
        label_list: List of labels. Can be obtained from the processor using the ``processor.get_labels()`` method
        output_mode: String indicating the output mode. Either ``regression`` or ``classification``

    Returns:
        If the ``examples`` input is a ``tf.data.Dataset``, will return a ``tf.data.Dataset``
        containing the task-specific features. If the input is a list of ``InputExamples``, will return
        a list of task-specific ``InputFeatures`` which can be fed to the model.

    NzWWhen calling glue_convert_examples_to_features from TF, the task parameter is required.r   task)r   r   
label_listoutput_mode)r   
isinstancetfdataDataset
ValueError%_tf_glue_convert_examples_to_features"_glue_convert_examples_to_features)r   r   r   r   r   r    r   E/tmp/pip-unpacked-wheel-ymerj3tt/transformers/data/processors/glue.py!glue_convert_examples_to_features"   s         r   )r   r   r   returnc              	      s   t |  fdd| D } t| |||d  fdd}tjj|tjtjtjdtjftdgtdgtdgdtg fS )zd
        Returns:
            A ``tf.data.Dataset`` containing the task-specific features.

        c                    s   g | ]}   |qS r   )Ztfds_mapget_example_from_tensor_dict.0example)	processorr   r   
<listcomp>O   s     z9_tf_glue_convert_examples_to_features.<locals>.<listcomp>r   c                  3   s(    D ]} | j | j| jd| jfV  qd S )N	input_idsattention_masktoken_type_ids)r&   r'   r(   label)ex)featuresr   r   genR   s    z2_tf_glue_convert_examples_to_features.<locals>.genr%   N)	glue_processorsr   r   r   r   Zfrom_generatorZint32Zint64ZTensorShape)r   r   r   r   r,   r   )r+   r#   r   r   F   s    



r   c                    sV  |d kr|j }|d k	rdt|  }|d krB| }td||f  d krdt| td|f  dd t|D ttt	t
f dfddfdd	| D }|jd
d	 | D |dd g }tt| D ]8 fdd D }	tf |	d| i}
||
 qt| d d D ]6\}td td|j  td|   q|S )NzUsing label list %s for task %sz Using output mode %s for task %sc                 S   s   i | ]\}}||qS r   r   )r!   ir)   r   r   r   
<dictcomp>   s      z6_glue_convert_examples_to_features.<locals>.<dictcomp>)r"   r   c                    s0   dkr | j  S dkr$t| j S td S )Nclassification
regression)r)   floatKeyError)r"   )	label_mapr   r   r   label_from_example   s
    

z>_glue_convert_examples_to_features.<locals>.label_from_examplec                    s   g | ]} |qS r   r   r    )r5   r   r   r$      s     z6_glue_convert_examples_to_features.<locals>.<listcomp>c                 S   s   g | ]}|j |jfqS r   )text_atext_br    r   r   r   r$      s     T)r   Zpad_to_max_lengthc                    s   i | ]}| |  qS r   r   )r!   k)batch_encodingr.   r   r   r/      s      r)      z*** Example ***zguid: %szfeatures: %s)max_lenr-   
get_labelsloggerinfoglue_output_modes	enumerater   r   intr2   Zbatch_encode_plusrangelenr   appendguid)r   r   r   r   r   r   r#   labelsr+   inputsZfeaturer"   r   )r9   r.   r5   r4   r   r   r   k   s8    
  
r   c                   @   s   e Zd ZdZdZdS )
OutputModer0   r1   N)__name__
__module____qualname__r0   r1   r   r   r   r   rH      s   rH   c                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )MrpcProcessorz/Processor for the MRPC data set (GLUE version).c                 C   s>   t |d  |d  d|d  dt|d  S See base class.idxZ	sentence1utf-8Z	sentence2r)   r   ZnumpydecodestrselfZtensor_dictr   r   r   r      s    
z*MrpcProcessor.get_example_from_tensor_dictc                 C   s6   t dtj|d | | tj|ddS )rN   zLOOKING AT {}	train.tsvtrain)r=   r>   formatospathjoin_create_examples	_read_tsvrU   data_dirr   r   r   get_train_examples   s    z MrpcProcessor.get_train_examplesc                 C   s   |  | tj|ddS rN   dev.tsvdevr\   r]   rY   rZ   r[   r^   r   r   r   get_dev_examples   s    zMrpcProcessor.get_dev_examplesc                 C   s   ddgS rN   01r   rU   r   r   r   r<      s    zMrpcProcessor.get_labelsc           
   	   C   s^   g }t |D ]L\}}|dkrqd||f }|d }|d }|d }	|t||||	d q|S )/Creates examples for the training and dev sets.r   %s-%sr      rE   r6   r7   r)   r@   rD   r   
rU   linesset_typer   r.   linerE   r6   r7   r)   r   r   r   r\      s    zMrpcProcessor._create_examplesN	rI   rJ   rK   __doc__r   r`   re   r<   r\   r   r   r   r   rL      s   	rL   c                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )MnliProcessorz3Processor for the MultiNLI data set (GLUE version).c                 C   s>   t |d  |d  d|d  dt|d  S )rN   rO   ZpremiserP   Z
hypothesisr)   rQ   rT   r   r   r   r      s    
z*MnliProcessor.get_example_from_tensor_dictc                 C   s   |  | tj|ddS rN   rV   rW   rd   r^   r   r   r   r`      s    z MnliProcessor.get_train_examplesc                 C   s   |  | tj|ddS )rN   zdev_matched.tsvdev_matchedrd   r^   r   r   r   re      s    zMnliProcessor.get_dev_examplesc                 C   s
   dddgS )rN   Zcontradiction
entailmentZneutralr   ri   r   r   r   r<      s    zMnliProcessor.get_labelsc           
   	   C   sb   g }t |D ]P\}}|dkrqd||d f }|d }|d }|d }	|t||||	d q|S )rj   r   rk      	   rm   rn   ro   r   r   r   r\      s    zMnliProcessor._create_examplesNrs   r   r   r   r   ru      s   	ru   c                   @   s   e Zd ZdZdd ZdS )MnliMismatchedProcessorz>Processor for the MultiNLI Mismatched data set (GLUE version).c                 C   s   |  | tj|ddS )rN   zdev_mismatched.tsvrw   rd   r^   r   r   r   re      s    z(MnliMismatchedProcessor.get_dev_examplesN)rI   rJ   rK   rt   re   r   r   r   r   r|      s   r|   c                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )ColaProcessorz/Processor for the CoLA data set (GLUE version).c                 C   s0   t |d  |d  ddt|d  S rN   rO   sentencerP   Nr)   rQ   rT   r   r   r   r      s    
z*ColaProcessor.get_example_from_tensor_dictc                 C   s   |  | tj|ddS rv   rd   r^   r   r   r   r`     s    z ColaProcessor.get_train_examplesc                 C   s   |  | tj|ddS ra   rd   r^   r   r   r   re     s    zColaProcessor.get_dev_examplesc                 C   s   ddgS rf   r   ri   r   r   r   r<     s    zColaProcessor.get_labelsc           	   	   C   sL   g }t |D ]:\}}d||f }|d }|d }|t||d|d q|S )rj   rk   r   r	   Nrm   rn   	rU   rp   rq   r   r.   rr   rE   r6   r)   r   r   r   r\     s    zColaProcessor._create_examplesNrs   r   r   r   r   r}      s   	r}   c                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )Sst2Processorz0Processor for the SST-2 data set (GLUE version).c                 C   s0   t |d  |d  ddt|d  S r~   rQ   rT   r   r   r   r     s    
z*Sst2Processor.get_example_from_tensor_dictc                 C   s   |  | tj|ddS rv   rd   r^   r   r   r   r`   &  s    z Sst2Processor.get_train_examplesc                 C   s   |  | tj|ddS ra   rd   r^   r   r   r   re   *  s    zSst2Processor.get_dev_examplesc                 C   s   ddgS rf   r   ri   r   r   r   r<   .  s    zSst2Processor.get_labelsc           	   	   C   sV   g }t |D ]D\}}|dkrqd||f }|d }|d }|t||d|d q|S )rj   r   rk   r	   Nrm   rn   r   r   r   r   r\   2  s    zSst2Processor._create_examplesNrs   r   r   r   r   r     s   	r   c                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )StsbProcessorz0Processor for the STS-B data set (GLUE version).c                 C   s>   t |d  |d  d|d  dt|d  S rM   rQ   rT   r   r   r   r   B  s    
z*StsbProcessor.get_example_from_tensor_dictc                 C   s   |  | tj|ddS rv   rd   r^   r   r   r   r`   K  s    z StsbProcessor.get_train_examplesc                 C   s   |  | tj|ddS ra   rd   r^   r   r   r   re   O  s    zStsbProcessor.get_dev_examplesc                 C   s   dgS )rN   Nr   ri   r   r   r   r<   S  s    zStsbProcessor.get_labelsc           
   	   C   sb   g }t |D ]P\}}|dkrqd||d f }|d }|d }|d }	|t||||	d q|S )rj   r   rk      ry   r{   rm   rn   ro   r   r   r   r\   W  s    zStsbProcessor._create_examplesNrs   r   r   r   r   r   ?  s   	r   c                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )QqpProcessorz.Processor for the QQP data set (GLUE version).c                 C   s>   t |d  |d  d|d  dt|d  S )rN   rO   Z	question1rP   Z	question2r)   rQ   rT   r   r   r   r   h  s    
z)QqpProcessor.get_example_from_tensor_dictc                 C   s   |  | tj|ddS rv   rd   r^   r   r   r   r`   q  s    zQqpProcessor.get_train_examplesc                 C   s   |  | tj|ddS ra   rd   r^   r   r   r   re   u  s    zQqpProcessor.get_dev_examplesc                 C   s   ddgS rf   r   ri   r   r   r   r<   y  s    zQqpProcessor.get_labelsc           
   	   C   s   g }t |D ]n\}}|dkrqd||d f }z|d }|d }|d }	W n tk
rb   Y qY nX |t||||	d q|S )rj   r   rk   r   rl   r:   rm   )r@   
IndexErrorrD   r   ro   r   r   r   r\   }  s    
zQqpProcessor._create_examplesNrs   r   r   r   r   r   e  s   	r   c                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )QnliProcessorz/Processor for the QNLI data set (GLUE version).c                 C   s>   t |d  |d  d|d  dt|d  S )rN   rO   ZquestionrP   r   r)   rQ   rT   r   r   r   r     s    
z*QnliProcessor.get_example_from_tensor_dictc                 C   s   |  | tj|ddS rv   rd   r^   r   r   r   r`     s    z QnliProcessor.get_train_examplesc                 C   s   |  | tj|ddS )rN   rb   rw   rd   r^   r   r   r   re     s    zQnliProcessor.get_dev_examplesc                 C   s   ddgS rN   rx   Znot_entailmentr   ri   r   r   r   r<     s    zQnliProcessor.get_labelsc           
   	   C   sb   g }t |D ]P\}}|dkrqd||d f }|d }|d }|d }	|t||||	d q|S rj   r   rk   r	      r{   rm   rn   ro   r   r   r   r\     s    zQnliProcessor._create_examplesNrs   r   r   r   r   r     s   	r   c                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )RteProcessorz.Processor for the RTE data set (GLUE version).c                 C   s>   t |d  |d  d|d  dt|d  S rM   rQ   rT   r   r   r   r     s    
z)RteProcessor.get_example_from_tensor_dictc                 C   s   |  | tj|ddS rv   rd   r^   r   r   r   r`     s    zRteProcessor.get_train_examplesc                 C   s   |  | tj|ddS ra   rd   r^   r   r   r   re     s    zRteProcessor.get_dev_examplesc                 C   s   ddgS r   r   ri   r   r   r   r<     s    zRteProcessor.get_labelsc           
   	   C   sb   g }t |D ]P\}}|dkrqd||d f }|d }|d }|d }	|t||||	d q|S r   rn   ro   r   r   r   r\     s    zRteProcessor._create_examplesNrs   r   r   r   r   r     s   	r   c                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )WnliProcessorz/Processor for the WNLI data set (GLUE version).c                 C   s>   t |d  |d  d|d  dt|d  S rM   rQ   rT   r   r   r   r     s    
z*WnliProcessor.get_example_from_tensor_dictc                 C   s   |  | tj|ddS rv   rd   r^   r   r   r   r`     s    z WnliProcessor.get_train_examplesc                 C   s   |  | tj|ddS ra   rd   r^   r   r   r   re     s    zWnliProcessor.get_dev_examplesc                 C   s   ddgS rf   r   ri   r   r   r   r<     s    zWnliProcessor.get_labelsc           
   	   C   sb   g }t |D ]P\}}|dkrqd||d f }|d }|d }|d }	|t||||	d q|S r   rn   ro   r   r   r   r\     s    zWnliProcessor._create_examplesNrs   r   r   r   r   r     s   	r   r   )	colamnlimrpcsst-2sts-bqqpqnlirtewnli)
r   r   zmnli-mmr   r   r   r   r   r   r   r0   r1   )NNNN)NNNN)+rt   loggingrY   enumr   typingr   r   r   Z
file_utilsr   Ztokenization_utilsr   utilsr
   r   r   Z
tensorflowr   	getLoggerrI   r=   rA   r   rS   r   r   r   r   rH   rL   ru   r|   r}   r   r   r   r   r   r   Zglue_tasks_num_labelsr-   r?   r   r   r   r   <module>   s   
    "   (    2'&#%&)&&'