U
    &c[-                    @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZmZmZmZmZmZmZmZ d dlZddlmZmZ dd	lmZ dd
l m!Z!m"Z" ddl#m$Z$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- e$ r0d dl.Z/ddl0m1Z1m2Z2m3Z3m4Z4m5Z5 e% r\d dl6Z6ddl7m8Z8m9Z9m:Z:m;Z;m<Z< e=e>Z?dIddZ@G dd deZAG dd deAZBG dd dZCG dd deCZDG dd deCZEG dd deCZFG d d! d!eZGG d"d# d#eGZHG d$d% d%eHZIG d&d' d'eHZJG d(d) d)eHZKG d*d+ d+eHZLG d,d- d-eHZMeMZNG d.d/ d/eAZOG d0d1 d1eHZPG d2d3 d3eHZQG d4d5 d5eHZReIe$ re1nde% re8ndd6d6d7dd6d8d9eKe$ re2nde% re9ndd:d:d7d:d;d8d9eMe$ re4nde% re;ndd<d<d7d<d=d8d9ePe$ r&e3nde% r4e:ndd>d>d7dd6d?d@ifd8d9eLe$ r`e5nde% rne<nddAdAd7ddAd?d@ifd8d9eQe$ re5nde% re<nddBdd7ddBd?d@ifd8d9eRe$ re5nde% re<nddCdCd7ddCd?d@ifd8d9eRe$ re5nde% re<nddCdCd7ddCd?d@ifd8d9eRe$ rHe5nde% rVe<nddCdCd7ddCd?d@ifd8d9eJe$ re5nde% re<nddDdDd7ddDd8d9dE
ZSdJeTeeeeTef  eeeTe-f  eeT eHdFdGdHZUdS )K    N)ABCabstractmethod)contextmanager)chain)abspathexists)AnyDictIterableListOptionalSequenceTupleUnion   )!ALL_PRETRAINED_CONFIG_ARCHIVE_MAP
AutoConfig)PretrainedConfig)SquadExample"squad_convert_examples_to_features)is_tf_availableis_torch_available)	ModelCard)AutoTokenizer)BasicTokenizer)PreTrainedTokenizer)TFAutoModel$TFAutoModelForSequenceClassificationTFAutoModelForQuestionAnswering!TFAutoModelForTokenClassificationTFAutoModelWithLMHead)	AutoModel"AutoModelForSequenceClassificationAutoModelForQuestionAnsweringAutoModelForTokenClassificationAutoModelWithLMHeadc                 C   s^   t  r6t r6| dk	r6t| ts6| jjdr0dnd}n$t  sLt sLtdnt rVdnd}|S )z Select framework (TensorFlow/PyTorch) to use.
        If both frameworks are installed and no specific model is provided, defaults to using PyTorch.
    NZTFtfptzAt least one of TensorFlow 2.0 or PyTorch should be installed. To install TensorFlow 2.0, read the instructions at https://www.tensorflow.org/install/ To install PyTorch, read the instructions at https://pytorch.org/.)r   r   
isinstancestr	__class____name__
startswithRuntimeError)model	framework r0   :/tmp/pip-unpacked-wheel-ymerj3tt/transformers/pipelines.pyget_frameworkA   s    r2   c                   @   s   e Zd ZdZedd ZdS )ArgumentHandlerz?
    Base interface for handling varargs for each Pipeline
    c                 O   s
   t  d S NNotImplementedErrorselfargskwargsr0   r0   r1   __call__Z   s    zArgumentHandler.__call__N)r+   
__module____qualname____doc__r   r;   r0   r0   r0   r1   r3   U   s   r3   c                   @   sH   e Zd ZdZeeedddZeee	 ee
 dddZdd	 Zd
S )DefaultArgumentHandlerzO
    Default varargs argument parser handling parameters for each Pipeline
    )r:   returnc                 C   s4   t | dkrt|  }ntt|  }t|S )Nr   )lenlistvaluesr   r?   handle_args)r:   outputr0   r0   r1   handle_kwargsd   s    z$DefaultArgumentHandler.handle_kwargs)r9   r@   c                 C   s   t | dkrDt| d tr$| d gS t| d ts:t| S | d S nZt | dkrtdd | D rjt| S t| trttt| S td	t
| ng S d S )Nr   r   c                 S   s   g | ]}t |tqS r0   )r(   r)   ).0argr0   r0   r1   
<listcomp>{   s     z6DefaultArgumentHandler.handle_args.<locals>.<listcomp>zBInvalid input type {}. Pipeline supports Union[str, Iterable[str]])rA   r(   r)   rB   allr
   r   from_iterable
ValueErrorformattype)r9   r0   r0   r1   rD   m   s    


z"DefaultArgumentHandler.handle_argsc                 O   sD   t |dkr t |dkr tdt |dkr6t|S t|S d S )Nr   z,Pipeline cannot handle mixed args and kwargs)rA   rL   r?   rF   rD   r7   r0   r0   r1   r;      s
    
zDefaultArgumentHandler.__call__N)r+   r<   r=   r>   staticmethodr	   r   rF   r   r   r)   rD   r;   r0   r0   r0   r1   r?   _   s   r?   c                   @   s   e Zd ZdZdddgZdee ee ee dddZed	d
 Z	ee
dddZee
ee
 f edddZedeee ee ee dddZdS )PipelineDataFormata  
    Base class for all the pipeline supported data format both for reading and writing.
    Supported data formats currently includes:
     - JSON
     - CSV
     - stdin/stdout (pipe)

    PipelineDataFormat also includes some utilities to work with multi-columns like mapping from datasets columns
    to pipelines keyword arguments through the `dataset_kwarg_1=dataset_column_1` format.
    jsoncsvpipeFoutput_path
input_pathcolumnc                 C   s   || _ || _|d k	r|dndg| _t| jdk| _| jrNdd | jD | _|d k	rx|sxtt| j rxtd	| j |d k	rtt| jstd	| jd S )N, r   c                 S   s*   g | ]"}d |krt |d n||fqS )=)tuplesplit)rG   cr0   r0   r1   rI      s     z/PipelineDataFormat.__init__.<locals>.<listcomp>z{} already exists on diskz{} doesnt exist on disk)
rU   rV   r\   rW   rA   is_multi_columnsr   r   OSErrorrM   r8   rU   rV   rW   	overwriter0   r0   r1   __init__   s    zPipelineDataFormat.__init__c                 C   s
   t  d S r4   r5   r8   r0   r0   r1   __iter__   s    zPipelineDataFormat.__iter__datac                 C   s
   t  dS )z
        Save the provided data object with the representation for the current `DataFormat`.
        :param data: data to store
        :return:
        Nr5   r8   rf   r0   r0   r1   save   s    zPipelineDataFormat.saverf   r@   c              	   C   sJ   t j| j\}}t jj|df}t|d}t|| W 5 Q R X |S )z
        Save the provided data object as a pickle-formatted binary data on the disk.
        :param data: data to store
        :return: (str) Path where the data has been saved
        picklezwb+)	ospathsplitextrU   extsepjoinopenrj   dump)r8   rf   rl   _Zbinary_pathZf_outputr0   r0   r1   save_binary   s
    zPipelineDataFormat.save_binary)rM   rU   rV   rW   c                 C   sZ   | dkrt ||||dS | dkr0t||||dS | dkrHt||||dS td| d S )NrQ   ra   rR   rS   z6Unknown reader {} (Available reader are json/csv/pipe))JsonPipelineDataFormatCsvPipelineDataFormatPipedPipelineDataFormatKeyErrorrM   )rM   rU   rV   rW   ra   r0   r0   r1   from_str   s    zPipelineDataFormat.from_strN)F)F)r+   r<   r=   r>   ZSUPPORTED_FORMATSr   r)   rb   r   rd   dictrh   r   r   rs   rO   ry   r0   r0   r0   r1   rP      s*   
   
    rP   c                       sN   e Zd Zd
ee ee ee d fddZdd Zee ddd	Z	  Z
S )rv   FrT   c                    s   t  j||||d d S )Nrt   superrb   r`   r*   r0   r1   rb      s    zCsvPipelineDataFormat.__init__c              	   #   s^   t | jdH}t|}|D ]2 | jr> fdd| jD V  q | jd  V  qW 5 Q R X d S )Nrc                    s   i | ]\}}| | qS r0   r0   rG   kr]   rowr0   r1   
<dictcomp>   s      z2CsvPipelineDataFormat.__iter__.<locals>.<dictcomp>r   )rp   rV   rR   
DictReaderr^   rW   )r8   freaderr0   r   r1   rd      s    
zCsvPipelineDataFormat.__iter__re   c              	   C   sR   t | jd<}t|dkrDt|t|d  }|  || W 5 Q R X d S )Nwr   )	rp   rU   rA   rR   
DictWriterrB   keyswriteheader	writerows)r8   rf   r   writerr0   r0   r1   rh      s
    zCsvPipelineDataFormat.save)F)r+   r<   r=   r   r)   rb   rd   r   rz   rh   __classcell__r0   r0   r}   r1   rv      s      	rv   c                       sJ   e Zd Zd
ee ee ee d fddZdd Zeddd	Z  Z	S )ru   FrT   c              	      s:   t  j||||d t|d}t|| _W 5 Q R X d S )Nrt   r~   )r|   rb   rp   rQ   load_entries)r8   rU   rV   rW   ra   r   r}   r0   r1   rb      s    zJsonPipelineDataFormat.__init__c                 #   s>   | j D ]2 | jr( fdd| jD V  q | jd  V  qd S )Nc                    s   i | ]\}}| | qS r0   r0   r   entryr0   r1   r      s      z3JsonPipelineDataFormat.__iter__.<locals>.<dictcomp>r   )r   r^   rW   rc   r0   r   r1   rd      s    
zJsonPipelineDataFormat.__iter__re   c              	   C   s(   t | jd}t|| W 5 Q R X d S )Nr   )rp   rU   rQ   rq   )r8   rf   r   r0   r0   r1   rh     s    zJsonPipelineDataFormat.save)F)
r+   r<   r=   r   r)   rb   rd   rz   rh   r   r0   r0   r}   r1   ru      s      ru   c                       sJ   e Zd ZdZdd ZedddZeeee f e	d fdd	Z
  ZS )
rw   z
    Read data from piped input to the python process.
    For multi columns data, columns should separated by 	

    If columns are provided, then the output will be a dictionary with {column_x: value_x}
    c                 c   sT   t jD ]H}d|krH|d}| jr<dd t| j|D V  qNt|V  q|V  qd S )N	c                 S   s   i | ]\\}}}||qS r0   r0   )rG   r:   rr   lr0   r0   r1   r     s    
  z4PipedPipelineDataFormat.__iter__.<locals>.<dictcomp>)sysstdinr\   rW   zipr[   )r8   liner0   r0   r1   rd     s    

z PipedPipelineDataFormat.__iter__re   c                 C   s   t | d S r4   )printrg   r0   r0   r1   rh      s    zPipedPipelineDataFormat.saveri   c                    s   | j d krtdt |S )NzWhen using piped input on pipeline outputting large object requires an output file path. Please provide such output path through --output argument.)rU   rx   r|   rs   rg   r}   r0   r1   rs   #  s
    
z#PipedPipelineDataFormat.save_binary)r+   r<   r=   r>   rd   rz   rh   r   r   r)   rs   r   r0   r0   r}   r1   rw     s   rw   c                   @   s(   e Zd ZdZedd Zedd ZdS )_ScikitCompatzA
    Interface layer for the Scikit and Keras compatibility.
    c                 C   s
   t  d S r4   r5   r8   Xr0   r0   r1   	transform2  s    z_ScikitCompat.transformc                 C   s
   t  d S r4   r5   r   r0   r0   r1   predict6  s    z_ScikitCompat.predictN)r+   r<   r=   r>   r   r   r   r0   r0   r0   r1   r   -  s
   
r   c                
   @   s   e Zd ZdZdZded eee ee	 e	e
eeddd	Zd
d Zdd Zdd Zedd Zdd ZddddZdd ZdddZdS )PipelineaI  
    The Pipeline class is the class from which all pipelines inherit. Refer to this class for methods shared across
    different pipelines.

    Base class implementing pipelined operations.
    Pipeline workflow is defined as a sequence of the following operations:
        Input -> Tokenization -> Model Inference -> Post-Processing (Task dependent) -> Output

    Pipeline supports running on CPU or GPU through the device argument. Users can specify
    device argument as an integer, -1 meaning "CPU", >= 0 referring the CUDA device ordinal.

    Some pipeline, like for instance FeatureExtractionPipeline ('feature-extraction') outputs large
    tensor object as nested-lists. In order to avoid dumping such large structure as textual data we
    provide the binary_output constructor argument. If set to True, the output will be stored in the
    pickle format.

    Arguments:
        model (:obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`):
            The model that will be used by the pipeline to make predictions. This needs to be a model inheriting from
            :class:`~transformers.PreTrainedModel` for PyTorch and :class:`~transformers.TFPreTrainedModel` for
            TensorFlow.
        tokenizer (:obj:`~transformers.PreTrainedTokenizer`):
            The tokenizer that will be used by the pipeline to encode data for the model. This object inherits from
            :class:`~transformers.PreTrainedTokenizer`.
        modelcard (:obj:`str` or :class:`~transformers.ModelCard`, `optional`, defaults to :obj:`None`):
            Model card attributed to the model for this pipeline.
        framework (:obj:`str`, `optional`, defaults to :obj:`None`):
            The framework to use, either "pt" for PyTorch or "tf" for TensorFlow. The specified framework must be
            installed.

            If no framework is specified, will default to the one currently installed. If no framework is specified
            and both frameworks are installed, will default to PyTorch.
        args_parser (:class:`~transformers.pipelines.ArgumentHandler`, `optional`, defaults to :obj:`None`):
            Reference to the object in charge of parsing supplied pipeline parameters.
        device (:obj:`int`, `optional`, defaults to :obj:`-1`):
            Device ordinal for CPU/GPU supports. Setting this to -1 will leverage CPU, >=0 will run the model
            on the associated CUDA device id.
        binary_output (:obj:`bool`, `optional`, defaults to :obj:`False`):
            Flag indicating if the output the pipeline should happen in a binary format (i.e. pickle) or as raw text.

    Return:
        :obj:`List` or :obj:`Dict`:
        Pipeline returns list or dictionary depending on:

         - Whether the user supplied multiple samples
         - Whether the pipeline exposes multiple fields in the output object
    NrY   FZPreTrainedModelZTFPreTrainedModel)r.   	tokenizer	modelcardr/   taskargs_parserdevicebinary_outputc	           
      C   s   |d krt  }|| _|| _|| _|| _|dkr2|nt|dk rBdnd|| _|| _|p^t	 | _
| jdkr| jjdkr| j| j| _| jjj}	|	d k	r||	kr| jj|	| d S )Nr&   r   cpuzcuda:{}r'   cuda)r2   r.   r   r   r/   torchr   rM   r   r?   _args_parserrN   toconfigtask_specific_paramsupdateget)
r8   r.   r   r   r/   r   r   r   r   r   r0   r0   r1   rb   n  s    *
zPipeline.__init__c                 C   sR   t j|s td| dS | j| | j| | j	dk	rN| j	| dS )zY
        Save the pipeline's model and tokenizer to the specified save_directory
        z(Provided path ({}) should be a directoryN)
rk   rl   isdirloggererrorrM   r.   save_pretrainedr   r   )r8   Zsave_directoryr0   r0   r1   r     s    
zPipeline.save_pretrainedc                 C   s
   | |dS zn
        Scikit / Keras interface to transformers' pipelines. This method will forward to __call__().
        )r   r0   r   r0   r0   r1   r     s    zPipeline.transformc                 C   s
   | |dS r   r0   r   r0   r0   r1   r     s    zPipeline.predictc              	   c   sb   | j dkr>t| jdkrdn
d| j dV  W 5 Q R X n | jjdkrXtj| j dV  dS )a  
        Context Manager allowing tensor allocation on the user-specified device in framework agnostic way.
        example:
            # Explicitly ask for tensor allocation on CUDA device :0
            nlp = pipeline(..., device=0)
            with nlp.device_placement():
                # Every framework specific tensor allocation will be done on the request device
                output = nlp(...)
        Returns:
            Context manager
        r&   r   z/CPU:0z/device:GPU:{}Nr   )r/   r&   r   rM   rN   r   r   Z
set_devicerc   r0   r0   r1   device_placement  s    
"zPipeline.device_placementc                    s    fdd|  D S )zm
        Ensure PyTorch tensors are on the specified device.
        :param inputs:
        :return:
        c                    s   i | ]\}}||  jqS r0   )r   r   )rG   nametensorrc   r0   r1   r     s      z4Pipeline.ensure_tensor_on_device.<locals>.<dictcomp>)items)r8   inputsr0   rc   r1   ensure_tensor_on_device  s    z Pipeline.ensure_tensor_on_deviceTpad_to_max_lengthc                O   s&   | j ||}| jj|d| j|d}|S )z.
        Parse arguments and tokenize
        T)Zadd_special_tokensreturn_tensorsr   )r   r   Zbatch_encode_plusr/   )r8   r   r9   r:   r   r0   r0   r1   _parse_and_tokenize  s       zPipeline._parse_and_tokenizec                 O   s   | j ||}| |S r4   )r   _forward)r8   r9   r:   r   r0   r0   r1   r;     s    zPipeline.__call__c              
   C   s|   |   Z | jdkr*| j|jddd }n4t & | jf |}| jf |d  }W 5 Q R X W 5 Q R X |rp|S | S dS )aH  
        Internal framework specific forward dispatching.
        Args:
            inputs: dict holding all the keyworded arguments for required by the model forward method.
            return_tensors: Whether to return native framework (pt/tf) tensors rather than numpy array.
        Returns:
            Numpy array
        r&   F)Ztrainingr   N)	r   r/   r.   rf   r   no_gradr   r   numpy)r8   r   r   predictionsr0   r0   r1   r     s    



(zPipeline._forward)NNrY   Nr   F)F)r+   r<   r=   r>   default_input_namesr   r   r   r   r)   r3   intboolrb   r   r   r   r   r   r   r   r;   r   r0   r0   r0   r1   r   ;  s6   0       
r   c                	       sL   e Zd ZdZded eee ee e	e
ed fddZ fd	d
Z  ZS )FeatureExtractionPipelineao  
    Feature extraction pipeline using Model head. This pipeline extracts the hidden states from the base transformer,
    which can be used as features in downstream tasks.

    This feature extraction pipeline can currently be loaded from the :func:`~transformers.pipeline` method using
    the following task identifier(s):

    - "feature-extraction", for extracting features of a sequence.

    All models may be used for this pipeline. See a list of all models, including community-contributed models on
    `huggingface.co/models <https://huggingface.co/models>`__.

    Arguments:
        model (:obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`):
            The model that will be used by the pipeline to make predictions. This needs to be a model inheriting from
            :class:`~transformers.PreTrainedModel` for PyTorch and :class:`~transformers.TFPreTrainedModel` for
            TensorFlow.
        tokenizer (:obj:`~transformers.PreTrainedTokenizer`):
            The tokenizer that will be used by the pipeline to encode data for the model. This object inherits from
            :class:`~transformers.PreTrainedTokenizer`.
        modelcard (:obj:`str` or :class:`~transformers.ModelCard`, `optional`, defaults to :obj:`None`):
            Model card attributed to the model for this pipeline.
        framework (:obj:`str`, `optional`, defaults to :obj:`None`):
            The framework to use, either "pt" for PyTorch or "tf" for TensorFlow. The specified framework must be
            installed.

            If no framework is specified, will default to the one currently installed. If no framework is specified
            and both frameworks are installed, will default to PyTorch.
        args_parser (:class:`~transformers.pipelines.ArgumentHandler`, `optional`, defaults to :obj:`None`):
            Reference to the object in charge of parsing supplied pipeline parameters.
        device (:obj:`int`, `optional`, defaults to :obj:`-1`):
            Device ordinal for CPU/GPU supports. Setting this to -1 will leverage CPU, >=0 will run the model
            on the associated CUDA device id.
    Nr   rY   r   r.   r   r   r/   r   r   r   c              
      s    t  j||||||d|d d S NTr.   r   r   r/   r   r   r   r   r{   )r8   r.   r   r   r/   r   r   r   r}   r0   r1   rb     s    
z"FeatureExtractionPipeline.__init__c                    s   t  j|| S r4   )r|   r;   tolistr7   r}   r0   r1   r;   '  s    z"FeatureExtractionPipeline.__call__)NNNr   rY   r+   r<   r=   r>   r   r   r   r   r)   r3   r   rb   r;   r   r0   r0   r}   r1   r     s    '     r   c                   @   s@   e Zd ZdZdZddddddd	d
dddgZddddddZdS )TextGenerationPipelinea  
    Language generation pipeline using any ModelWithLMHead head. This pipeline predicts the words that will follow a specified text prompt.

    This language generation pipeline can currently be loaded from the :func:`~transformers.pipeline` method using
    the following task identifier(s):

    - "text-generation", for generating text from a specified prompt.

    The models that this pipeline can use are models that have been trained with an autoregressive language modeling objective,
    which includes the uni-directional models in the library (e.g. gpt2).
    See the list of available community models on
    `huggingface.co/models <https://huggingface.co/models?search=&filter=lm-head>`__.
    a  In 1991, the remains of Russian Tsar Nicholas II and his family
    (except for Alexei and Maria) are discovered.
    The voice of Nicholas's young son, Tsarevich Alexei Nikolaevich, narrates the
    remainder of the story. 1883 Western Siberia,
    a young Grigori Rasputin is asked by his father and a group of men to perform magic.
    Rasputin has a vision and denounces one of the men as a horse thief. Although his
    father initially slaps him for making such an accusation, Rasputin watches as the
    man is chased outside and beaten. Twenty years later, Rasputin sees a vision of
    the Virgin Mary, prompting him to become a priest. Rasputin quickly becomes famous,
    with people, even a bishop, begging for his blessing. <eod> </s> <eos>XLNetLMHeadModelTransfoXLLMHeadModelZReformerModelWithLMHeadZGPT2LMHeadModelZOpenAIGPTLMHeadModelZCTRLLMHeadModelZTFXLNetLMHeadModelZTFTransfoXLLMHeadModelZTFGPT2LMHeadModelZTFOpenAIGPTLMHeadModelZTFCTRLLMHeadModelFTr   return_textclean_up_tokenization_spacesc             
   O   s  | j jj| jkr(td| j jj| j| j| }g }|D ]b}|   | j jjdkrn| j| j	| dd}	n| j|dd}	|	d j
d dkrd |	d< d |	d< | jd	kr|	d d k	r| jf |	}	|	d }
|
d ks|
j
d d
kstd| j jf d|
i|}W 5 Q R X g }|D ]}|  }i }|r0||d< |r| jj|d|d}|
d krXd}nt| jj|
d d|d}|||d   |d< || q||g7 }q:t|d
kr|d S |S )Nz[Generation is currently not supported for {}. Please select a model from {} for generation.)r   r   Fr   	input_idsr   r   attention_maskr'   r   z~Batch generation is currently not supported. See https://github.com/huggingface/transformers/issues/3021 for more information.Zgenerated_token_idsTZskip_special_tokensr   Zgenerated_text)r.   r*   r+   ALLOWED_MODELSr6   rM   r   r   r   PADDING_TEXTshaper/   r   AssertionErrorgenerater   r   r   decoderA   append)r8   r   r   r   r9   generate_kwargsZtext_inputsresultsZprompt_textr   r   Zoutput_sequencesresultZgenerated_sequencerecordtextZprompt_lengthr0   r0   r1   r;   W  sl     


 
zTextGenerationPipeline.__call__N)r+   r<   r=   r>   r   r   r;   r0   r0   r0   r1   r   +  s$     r   c                       s    e Zd ZdZ fddZ  ZS )TextClassificationPipelinea  
    Text classification pipeline using ModelForSequenceClassification head. See the
    `sequence classification usage <../usage.html#sequence-classification>`__ examples for more information.

    This text classification pipeline can currently be loaded from the :func:`~transformers.pipeline` method using
    the following task identifier(s):

    - "sentiment-analysis", for classifying sequences according to positive or negative sentiments.

    The models that this pipeline can use are models that have been fine-tuned on a sequence classification task.
    See the up-to-date list of available models on
    `huggingface.co/models <https://huggingface.co/models?filter=text-classification>`__.

    Arguments:
        model (:obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`):
            The model that will be used by the pipeline to make predictions. This needs to be a model inheriting from
            :class:`~transformers.PreTrainedModel` for PyTorch and :class:`~transformers.TFPreTrainedModel` for
            TensorFlow.
        tokenizer (:obj:`~transformers.PreTrainedTokenizer`):
            The tokenizer that will be used by the pipeline to encode data for the model. This object inherits from
            :class:`~transformers.PreTrainedTokenizer`.
        modelcard (:obj:`str` or :class:`~transformers.ModelCard`, `optional`, defaults to :obj:`None`):
            Model card attributed to the model for this pipeline.
        framework (:obj:`str`, `optional`, defaults to :obj:`None`):
            The framework to use, either "pt" for PyTorch or "tf" for TensorFlow. The specified framework must be
            installed.

            If no framework is specified, will default to the one currently installed. If no framework is specified
            and both frameworks are installed, will default to PyTorch.
        args_parser (:class:`~transformers.pipelines.ArgumentHandler`, `optional`, defaults to :obj:`None`):
            Reference to the object in charge of parsing supplied pipeline parameters.
        device (:obj:`int`, `optional`, defaults to :obj:`-1`):
            Device ordinal for CPU/GPU supports. Setting this to -1 will leverage CPU, >=0 will run the model
            on the associated CUDA device id.
    c                    s>   t  j||}t|t|jddd } fdd|D S )Nr   TZkeepdimsc                    s,   g | ]$} j jj|  |  d qS ))labelscore)r.   r   id2labelargmaxmaxitem)rG   r   rc   r0   r1   rI     s     z7TextClassificationPipeline.__call__.<locals>.<listcomp>)r|   r;   npexpsum)r8   r9   r:   outputsscoresr}   rc   r1   r;     s    z#TextClassificationPipeline.__call__)r+   r<   r=   r>   r;   r   r0   r0   r}   r1   r     s   $r   c                	       sH   e Zd ZdZded eee ee e	e
ed fdd	Zd
d Z  ZS )FillMaskPipelinea 	  
    Masked language modeling prediction pipeline using ModelWithLMHead head. See the
    `masked language modeling usage <../usage.html#masked-language-modeling>`__ examples for more information.

    This mask filling pipeline can currently be loaded from the :func:`~transformers.pipeline` method using
    the following task identifier(s):

    - "fill-mask", for predicting masked tokens in a sequence.

    The models that this pipeline can use are models that have been trained with a masked language modeling objective,
    which includes the bi-directional models in the library.
    See the up-to-date list of available models on
    `huggingface.co/models <https://huggingface.co/models?filter=lm-head>`__.

    Arguments:
        model (:obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`):
            The model that will be used by the pipeline to make predictions. This needs to be a model inheriting from
            :class:`~transformers.PreTrainedModel` for PyTorch and :class:`~transformers.TFPreTrainedModel` for
            TensorFlow.
        tokenizer (:obj:`~transformers.PreTrainedTokenizer`):
            The tokenizer that will be used by the pipeline to encode data for the model. This object inherits from
            :class:`~transformers.PreTrainedTokenizer`.
        modelcard (:obj:`str` or :class:`~transformers.ModelCard`, `optional`, defaults to :obj:`None`):
            Model card attributed to the model for this pipeline.
        framework (:obj:`str`, `optional`, defaults to :obj:`None`):
            The framework to use, either "pt" for PyTorch or "tf" for TensorFlow. The specified framework must be
            installed.

            If no framework is specified, will default to the one currently installed. If no framework is specified
            and both frameworks are installed, will default to PyTorch.
        args_parser (:class:`~transformers.pipelines.ArgumentHandler`, `optional`, defaults to :obj:`None`):
            Reference to the object in charge of parsing supplied pipeline parameters.
        device (:obj:`int`, `optional`, defaults to :obj:`-1`):
            Device ordinal for CPU/GPU supports. Setting this to -1 will leverage CPU, >=0 will run the model
            on the associated CUDA device id.
    Nr      rY   r   r   c	           	   
      s&   t  j||||||d|d || _d S r   )r|   rb   topk)	r8   r.   r   r   r/   r   r   r   r   r}   r0   r1   rb     s    zFillMaskPipeline.__init__c                 O   s  | j ||}| j|dd}g }| jdkr2|jd n|d}t|D ]*}|d | }g }	| jdkrt|| jj	k
  }
|||
d d f }tj|}tjj|| jd}|j
 |j
  }}nB|| jj	k  }
|||
d d f }|jdd}|| j\}}t| | D ]J\}}|
 }|||
< |t|| jjk }|	| j|||d q||	g7 }qDt|d	kr|d S |S )
NT)r   r&   r   r   r   )Zdim)sequencer   tokenr   )r   r   r/   r   sizeranger&   wherer   Zmask_token_idr   r   nnZsoftmaxmathZtop_kr   rC   indicesZnonzeror   r   r   pad_token_idr   r   rA   )r8   r9   r:   r   r   r   Z
batch_sizeir   r   Zmasked_indexZlogitsZprobsr   rC   r   vptokensr0   r0   r1   r;     s4    
zFillMaskPipeline.__call__)NNNr   r   rY   r   r0   r0   r}   r1   r     s"   )      r   c                
       s^   e Zd ZdZdZddddddgdfed eee ee	 e
eee	d	 fd
dZdd Z  ZS )NerPipelinea	  
    Named Entity Recognition pipeline using ModelForTokenClassification head. See the
    `named entity recognition usage <../usage.html#named-entity-recognition>`__ examples for more information.

    This token recognition pipeline can currently be loaded from the :func:`~transformers.pipeline` method using
    the following task identifier(s):

    - "ner", for predicting the classes of tokens in a sequence: person, organisation, location or miscellaneous.

    The models that this pipeline can use are models that have been fine-tuned on a token classification task.
    See the up-to-date list of available models on
    `huggingface.co/models <https://huggingface.co/models?filter=token-classification>`__.

    Arguments:
        model (:obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`):
            The model that will be used by the pipeline to make predictions. This needs to be a model inheriting from
            :class:`~transformers.PreTrainedModel` for PyTorch and :class:`~transformers.TFPreTrainedModel` for
            TensorFlow.
        tokenizer (:obj:`~transformers.PreTrainedTokenizer`):
            The tokenizer that will be used by the pipeline to encode data for the model. This object inherits from
            :class:`~transformers.PreTrainedTokenizer`.
        modelcard (:obj:`str` or :class:`~transformers.ModelCard`, `optional`, defaults to :obj:`None`):
            Model card attributed to the model for this pipeline.
        framework (:obj:`str`, `optional`, defaults to :obj:`None`):
            The framework to use, either "pt" for PyTorch or "tf" for TensorFlow. The specified framework must be
            installed.

            If no framework is specified, will default to the one currently installed. If no framework is specified
            and both frameworks are installed, will default to PyTorch.
        args_parser (:class:`~transformers.pipelines.ArgumentHandler`, `optional`, defaults to :obj:`None`):
            Reference to the object in charge of parsing supplied pipeline parameters.
        device (:obj:`int`, `optional`, defaults to :obj:`-1`):
            Device ordinal for CPU/GPU supports. Setting this to -1 will leverage CPU, >=0 will run the model
            on the associated CUDA device id.
    	sequencesNr   FOrY   r   r   c
           
   
      s2   t  j||||||||	d tdd| _|| _d S )Nr   F)Zdo_lower_case)r|   rb   r   Z_basic_tokenizerignore_labels)
r8   r.   r   r   r/   r   r   r   r  r   r}   r0   r1   rb   X  s    zNerPipeline.__init__c                 O   s  | j ||}g }|D ]P}|   | jj|d| j| jjd}| jdkrr| |jd d  }|d  d }nPt	
 B | jf |}| jf |d d   }|d   d }W 5 Q R X W 5 Q R X t|t|jddd }	|	jdd	}
g }t|
D ]X\}}| jjj| | jkr|| jt|| |	| |  | jjj| d
g7 }q||g7 }qt|dkr~|d S |S )NF)Zreturn_attention_maskr   
max_lengthr&   r   r   r   Tr   )Zaxis)wordr   entityr   )r   r   r   Zencode_plusr/   max_lenr.   rf   r   r   r   r   r   r   r   r   r   	enumerater   r   r  Zconvert_ids_to_tokensr   r   rA   )r8   r9   r:   r   answersZsentencer   entitiesr   r   Z
labels_idxansweridxZ	label_idxr0   r0   r1   r;   r  s@    



(
	zNerPipeline.__call__)r+   r<   r=   r>   r   r   r   r   r   r)   r3   r   r   rb   r;   r   r0   r0   r}   r1   r   1  s(   $r   c                   @   s   e Zd ZdZdd ZdS ) QuestionAnsweringArgumentHandlera6  
    QuestionAnsweringPipeline requires the user to provide multiple arguments (i.e. question & context) to be mapped
    to internal SquadExample / SquadFeature structures.

    QuestionAnsweringArgumentHandler manages all the possible to create SquadExample from the command-line supplied
    arguments.
    c                    s  |d k	r:t |dkr:t |dkr.|d |d< nt||d< d|ksJd|krd|krZ|d n|d }t|trt|g}ndd |D }t|D ]f\} t trt fddd	D rtd
tjf  ||< qt t	st
dd|krdndqn|d|krbd|krbt|d tr&|d g|d< t|d trD|d g|d< dd t|d |d D }nt
d|t|ts|g}|S )Nr   r   r   rf   c                 S   s   g | ]}|qS r0   r0   )rG   r   r0   r0   r1   rI     s     z=QuestionAnsweringArgumentHandler.__call__.<locals>.<listcomp>c                 3   s   | ]}| kV  qd S r4   r0   rG   r   r   r0   r1   	<genexpr>  s     z<QuestionAnsweringArgumentHandler.__call__.<locals>.<genexpr>questioncontextzFYou need to provide a dictionary with keys {question:..., context:...}zO{} argument needs to be of type (list[SquadExample | dict], SquadExample, dict)r  r  c                 S   s   g | ]\}}t ||qS r0   )QuestionAnsweringPipelinecreate_samplerG   qr]   r0   r0   r1   rI     s    zUnknown arguments {})rA   rB   r(   rz   r  anyrx   r  r  r   rL   rM   r)   r   )r8   r9   r:   r   r   r0   r  r1   r;     s@    


z)QuestionAnsweringArgumentHandler.__call__Nr+   r<   r=   r>   r;   r0   r0   r0   r1   r    s   r  c                       s   e Zd ZdZdZded eee ee	 e
e	d fdd	Zeee	ee	 f ee	ee	 f eeee f d
ddZdd Zejeje
e
edddZe	e
e
dddZ  ZS )r  a  
    Question Answering pipeline using ModelForQuestionAnswering head. See the
    `question answering usage <../usage.html#question-answering>`__ examples for more information.

    This question answering can currently be loaded from the :func:`~transformers.pipeline` method using
    the following task identifier(s):

    - "question-answering", for answering questions given a context.

    The models that this pipeline can use are models that have been fine-tuned on a question answering task.
    See the up-to-date list of available models on
    `huggingface.co/models <https://huggingface.co/models?filter=question-answering>`__.

    Arguments:
        model (:obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`):
            The model that will be used by the pipeline to make predictions. This needs to be a model inheriting from
            :class:`~transformers.PreTrainedModel` for PyTorch and :class:`~transformers.TFPreTrainedModel` for
            TensorFlow.
        tokenizer (:obj:`~transformers.PreTrainedTokenizer`):
            The tokenizer that will be used by the pipeline to encode data for the model. This object inherits from
            :class:`~transformers.PreTrainedTokenizer`.
        modelcard (:obj:`str` or :class:`~transformers.ModelCard`, `optional`, defaults to :obj:`None`):
            Model card attributed to the model for this pipeline.
        framework (:obj:`str`, `optional`, defaults to :obj:`None`):
            The framework to use, either "pt" for PyTorch or "tf" for TensorFlow. The specified framework must be
            installed.

            If no framework is specified, will default to the one currently installed. If no framework is specified
            and both frameworks are installed, will default to PyTorch.
        args_parser (:class:`~transformers.pipelines.ArgumentHandler`, `optional`, defaults to :obj:`None`):
            Reference to the object in charge of parsing supplied pipeline parameters.
        device (:obj:`int`, `optional`, defaults to :obj:`-1`):
            Device ordinal for CPU/GPU supports. Setting this to -1 will leverage CPU, >=0 will run the model
            on the associated CUDA device id.
    zquestion,contextNr   rY   r   )r.   r   r   r/   r   r   c              
      s(   t  jf ||||t ||d| d S )Nr   )r|   rb   r  )r8   r.   r   r   r/   r   r   r:   r}   r0   r1   rb     s    
z"QuestionAnsweringPipeline.__init__)r  r  r@   c                 C   s4   t | trdd t| |D S td| |dddS dS )a9  
        QuestionAnsweringPipeline leverages the SquadExample/SquadFeatures internally.
        This helper method encapsulate all the logic for converting question(s) and context(s) to SquadExample(s).
        We currently support extractive question answering.
        Arguments:
             question: (str, List[str]) The question to be ask for the associated context
             context: (str, List[str]) The context in which we will look for the answer.

        Returns:
            SquadExample initialized with the corresponding question and context.
        c              	   S   s"   g | ]\}}t d ||d d d qS r4   )r   r  r0   r0   r1   rI   -  s     z;QuestionAnsweringPipeline.create_sample.<locals>.<listcomp>N)r(   rB   r   r   r  r0   r0   r1   r    s    
z'QuestionAnsweringPipeline.create_samplec                    s   dd  dd  dd  dd  d	d
  dd d dk rftdd d dk rtdd j|}fdd|D }g }t||D ] \jjdg }fdd|D }  jdkr&dd |	 D }
|\}}	| |	  }}	nTt F fdd|	 D }j
f |\}}	|  |	   }}	W 5 Q R X W 5 Q R X d}
g }t||	D ]\}}t|tt| }t|tt| }|ttjd  |ttjd   }}d r0t|
|d |d   }
d |d< |d< ||d d \}}}tj | fddt|||D 7 }qd r||
dddd t|dd ddd d  }||7 }qt|dkr|d S |S )!a7  
        Args:
            We support multiple use-cases, the following are exclusive:
            X: sequence of SquadExample
            data: sequence of SquadExample
            question: (str, List[str]), batch of question(s) to map along with context
            context: (str, List[str]), batch of context(s) associated with the provided question keyword argument
        Returns:
            dict: {'answer': str, 'score": float, 'start": int, "end": int}
            answer: the textual answer in the intial context
            score: the score the current answer scored for the model
            start: the character index in the original string corresponding to the beginning of the answer' span
            end: the character index in the original string corresponding to the ending of the answer' span
        r   r   
doc_stride   max_answer_len   max_seq_leni  max_question_len@   Zhandle_impossible_answerFz&topk parameter should be >= 1 (got {})z0max_answer_len parameter should be >= 1 (got {})c                    s2   g | ]*}t |gj d   d  d dddqS )r  r  r  F)Ztqdm_enabled)r   r   )rG   example)r:   r8   r0   r1   rI   P  s   
z6QuestionAnsweringPipeline.__call__.<locals>.<listcomp>r   c                    s    i | ]   fd dD qS )c                    s   g | ]}|j   qS r0   )__dict__)rG   featurer   r0   r1   rI   _  s     zAQuestionAnsweringPipeline.__call__.<locals>.<dictcomp>.<listcomp>r0   )rG   )featuresr   r1   r   _  s      z6QuestionAnsweringPipeline.__call__.<locals>.<dictcomp>r&   c                 S   s   i | ]\}}|t |qS r0   )r&   ZconstantrG   r   r   r0   r0   r1   r   d  s      c                    s"   i | ]\}}|t j| jd qS ))r   )r   r   r   r$  rc   r0   r1   r   j  s      i@B r   c                    s|   g | ]t\}}}|  t j| kd  d    t j| kd  d   djj| j| d  dqS )r   r    r   r   startendr
  )r   r   r   Ztoken_to_orig_mapro   Z
doc_tokens)rG   ser   )char_to_wordr   r"  r0   r1   rI     s   	rY   r&  c                 S   s   | d S )Nr   r0   )xr0   r0   r1   <lambda>      z4QuestionAnsweringPipeline.__call__.<locals>.<lambda>T)keyreverseN)
setdefaultrL   rM   r   r   r   model_input_namesr   r/   r   r.   r   r   r   r   r   r   r   absarrayZp_maskminr   r   Zchar_to_word_offsetr   sortedrA   )r8   r9   r:   ZexamplesZfeatures_listZall_answersr2  Zfw_argsr'  r(  Zmin_null_scorer  Zstart_end_ZstartsZendsr   r0   )r+  r   r"  r#  r:   r8   r1   r;   1  sd    


.
	


z"QuestionAnsweringPipeline.__call__)r'  r(  r   r  r@   c           
      C   s   |j dkr|d }|j dkr$|d }tt|dt|d}tt||d }| }|dkrtt|g}nDt||k rt	| }n*t
| |d| }	|	t	||	   }t||jdd \}}|||d||f fS )a  
        Take the output of any QuestionAnswering head and will generate probalities for each span to be
        the actual answer.
        In addition, it filters out some unwanted/impossible cases like answer len being greater than
        max_answer_len or answer end position being before the starting position.
        The method supports output the k-best answer through the topk argument.

        Args:
            start: numpy array, holding individual start probabilities for each token
            end: numpy array, holding individual end probabilities for each token
            topk: int, indicates how many possible answer span(s) to extract from the model's output
            max_answer_len: int, maximum size of the answer to extract from the model's output
        r   Nr   r   )ndimr   matmulZexpand_dimsZtrilZtriuflattenr   rA   ZargsortZargpartitionZunravel_indexr   )
r8   r'  r(  r   r  outer
candidatesZscores_flatZidx_sortr  r0   r0   r1   r     s    

z QuestionAnsweringPipeline.decode)r   r'  r(  c                 C   s   g }d } } }}t |dD ]~\}	}
| j|
}||  krJ|krxn n*||krZ|}||krn|t|
 }||
g7 }||kr q|t|7 }|t|
d 7 }q"d|td|tt||dS )a  
        When decoding from token probalities, this method maps token indexes to actual word in
        the initial context.

        Args:
            text: str, the actual context to extract the answer from
            start: int, starting answer token index
            end: int, ending answer token index

        Returns:
            dict: {'answer': str, 'start': int, 'end': int}
        r   r%  r   )r
  r'  r(  )r  r\   r   tokenizerA   ro   r   r5  )r8   r   r'  r(  wordsZ	token_idxZchar_start_idxZchar_end_idxZ	chars_idxr   r  r   r0   r0   r1   span_to_answer  s$    
z(QuestionAnsweringPipeline.span_to_answer)NNr   rY   )r+   r<   r=   r>   r   r   r   r   r   r)   r   rb   rO   r   r   r  r;   r   Zndarrayr   r   r?  r   r0   r0   r}   r1   r    s,   $     i(r  c                   @   s"   e Zd ZdZddddddZdS )SummarizationPipelinea$  
    Summarize news articles and other documents

    Usage::

        # use bart in pytorch
        summarizer = pipeline("summarization")
        summarizer("Sam Shleifer writes the best docstring examples in the whole world.", min_length=5, max_length=20)

        # use t5 in tf
        summarizer = pipeline("summarization", model="t5-base", tokenizer="t5-base", framework="tf")
        summarizer("Sam Shleifer writes the best docstring examples in the whole world.", min_length=5, max_length=20)

    The models that this pipeline can use are models that have been fine-tuned on a summarization task,
    which is currently, '`bart-large-cnn`', '`t5-small`', '`t5-base`', '`t5-large`', '`t5-3b`', '`t5-11b`'.
    See the up-to-date list of available models on
    `huggingface.co/models <https://huggingface.co/models?filter=summarization>`__.

    Arguments:
        model (:obj:`str` or :obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`, `optional`, defaults to :obj:`None`):
            The model that will be used by the pipeline to make predictions. This can be :obj:`None`, a string
            checkpoint identifier or an actual pre-trained model inheriting from
            :class:`~transformers.PreTrainedModel` for PyTorch and :class:`~transformers.TFPreTrainedModel` for
            TensorFlow.

            If :obj:`None`, the default of the pipeline will be loaded.
        tokenizer (:obj:`str` or :obj:`~transformers.PreTrainedTokenizer`, `optional`, defaults to :obj:`None`):
            The tokenizer that will be used by the pipeline to encode data for the model. This can be :obj:`None`,
            a string checkpoint identifier or an actual pre-trained tokenizer inheriting from
            :class:`~transformers.PreTrainedTokenizer`.

            If :obj:`None`, the default of the pipeline will be loaded.
        modelcard (:obj:`str` or :class:`~transformers.ModelCard`, `optional`, defaults to :obj:`None`):
            Model card attributed to the model for this pipeline.
        framework (:obj:`str`, `optional`, defaults to :obj:`None`):
            The framework to use, either "pt" for PyTorch or "tf" for TensorFlow. The specified framework must be
            installed.

            If no framework is specified, will default to the one currently installed. If no framework is specified
            and both frameworks are installed, will default to PyTorch.
        args_parser (:class:`~transformers.pipelines.ArgumentHandler`, `optional`, defaults to :obj:`None`):
            Reference to the object in charge of parsing supplied pipeline parameters.
        device (:obj:`int`, `optional`, defaults to :obj:`-1`):
            Device ordinal for CPU/GPU supports. Setting this to -1 will leverage CPU, >=0 will run the model
            on the associated CUDA device id.
    FTr   c             
      s  |s|st dt|dks$t d| jdkrDd| jjjkrDtd| jjjdk	r\| jjjnd t	|d t
r| jjdk	st d	 fd
d|d D f}d}n4t	|d tr |d  f}d}ntd|d |  4 | j|d|i}| jdkr| jf |}|d jd }n"| jdkr:t|d d  }|d| jjj}	||	d k rltd|	| |d| jjj}
||
k rtd|
| | jj|d fd|d i|}g }|D ]>}i }|r||d< |r| jj|d|d|d< || q|W  5 Q R  S Q R X dS )a  
        Args:
            *documents: (list of strings) articles to be summarized
            return_text: (bool, default=True) whether to add a decoded "summary_text" to each result
            return_tensors: (bool, default=False) whether to return the raw "summary_token_ids" to each result

            clean_up_tokenization_spaces: (`optional`) bool whether to include extra spaces in the output
            **generate_kwargs: extra kwargs passed to `self.model.generate`_

        Returns:
            list of dicts with 'summary_text' and/or 'summary_token_ids' for each document_to_summarize

        .. _`self.model.generate`:
            https://huggingface.co/transformers/model_doc/bart.html#transformers.BartForConditionalGeneration.generate

        8You must specify return_tensors=True or return_text=Truer   z&Please provide a document to summarizer&   ZBartForConditionalGenerationzRTensorflow is not yet supported for Bart. Please consider using T5, e.g. `t5-base`NrY   OPlease make sure that the tokenizer has a pad_token_id when using a batch inputc                    s   g | ]} | qS r0   r0   )rG   documentprefixr0   r1   rI   A  s     z2SummarizationPipeline.__call__.<locals>.<listcomp>TF\ `documents[0]`: {} have the wrong format. The should be either of type `str` or type `list`r   r'   r   r   
min_length   zYour min_length is set to {}, but you input_length is only {}. You might consider decreasing min_length manually, e.g. summarizer('...', min_length=10)r  zYour max_length is set to {}, but you input_length is only {}. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=50)r   Zsummary_token_idsr   Zsummary_text)r   rA   r/   r.   r*   r+   r6   r   rE  r(   rB   r   r   r)   rL   rM   r   r   r   r   r&   r   r   rG  r   warningr  r   r   r   )r8   r   r   r   Z	documentsr   r   r   input_lengthrG  r  Z	summariesr   summaryr   r0   rD  r1   r;     s    
 
   
zSummarizationPipeline.__call__Nr  r0   r0   r0   r1   r@    s
   0  r@  c                   @   s"   e Zd ZdZddddddZdS )TranslationPipelinea	  
    Translates from one language to another.

    Usage::
        en_fr_translator = pipeline("translation_en_to_fr")
        en_fr_translator("How old are you?")

    The models that this pipeline can use are models that have been fine-tuned on a translation task,
    currently: "t5-small", "t5-base", "t5-large", "t5-3b", "t5-11b"
    See the up-to-date list of available models on
    `huggingface.co/models <https://huggingface.co/models?filter=translation>`__.

    Arguments:
        model (:obj:`str` or :obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`, `optional`, defaults to :obj:`None`):
            The model that will be used by the pipeline to make predictions. This can be :obj:`None`, a string
            checkpoint identifier or an actual pre-trained model inheriting from
            :class:`~transformers.PreTrainedModel` for PyTorch and :class:`~transformers.TFPreTrainedModel` for
            TensorFlow.
            If :obj:`None`, the default of the pipeline will be loaded.
        tokenizer (:obj:`str` or :obj:`~transformers.PreTrainedTokenizer`, `optional`, defaults to :obj:`None`):
            The tokenizer that will be used by the pipeline to encode data for the model. This can be :obj:`None`,
            a string checkpoint identifier or an actual pre-trained tokenizer inheriting from
            :class:`~transformers.PreTrainedTokenizer`.
            If :obj:`None`, the default of the pipeline will be loaded.
        modelcard (:obj:`str` or :class:`~transformers.ModelCard`, `optional`, defaults to :obj:`None`):
            Model card attributed to the model for this pipeline.
        framework (:obj:`str`, `optional`, defaults to :obj:`None`):
            The framework to use, either "pt" for PyTorch or "tf" for TensorFlow. The specified framework must be
            installed.
            If no framework is specified, will default to the one currently installed. If no framework is specified
            and both frameworks are installed, will default to PyTorch.
        args_parser (:class:`~transformers.pipelines.ArgumentHandler`, `optional`, defaults to :obj:`None`):
            Reference to the object in charge of parsing supplied pipeline parameters.
        device (:obj:`int`, `optional`, defaults to :obj:`-1`):
            Device ordinal for CPU/GPU supports. Setting this to -1 will leverage CPU, >=0 will run the model
            on the associated CUDA device id.
    FTr   c             
      s  |s|st d| jjjdk	r(| jjjnd t|d trl| jjdk	sNt d fdd|d D f}d}n4t|d tr |d  f}d	}nt	d

|d |   | j|d|i}| jdkr| jf |}|d jd }n"| jdkrt|d d  }|d| jjj}	|d|	 kr6td
||	 | jj|d fd|d i|}
g }|
D ]>}i }|rt||d< |r| jj|d|d|d< || q^|W  5 Q R  S Q R X dS )a  
        Args:
            *args: (list of strings) texts to be translated
            return_text: (bool, default=True) whether to add a decoded "translation_text" to each result
            return_tensors: (bool, default=False) whether to return the raw "translation_token_ids" to each result

            **generate_kwargs: extra kwargs passed to `self.model.generate`_

        Returns:
            list of dicts with 'translation_text' and/or 'translation_token_ids' for each text_to_translate
        .. _`self.model.generate`:
            https://huggingface.co/transformers/model_doc/bart.html#transformers.BartForConditionalGeneration.generate
        rA  NrY   r   rB  c                    s   g | ]} | qS r0   r0   )rG   r   rD  r0   r1   rI     s     z0TranslationPipeline.__call__.<locals>.<listcomp>TFrF  r   r'   r   r   r&   r  g?zYour input_length: {} is bigger than 0.9 * max_length: {}. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)r   Ztranslation_token_idsr   Ztranslation_text)r   r.   r   rE  r(   rB   r   r   r)   rL   rM   r   r   r/   r   r   r&   r   r   r  r   rI  r   r   r   )r8   r   r   r   r9   r   r   r   rJ  r  Ztranslationsr   translationr   r0   rD  r1   r;     sh    

 
zTranslationPipeline.__call__Nr  r0   r0   r0   r1   rL  x  s
   '  rL  zdistilbert-base-cased)r'   r&   r.   r   r   )implr&   r'   defaultz/distilbert-base-uncased-finetuned-sst-2-englishzdistilbert-base-uncasedz0dbmdz/bert-large-cased-finetuned-conll03-englishzbert-large-casedz%distilbert-base-cased-distilled-squadZuse_fastFzdistilroberta-basezbart-large-cnnzt5-baseZgpt2)
zfeature-extractionzsentiment-analysisZnerzquestion-answeringz	fill-maskZsummarizationZtranslation_en_to_frZtranslation_en_to_deZtranslation_en_to_roztext-generation)r   r.   r   r   r/   r@   c                    s  | t kr td| tt  |p*t|}t |    d  |  }}|dkrn fdddD \}}}|| }|dkrt|tr|tkr|}n t|tr|tkr|}nt	dd}	t|tr|}	nt|tr|}	t|tt
frt|t
rtj|d f|d	 }n
t|}t|tr$t|}t|	tr:t|	}	t|tri }
|d
krt|drtd|
d< td n(|dkr|drd|
d< td |j|fd|i|
}|f |||	|| d|S )aj  
    Utility factory method to build a pipeline.

    Pipeline are made of:

        - A Tokenizer instance in charge of mapping raw textual input to token
        - A Model instance
        - Some (optional) post processing for enhancing model's output


    Args:
        task (:obj:`str`):
            The task defining which pipeline will be returned. Currently accepted tasks are:

            - "feature-extraction": will return a :class:`~transformers.FeatureExtractionPipeline`
            - "sentiment-analysis": will return a :class:`~transformers.TextClassificationPipeline`
            - "ner": will return a :class:`~transformers.NerPipeline`
            - "question-answering": will return a :class:`~transformers.QuestionAnsweringPipeline`
            - "fill-mask": will return a :class:`~transformers.FillMaskPipeline`
            - "summarization": will return a :class:`~transformers.SummarizationPipeline`
            - "translation_xx_to_yy": will return a :class:`~transformers.TranslationPipeline`
        model (:obj:`str` or :obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`, `optional`, defaults to :obj:`None`):
            The model that will be used by the pipeline to make predictions. This can be :obj:`None`,
            a model identifier or an actual pre-trained model inheriting from
            :class:`~transformers.PreTrainedModel` for PyTorch and :class:`~transformers.TFPreTrainedModel` for
            TensorFlow.

            If :obj:`None`, the default for this pipeline will be loaded.
        config (:obj:`str` or :obj:`~transformers.PretrainedConfig`, `optional`, defaults to :obj:`None`):
            The configuration that will be used by the pipeline to instantiate the model. This can be :obj:`None`,
            a model identifier or an actual pre-trained model configuration inheriting from
            :class:`~transformers.PretrainedConfig`.

            If :obj:`None`, the default for this pipeline will be loaded.
        tokenizer (:obj:`str` or :obj:`~transformers.PreTrainedTokenizer`, `optional`, defaults to :obj:`None`):
            The tokenizer that will be used by the pipeline to encode data for the model. This can be :obj:`None`,
            a model identifier or an actual pre-trained tokenizer inheriting from
            :class:`~transformers.PreTrainedTokenizer`.

            If :obj:`None`, the default for this pipeline will be loaded.
        framework (:obj:`str`, `optional`, defaults to :obj:`None`):
            The framework to use, either "pt" for PyTorch or "tf" for TensorFlow. The specified framework must be
            installed.

            If no framework is specified, will default to the one currently installed. If no framework is specified
            and both frameworks are installed, will default to PyTorch.

    Returns:
        :class:`~transformers.Pipeline`: Class inheriting from :class:`~transformers.Pipeline`, according to
        the task.

    Examples::

        from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer

        # Sentiment analysis pipeline
        pipeline('sentiment-analysis')

        # Question answering pipeline, specifying the checkpoint identifier
        pipeline('question-answering', model='distilbert-base-cased-distilled-squad', tokenizer='bert-base-cased')

        # Named entity recognition pipeline, passing in a specific model and tokenizer
        model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
        tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
        pipeline('ner', model=model, tokenizer=tokenizer)
    z'Unknown task {}, available tasks are {}rO  Nc                    s   g | ]} d  | qS )rP  r0   r  Ztargeted_taskr0   r1   rI     s     zpipeline.<locals>.<listcomp>rN  zImpossible to guess which tokenizer to use. Please provided a PretrainedTokenizer class or a path/identifier to a pretrained tokenizer.r   r   r'   z.h5TZfrom_tfz}Model might be a TensorFlow model (ending with `.h5`) but TensorFlow is not available. Trying to load the model with PyTorch.r&   z.binZfrom_ptz{Model might be a PyTorch model (ending with `.bin`) but PyTorch is not available. Trying to load the model with Tensorflow.r   )r.   r   r   r/   r   )SUPPORTED_TASKSrx   rM   rB   r   r2   r(   r)   r   	Exceptionr[   r   Zfrom_pretrainedr   r   endswithr   rI  )r   r.   r   r   r/   r:   Z
task_classZmodel_classmodelsr   Zmodel_kwargsr0   rQ  r1   pipelineS  sV    K




rV  )N)NNNN)VrR   rQ   loggingrk   rj   r   abcr   r   
contextlibr   	itertoolsr   os.pathr   r   typingr   r	   r
   r   r   r   r   r   r   r   Zconfiguration_autor   r   Zconfiguration_utilsr   rf   r   r   Z
file_utilsr   r   r   r   Ztokenization_autor   Ztokenization_bertr   Ztokenization_utilsr   Z
tensorflowr&   Zmodeling_tf_autor   r   r   r   r    r   Zmodeling_autor!   r"   r#   r$   r%   	getLoggerr+   r   r2   r3   r?   rP   rv   ru   rw   r   r   r   r   r   r   r   ZTokenClassificationPipeliner  r  r@  rL  rR  r)   rV  r0   r0   r0   r1   <module>   s  (	


3J% 4=w+do>   
s





l    