U
    &c                     @   sF   d Z ddlZddlmZ eeZddddd	d
ZG dd deZdS )z T5 model configuration     N   )PretrainedConfigzHhttps://s3.amazonaws.com/models.huggingface.co/bert/t5-small-config.jsonzGhttps://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.jsonzHhttps://s3.amazonaws.com/models.huggingface.co/bert/t5-large-config.jsonzEhttps://s3.amazonaws.com/models.huggingface.co/bert/t5-3b-config.jsonzFhttps://s3.amazonaws.com/models.huggingface.co/bert/t5-11b-config.json)zt5-smallzt5-basezt5-largezt5-3bzt5-11bc                       sZ   e Zd ZdZeZdZd fdd	Zedd Z	edd Z
edd Zedd Z  ZS )T5ConfigaS  
        :class:`~transformers.T5Config` is the configuration class to store the configuration of a
        `T5Model`.


        Arguments:
            vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `T5Model`.
            hidden_size: Size of the encoder layers and the pooler layer.
            num_hidden_layers: Number of hidden layers in the Transformer encoder.
            num_attention_heads: Number of attention heads for each attention layer in
                the Transformer encoder.
            intermediate_size: The size of the "intermediate" (i.e., feed-forward)
                layer in the Transformer encoder.
            hidden_act: The non-linear activation function (function or string) in the
                encoder and pooler. If string, "gelu", "relu", "swish" and "gelu_new" are supported.
            hidden_dropout_prob: The dropout probabilitiy for all fully connected
                layers in the embeddings, encoder, and pooler.
            attention_probs_dropout_prob: The dropout ratio for the attention
                probabilities.
            max_position_embeddings: The maximum sequence length that this model might
                ever be used with. Typically set this to something large just in case
                (e.g., 512 or 1024 or 2048).
            type_vocab_size: The vocabulary size of the `token_type_ids` passed into
                `T5Model`.
            initializer_factor: A factor for initializing all weight matrices (should be kept to 1.0, used for initialization testing).
            layer_norm_eps: The epsilon used by LayerNorm.
    Zt5}     @                皙?ư>      ?Tr   r   c                    s`   t  jf |||d| || _|| _|| _|| _|| _|| _|| _|| _	|	| _
|
| _|| _d S )N)pad_token_ideos_token_idis_encoder_decoder)super__init__
vocab_sizen_positionsd_modeld_kvd_ff
num_layers	num_headsrelative_attention_num_bucketsdropout_ratelayer_norm_epsiloninitializer_factor)selfr   r   r   r   r   r   r   r   r   r   r   r   r   r   kwargs	__class__ A/tmp/pip-unpacked-wheel-ymerj3tt/transformers/configuration_t5.pyr   A   s$      zT5Config.__init__c                 C   s   | j S N)r   r   r#   r#   r$   max_position_embeddingsb   s    z T5Config.max_position_embeddingsc                 C   s   | j S r%   )r   r&   r#   r#   r$   hidden_sizef   s    zT5Config.hidden_sizec                 C   s   | j S r%   )r   r&   r#   r#   r$   num_attention_headsj   s    zT5Config.num_attention_headsc                 C   s   | j S r%   )r   r&   r#   r#   r$   num_hidden_layersn   s    zT5Config.num_hidden_layers)r   r   r   r   r   r	   r
   r   r   r   r   Tr   r   )__name__
__module____qualname____doc__ T5_PRETRAINED_CONFIG_ARCHIVE_MAPZpretrained_config_archive_mapZ
model_typer   propertyr'   r(   r)   r*   __classcell__r#   r#   r!   r$   r   "   s4                 !


r   )	r.   loggingZconfiguration_utilsr   	getLoggerr+   loggerr/   r   r#   r#   r#   r$   <module>   s   
	