U
    &c                     @   s>   d Z ddlZddlmZ eeZddiZG dd deZdS )	z Salesforce CTRL configuration     N   )PretrainedConfigctrlz?https://storage.googleapis.com/sf-ctrl/pytorch/ctrl-config.jsonc                       sZ   e Zd ZdZeZdZd fdd	Zedd Z	edd Z
edd Zedd Z  ZS )
CTRLConfiga>  
        This is the configuration class to store the configuration of an :class:`~transformers.CTRLModel`.
        It is used to instantiate an CTRL model according to the specified arguments, defining the model
        architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
        the `ctrl <https://huggingface.co/ctrl>`__ architecture from SalesForce.

        Configuration objects inherit from  :class:`~transformers.PretrainedConfig` and can be used
        to control the model outputs. Read the documentation from  :class:`~transformers.PretrainedConfig`
        for more information.

        Args:
            vocab_size (:obj:`int`, optional, defaults to 246534):
                Vocabulary size of the CTRL model. Defines the different tokens that
                can be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.CTRLModel`.
            n_positions (:obj:`int`, optional, defaults to 256):
                The maximum sequence length that this model might ever be used with.
                Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
            n_ctx (:obj:`int`, optional, defaults to 256):
                Dimensionality of the causal mask (usually same as n_positions).
            n_embd (:obj:`int`, optional, defaults to 1280):
                Dimensionality of the embeddings and hidden states.
            dff (:obj:`int`, optional, defaults to 8192):
                Dimensionality of the inner dimension of the FFN.
            n_layer (:obj:`int`, optional, defaults to 48):
                Number of hidden layers in the Transformer encoder.
            n_head (:obj:`int`, optional, defaults to 16):
                Number of attention heads for each attention layer in the Transformer encoder.
            resid_pdrop (:obj:`float`, optional, defaults to 0.1):
                The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
            embd_pdrop (:obj:`int`, optional, defaults to 0.1):
                The dropout ratio for the embeddings.
            attn_pdrop (:obj:`float`, optional, defaults to 0.1):
                The dropout ratio for the attention.
            layer_norm_epsilon (:obj:`float`, optional, defaults to 1e-6):
                The epsilon to use in the layer normalization layers
            initializer_range (:obj:`float`, optional, defaults to 0.02):
                The standard deviation of the truncated_normal_initializer for initializing all weight matrices.

        Example::

            from transformers import CTRLModel, CTRLConfig

            # Initializing a CTRL configuration
            configuration = CTRLConfig()

            # Initializing a model from the configuration
            model = CTRLModel(configuration)

            # Accessing the model configuration
            configuration = model.config

        Attributes:
            pretrained_config_archive_map (Dict[str, str]):
                A dictionary containing all the available pre-trained checkpoints.
    r              0      皙?ư>{Gz?	cls_indexTNc                    sx   t  jf | || _|| _|| _|| _|| _|| _|| _|| _	|	| _
|
| _|| _|| _|| _|| _|| _|| _|| _d S N)super__init__
vocab_sizen_ctxn_positionsn_embdn_layern_headdffresid_pdrop
embd_pdrop
attn_pdroplayer_norm_epsiloninitializer_rangesummary_typesummary_use_projsummary_activationsummary_first_dropoutsummary_proj_to_labels)selfr   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r#   r"   kwargs	__class__ C/tmp/pip-unpacked-wheel-ymerj3tt/transformers/configuration_ctrl.pyr   X   s$    zCTRLConfig.__init__c                 C   s   | j S r   )r   r$   r(   r(   r)   max_position_embeddings   s    z"CTRLConfig.max_position_embeddingsc                 C   s   | j S r   )r   r*   r(   r(   r)   hidden_size   s    zCTRLConfig.hidden_sizec                 C   s   | j S r   )r   r*   r(   r(   r)   num_attention_heads   s    zCTRLConfig.num_attention_headsc                 C   s   | j S r   )r   r*   r(   r(   r)   num_hidden_layers   s    zCTRLConfig.num_hidden_layers)r   r   r   r   r	   r
   r   r   r   r   r   r   r   TNTr   )__name__
__module____qualname____doc__"CTRL_PRETRAINED_CONFIG_ARCHIVE_MAPZpretrained_config_archive_mapZ
model_typer   propertyr+   r,   r-   r.   __classcell__r(   r(   r&   r)   r      s:   8                 )


r   )	r2   loggingZconfiguration_utilsr   	getLoggerr/   loggerr3   r   r(   r(   r(   r)   <module>   s
   
