U
    &ºcýD  ã                   @   sœ   d Z ddlZddlZddlZddlmZmZmZ ddlZ	ddl
ZddlmZmZ ddlmZmZ ddlmZmZmZ ddlmZ e e¡ZG d	d
„ d
ƒZdS )zTensorflow trainer class.é    N)ÚCallableÚDictÚOptionalé   )ÚTFPreTrainedModelÚ
shape_list)ÚGradientAccumulatorÚcreate_optimizer)ÚPREFIX_CHECKPOINT_DIRÚEvalPredictionÚPredictionOutput)ÚTFTrainingArgumentsc                	   @   s²  e Zd ZU eed< eed< eejj	 ed< eejj	 ed< dZ
eeegef  ed< eed< d9eeeejj	 eejj	 eeegef  d	œd
d„Zddœdd„Zddœdd„Zddœdd„Zddœdd„Zddœdd„Zd:eeddœdd„Zejdd„ ƒZd;ejj	eee edœdd „Zd<eejj	 ee eeef d!œd"d#„Zddœd$d%„Zd&d'„ Zejd(d)„ ƒZ d*d+„ Z!d,d-„ Z"d.d/„ Z#d0d1„ Z$d2d3„ Z%ejj	ed4œd5d6„Z&ddœd7d8„Z'dS )=Ú	TFTrainerÚmodelÚargsÚtrain_datasetÚeval_datasetNÚcompute_metricsÚprediction_loss_onlyF)r   r   r   r   r   c                 C   s8   || _ || _|| _|| _|| _|| _tƒ | _|  ¡  d S ©N)	r   r   r   r   r   r   r   Úgradient_accumulatorÚ_setup_training)Úselfr   r   r   r   r   r   © r   ú;/tmp/pip-unpacked-wheel-ymerj3tt/transformers/trainer_tf.pyÚ__init__   s    	zTFTrainer.__init__)Úreturnc              	   C   sL   |   ¡  | jj ¡ . |  ¡  | jj}|  ¡  |  ¡  |  	¡  W 5 Q R X dS )z×
        Setup the different steps to train a model:
          - check if all the data are given
          - create the proper strategy
          - create the features
          - prepare the model settings
        N)
Ú_prepare_datasetr   ÚstrategyZscopeÚ_create_optimizerÚ	optimizerÚ
iterationsÚ_set_loss_and_metricÚ_create_checkpoint_managerÚ_create_summary_writer)r   Ú_r   r   r   r   2   s    zTFTrainer._setup_trainingc                 C   sl   z,t jj | jjdt jjjjdœdœ¡| _W n: t	k
rf   t jj | jjdt jjjjidœ¡| _Y nX dS )z¾
        Create the training loss and metric with their name. Allowed names are those listed
        in the Tensorflow documentation and those contained in the transformers library.
        T)Zfrom_logitsÚ	reduction©Ú
class_nameÚconfigr&   N)
ÚtfÚkerasÚlossesÚgetr   Z	loss_nameZ	ReductionÚNONEÚlossÚ	TypeError©r   r   r   r   r"   C   s    þÿ
ÿzTFTrainer._set_loss_and_metricc                 C   s   t j | jj¡| _dS )zU
        Create a summary writer to be able to read the logs in Tensorboard.
        N)r*   ÚsummaryZcreate_file_writerr   Úlogging_dirÚwriterr1   r   r   r   r$   T   s    z TFTrainer._create_summary_writerc                 C   sö   | j dk	r®| j  t d¡dd„ ¡ ¡ | _| jjdkr@| jj| _nt	 
| j| jj ¡| _| j  ¡  | j¡ | jj¡ tjjj¡| _ | jjdkrš| j  d¡| _ | jj | j ¡| _ nd| _| jdk	rò| j | jj¡ ¡  tjjj¡| _| jj | j¡| _dS )zA
        Prepare the training, validation and test data.
        Nr   c                 S   s   | d S )Nr   r   )Úxr%   r   r   r   Ú<lambda>_   ó    z,TFTrainer._prepare_dataset.<locals>.<lambda>éÿÿÿÿ)r   Úreducer*   ZconstantÚnumpyÚnum_train_examplesr   Ú	max_stepsÚtrain_stepsÚmathÚceilZtrain_batch_sizeÚcacheÚshuffleÚbatchÚprefetchÚdataÚexperimentalZAUTOTUNEÚrepeatr   Úexperimental_distribute_datasetr   Úeval_batch_sizer1   r   r   r   r   Z   s*    

ÿþýÿ
ÿzTFTrainer._prepare_datasetc                 C   s¦   | j jdkr,t| j j| j| j j| j jƒ| _ndz,tj	j
 | j j| j j| j jdœdœ¡| _W n6 tk
rŽ   tj	j
 | j jd| j jidœ¡| _Y nX t d | j¡¡ dS )z¶
        Create the training optimizer with its name. Allowed names are those listed
        in the Tensorflow documentation and those contained in the transformers library.
        Zadamw)Úlearning_rateÚepsilonr'   rI   zCreated an/a {} optimizerN)r   Zoptimizer_namer	   rI   r=   Zwarmup_stepsZend_lrr    r*   r+   Z
optimizersr-   Zadam_epsilonr0   ÚloggerÚinfoÚformatr1   r   r   r   r   z   s$       ÿþÿ
ÿzTFTrainer._create_optimizeré   T)Úmax_to_keepÚ
load_modelr   c                 C   sF   t jj| j| jd}t jj|t|d| j_|rB| | jjj	¡ 
¡  dS )a(  
        Create a checkpoint manager in order to be able to make the training
        fault-tolerant.
        Args:
          max_to_keep: the maximum number of checkpoints to keep in the checkpoint path.
          load_model: if we want to start the training from the latest checkpoint.
        )r    r   )rO   N)r*   ÚtrainZ
Checkpointr    r   ZCheckpointManagerr
   Úckpt_managerÚrestoreZlatest_checkpointZexpect_partial)r   rO   rP   Zckptr   r   r   r#   ’   s    z$TFTrainer._create_checkpoint_managerc                 C   sr   | j jj| j||dfd\}}z| j jjtjjj|dd}W n, t	k
rh   | j j tjjj|d¡}Y nX ||fS )zñ
        One step evaluation across replica.
        Args:
          per_replica_features: the batched features.
          per_replica_labels: the batched labels.
        Returns:
          The loss corresponding to the given batch.
        F©r   r   ©ZaxisN)
r   r   Úexperimental_run_v2Ú
_run_modelr9   r*   Ú
distributeÚReduceOpÚMEANÚ
ValueError)r   Úper_replica_featuresÚper_replica_labelsÚper_replica_lossZper_replica_logitsÚreduced_lossr   r   r   Ú_evaluate_steps¡   s    
 ÿ
zTFTrainer._evaluate_steps)ÚdatasetÚdescriptionr   r   c                 C   s¶  t  d|¡ t  d| jj¡ d }d }d}|D ] \}}tj|tjd}|  ||¡\}	}
t |	¡}	|s&| jj	dkrØ|
j
D ]*}|d krŽ| ¡ }qxtj|| ¡ dd}qx|j
D ]*}|d krÀ| ¡ }qªtj|| ¡ dd}qªnN|d krê|
 ¡ }ntj||
 ¡ dd}|d kr| ¡ }ntj|| ¡ dd}|d7 }q,| jd k	rd|d k	rd|d k	rd|  t||d¡}ni }|	 ¡ |d< t| ¡ ƒD ]&}| d	¡s€| |¡|d	|› < q€t|||d
S )Nz***** Running %s *****z  Batch size = %dr   )Údtyper   rU   )ÚpredictionsÚ	label_idsZ	eval_lossZeval_)rd   re   Úmetrics)rK   rL   r   rH   r*   Zconvert_to_tensorZint64r`   Zreduce_meanÚn_gpuÚvaluesr:   ÚnpÚappendr   r   ÚlistÚkeysÚ
startswithÚpopr   )r   ra   rb   r   re   ÚpredsÚstepÚfeaturesÚlabelsr/   ÚlogitsÚvalrf   Úkeyr   r   r   Ú_prediction_loop¶   sD    








 zTFTrainer._prediction_loop)r   r   r   c                 C   s"   |dkr| j }| j|dd}|jS )zU
        Prediction/evaluation loop, shared by `evaluate()` and `predict()`.
        NZ
Evaluation©rb   )r   rv   rf   )r   r   r   Úoutputr   r   r   Úevaluateí   s    zTFTrainer.evaluatec                 C   s–  | j jrtjjddd | j ¡  | jj}| 	¡ dkrTt
 d¡ | 	¡ | j d }nd}tjj |¡ | j jdkrvdn| j j}t
 d¡ t
 d| j¡ t
 d|¡ t
 d	| j¡ t|t|d ƒƒD ]Ì}|  ¡ D ]¼}| 	¡ }| j jr| j ¡  tjjd
||d W 5 Q R X |dkrP| j jrP| j ¡  tjjd|| j jd W 5 Q R X | j jr"|| j j dkr"i }|  ¡ }| ¡ D ]\}	}
d |	¡}|
||< q€t| jj ƒrÂ| j  |¡ 	¡ |d< n| jj  	¡ |d< t
 d |||¡¡ | j ¡ , | ¡ D ]\}}tjj|||d qúW 5 Q R X || j j! dkrLt
 d ||| 	¡ ¡¡ || j j" dkr|| j#j$ %¡ }t
 d ||¡¡ || j dkrÐ qÂqÐqÂdS )z2
        Train method to train the model.
        T)ÚgraphZprofilerr   z+Start the training from the last checkpointr   z***** Running training *****z  Num examples = %dz  Num Epochs = %dz  Total optimization steps = %dr/   )rp   Útraining)Únamerp   Zprofiler_outdirzeval_{}rI   z&Epoch {} Step {} Validation Metrics {}z"Epoch {} Step {} Train Loss {:.4f}z#Saving checkpoint for step {} at {}N)&r   Údebugr*   r2   Ztrace_onr   Úresetr    r!   r:   rK   rL   r=   rE   Zset_stepr<   Znum_train_epochsr;   ÚrangeÚintÚ_training_stepsr4   Z
as_defaultZscalarZtrace_exportr3   Zevaluate_during_trainingZ
eval_stepsry   ÚitemsrM   ÚcallablerI   Zlogging_stepsZ
save_stepsr   rR   Úsave)r   r!   Zstart_epochZepochsÚepochZtraining_lossrp   ZlogsÚresultsru   ÚvalueZeval_keyÚkÚvZckpt_save_pathr   r   r   rQ   ú   sV    



 
 zTFTrainer.trainc                 c   s8   t |  ¡ ƒD ]&\}}|| jj dkr|  ¡  |V  qdS )zS
        Returns a generator over training steps (i.e. parameters update).
        r   N)Ú	enumerateÚ_accumulate_next_gradientsr   Zgradient_accumulation_stepsÚ_apply_gradients)r   Úir/   r   r   r   r   =  s    zTFTrainer._training_stepsc                 C   s   | j j | j¡ dS )z&Applies the gradients (cross-replica).N)r   r   rV   Ú_stepr1   r   r   r   rŒ   F  s    zTFTrainer._apply_gradientsc                    s„   ˆj jˆjjj ‰ ‡ fdd„ˆj jD ƒ}‡fdd„|D ƒ}ˆjj}ˆjjdkr`dd„ ˆjjD ƒ}ˆj	 
tt||ƒƒ¡ ˆj  ¡  dS )z*Applies gradients and resets accumulation.c                    s   g | ]}|t  ˆ |j¡ ‘qS r   )r*   Úcastrc   )Ú.0Zgradient)Úgradient_scaler   r   Ú
<listcomp>N  s    z#TFTrainer._step.<locals>.<listcomp>c                    s$   g | ]}t  |ˆ jj ˆ jj¡‘qS r   )r*   Zclip_by_valuer   Zmax_grad_norm)r   Zgradr1   r   r   r’   Q  s     ©útoken-classificationúquestion-answeringc                 S   s   g | ]}d |j kr|‘qS ©Zpooler©r|   ©r   Úvarr   r   r   r’   U  s     
 N)r   rp   r   r   Znum_replicas_in_syncÚ	gradientsr   Útrainable_variablesÚmoder    Zapply_gradientsrk   Úzipr~   )r   rš   Úvarsr   )r‘   r   r   rŽ   K  s    
ÿzTFTrainer._stepc                 #   sN   t ˆjƒ‰ tj‡ ‡fdd„ƒ}z|ƒ V  W q tjjk
rF   Y qJY qX qdS )z;Accumulates the gradients from the next element in dataset.c                     s   t ˆ ƒ\} }ˆ | |¡S r   )ÚnextÚ_accumulate_gradients)r\   r]   ©Úiteratorr   r   r   Ú_accumulate_next^  s    z>TFTrainer._accumulate_next_gradients.<locals>._accumulate_nextN)Úiterr   r*   ÚfunctionÚerrorsZOutOfRangeError)r   r£   r   r¡   r   r‹   Z  s    
z$TFTrainer._accumulate_next_gradientsc                 C   sh   | j jj| j||fd}z| j jjtjjj|dd}W n, t	k
rb   | j j tjjj|d¡}Y nX |S )z1Accumulates the gradients across all the replica.rT   r   rU   N)
r   r   rV   Ú_forwardr9   r*   rX   rY   rZ   r[   )r   r\   r]   r^   r_   r   r   r   r    j  s     ÿzTFTrainer._accumulate_gradientsc                 C   sT   |   ||d¡\}}| jj}| jjdkr8dd„ | jjD ƒ}| j ||¡}|  |¡ |S )z:Forwards a training example and accumulates the gradients.Tr“   c                 S   s   g | ]}d |j kr|‘qS r–   r—   r˜   r   r   r   r’   }  s     
 z&TFTrainer._forward.<locals>.<listcomp>)rW   r   r›   r   rœ   r    Zget_gradientsr   )r   rq   rr   Zper_example_lossr%   rž   rš   r   r   r   r§   w  s    
zTFTrainer._forwardc           
      C   s
  | j jdks| j jdkr,| j||dd }n| j||d}| j jdkr˜t |d¡dk}t t |dt|ƒd f¡|¡}t t |d¡|¡}|  ||¡}nN| j jdkrÚ|  |d	 |d ¡}|  |d
 |d ¡}	||	 d }n|  ||¡}|t| jj	ƒd| j j
  7 }||fS )zâ
        Computes the loss of the given features and labels pair.
        Args:
          features: the batched features.
          labels: the batched labels.
          training: run the model in training mode or not
        ztext-classificationr”   )r{   r   )r8   r8   é   r•   Zstart_positionZend_positionr   g       @g      ð?)r   rœ   r   r*   ZreshapeZboolean_maskr   r/   Úsumr,   rg   )
r   rq   rr   r{   rs   Zactive_lossZreduced_logitsr/   Z
start_lossZend_lossr   r   r   rW   …  s     zTFTrainer._run_model)Útest_datasetr   c                 C   s*   |  | jj¡}| jj |¡}| j|ddS )aŸ  
        Run prediction and return predictions and potential metrics.
        Depending on the dataset and your use case, your test dataset may contain labels.
        In that case, this method will also return metrics, like in evaluate().
        Args:
          test_dataset: something similar to a PT Dataset. This is just
            temporary before to have a framework-agnostic approach for datasets.
        Z
Predictionrw   )rB   r   rH   r   rG   rv   )r   rª   r   r   r   Úpredict¢  s    	zTFTrainer.predictc                 C   sX   t  d | jj¡¡ tj | jjd¡}t  d |¡¡ tj|dd | j	 
| jj¡ dS )zP
        Save the pretrained model and create a Tensorflow saved model.
        zSaving model in {}Zsaved_modelT)Úexist_okN)rK   rL   rM   r   Ú
output_dirÚosÚpathÚjoinÚmakedirsr   Zsave_pretrained)r   r¯   r   r   r   Ú
save_model°  s
    zTFTrainer.save_model)NNNF)rN   T)N)NN)(Ú__name__Ú
__module__Ú__qualname__r   Ú__annotations__r   r   r*   rD   ZDatasetr   r   r   r   Úboolr   r   r"   r$   r   r   r€   r#   r¥   r`   Ústrr   rv   Úfloatry   rQ   r   rŒ   rŽ   r‹   r    r§   rW   r«   r²   r   r   r   r   r      sd   
    ù

ú 
 ÿ  þ8   ÿ
 
þC	
r   )Ú__doc__Úloggingr>   r®   Útypingr   r   r   r:   ri   Z
tensorflowr*   Zmodeling_tf_utilsr   r   Zoptimization_tfr   r	   Ztrainer_utilsr
   r   r   Ztraining_args_tfr   Ú	getLoggerr³   rK   r   r   r   r   r   Ú<module>   s   
