U
    &ºcuq  ã                   @   s¦  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZmZmZmZ d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ ddl m!Z!m"Z" ddl#m$Z$ ddl%m&Z&m'Z' ddl(m)Z)m*Z*m+Z+m,Z, ddl-m.Z.m/Z/ zd dl0m1Z1 dZ2W n e3k
rF   dZ2Y nX dd„ Z4e/ƒ rŽd dl5m6  m7Z8 d dl9m:  m;Z< d dl=m>  m?Z@ zd dlAmBZB dZCW nF e3k
rè   zd dlDmBZB dZCW n e3k
râ   dZCY nX Y nX dd„ ZEzFd dlFZFeF G¡  eFjHjIdkr"dZJeF Kd¡ ne Ld¡r2dndZJW n e3k
rR   dZJY nX dd„ ZMe NeO¡ZPeQdœdd„ZReeQd œd!d"„ƒZSed#œd$d%„ZTG d&d'„ d'ƒZUdS )(é    N)Úcontextmanager)ÚPath)ÚCallableÚDictÚListÚOptionalÚTupleÚUnion)Únn)Ú
DataLoader)ÚDataset)ÚDistributedSampler)ÚRandomSampler)ÚtqdmÚtrangeé   )ÚDataCollatorÚDefaultDataCollator)ÚPreTrainedModel)ÚAdamWÚget_linear_schedule_with_warmup)ÚPREFIX_CHECKPOINT_DIRÚEvalPredictionÚPredictionOutputÚTrainOutput)ÚTrainingArgumentsÚis_tpu_available)ÚampTFc                   C   s   t S ©N)Ú	_has_apex© r    r    ú8/tmp/pip-unpacked-wheel-ymerj3tt/transformers/trainer.pyÚis_apex_available#   s    r"   )ÚSummaryWriterc                   C   s   t S r   )Ú_has_tensorboardr    r    r    r!   Úis_tensorboard_available9   s    r%   zZW&B installed but not logged in.  Run `wandb login` or set the WANDB_API_KEY env variable.ZWANDB_DISABLEDc                   C   s   t S r   )Ú
_has_wandbr    r    r    r!   Úis_wandb_availableJ   s    r'   ©Úseedc                 C   s0   t  | ¡ tj  | ¡ t | ¡ tj | ¡ d S r   )Úrandomr)   ÚnpÚtorchZmanual_seedZcudaZmanual_seed_allr(   r    r    r!   Úset_seedQ   s    

r-   ©Ú
local_rankc                 c   s.   | dkrt j ¡  dV  | dkr*t j ¡  dS )zs
    Decorator to make all processes in distributed training wait for the first one (locally) to do something.
    ©éÿÿÿÿr   Nr   )r,   ÚdistributedZbarrierr.   r    r    r!   Útorch_distributed_zero_firstY   s
    
r3   ©Údatasetc                 C   s*   t  ¡ dkrt| ƒS t| t  ¡ t  ¡ dS )Nr   )Znum_replicasZrank)ÚxmÚxrt_world_sizer   r   Zget_ordinalr4   r    r    r!   Úget_tpu_samplere   s    r8   c                   @   sˆ  e Zd ZU dZeed< eed< eed< ee	 ed< ee	 ed< dZ
eeegef  ed< eed	< dZed
 ed< dZeejjejjjf ed< dZee ed< dZee ed< dCeeee ee	 ee	 eeegef  ed
 eejjejjjf dœdd„Zedœdd„ZdDee	 edœdd„Ze	edœdd„Zeeejjejjjf dœdd„Z dd „ Z!e"ed!f ed"œd#d$„Z#dEee$ d%œd&d'„Z%dFee$ef ee& dd(œd)d*„Z'e(j)ee$ej*f ejjed+œd,d-„Z+edœd.d/„Z,edœd0d1„Z-dGee$ d2œd3d4„Z.dHee$ d2œd5d6„Z/e0dfe1e$ dœd7d8„Z2dIddœd9d:„Z3dJee	 ee ee$ef d;œd<d=„Z4e	e5dœd>d?„Z6dKee$ee e5d@œdAdB„Z7dS )LÚTrainerzv
    Trainer is a simple but feature-complete training and eval loop for PyTorch,
    optimized for Transformers.
    ÚmodelÚargsÚdata_collatorÚtrain_datasetÚeval_datasetNÚcompute_metricsÚprediction_loss_onlyr#   Ú	tb_writerÚ
optimizersÚglobal_stepÚepochF)r:   r;   r<   r=   r>   r?   rA   rB   c
           
      C   sØ   || _ || _|dk	r|| _ntƒ | _|| _|| _|| _|| _|	| _|dk	rR|| _	n"t
ƒ rt| jjdkrtt| jjd| _	t
ƒ s„t d¡ tƒ r”|  ¡  n
t d¡ t| jjƒ |  ¡ rÄtj| jjdd tƒ rÔd| j j_dS )a  
        Trainer is a simple but feature-complete training and eval loop for PyTorch,
        optimized for Transformers.

        Args:
            prediction_loss_only:
                (Optional) in evaluation and prediction, only return the loss
        Nr0   )Zlog_dirzdYou are instantiating a Trainer but Tensorboard is not installed. You should consider installing it.zœYou are instantiating a Trainer but W&B is not installed. To use wandb logging, run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface.T©Úexist_ok)r:   r;   r<   r   r=   r>   r?   r@   rB   rA   r%   r/   r#   Zlogging_dirÚloggerÚwarningr'   Ú_setup_wandbÚinfor-   r)   Úis_local_masterÚosÚmakedirsÚ
output_dirr   ÚconfigZ
xla_device)
Úselfr:   r;   r<   r=   r>   r?   r@   rA   rB   r    r    r!   Ú__init__}   s8    ÿ
ÿzTrainer.__init__)Úreturnc                 C   s„   | j d krtdƒ‚tƒ r$t| j ƒ}n | jjdkr:t| j ƒnt| j ƒ}t| j | jj	|| j
jd}tƒ r€t || jjg¡ | jj¡}|S )Nz+Trainer: training requires a train_dataset.r1   )Ú
batch_sizeÚsamplerÚ
collate_fn)r=   Ú
ValueErrorr   r8   r;   r/   r   r   r   Útrain_batch_sizer<   Úcollate_batchÚplÚParallelLoaderÚdeviceÚper_device_loader)rP   Ztrain_samplerÚdata_loaderr    r    r!   Úget_train_dataloader´   s"    

ÿýüzTrainer.get_train_dataloader)r>   rR   c                 C   s~   |d kr| j d krtdƒ‚|d k	r&|n| j }tƒ r:t|ƒnd }t||| jjd| jjd}tƒ rzt	 
|| jjg¡ | jj¡}|S )Nz-Trainer: evaluation requires an eval_dataset.F©rT   rS   ÚshufflerU   )r>   rV   r   r8   r   r;   Úeval_batch_sizer<   rX   rY   rZ   r[   r\   )rP   r>   rT   r]   r    r    r!   Úget_eval_dataloaderÌ   s    ûzTrainer.get_eval_dataloader)Útest_datasetrR   c                 C   sR   t ƒ rt|ƒnd }t||| jjd| jjd}t ƒ rNt || jj	g¡ 
| jj	¡}|S )NFr_   )r   r8   r   r;   ra   r<   rX   rY   rZ   r[   r\   )rP   rc   rT   r]   r    r    r!   Úget_test_dataloaderá   s    ûzTrainer.get_test_dataloader)Únum_training_stepsrR   c                    sˆ   | j dk	r| j S ddg‰ ‡ fdd„| j ¡ D ƒ| jjdœ‡ fdd„| j ¡ D ƒddœg}t|| jj| jjd	}t|| jj	|d
}||fS )a  
        Setup the optimizer and the learning rate scheduler.

        We provide a reasonable default that works well.
        If you want to use something else, you can pass a tuple in the Trainer's init,
        or override this method in a subclass.
        NZbiaszLayerNorm.weightc                    s*   g | ]"\‰ }t ‡ fd d„ˆD ƒƒs|‘qS )c                 3   s   | ]}|ˆ kV  qd S r   r    ©Ú.0Znd©Únr    r!   Ú	<genexpr>  s     ú4Trainer.get_optimizers.<locals>.<listcomp>.<genexpr>©Úany©rg   Úp©Zno_decayrh   r!   Ú
<listcomp>  s      z*Trainer.get_optimizers.<locals>.<listcomp>)ÚparamsÚweight_decayc                    s*   g | ]"\‰ }t ‡ fd d„ˆD ƒƒr|‘qS )c                 3   s   | ]}|ˆ kV  qd S r   r    rf   rh   r    r!   rj     s     rk   rl   rn   rp   rh   r!   rq     s      ç        )ÚlrZeps)Znum_warmup_stepsre   )
rB   r:   Znamed_parametersr;   rs   r   Úlearning_rateZadam_epsilonr   Zwarmup_steps)rP   re   Zoptimizer_grouped_parametersÚ	optimizerÚ	schedulerr    rp   r!   Úget_optimizersò   s"    

þþû
  ÿzTrainer.get_optimizersc                 C   s\   t  d¡ tjt dd¡t| jƒd t d¡dkrXtj| j	t dd¡t
d| jjƒd	 d
S )a  
        Setup the optional Weights & Biases (`wandb`) integration.

        One can override this method to customize the setup if needed.  Find more information at https://docs.wandb.com/huggingface
        You can also override the following environment variables:

        Environment:
            WANDB_WATCH:
                (Optional, ["gradients", "all", "false"]) "gradients" by default, set to "false" to disable gradient logging
                or "all" to log gradients and parameters
            WANDB_PROJECT:
                (Optional): str - "huggingface" by default, set this to a custom string to store results in a different project
            WANDB_DISABLED:
                (Optional): boolean - defaults to false, set to "true" to disable wandb entirely
        z`Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"ZWANDB_PROJECTZhuggingface)ÚprojectrO   ZWANDB_WATCHÚfalseZ	gradientséd   )ÚlogZlog_freqN)rG   rJ   ÚwandbÚinitrL   ÚgetenvÚvarsr;   Zwatchr:   ÚmaxÚlogging_steps©rP   r    r    r!   rI     s    
 
 ÿzTrainer._setup_wandbzpl.PerDeviceLoader)Ú
dataloaderrR   c                 C   s2   t ƒ r$t|tjƒst‚t|jjjƒS t|jƒS dS )z\
        Helper to get num of examples from a DataLoader, by accessing its Dataset.
        N)r   Ú
isinstancerY   ZPerDeviceLoaderÚAssertionErrorÚlenÚ_loaderr5   )rP   r…   r    r    r!   Únum_examples(  s    zTrainer.num_examples)Ú
model_pathc              	   C   s@  |   ¡ }| jjdkr:| jj}| jjt|ƒ| jj  d }n$tt|ƒ| jj | jj ƒ}| jj}| j|d\}}|dk	rÖtj	 
tj	 |d¡¡rÖtj	 
tj	 |d¡¡rÖ| t tj	 |d¡¡¡ | t tj	 |d¡¡¡ | j}| | jj¡ | jjrtƒ stdƒ‚tj||| jjd\}}| jjdkr6tj |¡}| jjd	krdtjjj|| jjg| jjd
d}| jdk	rš| j d| j  ¡ ¡ | jj!| j "¡ i d t#ƒ r´| jj$t% &¡  }n,| jj$| jj | jjd	krÚtj' (¡ nd }t) *d¡ t) *d|  +|¡¡ t) *d|¡ t) *d| jj,¡ t) *d|¡ t) *d| jj¡ t) *d|¡ d| _-d| _.d}	d}
|dk	rz~t| /d¡d	  /d¡d ƒ| _-| j-t|ƒ| jj  }	| j-t|ƒ| jj  }
t) *d¡ t) *d|	¡ t) *d| j-¡ t) *d|
¡ W n& t0k
r   d| _-t) *d¡ Y nX d}d}| 1¡  t2|	t|ƒd|  3¡  d}|D ]Ü}t4|d|  3¡  d}t5|ƒD ]r\}}|
dkrt|
d8 }
qT||  6|||¡7 }|d | jj dksÀt|ƒ| jjkrš|d t|ƒkrš| jjrætjj7 8t 9|¡| jj:¡ ntjj7 8| ;¡ | jj:¡ t#ƒ rt% <|¡ n| =¡  | =¡  | 1¡  |  j-d7  _-||d t|ƒ  | _.|  3¡ rš| jj>dkrz| j-| jj> dks| j-dkrØ| jj?rØi }|| | jj> |d < | @¡ d |d!< |}|  A|¡ | jjBrØ|  C¡  | jjDdkrš| j-| jjD dkrštE|d"ƒr|jF| jks*tG‚n|| jks*tG‚tj	 | jjHtI› d| j-› ¡}|  J|¡ |  K¡  t L| M¡ tj	 |d¡¡ t L| M¡ tj	 |d¡¡ t) *d#|¡ | jjdkrT| j-| jjkrT| N¡   qÊqT| jjdkrö| j-| jjkrö| N¡   q| jjOr2t% PtQ R¡ ¡ q2| jr$| j N¡  t) *d$¡ tS| j-|| j- ƒS )%a  
        Main training entry point.

        Args:
            model_path:
                (Optional) Local path to model if model to train has been instantiated from a local path
                If present, we will try reloading the optimizer/scheduler states from there.
        r   r   )re   Nzoptimizer.ptzscheduler.ptzQPlease install apex from https://www.github.com/nvidia/apex to use fp16 training.)Ú	opt_levelr1   T)Z
device_idsZoutput_deviceZfind_unused_parametersr;   )Zmetric_dictz***** Running training *****ú  Num examples = %dz  Num Epochs = %dz*  Instantaneous batch size per device = %dzG  Total train batch size (w. parallel, distributed & accumulation) = %dz"  Gradient Accumulation steps = %dz  Total optimization steps = %dú-ú/zE  Continuing training from checkpoint, will skip to saved global_stepz#  Continuing training from epoch %dz)  Continuing training from global step %dz1  Will skip the first %d steps in the first epochz  Starting fine-tuning.rt   ZEpoch)ÚdescÚdisableZ	IterationÚlossrv   Úmodulez+Saving optimizer and scheduler states to %szU

Training completed. Do not forget to share your model on huggingface.co/models =)

)Tr^   r;   Z	max_stepsrˆ   Úgradient_accumulation_stepsÚintÚnum_train_epochsry   rL   ÚpathÚisfileÚjoinZload_state_dictr,   Úloadr:   Útor[   Úfp16r"   ÚImportErrorr   Z
initializeZfp16_opt_levelÚn_gpur
   ÚDataParallelr/   ÚparallelZDistributedDataParallelrA   Úadd_textZto_json_stringZadd_hparamsZto_sanitized_dictr   rW   r6   r7   r2   Zget_world_sizerG   rJ   rŠ   Zper_gpu_train_batch_sizerC   rD   ÚsplitrV   Z	zero_gradr   rK   r   Ú	enumerateÚ_training_stepÚutilsZclip_grad_norm_Zmaster_paramsZmax_grad_normÚ
parametersZoptimizer_stepÚsteprƒ   Zlogging_first_stepZget_last_lrÚ_logZevaluate_during_trainingÚevaluateZ
save_stepsÚhasattrr“   r‡   rN   r   Ú
save_modelÚ_rotate_checkpointsÚsaveZ
state_dictÚcloseÚtpu_metrics_debugÚmaster_printÚmetÚmetrics_reportr   )rP   r‹   Ztrain_dataloaderZt_totalr–   rw   rx   r:   Ztotal_train_batch_sizeZepochs_trainedZsteps_trained_in_current_epochZtr_lossZlogging_lossZtrain_iteratorrD   Zepoch_iteratorr§   ÚinputsÚlogsrN   r    r    r!   Útrain2  s   	ÿÿþý
üÿþÿ

ÿ
   ÿ

þý

"ÿÿ

" ÿ




zTrainer.train)r´   ÚiteratorrR   c                 C   sˆ   | j d k	r| j |d< | jr>| ¡ D ]\}}| j ||| j¡ q"tƒ rTtj|| jd t 	|d| ji–¡}|d k	r|| 
|¡ nt|ƒ d S )NrD   )r§   r§   )rD   rA   ÚitemsZ
add_scalarrC   r'   r~   r}   ÚjsonÚdumpsÚwriteÚprint)rP   r´   r¶   ÚkÚvÚoutputr    r    r!   r¨   å  s    

zTrainer._log)r:   r³   rw   rR   c           	   	   C   s¤   |  ¡  | ¡ D ]\}}| | jj¡||< q|f |Ž}|d }| jjdkrR| ¡ }| jjdkrj|| jj }| jjr”t	 
||¡}| ¡  W 5 Q R X n| ¡  | ¡ S )Nr   r   )rµ   r·   r›   r;   r[   rž   Úmeanr”   rœ   r   Z
scale_lossZbackwardÚitem)	rP   r:   r³   rw   r¼   r½   Úoutputsr’   Zscaled_lossr    r    r!   r¤   ó  s    
zTrainer._training_stepc                 C   s"   t ƒ rtjddS | jjdkS d S )NT©Úlocalr0   )r   r6   Úis_master_ordinalr;   r/   r„   r    r    r!   rK   
  s    zTrainer.is_local_masterc                 C   s0   t ƒ rtjddS | jjdkp*tj ¡ dkS dS )zƒ
        This will be True only in one process, even in distributed mode,
        even when training on multiple machines.
        FrÂ   r1   r   N)r   r6   rÄ   r;   r/   r,   r2   Zget_rankr„   r    r    r!   Úis_world_master  s    zTrainer.is_world_master)rN   c                 C   s   |   ¡ r|  |¡ dS )z´
        Saving best-practices: if you use default names for the model,
        you can reload it using from_pretrained().

        Will only save from the master process.
        N)rÅ   Ú_save©rP   rN   r    r    r!   r«     s    zTrainer.save_modelc                 C   sj   |d k	r|n| j j}tj|dd t d|¡ t| jtƒsBt	dƒ‚| j 
|¡ t | j tj |d¡¡ d S )NTrE   zSaving model checkpoint to %sz1Trainer.model appears to not be a PreTrainedModelztraining_args.bin)r;   rN   rL   rM   rG   rJ   r†   r:   r   rV   Zsave_pretrainedr,   r­   r—   r™   rÇ   r    r    r!   rÆ   $  s    zTrainer._savec                 C   s    g }dd„ t | jjƒ |› d¡D ƒ}|D ]Z}|rJ| tj |¡|f¡ q*t 	d|› d|¡}|r*| 
¡ r*| t| 
¡ d ƒ|f¡ q*t|ƒ}dd„ |D ƒ}|S )Nc                 S   s   g | ]}t |ƒ‘qS r    )Ústr)rg   Úxr    r    r!   rq   4  s     z/Trainer._sorted_checkpoints.<locals>.<listcomp>z-*z.*z	-([0-9]+)r   c                 S   s   g | ]}|d  ‘qS )r   r    )rg   Ú
checkpointr    r    r!   rq   ?  s     )r   r;   rN   ÚglobÚappendrL   r—   ÚgetmtimeÚreÚmatchÚgroupsr•   Úsorted)rP   Zcheckpoint_prefixÚ	use_mtimeZordering_and_checkpoint_pathZglob_checkpointsr—   Zregex_matchÚcheckpoints_sortedr    r    r!   Ú_sorted_checkpoints1  s    "zTrainer._sorted_checkpointsc                 C   s†   | j jd ks| j jdkrd S | j|d}t|ƒ| j jkr<d S tdt|ƒ| j j ƒ}|d |… }|D ]}t d |¡¡ t 	|¡ qbd S )Nr   )rÒ   z;Deleting older checkpoint [{}] due to args.save_total_limit)
r;   Zsave_total_limitrÔ   rˆ   r‚   rG   rJ   ÚformatÚshutilÚrmtree)rP   rÒ   rÓ   Znumber_of_checkpoints_to_deleteZcheckpoints_to_be_deletedrÊ   r    r    r!   r¬   B  s    zTrainer._rotate_checkpoints)r>   r@   rR   c                 C   s@   |   |¡}| j|dd}|  |j¡ | jjr:t t 	¡ ¡ |jS )aÉ  
        Run evaluation and return metrics.

        The calling script will be responsible for providing a method to compute metrics, as they are
        task-dependent.

        Args:
            eval_dataset: (Optional) Pass a dataset if you wish to override
            the one on the instance.
        Returns:
            A dict containing:
                - the eval loss
                - the potential metrics computed from the predictions
        Z
Evaluation©Údescription)
rb   Ú_prediction_loopr¨   Úmetricsr;   r¯   r6   r°   r±   r²   )rP   r>   r@   Zeval_dataloaderr¾   r    r    r!   r©   Q  s    
zTrainer.evaluatec                 C   s   |   |¡}| j|ddS )zù
        Run prediction and return predictions and potential metrics.

        Depending on the dataset and your use case, your test dataset may contain labels.
        In that case, this method will also return metrics, like in evaluate().
        Z
PredictionrØ   )rd   rÚ   )rP   rc   Ztest_dataloaderr    r    r!   Úpredictn  s    
zTrainer.predict)r…   rÙ   r@   rR   c              
      sŽ  |dk	r|n| j }| jjdkr>t| jtjjƒs>tj | j¡}n| j}| | jj	¡ t
ƒ rd|jjj}n|j}t d|¡ t d|  |¡¡ t d|¡ g }d}d}| ¡  t||dD ]‰ t‡ fdd„d	D ƒƒ}	ˆ  ¡ D ]\}
}| | jj	¡ˆ |
< qØt ¡ B |f ˆ Ž}|	r2|dd
… \}}|| ¡  ¡ g7 }n|d }W 5 Q R X |s´|dkrd| ¡  ¡  ¡ }ntj|| ¡  ¡  ¡ dd}ˆ  d¡dk	r´|dkr®ˆ d  ¡  ¡  ¡ }q´tj|ˆ d  ¡  ¡  ¡ dd}q´t
ƒ røt d|tj¡}t d|tj¡}| j dk	r,|dk	r,|dk	r,|   t!||d¡}ni }t"|ƒdkrLt |¡|d< t#| $¡ ƒD ]&}| %d¡sX| &|¡|d|› < qXt'|||dS )z
        Prediction/evaluation loop, shared by `evaluate()` and `predict()`.

        Works both with or without labels.
        Nr   z***** Running %s *****r   z  Batch size = %d)r   c                 3   s   | ]}ˆ   |¡d k	V  qd S r   )Úget)rg   r¼   ©r³   r    r!   rj   —  s     z+Trainer._prediction_loop.<locals>.<genexpr>)ÚlabelsZmasked_lm_labelsé   r   )Zaxisrß   Z
eval_predsZeval_out_label_ids)ÚpredictionsÚ	label_idsZ	eval_lossZeval_)rá   râ   rÛ   )(r@   r;   rž   r†   r:   r,   r
   rŸ   r›   r[   r   r‰   rS   rG   rJ   rŠ   Úevalr   rm   r·   Zno_gradr¿   rÀ   ÚdetachÚcpuÚnumpyr+   rÌ   rÝ   r6   Zmesh_reduceZconcatenater?   r   rˆ   ÚlistÚkeysÚ
startswithÚpopr   )rP   r…   rÙ   r@   r:   rS   Zeval_lossesÚpredsrâ   Z
has_labelsr¼   r½   rÁ   Zstep_eval_lossZlogitsrÛ   Úkeyr    rÞ   r!   rÚ   x  sZ    	



" zTrainer._prediction_loop)NNNNFNN)N)N)N)N)N)F)NN)N)8Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   Ú__annotations__r   r   r   r   r?   r   r   r   ÚboolrA   rB   r   r,   ZoptimZ	OptimizerZlr_schedulerZLambdaLRrC   r•   rD   ÚfloatrQ   r   r^   rb   rd   ry   rI   r	   rŠ   rÈ   rµ   r   r¨   r
   ÚModuleZTensorr¤   rK   rÅ   r«   rÆ   r   r   rÔ   r¬   r©   r   rÜ   rÚ   r    r    r    r!   r9   k   s„   
       öö7þ
 4   þ

   ÿ 
þ ÿ  þr9   )Vr¸   ÚloggingrL   r*   rÎ   rÖ   Ú
contextlibr   Úpathlibr   Útypingr   r   r   r   r   r	   ræ   r+   r,   r
   Ztorch.utils.data.dataloaderr   Ztorch.utils.data.datasetr   Ztorch.utils.data.distributedr   Ztorch.utils.data.samplerr   Z	tqdm.autor   r   Zdata.data_collatorr   r   Zmodeling_utilsr   Úoptimizationr   r   Ztrainer_utilsr   r   r   r   Ztraining_argsr   r   Zapexr   r   r   r"   Ztorch_xla.core.xla_modelÚcoreZ	xla_modelr6   Ztorch_xla.debug.metricsÚdebugrÛ   r±   Z%torch_xla.distributed.parallel_loaderr2   Zparallel_loaderrY   Ztorch.utils.tensorboardr#   r$   ZtensorboardXr%   r~   Zensure_configuredÚapiZapi_keyr&   Ztermwarnr€   r'   Ú	getLoggerrí   rG   r•   r-   r3   r8   r9   r    r    r    r!   Ú<module>   sr    


