U
    <c                    @   sr  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	m
Z
 d dlmZmZ d dlmZ d dlmZ d dlmZmZmZmZ d dlZd dlZd dlZd dlmZ d dlm  m  m   m!Z! d dl"m  m  m   m#Z$ d dl%m  m  m   m&Z' d dl(m)Z) d dl*m)  m+Z, d dl-m)  m&  m.Z. d dl/m0Z1 d dl/m2Z3 d d	l4m5Z5m6Z6 d d
l7m8Z9m:Z;m<Z=m>Z? d dl@mAZAmBZBmCZCmDZD d dlEmFZFmGZG d dlHmIZI d dlJmKZK d dlLmMZMmNZNmOZOmPZPmQZQmRZRmSZSmTZTmUZUmVZVmWZWmXZXmYZYmZZZm[Z[m\Z\m]Z]m^Z^m_Z_ d dl`maZambZbmcZcmdZdmeZemfZfmgZgmhZhmiZi d dljm  mk  mlZl d dlmmnZn zd dloZodZpW n eqk
r   dZpY nX ejrdkrd dlsZsnd dltZtG dd de)juZvG dd dZwewdZxdex_yewezddZ{ddddidexe{ddd dd!dd"dgggZ|ej}j~ej}jej}jej}jgZej}jej}jej}j~ej}jgZej}jej}jej}j~ej}jgZd#Ze	d$eZG d%d& d&eZeiep d'Zejd( Zed)d*Zd+Zd,d-iZd.d/ Zd0Zd1Zd2Zd3Zd4ZG d5d6 d6eZG d7d8 d8e)juZG d9d: d:e)juZG d;d< d<e)juZG d=d> d>e)juZG d?d@ d@e)juZG dAdB dBe)juZG dCdD dDe)juZG dEdF dFe)juZG dGdH dHe)juZG dIdJ dJe)juZe Ze ZeddKZe)jd dLdMZdNdO ZdPZd+d"dQZdRdS ZdTdU ZdVdW ZedXdY ZedZd[ Zdejdfd\d]Zdejfd^d_Zd`da Zdbdc ZG ddde deeZG dfdg dgeMZG dhdi diZeaej dS )j    N)
namedtupleOrderedDict)contextmanagersuppress)	timedelta)reduce)Union
NamedTupleCallableAny)TEST_MASTER_ADDR)TEST_MASTER_PORT)
GradScalerautocast)post_localSGD_hookpowerSGD_hookdefault_hooksquantization)get_world_size_get_default_groupAllreduceOptionsGroupMember)$_verify_param_shape_across_processes_sync_module_states)DistributedDataParallel)_dump_DDP_relevant_env_vars)MultiProcessTestCase
TEST_SKIPSinit_multigpu_helperinitialize_temp_directoriescleanup_temp_dirsimple_sparse_reduce_testsskip_if_rocmskip_if_small_worldsizeskip_if_odd_worldsizeskip_if_lt_x_gpunccl_skip_if_lt_x_gpuskip_if_no_gpurequire_n_gpus_for_nccl_backendrequires_nccl_versioncaptured_outputwith_nccl_blocking_waitwith_dist_debug_levelsverify_ddp_error_loggedDistTestCases)	instantiate_parametrized_testsIS_MACOS
IS_WINDOWSFILE_SCHEMA	IS_FBCODENO_MULTIPROCESSING_SPAWNparametrizesandcastle_skipsandcastle_skip_if)DistributedSamplerTFwin32c                       s$   e Zd Z fddZdd Z  ZS )NetWithBuffersc                    sF   t    tjdddd| _tjdddd| _| dtdd d S N
   Fbias   buffer   	super__init__nnLinearabregister_buffertorchrandnself	__class__ X/tmp/pip-unpacked-wheel-gikjz4vx/torch/testing/_internal/distributed/distributed_test.pyrD   c   s    
zNetWithBuffers.__init__c                 C   s   | j d | | |S Nr?   )r@   Zadd_rH   rG   rM   xrP   rP   rQ   forwardi   s    zNetWithBuffers.forward__name__
__module____qualname__rD   rU   __classcell__rP   rP   rN   rQ   r:   b   s   r:   c                   @   s   e Zd Zdd Zdd ZdS )Fooc                 C   s
   || _ d S NrT   rS   rP   rP   rQ   rD   n   s    zFoo.__init__c                 C   s:   dd }| j  D ]"\}}|j | }|||s dS qdS )Nc                 S   s    t | tjrt| |S | |kS r\   )
isinstancerJ   Tensorequal)valueotherrP   rP   rQ   eqs   s    zFoo.__eq__.<locals>.eqFT)__dict__items)rM   rb   rc   attrra   Zother_valuerP   rP   rQ   __eq__r   s    

z
Foo.__eq__N)rW   rX   rY   rD   rg   rP   rP   rP   rQ   r[   m   s   r[   r<   r?         nested)key1key2Zkey3ZfoorA   string   rG   rH   r	   c                   @   s"   e Zd ZU ejed< ejed< dS )TestNamedTupleInput_1rG   rH   N)rW   rX   rY   rJ   tensor__annotations__rP   rP   rP   rQ   rp      s   

rp   zno torchvisionBACKENDINIT_METHODenv://i,  test_DistributedDataParallel  c                    s.   t |tjjr| n|j} fdd|D S )Nc                    s   g | ]}|j  r|qS rP   )nameendswith.0eventpostfixrP   rQ   
<listcomp>   s      z'get_profiling_event.<locals>.<listcomp>)r^   rJ   profilerprofileeventsZfunction_events)r~   r   Z
event_listrP   r}   rQ   get_profiling_event   s
    
r   z:Expected to have finished reduction in the prior iterationz:passing the keyword argument `find_unused_parameters=True`z.Since `find_unused_parameters=True` is enabledz:`forward` function outputs participate in calculating losszMset the environment variable TORCH_DISTRIBUTED_DEBUG to either INFO or DETAILc                   @   s^   e Zd ZU eed< ejed< eej	e
f ed< eed< dZeed< dZeed< dZeed	< dS )
DDPUnevenTestInputrx   modelinpsync_intervalFthrow_on_early_terminationNhookstate)rW   rX   rY   strrr   rE   Moduler   rJ   rq   tupleintr   boolr   r
   r   r   rP   rP   rP   rQ   r      s   

r   c                       s$   e Zd Z fddZdd Z  ZS )_FC2c                    s.   t t|   tjdddd| _d| jj_d S )Nr<   2   Tr=   F)rC   r   rD   rE   rF   fcr>   requires_gradrL   rN   rP   rQ   rD      s    z_FC2.__init__c                 C   s   |  |}|S r\   r   rS   rP   rP   rQ   rU      s    
z_FC2.forwardrV   rP   rP   rN   rQ   r      s   r   c                       s$   e Zd Z fddZdd Z  ZS )Netc                    sf   t t|   tjdddd| _t | _tjdddd| _t	 | _
tjtddg dd| _d S )NrA   r<   Fr=   r   ri   r   )rC   r   rD   rE   rF   fc1r   fc2fc3ReLUrelu	ParameterrJ   rq   longZno_grad_paramrL   rN   rP   rQ   rD      s    
 zNet.__init__c                 C   s8   |  | |}|  | |}| |}tj|ddS )Nr?   dim)r   r   r   r   FsoftmaxrS   rP   rP   rQ   rU      s    
zNet.forwardrV   rP   rP   rN   rQ   r      s   
r   c                       s$   e Zd Z fddZdd Z  ZS )LargeNetc                    s6   t t|   tjdddd| _tjdddd| _d S )N  i  Fr=   rw   )rC   r   rD   rE   rF   r   r   rL   rN   rP   rQ   rD      s    zLargeNet.__init__c                 C   s   |  |}| |}|S r\   r   r   rS   rP   rP   rQ   rU     s    

zLargeNet.forwardrV   rP   rP   rN   rQ   r      s   r   c                       s$   e Zd Z fddZdd Z  ZS )Taskc                    s"   t    ttdd| _d S NrA   )rC   rD   rE   r   rJ   onesprL   rN   rP   rQ   rD   
  s    
zTask.__init__c                 C   s
   | j | S r\   )r   rS   rP   rP   rQ   rU     s    zTask.forwardrV   rP   rP   rN   rQ   r   	  s   r   c                       s&   e Zd Zd fdd	Zdd Z  ZS )BatchNormNetTc                    sF   t t|   tjdddd| _tjd|d| _tjdddd| _d S )NrA   (   Fr=   ri   affine)	rC   r   rD   rE   rF   r   BatchNorm1dbnr   )rM   r   rN   rP   rQ   rD     s    zBatchNormNet.__init__c                 C   s@   t | |d}| |}t |d}| |}tj|ddS )N)ri   r<   )r   r   r?   r   )rJ   reshaper   r   r   r   r   rS   rP   rP   rQ   rU     s
    

zBatchNormNet.forward)TrV   rP   rP   rN   rQ   r     s   r   c                       s$   e Zd Z fddZdd Z  ZS )UnusedParamTwoLinLayerNetc                    sD   t    tjdddd| _tjdddd| _tjdddd| _d S )Nr<   Fr=   rn   )rC   rD   rE   rF   rG   rH   crL   rN   rP   rQ   rD   "  s    
z"UnusedParamTwoLinLayerNet.__init__c                 C   s   |  |}| |}||fS r\   ro   rM   rT   rG   rH   rP   rP   rQ   rU   (  s    

z!UnusedParamTwoLinLayerNet.forwardrV   rP   rP   rN   rQ   r   !  s   r   c                       s$   e Zd Z fddZdd Z  ZS )DictOutputModulec                    s   t    t | _d S r\   )rC   rD   r   modulerL   rN   rP   rQ   rD   /  s    
zDictOutputModule.__init__c                 C   s(   |  |}|d |d   }||dS )Nr   r?   )predictionsloss)r   sum)rM   rT   r   r   rP   rP   rQ   rU   3  s
    
zDictOutputModule.forwardrV   rP   rP   rN   rQ   r   .  s   r   c                       s$   e Zd Z fddZdd Z  ZS )TwoLinLayerNetc                    s2   t    tjdddd| _tjdddd| _d S )Nr<   Fr=   r?   )rC   rD   rE   rF   rG   rH   rL   rN   rP   rQ   rD   =  s    
zTwoLinLayerNet.__init__c                 C   s   |  |}| |}||fS r\   ro   r   rP   rP   rQ   rU   B  s    

zTwoLinLayerNet.forwardrV   rP   rP   rN   rQ   r   <  s   r   c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )EmbeddingNetDifferentParamsz{
    A module containing an embedding with different dimension or different # of
    parameters depending on the rank.
    Fc                    sV   t    |s|dkrdnd}tjd|d| _t|d| _|rRtjdddd| _d S )	Nr   rw   r   r<   )Znum_embeddingsembedding_dimr?   Fr=   )rC   rD   rE   Z	Embedding	embeddingrF   linlin2)rM   rankdiff_num_paramsr   rN   rP   rQ   rD   M  s    
z$EmbeddingNetDifferentParams.__init__c                 C   s   |  |}| |S r\   )r   r   rS   rP   rP   rQ   rU   U  s    
z#EmbeddingNetDifferentParams.forward)F)rW   rX   rY   __doc__rD   rU   rZ   rP   rP   rN   rQ   r   H  s   r   c                       s$   e Zd Z fddZdd Z  ZS )ControlFlowToyModelc                    s6   t t|   tjdddd| _tjdddd| _d S Nr<   Fr=   )rC   r   rD   rE   rF   lin1r   rL   rN   rP   rQ   rD   [  s    zControlFlowToyModel.__init__c                 C   sH   t |t jdd|jd}|r4| t| |S t| |S d S )N   r<   device)rJ   r`   r   r   r   r   r   r   rM   rT   Zuse_second_layerrP   rP   rQ   rU   `  s    zControlFlowToyModel.forwardrV   rP   rP   rN   rQ   r   Z  s   r   r   Gz?)momentumc                 C   s&   |  dd }|tkrt| S tS d S )N.r   )splitCUSTOMIZED_TIMEOUTDEFAULT_TIMEOUT)Ztest_id	test_namerP   rP   rQ   get_timeouto  s    r   <   )test_ddp_uneven_inputsZ test_ddp_model_diff_across_ranksc                 C   s   t | krtd|  S dd S )Nz%Test requires backend to be one of %sc                 S   s   | S r\   rP   funcrP   rP   rQ   <lambda>      z!require_backend.<locals>.<lambda>)rs   r6   backendsrP   rP   rQ   require_backend  s    r   c                    s2   dd  t  fdd| D s*td|  S dd S )Nc                 S   sf   | t jjkrt  S | t jjkr(t  S | t jjkr<t  S | t jjkrPt 	 S | t
jd krbdS dS )NpluginTF)distBackendGLOOZis_gloo_availableNCCLZis_nccl_availableMPIZis_mpi_availableUCCZis_ucc_availabler.   backend_featurebackendrP   rP   rQ   check  s    z)require_backends_available.<locals>.checkc                 3   s   | ]} t |V  qd S r\   )r   r   )r{   r   r   rP   rQ   	<genexpr>  s     z-require_backends_available.<locals>.<genexpr>z)Test requires backends to be available %sc                 S   s   | S r\   rP   r   rP   rP   rQ   r     r   z,require_backends_available.<locals>.<lambda>)allr6   r   rP   r   rQ   require_backends_available  s    r   c                 C   s&   t tjd | k rtd|  S dd S )N
WORLD_SIZEzTest requires world size of %dc                 S   s   | S r\   rP   r   rP   rP   rQ   r     r   z$require_world_size.<locals>.<lambda>)r   osenvironr6   
world_sizerP   rP   rQ   require_world_size  s    r   c                  c   s   t jd } t j| d}t|d}zBtjdkrLt|	 tjd d V  nt|	 tj d V  W 5 tjdkrt|	 tj
d nt|	 tj |  X W 5 Q R X d S )NTEMP_DIRlockfilewr9   r?   )r   r   pathjoinopensysplatformmsvcrtZlockingfilenoZLK_UNLCKfcntlZflockZLOCK_UNcloseZLK_RLCKZLOCK_EX)r   r   lfrP   rP   rQ   _lock  s    



r   c               	   c   sh   t  dkr$t \} }t|  nd }|g}t | |d }z
|V  W 5 t  dkrbt| X d S Nr   )r   get_ranktempfilemkstempr   r   broadcast_object_listremove)fdrx   Zobject_listrP   rP   rQ   _rank_temp_file  s    

r  c                 C   sN   |d kr| }|d kr,t j| | | |d|S t j| | | |d||S d S Ndtype)rJ   emptyfill_cuda)sizera   r  	device_idrP   rP   rQ   _build_tensor  s
    r  c                    s2   |d kr| }t j fddt| D |d|S )Nc                    s   g | ]} qS rP   rP   r{   _dim_sizerP   rQ   r     s     z*_build_multidim_tensor.<locals>.<listcomp>)r  r  )rJ   r  ranger	  )r   r  ra   r  rP   r  rQ   _build_multidim_tensor  s    r  c                   C   s   t jjjddS NT)record_shapes)rJ   autogradr   r   rP   rP   rP   rQ   _create_autograd_profiler  s    r  c                   C   s   t jjt jjjgddS NT)
activitiesr  )rJ   r   r   ProfilerActivityCPUrP   rP   rP   rQ   _create_torch_profiler  s
    r  c                   @   s*   e Zd ZdZedd ZedddZdS )	Barrierr   c                 C   sB   d| _ tjtjd d}t|D ]}ttj|| q$d S )Nr   r   barrier)
barrier_idr   r   r   r   listdirunlink)clsbarrier_dirf_namerP   rP   rQ   init  s    zBarrier.initNr<   c                 C   s&  |d krt  }|  jd7  _tjtjd d}tt }tj||}t	 , t
|d}|t| j W 5 Q R X W 5 Q R X t }d}t	 T t|D ]B}	t
tj||	d$}| }
t|
| jkr|d7 }W 5 Q R X qW 5 Q R X ||krq"t | |krtdtd qd S )	Nr?   r   r  r   r   rzbarrier timeout皙?)r   r   r  r   r   r   r   r   getpidr   r   writetimer   readr   RuntimeErrorsleep)r"  wait_fortimeoutr#  pidZbarrier_filef
start_timeZarrivedr$  datarP   rP   rQ   sync  s,    $zBarrier.sync)Nr<   )rW   rX   rY   r  classmethodr%  r4  rP   rP   rP   rQ   r    s
   
r  c                       s\   e Zd Ze fddZ fddZ fddZedd Zed	d
 Z	edd Z
  ZS )TestDistBackendc                    s   t ttjd< t   d S )NMASTER_ADDR)r   r7  r   r   rC   
setUpClass)r"  rN   rP   rQ   r8    s    zTestDistBackend.setUpClassc                    s"   t    t  t  g | _d S r\   )rC   setUpr   r  r%  Zskip_return_code_checksrL   rN   rP   rQ   r9    s    
zTestDistBackend.setUpc                    s   t   t   d S r\   )r    rC   tearDownrL   rN   rP   rQ   r:    s    zTestDistBackend.tearDownc                 C   s   dj t| jdS )Nz{}{file_name})	file_name)formatr2   r;  rL   rP   rP   rQ   init_method  s    zTestDistBackend.init_methodc           
   
   C   s.  ddl m} |d tdkr6tj s6ttd j	 | |}||_
||_tj rtj t|jk rttd|j  j	 z8t|t}t|d}tj|jtt|j|j
|d W nB tk
r }	 z$d	|	jd krttd
 j	  W 5 d }	~	X Y nX |  ||| |  t  td d S )Nr   )_set_ddp_with_replicated_tensorTncclZno_cudaz
multi-gpu-secondsr=  r   r   r   r/  Z	recompileZbackend_unavailable).torch.nn.parallel._replicated_tensor_ddp_utilsr>  rs   rJ   r
  Zis_availabler   exitr   Z	exit_coder   r;  device_countr   r   CUSTOM_PG_TIMEOUTgetdefault_pg_timeoutr   r   init_process_groupr=  r,  args_barrierZrun_testdestroy_process_group)
r"  r   r   r;  piper>  rM   Zpg_timeout_secondsr/  erP   rP   rQ   _run#  s<    

zTestDistBackend._runc                 C   s
   t jd S )Nr   r   r   rL   rP   rP   rQ   r   O  s    zTestDistBackend.world_size)rW   rX   rY   r5  r8  r9  r:  propertyr=  rO  r   rZ   rP   rP   rN   rQ   r6  
  s   


+r6  c                   @   s   e Zd ZG dd dZdS )DistributedTestc                   @   sh1  e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zeedkdeed d d!d" Zeeedkdd#d$ Zeedkdd%d& Zd'd( Zeejd) eejd) ed*ed+d,d- Z eejd) eejd) ed*d.d/ Z!eeejd0 kd1e d2ed3ed+d4d5 Z"eeejd0 kd1e d2e#d6d7 Z$eeejd0 kd1e d2ed3ed3d8d9 Z%eeejd0 kd1e d2ed3ed3d:d; Z&eeejd0 kd1e d2ed3ed3d<d= Z'eeejd0 kd1e d2e#d>d? Z(eeejd0 kd1e d2ed3ed3d@dA Z)eeejd0 kd1e d2ed+dBdC Z*eeejd0 kd1e d2ed+dDdE Z+ed+dFdG Z,eeejd0 kd1e d2ed+dHdI Z-eeejd0 kd1e d2ed3ed3dJdK Z.e#eedLkdMe/dNdOdPdQ Z0e#eedLkdMe/dNdOdRdS Z1e#eedLkdMe/dNdOdTdU Z2e#eeedLkdMe/dNdOdVdW Z3eedkdXdYdZ Z4eedkdXd[d\ Z5eedLkdMe/dNdOd]d^ Z6eedLkdMe/dNdOd_d` Z7eedLkdMe/dNdOdadb Z8eedLkdMe/dNdOdcdd Z9e#eedLkdee/dNdOddgdhZ:e#eedLkdee/dNdOdidj Z;e#eedLkdee/dNdOdkdl Z<e#eedLkdee/dNdOee=dmee>p,e?dndodp Z@dqdr ZAeedLkdsdtdu ZBeedLkdvdwdx ZCeedLkdvee=dmee>pe?dndydz ZDd{d| ZEeeejFd} ke d~dd ZGeeejFd} ke d~dd ZHeeejFd} ke d~ee=dee>pe?dndd ZIdd ZJeedLkdvdd ZKeedLkdvdd ZLeedLkdvee=dee>pe?dndd ZMdd ZNeedLkddd ZOeedLkddd ZPeedLkdee=dee>pe?dndd ZQeedLkddd ZRdddZSeedLkddd ZTeedko@edLkde#dd ZUeeedLkddd ZVeedLkddd ZWeedLkde#dd ZXdddZYeedLkdeeejFd ke ddd ZZeedLkdeeejFd ke de#dd Z[eedLkdeeejFd ke ddd Z\eedLkdeeejFd ke ddd Z]eedLkdeeejFd ke ddd Z^eedLkdeeejFd ke dedd Z_eedLkdeeejFd ke dedd Z`eedLkdeeejFd ke dedd ZaeedLkdeeejFd ke dedd ZbeedLkdeeejFd ke ddd ZceedLkdeeejFd ke dddĄ ZdeedLkdeeejFd ke dddƄ ZeeedLkdeeejFd ke dddȄ ZfdddʄZgeedLkdeeejFd ke ddd̄ ZheedLkdeeejFd ke de#dd΄ ZieedLkdσeeejFd ke de#ddф ZjdddԄZkeedLkdՃe#ddׄ Zle#eejd) ddل Zmddfddfdڜdd܄ZnddfeojpdfddބZqeedLkddd ZreedLkddd ZseedkovedLkde#dd ZteedkoedLkde#dd ZueedLkddd ZveedLkddd ZweedkoedLkde#dd ZxeedLkddd ZyeedLkddd ZzeedLkddd Z{eeedLkddd Z|eeedLkddd Z}eeedLkddd Z~eeedLkddd ZeedLkddd ZeedLkddd ZeedLkdd d ZeedLkddd Zdd Zeedkddd Zeedkde#d	d
 Zedd Zedd Zedd Zedd ZeedLkddd ZdddZedhdd Zedhdd Zedhdd Zedhdd Zeedhdd  Zeedhd!d" Zeedhd#d$ Zeedhd%d& Zedhd'd( Zedhd)d* Zedhd+d, Zedhd-d. Zddfeojpfd/d0ZeedLkdeed1kd2d3d4 ZeedLkdeed1kd2d5d6 ZeedLkd7e#d8d9 ZeedLkdeed1kd2d:d; ZeedLkd7e#d<d= ZeedLkdeed1kd2ed>d? ZeedLkdeed1kd2d@dA ZddBdCZeedLkdeed1kd2dDdE ZeedLkdeed1kd2dFdG ZeedLkd7e#dHdI ZeedLkdeed1kd2edJdK ZeedLkdeed1kd2dLdM ZddfeojpfdNdOZeedLkddPdQ ZeedLkdRe#dSdT ZeedLkddUdV ZeedLkdRe#dWdX ZeeedLkddYdZ ZeedLkdd[d\ ZeedLkd]e#d^d_ Zdd`daZeedLkdbe#dcdd ZeedLkdbe#dedf Zdgdh ZeojpfdidjZeeejFdk ke dldmdn ZeeejFdk ke dldodp ZeeeejFdk ke dldqdr ZeeejFdk ke dldsdt ZeeejFdk ke dldudv ZddfeojpfdwdxZddfeojpfdydzZddfeojpfd{d|Zeed}kd~dd ZeedLkde#dd Zeed}kd~dd ZeedLkde#dd Zeed}kd~dd ZeedLkde#dd Zeed}kd~dd ZeedLkde#dd Zeed}kddd ZeedLkdeƐdd Zeed}kddd ZeedLkdeƐdd Zeed}kd~edd ZeedLkde#edd Zeed}kd~edd ZeedLkde#edd Zeed}kdedd ZeedLkdeeƐdd Zeed}kd~dd ZeedLkde#dd Zeed}kd~dd ZeedLkde#dd Zeed}kddd ZeedLkdeƐdd ZՐdddZe#eed}kdeed1kddd Zee#eed}kddd Zee#eed}kddd ZeeejFd ke ddd ZeeeejFd ke ddd ZeeejFd ke dddÄ ZܐdĐdń Zeed}kdƃeedLkdǃe#dȐdɄ Zeojpfdʐd˄Zeed}kdƃeedLkd̃eed1kd̓e#dΐdτ Zeed}kdƃeedLkd̃eed1kd̓e#dАdф ZdҐdӄ ZeedLkdԃe#dՐdք Zeojpfdאd؄ZeedLkdكe#dڐdۄ ZeedLkdكe#dܐd݄ Zdސd߄ Zdd Zdd Zd ddZdd Zd!ddZd"ddZd#ddZeedLkddd ZeedLkddd Zeeejd kd1e ddd ZeedLkddd Zeeejd kd1e deeejd dd Zeeejd  kd1e deeejd dd Zdd ZeedLkpJed1kded+edddgedddged	ddgd
d ZeedLkped1kded+ed	ddgdd ZeedLkped1kded+ed	ddgdd Zd$ddZeeejd  kd1e deeejd dd Zeeejd  kd1e deeejd dd Zeeejd  kd1e deeejd dd Z eeejd  kd1e deedeeejd dd Zd%ddZd&ddZd'd d!Zeed}koedLkoedkd"eed+d#d$ Zeed}koedLkoedkd"eed+d%d& Zeed}koedLkoedkd"eed+d'd( Z	eed}koLedLkoLedkd"eed+d)d* Z
eed}koedLkoedkd"eed+d+d, Zeeejd kd1e de#d-d. Zd(d/d0Zeeejd kd1e de#d1d2 Zd)d3d4Zd5d6 Zd7d8 Zd9d: Zd;d< Zd=d> Zed+eeejd kd1e dd?d@ Zed+eeejd kd1e ddAdB ZdCdD Zed3eeeejd kd1e ddEdF Zed3eeeejd kd1e ddGdH Zed+eeejd kd1e ddIdJ Zeeejd kd1e de#dKdL Zeeejd kd1e de#dMdN Zeeejd kd1e de#dOdP Zeeejd kd1e de#dQdR Zeeejd kd1e de#ed+dSdT Z eeejd kd1e de#dUdV Z!eeejd kd1e de#dWdX Z"dYdZ Z#eedLkdd[d\ Z$eeejd kd1e de#d]d^ Z%eedLkdd_d` Z&e'dadb Z(e)j*dffdcddZ+edLhedLhed+dedf Z,edLhedLhed+dgdh Z-edLhedLheeejd didj Z.edLhedLhed+dkdl Z/eeejd kd1e deeejd dmdn Z0d*dodpZ1eejd) e2eejd ejdq e3drdsdtgdudvdw Z4eejd) e2eejd ejdq e3dtdrdsgdudxdy Z5d+dzd{Z6eejd) e3dtdrdsgdud|d} Z7eejd) e3dtdrdsgdud~d Z8dd Z9ed+eeejd kd1e ddd Z:ed+eeejd kd1e ddd Z;dd Z<eejd) eejd) ed+dd Z=eejd) eejd) ed+ee=dee>$pe?dndd Z>ed+eeejd kd1e ddd Z?dd Z@ed+eeejd kd1e ddd ZAed+eeejd kd1e ddd ZBed+eeejd kd1e ddd ZCed+eeejd kd1e ddd ZDd,ddZEeejd) e2eejd ejdq e3dtgdudd ZFeejd) e2eejd ejdq e3dtgdudd ZGd-ddZHeejd) eejd) ed+dd ZIe3drdsdtgdueejd) eejd) ed+dd ZJeejd) eejd) ed+dd ZKeejd) eejd) ed+dd ZLeejd) eejd) ed+dd ZMe3drdsdtgdueejd) eejd) ed+dd ZNeejd) eejd) ed+dd ZOe3drdsdtgdueejd) eejd) ed+dd ZPedhdd ZQdd ZRdd ZSeejd) eejd) ed+dd ZTeejd) eejd) ed+dd ZUd.ddZVdd ZWeejd) eejd) ed+dd ZXeejd) eejd) ed+dd ZYddÄ ZZeejd) eejd) ed+dĐdń Z[eejd) eejd) ed+dƐdǄ Z\dȐdɄ Z]eeejd kd1e ded+dʐd˄ Z^eeejd kd1e ded+d̐d̈́ Z_eeejd kd1e ded+dΐdτ Z`d/dАdфZaeejd) eejd) ed+dҐdӄ Zbedhedhee>+pBe?dԃdՐdք Zcedhedhdאd؄ Zddِdڄ Zeefeejd) eejd) eeejd dېd܄ Zgefeejd) eejd) eeejd dݐdބ Zhedhedhdߐd Ziedhedheee>,pRe?dԃdd Zjedhedhedd Zkeejd) eejd) e3dsgdued+dd Zleeejd kd1e de3dsgdued+dd Zmdd Zne3drdsdtgdueejd) eejd) ed+dd Zoe3drdsdtgdueejd) eejd) ed+dd Zpeeejd kd1e ded+dd Zqeeejd kd1e ded+dd Zred+eeejd kd1e ddd Zsed+eeejd kd1e ddd Zted+eeejd kd1e ddd Zued+eeejd kd1e ddd Zvdd Zwed+eeejd kd1e ddd Zxed+eeejd kd1e ddd  Zydd Zzed+eeejd kd1e ddd Z{ed+eeejd kd1e ddd Z|ed+eeejd kd1e ddd Z}ed+eeejd kd1e dd	d
 Z~ed+eedLk0oedkddd Zed+eeejd kd1e ddd Zeejd) eejd) ed+dd Zdd Zeeejd  kd1e deed1kdeeejd dd ZdfS (0  zDistributedTest._DistTestBasec                 O   s   t j|| d S r\   )r  r4  )rM   rJ  kwargsrP   rP   rQ   rK  V  s    z&DistributedTest._DistTestBase._barrierc                 K   s:   ddg}t j|f|}t  }||kr0g d |fS |||fS )Nr?   rA   )r   	new_groupr   rM   rS  groupgroup_idr   rP   rP   rQ   _init_group_testY  s    
z.DistributedTest._DistTestBase._init_group_testc                 K   s0   t tdt }tjf |}t }|||fS r   )listr  r   r   rT  r   rU  rP   rP   rQ   _init_full_group_testb  s    z3DistributedTest._DistTestBase._init_full_group_testc                 C   s,   t tdt }tjj}t }|||fS r   )rY  r  r   r   rV  WORLDr   rM   rV  rW  r   rP   rP   rQ   _init_global_testh  s    z/DistributedTest._DistTestBase._init_global_testc                    s   dd |j  D }|j  D ]\}}| |||  qt| }t| }t||D ]\  fddtt D }t	|  fddtt D }	|D ]}
| |
  qt	|	 |	D ]}
| |
 qqZd S )Nc                 S   s   i | ]\}}||qS rP   rP   )r{   kvrP   rP   rQ   
<dictcomp>p  s      zGDistributedTest._DistTestBase._verify_buffers_equal.<locals>.<dictcomp>c                    s   g | ]}t  qS rP   rJ   
empty_liker  )buf1rP   rQ   r   x  s    zGDistributedTest._DistTestBase._verify_buffers_equal.<locals>.<listcomp>c                    s   g | ]}t  qS rP   ra  r  )buf2rP   rQ   r   |  s    )
r   named_buffersassertEqualrY  bufferszipr  r   r   
all_gather)rM   m1m2Zm1_buf_dictrx   bufZ
m1_buffersZ
m2_buffersZgathered_bufsZgathered_bufs_m2rH   rP   )rc  rd  rQ   _verify_buffers_equaln  s$    



z3DistributedTest._DistTestBase._verify_buffers_equalc              	   C   s   t  \}}t  |  }W 5 Q R X dd }dddddg}|D ]}||}| || qBdd	d
g}|D ]}||}| || qjd S )Nc                 S   s    d| | t jkrt j|  ndf S )Nz	env:%s=%sN/ArP  varrP   rP   rQ   format_line  s    zRDistributedTest._DistTestBase.test_dump_DDP_relevant_env_vars.<locals>.format_liner7  MASTER_PORTr   ZNCCL_TOPO_DUMP_FILEZNCCL_ASYNC_ERROR_HANDLINGZxxxZyyyZzzz)r*   r   getvalue
splitlinesZassertInassertNotIn)rM   outr  linesrq  varsrp  linerP   rP   rQ   test_dump_DDP_relevant_env_vars  s(    z=DistributedTest._DistTestBase.test_dump_DDP_relevant_env_varsc              
   C   s   t jt jd d}tt  }t }tt j||d}|	tt
  W 5 Q R X |   t }t |D ]6}tt j||d}|t|  W 5 Q R X qr| t|| |   t
 dkrt |D ]}t t j|| q|   d S )Nr   test_dirr   r&  r   )r   r   r   r   r   r(  r   r   r   r)  r   rK  setr   addr   r+  rf  lenr!  )rM   r{  r0  num_processesr1  Z	all_ranksr$  rP   rP   rQ   test_get_rank  s     z+DistributedTest._DistTestBase.test_get_rankc              	   C   s   t  dkrddg}nddg}t |}t }| t  | t  |kr`| t || n"| t	d t | W 5 Q R X d S )NrA   r?   r   zInvalid process group specified)
r   r   rT  rs   lowerrf  get_backendr   assertRaisesRegexr,  )rM   rV  rW  Zbackend_strrP   rP   rQ   test_get_backend  s    

 z.DistributedTest._DistTestBase.test_get_backendc              	   C   s   t  }| tt  | | tt | | td td W 5 Q R X | td td W 5 Q R X | t td  W 5 Q R X | t td W 5 Q R X | t tdg W 5 Q R X d S )NzInvalid backend: 'undefined'Z	undefinedzInvalid backend: 'xYz'ZxYzrh   gloo)	rs   r  rf  r   r   upperr  
ValueErrorassertRaises)rM   r   rP   rP   rQ   test_Backend_enum_class  s    z5DistributedTest._DistTestBase.test_Backend_enum_classc                 C   s>   t  dkrddg}nddg}t |}|   t | d S )NrA   r?   r   )r   r   rT  rK  rL  rM   rV  rW  rP   rP   rQ   test_destroy_group  s    

z0DistributedTest._DistTestBase.test_destroy_groupc                 C   s   t  dkrddg}nddg}t |}t  |krd| t |d | t |ttdk n$| t |d | t |d d S )NrA   r?   r   r   )r   r   rT  r   rf  
assertTruerY  r  r  rP   rP   rQ   test_get_rank_size_group  s    

z6DistributedTest._DistTestBase.test_get_rank_size_groupc                 C   s$   |   \}}}|   t| d S r\   )rZ  rK  r   rL  rM   r  rW  rP   rP   rQ   test_destroy_full_group  s    z5DistributedTest._DistTestBase.test_destroy_full_groupc                 C   s>   |   \}}}| t|t  | t|t  d S r\   )rZ  rf  r   r   r   r  rP   rP   rQ   test_get_rank_size_full_group  s    z;DistributedTest._DistTestBase.test_get_rank_size_full_groupc              	   C   s   t |}|dkr|t |  }t  t jjkr@| td}n| td}| t 	| W 5 Q R X | j
t |dd n d S )Nr   zfailed to pass monitoredBarrierz (Timed out|closed|timeout) r'  )delta)r   r   r*  total_secondsget_debug_level
DebugLevelDETAILr  	Exceptionr  assertGreaterAlmostEqual)rM   rW  r/  Z
local_rankexpected_timeexception_ctxrP   rP   rQ   _test_barrier_timeout  s     
  z3DistributedTest._DistTestBase._test_barrier_timeoutr  z#Only gloo backend supports timeoutsfile://zRequires file:// initialization method. Both tcp:// and env:// rely on the TCP store for which reinitialization has proven racy.c                 C   s\   t   | jttjd d tdd}t jtt	ttjd | j
|d | t jj| d S )Nr   )r.  r?   r@  rB  )r   rL  rK  r   r   r   r   rI  rt   rs   r   r  rV  r[  )rM   r/  rP   rP   rQ   test_barrier_timeout_global  s    
z9DistributedTest._DistTestBase.test_barrier_timeout_globalc                 C   s4   t dd}| j|d\}}}|d k	r0| || d S )Nrn   r@  r/  )r   rX  r  rM   r/  r  rW  rP   rP   rQ   test_barrier_timeout_group1  s    
z8DistributedTest._DistTestBase.test_barrier_timeout_groupc                 C   s4   t dd}| j|d\}}}|d k	r0| || d S )Nr?   r@  r  )r   rZ  r  r  rP   rP   rQ   test_barrier_timeout_full_group9  s    
z=DistributedTest._DistTestBase.test_barrier_timeout_full_groupc                 C   s  t dkrd}n t dkrd}nt tjd kr.d}||d\}}}|d krJd S |dkrd| t|tj |dkr~| t|tj | ||t	|  | t
|t| t	|}tj| td|d }tj||d |d | tddd|d	 d S )
Nr  r?  r   r   rA   ra   r   srcrV  cpu)rs   r.   r   r  r^   r   ZProcessGroupGlooProcessGroupNCCLrf  r   r~  r   rJ   r
  
set_devicer  	broadcastto)rM   ZinitializerZnew_backendrV  rW  r   Z
group_rankrq   rP   rP   rQ   _test_group_override_backendC  s(    
z:DistributedTest._DistTestBase._test_group_override_backendgpurh   rA   c                 C   s   |  | j d S r\   )r  rX  rL   rP   rP   rQ   test_backend_group`  s    z0DistributedTest._DistTestBase.test_backend_groupc                 C   s   |  | j d S r\   )r  rZ  rL   rP   rP   rQ   test_backend_full_groupg  s    z5DistributedTest._DistTestBase.test_backend_full_groupsubgroupzThe z< backend does not support creating subgroups on CUDA devicesri   c                 C   sf   d}t |\}}t  }| | | | t|||  | t | |D ]}t | qRd S r   )	r   new_subgroupsr   rf  r  r~  assertFalse_rank_not_in_grouprL  )rM   Zsubgroup_sizecur_subgroup	subgroupsr   r  rP   rP   rQ   test_new_subgroupsm  s    z0DistributedTest._DistTestBase.test_new_subgroupsc              	   C   s&   |  td td W 5 Q R X d S )Nz3The arg 'group_size' must not exceed the world sized   r  r  r   r  rL   rP   rP   rQ   0test_new_subgroups_group_size_exceeds_world_size  s
     zNDistributedTest._DistTestBase.test_new_subgroups_group_size_exceeds_world_sizec              	   C   s&   |  td td W 5 Q R X d S )Nz0The world size must be divisible by 'group_size'rh   r  rL   rP   rP   rQ   9test_new_subgroups_world_size_not_divisible_by_group_size  s
     zWDistributedTest._DistTestBase.test_new_subgroups_world_size_not_divisible_by_group_sizec           	      C   s   |   \}}}tt t}|| d }tjddgddggd\}}|dkrX| | nR| | d | t	|d |dks|dkr| ||d  n| ||d  |D ]}t
| qd S )Nr   rA   r?   rh   Zranks_per_subgroup_listri   )r]  r   r   r   rs   new_subgroups_by_enumerationassertIsNonerf  r  r~  rL  )	rM   rV  rW  r   rank_to_GPUr  r  r  r  rP   rP   rQ   !test_new_subgroups_by_enumeration  s    
z?DistributedTest._DistTestBase.test_new_subgroups_by_enumerationc              	   C   sd   |   \}}}tt t}|| d }t|}| td tjddg|dggd W 5 Q R X d S )Nr   RThe new group's rank should be within the the world_size set by init_process_groupr?   rA   r  )r]  r   r   r   rs   r  r,  r  )rM   rV  rW  r   r  r  r   rP   rP   rQ   ?test_new_subgroups_by_enumeration_input_rank_exceeds_world_size  s    z]DistributedTest._DistTestBase.test_new_subgroups_by_enumeration_input_rank_exceeds_world_sizec              	   C   sB   |   \}}}| td tjddgddggd W 5 Q R X d S )Nr  r   r  )r]  r  r,  r   r  r\  rP   rP   rQ   5test_new_subgroups_by_enumeration_negative_input_rank  s    zSDistributedTest._DistTestBase.test_new_subgroups_by_enumeration_negative_input_rankc              	   C   s8   |  td" tjdgddgddggd W 5 Q R X d S )Nz$Rank 1 has appeared in both subgroupr   r?   rA   rh   r  )r  r  r   r  rL   rP   rP   rQ   &test_new_subgroups_overlap_not_allowed  s     zDDistributedTest._DistTestBase.test_new_subgroups_overlap_not_allowedc              	   C   sN  t  }tt  t}|| d }ttjdddddt tj	dddd
|}| D ]}t|j|_q\tj| d d | D ]}| |jt|j q| D ]}t|j| |_qt jddgd	d
}tj| |d t |s | D ]}| |jt|jd  qn*| D ] }| |jt|j|  q(d S )Nr   rh   r?   )Zkernel_sizepaddingrn   Fr=   )paramsprocess_groupr?  ranksr         ?)r   r   r   r   rs   rE   
SequentialConv2dr   rF   r
  
parametersrJ   	ones_liker3  model_averaging_utilsaverage_parametersrf  rT  r  )rM   r   r  r  r   r   
group_ncclrP   rP   rQ   test_average_parameters  s<      z5DistributedTest._DistTestBase.test_average_parametersc                 C   s  t  }t  }t|t}|| d }tjdddd|}t|	 }t
|j| }t
|jtt| | }d}	dD ]}
tj|	|
d}tdd	D ]p}t||_|	 D ]}t
|j|_q||	  ||
kr||
 |	 dkr| |j| q| |j| qq|d S )
Nr   r?   rn   Fr=   ri               periodwarmup_stepsr   )r   r   r   r   rs   rE   rF   r
  nextr  rJ   r  r3  r   r  	averagersPeriodicModelAveragercopydeepcopygradr  rf  rM   r   r   r  r  r   paramrq   Zexpected_avg_tensorr  r  averagerstepr  rP   rP   rQ   test_periodic_model_averager  s(    
z:DistributedTest._DistTestBase.test_periodic_model_averagerc                 C   s  t  }t  }t|t}|| d }tjdddd|}t|	 }t
jj|	 dd}d}d	D ]&}	tj||	d
}
tddD ]}|jD ]6}|d D ](}t
|j| |_t
|j| |_qq|
|j ||	krB||	 | dkrB|jD ]J}|d D ]<}|jd krq | |jt
|jtt| |  q qq|jD ]@}|d D ]0}|jd krhqT| |jt
|j|  qTqHqqdd S )Nr   r?   rn   Fr=   r'  lrri   r  r  r   r  )r   r   r   r   rs   rE   rF   r
  r  r  rJ   optimSGDr  r  r  Zparam_groupsr  r3  r  r  rf  r   )rM   r   r   r  r  r   r  optr  r  r  r  Zparam_groupr  rP   rP   rQ   (test_periodic_model_averager_param_group,  s6    



.
zFDistributedTest._DistTestBase.test_periodic_model_averager_param_groupc                 C   s,  t  }t  }t|t}|| d }tjdddd|}t|	 }t
|j| }t
|jtt| | }d}	dD ]}
tjt|	|fg|
d}tj|	|
d	}tdd
D ]t}t||_|	 D ]}t
|j|_q||	  ||
kr||
 |	 dkr| |j| q| |j| qq|d S )Nr   r?   rn   Fr=   ri   r  period_group_size_dictr  r  r   )r   r   r   r   rs   rE   rF   r
  r  r  rJ   r  r3  r   r  hierarchicalSGDHierarchicalModelAveragerr   r  r  r  r  r  r  rf  r  rP   rP   rQ   Ntest_1_level_hierarchical_model_averager_equivalent_to_periodic_model_averagerP  s0    
 zlDistributedTest._DistTestBase.test_1_level_hierarchical_model_averager_equivalent_to_periodic_model_averagerc                 C   s\  ddl m} t }t }t|t}|| d }tjdddd	|}t
| }t|j| }d}	d}
d}d	}d	}d
}t||
f||f||fg}tj||	d}|j| }|j| }t||  }t||  }||
 |
 ttt|
  }|| | ttt|  }| || | || t|jt| |
 }t|jt| | }t|jtt| | }tddD ]}t||_| D ]}t|j|_q||  |dks|dkr| |j| nj|dks|dkr| |j| nF|dks6|dks6|dks6|dkrF| |j| n| |j| qd S )Nr   )_pg_group_ranksr?   rn   Fr=   r<   rA   ri      r           r  r   r        )"torch.distributed.distributed_c10dr  r   r   r   r   rs   rE   rF   r
  r  r  rJ   r  r3  r   r  r  Zperiod_process_group_dictrY  keysnparrayr  tolistrf  r   r  r  r  r  )rM   r  r   r   r  r  r   r  rq   r  Zsubgroup_size1Zsubgroup_avg_period1Zsubgroup_size2Zsubgroup_avg_period2Zglobal_avg_periodr  r  Z	subgroup1Z	subgroup2Zreal_group_ranks_res1Zreal_group_ranks_res2Zexpect_group_ranks_res1Zexpect_group_ranks_res2Z$expected_avg_tensor_within_subgroup1Z$expected_avg_tensor_within_subgroup2Zexpected_global_avg_tensorr  r  rP   rP   rQ   (test_3_level_hierarchical_model_averagerw  sd    
 

""(zFDistributedTest._DistTestBase.test_3_level_hierarchical_model_averagerr?  zNCCL Batch Send Recv Only)rA      r   zNeed NCCL 2.7+ for send/recvc                 C   s\  |    t }t }t|t}|| d }tj| g }dd t	|D }dd t	|D }dD ]}|t
jd< t	d|D ]}	t|d |d|	}
t|	d d	|d
d	||	< t|	d d	|d
|||	< ttj||	 |	}|| ttj|
|	}|| q~t|}|D ]}|  qt	d|D ]}	| ||	 ||	  q2qf|    d S )Nr   c                 S   s   g | ]}d qS r\   rP   r  rP   rP   rQ   r     s     zMDistributedTest._DistTestBase.test_batch_isend_irecv_nccl.<locals>.<listcomp>c                 S   s   g | ]}d qS r\   rP   r  rP   rP   rQ   r     s     )10NCCL_BLOCKING_WAITr?   r  r   ra   r  )rK  r   r   r   r   rs   rJ   r
  r  r  r   r   r  r	  P2POpirecvappendisendbatch_isend_irecvwaitrf  )rM   r   r   r  r  p2p_op_listZrecv_tensorsexpected_tensorsvalr  send_tensorrecv_opsend_opreqsreqrP   rP   rQ   test_batch_isend_irecv_nccl  s2    



z9DistributedTest._DistTestBase.test_batch_isend_irecv_ncclc                 C   s   |    t }t }t|t}|| d }tj| g }t	||d}t	|d|d}t
tj||d | }t
tj||d | | }	t||	g}
|
D ]}|  q|    d S )Nr   r  r   r  r?   )rK  r   r   r   r   rs   rJ   r
  r  r  r  r  r  r  r  )rM   r   r   r  r  r	  r  recv_tensorr  r  r  r  rP   rP   rQ   )test_batch_isend_irecv_ring_exchange_nccl  s    

zGDistributedTest._DistTestBase.test_batch_isend_irecv_ring_exchange_ncclc                 C   s   |    t  t }tt t}|| d }g }|dkrt|d |d}t|d d|d}ttj	|d}|
| ttj|d}|
| t|}	|	D ]}
|
  q|    d S )Nr   r?   r  r   r  )rK  r   r  r   r   r   rs   r  r  r  r  r  r  r  )rM   r   r  r  r	  r  r  r  r  r  r  rP   rP   rQ    test_batch_isend_irecv_self_nccl  s"    



z>DistributedTest._DistTestBase.test_batch_isend_irecv_self_ncclc                 C   s   |    t  t }tt t}|| d }tj	| g }|dkrPd}n|dkr\d}|dkrt
|d |d}t
|d d|d}ttj||}|| ttj||}	||	 t|}
|
D ]}|  q|    d S )Nr   r?   rA   )r?   rA   r  r   r  )rK  r   r  r   r   r   rs   rJ   r
  r  r  r  r  r  r  r  r  )rM   r   r  r  r	  Zpeerr  r  r  r  r  r  rP   rP   rQ   (test_batch_isend_irecv_no_rank_zero_nccl  s,    



zFDistributedTest._DistTestBase.test_batch_isend_irecv_no_rank_zero_ncclzGLOO Batch Send Recv CPUc           
      C   s   |    t }g }tdt D ]^}||kr0q"t|d }t|d dd}ttj||}|| ttj	||}|| q"t
|}|D ]}	|	  q|    d S )Nr   r?   r   r  rK  r   r   r  r   r  r  r  r  r  r  r  
rM   r   r	  r  r  r  r  r  r  r  rP   rP   rQ   test_batch_isend_irecv_gloo8  s     


z9DistributedTest._DistTestBase.test_batch_isend_irecv_glooc           
      C   s   |    t }g }tdt D ]f}||kr0q"t|d }t|d dd}tjtj|||d}|| tjtj	|||d}|| q"t
|}|D ]}	|	  q|    d S )Nr   r?   r   r  tagr  r  rP   rP   rQ    test_batch_isend_irecv_gloo_tagsO  s     


z>DistributedTest._DistTestBase.test_batch_isend_irecv_gloo_tagsc              	   C   s\   |    t }|dkrX| td. t|d }ttj|d}t|g W 5 Q R X d S )Nr   zTensors must be CUDA and denser?   )	rK  r   r   r  r,  r  r  r  r  )rM   r   r  r  rP   rP   rQ   !test_batch_isend_irecv_tensor_errf  s     z?DistributedTest._DistTestBase.test_batch_isend_irecv_tensor_errc              	   C   sz   |    t }|dkrvtt t}|| d }| td2 t|d |d}t	tj
|d}t|g W 5 Q R X d S )Nr   z^Invalid ``op``r?   r  )rK  r   r   r   r   rs   r  r,  r  r  r  r  )rM   r   r  r  r  r  rP   rP   rQ   test_batch_isend_irecv_op_errt  s    z;DistributedTest._DistTestBase.test_batch_isend_irecv_op_errc              	   C   sB   |    t }|dkr>| td tddg W 5 Q R X d S )Nr   z^Invalid ``p2p_op_list``r?   rA   )rK  r   r   r  r,  r  rM   r   rP   rP   rQ   "test_batch_isend_irecv_op_list_err  s
    z@DistributedTest._DistTestBase.test_batch_isend_irecv_op_list_errc           	   	   C   s   |    t }tt t}|| d }tjddgdd}tjddgdd}|dkr| tdD t	|d }t
tj|d|}t
tj|d|}t||g W 5 Q R X d S )Nr   r?   r  r  r?  z"All ops need to use the same group)rK  r   r   r   r   rs   rT  r  r,  r  r  r  r  )	rM   r   r  r  
group_gloor  r  Zsend_op_glooZsend_op_ncclrP   rP   rQ   (test_batch_isend_irecv_mixed_backend_err  s     zFDistributedTest._DistTestBase.test_batch_isend_irecv_mixed_backend_errzNCCL Send Recv OnlyNc              	   C   sd  t  }t  }t|t}|| d }tj| t|d |d}|d k	rN|nt	 }|}t
d|D ]j}	|	|krt
d|D ]}
|
|krqzt ||
 qzqdt|	d }t|	d d|d}t ||	 | || qd|   W 5 Q R X |d k	r`t  }|tkr`| d| dfD ]L}t||}| | dd	 t
t  D }|D ]}| |j|k qDqd S )
Nr   r?   r  r   r  :send:recvc                 S   s   g | ]}|d  gd gqS r?   rh   rP   r{   r   rP   rP   rQ   r     s    zFDistributedTest._DistTestBase._test_send_recv_nccl.<locals>.<listcomp>)r   r   r   r   rs   rJ   r
  r  r  r   r  sendrecvrf  rK  r  &SEND_RECV_PROFILING_SUPPORTED_BACKENDSr   r  input_shapes)rM   profiler_ctxr   r   r  r  rq   Zprofiler_clsprofr  dstexpected_tensoroutput_tensorr   
event_namer   expected_shapesr|   rP   rP   rQ   _test_send_recv_nccl  sD    
  




z2DistributedTest._DistTestBase._test_send_recv_ncclc                 C   s   |    d S r\   )r1  rL   rP   rP   rQ   test_send_recv_nccl  s    z1DistributedTest._DistTestBase.test_send_recv_ncclc                 C   s   t jjjdd}| | d S r  )rJ   r  r   r   r1  rM   r*  rP   rP   rQ   %test_send_recv_nccl_autograd_profiler  s    zCDistributedTest._DistTestBase.test_send_recv_nccl_autograd_profilerzKineto in fbcode causes hangzYtorch.profiler not enabled for mac/windows: https://github.com/pytorch/pytorch/pull/56124c                 C   s.   t jjt jjjt jjjgdd}| | d S r  )rJ   r   r   r  r  CUDAr1  r3  rP   rP   rQ   "test_send_recv_nccl_torch_profiler  s    	z@DistributedTest._DistTestBase.test_send_recv_nccl_torch_profilerc              	   C   sb  t  }|d }t|}|d k	r$|nt }|}tdt  D ]l}||krvtdt  D ]}||krfqXt || qXq>|d }	t|	}
t|	dd}t || | ||
 q>W 5 Q R X |d k	r^t 	 }|t
kr^| d| dfD ]v}t||}tdd |D }t  d }| || d	d
 tt  D }|D ]"}| |j | |j|k q8qd S )Nr?   r   r   r  r"  r#  c                 s   s   | ]}|j V  qd S r\   countr{   rN  rP   rP   rQ   r   	  s     z@DistributedTest._DistTestBase._test_send_recv.<locals>.<genexpr>c                 S   s   g | ]}|d  gd gqS r$  rP   r%  rP   rP   rQ   r     s    zADistributedTest._DistTestBase._test_send_recv.<locals>.<listcomp>)r   r   r  r   r  r   r&  r'  rf  r  r(  r   r   r  is_asyncr)  )rM   r*  r   Z	send_sizerq   ctxr+  r  r,  Z	recv_sizer-  r.  r   r/  r   event_countexpected_event_countr0  r|   rP   rP   rQ   _test_send_recv  s<    



z-DistributedTest._DistTestBase._test_send_recvz,Nccl send/recv tested by test_send_recv_ncclc                 C   s   | j d d d S Nr*  )r>  rL   rP   rP   rQ   test_send_recv  s    z,DistributedTest._DistTestBase.test_send_recvz,NCCL send/recv tested by test_send_recv_ncclc                 C   s   t  }| j|d d S r?  )r  r>  rM   autograd_profiler_ctxrP   rP   rQ    test_send_recv_autograd_profiler  s    z>DistributedTest._DistTestBase.test_send_recv_autograd_profilerc                 C   s   t  }| j|dS r?  )r  r>  rM   torch_profiler_ctxrP   rP   rQ   test_send_recv_torch_profiler"  s    	z;DistributedTest._DistTestBase.test_send_recv_torch_profilerc              	      sj  t  }d}t||d}t }t }|d k	r0|nt }|}tdt  D ]}	|	|krtdt  D ]}	|	|krrqddD ]n}
t|dd}|
dkrt |}|| n,|
dkrt 	|}|
  | }|| | ||  qvqdqJt ||	 t ||	 qJW 5 Q R X |d k	rft  }|tkr| d| d	fD ]d}t||}| td
d |D dt  d   |D ](}| |j | |j|gd g qvq>tt|t|fd  fddtt  D }t |  g }|D ]}|| 7 }qddlm} |  dd ||D }| t  t| | dt  d  gt   | |    d S )Nr<   r  r   )r'  r  r   r'  r  r"  z:recvAnySourcec                 s   s   | ]}|j V  qd S r\   r7  rz   rP   rP   rQ   r   [  s     zKDistributedTest._DistTestBase._test_send_recv_any_source.<locals>.<genexpr>rA   r?   rh   c                    s   g | ]}t  qS rP   ra  r  Zrecv_ranks_tensorrP   rQ   r   g  s   zLDistributedTest._DistTestBase._test_send_recv_any_source.<locals>.<listcomp>)groupbyc                 S   s   g | ]\}}t t|qS rP   )r~  rY  )r{   keyrV  rP   rP   rQ   r   s  s    )!r   r   r  rY  r   r  r   r'  r  r  r  Z_source_rankr  rc   r   r&  r  r(  r   rf  r   r:  r)  rJ   catrq   ri  r  	itertoolsrI  sortr~  rK  )rM   r*  r   send_recv_sizerq   Z
recv_ranksZirecv_ranksr;  r+  r,  r'  r.  Zsenderworkr   r/  r   r|   Zglobal_recv_ranksZglobal_recv_ranks_listrI  Z	frequencyrP   rH  rQ   _test_send_recv_any_source/  sv    





 

 z8DistributedTest._DistTestBase._test_send_recv_any_sourcezsendrecv anysourcez+ does not support send/recv from any sourcec                 C   s   | j d d d S r?  )rP  rL   rP   rP   rQ   test_send_recv_any_source|  s    z7DistributedTest._DistTestBase.test_send_recv_any_sourcec                 C   s   t  }| j|d d S r?  )r  rP  rB  rP   rP   rQ   +test_send_recv_any_source_autograd_profiler  s    zIDistributedTest._DistTestBase.test_send_recv_any_source_autograd_profilerz!Kineto in fbcode code causes hangc                 C   s   t  }| j|dS r?  )r  rP  rE  rP   rP   rQ   (test_send_recv_any_source_torch_profiler  s    	zFDistributedTest._DistTestBase.test_send_recv_any_source_torch_profilerc              	   C   s`  t  }t  }d}t||d}|d k	r,|nt }|z}td|D ]h}||krtd|D ]>}	|	|krfqXt|dd}
t j|
|	|	d | |
|		  qXqBt j
|||d qBW 5 Q R X |d k	r\t  }|tkr\| d| dfD ]t}t||}tdd	 |D }t  d
 }| || |D ]6}| |j | |j| | |j|gd g q"qd S )Nr<   r  r   r   r  r"  r#  c                 s   s   | ]}|j V  qd S r\   r7  r9  rP   rP   rQ   r     s     zIDistributedTest._DistTestBase._test_send_recv_with_tag.<locals>.<genexpr>r?   rh   )r   r   r   r  r   r  r'  r  rc   r   r&  r  r(  r   r   rf  r:  rx   r)  )rM   r*  r   r   rN  rq   r;  r+  r,  r  r.  r   r/  r   r<  r=  r|   rP   rP   rQ   _test_send_recv_with_tag  s6    


z6DistributedTest._DistTestBase._test_send_recv_with_tagc                 C   s   | j d d d S r?  )rT  rL   rP   rP   rQ   test_send_recv_with_tag  s    z5DistributedTest._DistTestBase.test_send_recv_with_tagc                 C   s   t  }| j|dS r?  )r  rT  rB  rP   rP   rQ   )test_send_recv_with_tag_autograd_profiler  s    zGDistributedTest._DistTestBase.test_send_recv_with_tag_autograd_profilerc                 C   s   t  }| j|dS r?  )r  rT  rE  rP   rP   rQ   &test_send_recv_with_tag_torch_profiler  s    	zDDistributedTest._DistTestBase.test_send_recv_with_tag_torch_profilerc              	   C   s  t  }t  }|d k	r|nt }|t}|dkrfdd td|D }|D ]}|  | |  qHn(t|d}t 	|d | 
|t|d |   W 5 Q R X |d k	rt  }	|	tkr|dkr|	 dn|	 d}
t|
|}td	d
 |D }|dkr
t  d nd}| 
|| dd tdt  D }|D ]R}| |j | 
|j|
 |dkrt| |j| k n| 
|j||  q6d S )Nr   c                 S   s   g | ]}t t|d |qS r<   )r   r  r  )r{   destrP   rP   rQ   r     s   z=DistributedTest._DistTestBase._test_isend.<locals>.<listcomp>r?   r   r<   r"  r#  c                 s   s   | ]}|j V  qd S r\   r7  r9  rP   rP   rQ   r     s     z<DistributedTest._DistTestBase._test_isend.<locals>.<genexpr>c                 S   s   i | ]}||gd  gqS rh   rP   r{   r&  rP   rP   rQ   r`    s     z=DistributedTest._DistTestBase._test_isend.<locals>.<dictcomp>)r   r   r   r   r  r  r  is_completedr  r'  rf  rK  r  r(  r   r   r:  rx   r)  values)rM   r*  r   r   r;  r+  requestsrequestrq   r   Zexpected_event_namer   r<  Zexpected_countr0  r|   rP   rP   rQ   _test_isend  sF    




z)DistributedTest._DistTestBase._test_isendzNccl does not support isendc                 C   s   | j d d d S r?  )r`  rL   rP   rP   rQ   
test_isend  s    z(DistributedTest._DistTestBase.test_isendc                 C   s   t  }| j|d d S r?  )r  r`  rB  rP   rP   rQ   test_isend_autograd_profiler  s    z:DistributedTest._DistTestBase.test_isend_autograd_profilerc                 C   s   t  }| j|d d S r?  )r  r`  rE  rP   rP   rQ   test_isend_torch_profiler  s    z7DistributedTest._DistTestBase.test_isend_torch_profilerzNccl does not support irecvc                    s   t  }t  }|dkrdd td|D   fddtd|D }td|D ]D}||d    | ||d    |  |d  t|d qNnt|d}t 	|d | 
  d S )Nr   c                 S   s   g | ]}t |d qS )r   r  r{   r  rP   rP   rQ   r     s    z<DistributedTest._DistTestBase.test_irecv.<locals>.<listcomp>r?   c                    s    g | ]}t  |d   |qS r?   )r   r  re  r
  rP   rQ   r     s   r<   )r   r   r   r  r  r  r\  rf  r  r&  rK  )rM   r   r   r^  r  rq   rP   rg  rQ   
test_irecv  s     

z(DistributedTest._DistTestBase.test_irecvFc              
   C   s  t jddft jddft jddft jddft jddft jddft jd	dffD ]6\}}}	|	r`|s`qJ|D ]}
t|
d
 ||}|r|	|| d }||
kr|rt
 }d|_|
|_| dd|j|g| n| ddt
j||
| qdt|
d
 d|}|r|	|| d }|r:t
 }d|_|
|_| dd|j|g| n| ddt
j||
| | | |  | || t d qdqJ|   d S )Ng|۽Fg0.+gTr     g     jg  4&kr?   r   
:broadcastr   )rJ   floatdoubleZhalfZint8Zuint8r   r   r  r
  r   ZBroadcastOptionsZ
rootTensorZrootRankcall_dist_opr  rf  r  nemaxrq   rK  )rM   rV  rW  r   r
  r  Zwith_optionsr  ra   Zrequires_cudar  r-  optsrq   rP   rP   rQ   _test_broadcast_helper,  s|    







	
	     z4DistributedTest._DistTestBase._test_broadcast_helperz!Nccl does not support CPU tensorsc                 C   s    |   \}}}| ||| d S r\   )r]  rq  r\  rP   rP   rQ   test_broadcastt  s    z,DistributedTest._DistTestBase.test_broadcastz2Only Gloo and Nccl backend supports CUDA allReducec                 C   sJ   |   \}}}tt t}|| d }tj| | |||d| d S )Nr   T)	r]  r   r   r   rs   rJ   r
  r  rq  rM   rV  rW  r   r  r  rP   rP   rQ   test_broadcast_cuday  s
    z1DistributedTest._DistTestBase.test_broadcast_cudac                 C   s    |   \}}}| ||| d S r\   )rX  rq  r\  rP   rP   rQ   test_broadcast_group  s    z2DistributedTest._DistTestBase.test_broadcast_groupc                 C   s    |   \}}}| ||| d S r\   )rZ  rq  r\  rP   rP   rQ   test_broadcast_full_group  s    z7DistributedTest._DistTestBase.test_broadcast_full_groupz/Only NCCL backend supports high priority streamc                 C   s   |   \}}}tt t}|| d }tj| tt	d }|t
jd< td|t }t|\}}}	t||}tj }
d|
_t|||	|
}| |||d|d d S )Nr   r?   rr  ru   FT)r]  r   r   r   rs   rJ   r
  r  r   rr  r   r   Z
rendezvousr  ZPrefixStorer  OptionsZis_high_priority_streamrq  )rM   rV  r  r   r  r  Znew_portZgen_iteratorstorer  rp  rW  rP   rP   rQ   test_nccl_high_priority_stream  s    

z<DistributedTest._DistTestBase.test_nccl_high_priority_streamc
                 C   s   |D ]v}
t |
d ||
kr |n|}|r<||	| d }| jddtj||
|||jgd ||
kr| |t |
d | q|   d S )Nr?   r   :reduceFtensor_shapes)	r  r	  r
  rm  r   r   shaperf  rK  )rM   rV  rW  r   opmaster_valueworker_valueexpected_valuer
  r  r  rq   rP   rP   rQ   _test_reduce_helper  s&    
z1DistributedTest._DistTestBase._test_reduce_helperr   z does not support reducec                 C   s<   |   \}}}| |||tjjddddt|d    d S NrA   r<   r?   )r]  r  r   ReduceOpSUMr~  r\  rP   rP   rQ   test_reduce_sum  s    z-DistributedTest._DistTestBase.test_reduce_sumzOnly Nccl supports CUDA reducec                 C   sf   |   \}}}tt t}|| d }tj| | |||tj	j
ddddt|d   d|	 d S Nr   rA   r<   r?   T)r]  r   r   r   rs   rJ   r
  r  r  r  r  r~  rs  rP   rP   rQ   test_reduce_sum_cuda  s    z2DistributedTest._DistTestBase.test_reduce_sum_cudac                 C   sF   |   \}}}| |||tjjddtdd dgt|d  d d S )NrA   r<   c                 S   s   | | S r\   rP   rT   yrP   rP   rQ   r     r   zCDistributedTest._DistTestBase.test_reduce_product.<locals>.<lambda>r?   )r]  r  r   r  PRODUCTr   r~  r\  rP   rP   rQ   test_reduce_product  s    z1DistributedTest._DistTestBase.test_reduce_productc              	   C   s,   |   \}}}| |||tjjddd d S Ni  r?   )r]  r  r   r  MINr\  rP   rP   rQ   test_reduce_min  s          z-DistributedTest._DistTestBase.test_reduce_minc              	   C   s,   |   \}}}| |||tjjddd d S Nr   r<   )r]  r  r   r  MAXr\  rP   rP   rQ   test_reduce_max  s          z-DistributedTest._DistTestBase.test_reduce_maxc                 C   s<   |   \}}}| |||tjjddddt|d    d S r  )rX  r  r   r  r  r~  r\  rP   rP   rQ   test_reduce_group_sum	  s    z3DistributedTest._DistTestBase.test_reduce_group_sumc                 C   sF   |   \}}}| |||tjjddtdd dgt|d  d d S )NrA   r<   c                 S   s   | | S r\   rP   r  rP   rP   rQ   r   $  r   zIDistributedTest._DistTestBase.test_reduce_group_product.<locals>.<lambda>r?   )rX  r  r   r  r  r   r~  r\  rP   rP   rQ   test_reduce_group_product  s    z7DistributedTest._DistTestBase.test_reduce_group_productc              	   C   s,   |   \}}}| |||tjjddd d S r  )rX  r  r   r  r  r\  rP   rP   rQ   test_reduce_group_min'  s          z3DistributedTest._DistTestBase.test_reduce_group_minc              	   C   s,   |   \}}}| |||tjjddd d S r  )rX  r  r   r  r  r\  rP   rP   rQ   test_reduce_group_max0  s          z3DistributedTest._DistTestBase.test_reduce_group_maxc                 C   s<   |   \}}}| |||tjjddddt|d    d S r  )rZ  r  r   r  r  r~  r\  rP   rP   rQ   test_reduce_full_group_sum9  s    z8DistributedTest._DistTestBase.test_reduce_full_group_sumc                 C   sF   |   \}}}| |||tjjddtdd dgt|d  d d S )NrA   r<   c                 S   s   | | S r\   rP   r  rP   rP   rQ   r   R  r   zNDistributedTest._DistTestBase.test_reduce_full_group_product.<locals>.<lambda>r?   )rZ  r  r   r  r  r   r~  r\  rP   rP   rQ   test_reduce_full_group_productG  s    z<DistributedTest._DistTestBase.test_reduce_full_group_productc              	   C   s,   |   \}}}| |||tjjddd d S r  )rZ  r  r   r  r  r\  rP   rP   rQ   test_reduce_full_group_minU  s          z8DistributedTest._DistTestBase.test_reduce_full_group_minc              	   C   s,   |   \}}}| |||tjjddd d S r  )rZ  r  r   r  r  r\  rP   rP   rQ   test_reduce_full_group_max]  s          z8DistributedTest._DistTestBase.test_reduce_full_group_maxc
                    s   |D ]fddt dD |rPt dD ]}
|
 |	 d |
< q0| jddtjd   fddd jgd		 krD ]}| |td
 | qq|   d S )Nc                    s*   g | ]"}t d  kr  nqS rf  )r  r	  r{   i)r  r   r  r  rP   rQ   r   s  s   zKDistributedTest._DistTestBase._test_reduce_twice_helper.<locals>.<listcomp>rA   r   rz  Fc                      s   t d  S rR   )r   r   rP   )rW  r~  r  tensorsrP   rQ   r     s
      zIDistributedTest._DistTestBase._test_reduce_twice_helper.<locals>.<lambda>)secondary_op_callr|  r?   )	r  r
  rm  r   r   r}  rf  r  rK  )rM   rV  rW  r   r~  r  r  r  r
  r  r  rq   rP   )rW  r  r~  r   r  r  r  rQ   _test_reduce_twice_helperf  s,    
z7DistributedTest._DistTestBase._test_reduce_twice_helperc                 C   s<   |   \}}}| |||tjjddddt|d    d S r  )r]  r  r   r  r  r~  r\  rP   rP   rQ   test_reduce_sum_twice  s    z3DistributedTest._DistTestBase.test_reduce_sum_twicec                 C   sf   |   \}}}tt t}|| d }tj| | |||tj	j
ddddt|d   d|	 d S r  )r]  r   r   r   rs   rJ   r
  r  r  r  r  r~  rs  rP   rP   rQ   test_reduce_sum_cuda_twice  s    z8DistributedTest._DistTestBase.test_reduce_sum_cuda_twicez#Only Nccl supports reduce_scatter_vc              	   C   sH  |    |  \}}}tt t}|| d }g }|D ]}||d  q8t|d | }|||  }	t|}
d}d}dD ]}t|
||d}|||	 	| t
j|| |
|
t
jd	d|}t|tt
||tjj||}|r|  ddt|d   }t
j|| |
|
t
jd}|	||}| || q||    d S )	Nr   r?   rA   r<   TFr  r  r   )rK  r]  r   r   r   rs   r  r   r  r	  rJ   r  rk  r
  Zreduce_scatterrY  r   r  r  r  r~  rf  )rM   rV  rW  r   r  r  Zinput_split_sizesr  Z	start_lenZend_lensum_lenr  r  	async_valrq   
out_tensorr  r  r-  rP   rP   rQ   test_reduce_scatter_v_cuda  s<    $z8DistributedTest._DistTestBase.test_reduce_scatter_v_cudaTc                 C   sX   |r(| || d }| || d }|jg}| jddtj||tjj|dd|d
 |S )Nr   z:reduce_scatter_tensorFexpect_eventr|  )r
  r}  rm  r   Zreduce_scatter_tensorr  r  rM   
tensor_out	tensor_inrW  r   r
  r  r|  rP   rP   rQ   _reduce_scatter_tensor_helper  s"    z;DistributedTest._DistTestBase._reduce_scatter_tensor_helperz-Only Nccl supports CUDA reduce_scatter_tensorc           	      C   s   |   \}}}tt t}d}tj|tjd}tt	|| }| 
||||d|}t|| |d | t	| }| || |   t|t	||f}| 
||||d|}| || |   d S )NrA   r  Tr?   )r]  r   r   r   rs   rJ   zerosZint64Zaranger~  r  rf  rK  r   )	rM   rV  rW  r   r  r  r  r  r-  rP   rP   rQ   test_reduce_scatter_tensor_cuda  s     z=DistributedTest._DistTestBase.test_reduce_scatter_tensor_cudac              
   C   sD  |   \}}}tt t}|D ]}||kr>t|d d}nt|d d}||| d }t }tjj	|_
|tjkrt |g|}n||g|}tdkrz$| td |  W 5 Q R X W n" tk
r   | |  Y nX |  | }	n| }	|  ddt|d   }
| |	t|d |
g q |   d S )Nr?   rA   r<   r   r  z0Work needs to be completed before calling result)r]  r   r   r   rs   r  r
  r   r  r  ZreduceOpr   r[  r   	allreducer  r,  resultAssertionErrorr  r\  r  r~  rf  rK  )rM   rV  rW  r   r  r  rq   rp  rO  r  r  rP   rP   rQ   test_all_reduce_result_cuda	  s8    



z9DistributedTest._DistTestBase.test_all_reduce_result_cuda)r  r  profile_cudar|  c             	      s    fddg}
|d k	r$|
 | tjjj|dd}|*}dd |
D }|r`|D ]}|  qRW 5 Q R X |rt tkrt	t | |}t
 tjjkr| t|t|
 |D ]b}| |j | |jd | |jd |d k	rt
 tjjkr| |j|d	|j d
|  qd S )Nc                      s
    S r\   rP   rP   rJ  rS  r~  rP   rQ   r   B	  r   z<DistributedTest._DistTestBase.call_dist_op.<locals>.<lambda>T)Zuse_cudar  c                 S   s   g | ]
}| qS rP   rP   )r{   Zop_callrP   rP   rQ   r   M	  s     z>DistributedTest._DistTestBase.call_dist_op.<locals>.<listcomp>r?   r   zevent shape: z vs tensor )r  rJ   r  r   r   r  r   r  PROFILING_SUPPORTED_BACKENDSr   r  r  r  rf  r~  r  r:  r8  assertGreaterEqualZcpu_timer)  )rM   Zprofiling_title_postfixr:  r~  r  r  r  r|  rJ  rS  Zop_callsrC  r+  ZworksrO  r   rN  rP   r  rQ   rm  6	  s@    
 
 z*DistributedTest._DistTestBase.call_dist_opc                 C   s   |D ]}||kr|n|}t |d |
d|}|rD||	| d }|jtjkr`t|jg}n|jg}| jd|t	j
|||||d |dkr|rt	 tkr| jd|t	j
||||d|d	 q|   d S )Nr?   r  r   :all_reduce)async_opr|  T)r  r  r|  )r  r	  r
  r  rJ   	complex64view_as_realr}  rm  r   
all_reducer  !CUDA_PROFILING_SUPPORTED_BACKENDSrK  )rM   rV  rW  r   r~  r  r  r  r
  r  r  r  r  
curr_valuerq   r|  rP   rP   rQ   _test_all_reduce_helperm	  sH    
z5DistributedTest._DistTestBase._test_all_reduce_helperc                 C   s<   |   \}}}| |||tjjddddt|d    d S r  r]  r  r   r  r  r~  r\  rP   rP   rQ   test_all_reduce_sum	  s    z1DistributedTest._DistTestBase.test_all_reduce_sumc                 C   s@   |   \}}}| j|||tjjddddt|d   dd d S NrA   r<   r?   T)r  r  r\  rP   rP   rQ   test_all_reduce_sum_async	  s    z7DistributedTest._DistTestBase.test_all_reduce_sum_asyncz;Only Gloo and NCCL backends will have CUDA allReduce testedc                 C   s\   t j| j |  \}}}tt t}| 	|||tj
jddddt|d   d|	 d S )NrA   r<   r?   TrJ   r
  r  r   r]  r   r   r   rs   r  r  r  r~  rM   rV  rW  r   r  rP   rP   rQ   test_all_reduce_sum_cuda	  s    z6DistributedTest._DistTestBase.test_all_reduce_sum_cudac                 C   s`   t j| j |  \}}}tt t}| j	|||tj
jddddt|d   d|dd
 d S r  r  r  rP   rP   rQ   test_all_reduce_sum_cuda_async	  s    z<DistributedTest._DistTestBase.test_all_reduce_sum_cuda_asyncc                 C   sZ   |   \}}}| j|||tjjtddtddtddtddt|d   tjd d S )NrA   rh   r<      r?   r  )	r]  r  r   r  r  complexr~  rJ   cfloatr\  rP   rP   rQ   test_all_reduce_sum_complex	  s    z9DistributedTest._DistTestBase.test_all_reduce_sum_complexc              
   C   st   t jjt jjt jjt jjt jjt jjg}|  \}}}|D ]4}| 	t
d t tdtjd|| W 5 Q R X q:d S Nzall_reduce does not supportr?   r  )r   r  r  r  r  ZBANDZBORZBXORr]  r  r,  r  r  rJ   r  )rM   Zunsupported_opsrV  rW  r   Zunsupported_oprP   rP   rQ   'test_all_reduce_complex_unsupported_ops	  s$       zEDistributedTest._DistTestBase.test_all_reduce_complex_unsupported_opsc                 C   sz   t j| j |  \}}}tt t}| j	|||tj
jtddtddtddtddt|d   d|t jd
 d S )NrA   rh   r<   r  r?   Tr  )rJ   r
  r  r   r]  r   r   r   rs   r  r  r  r  r~  r  r  rP   rP   rQ    test_all_reduce_sum_cuda_complex
  s    z>DistributedTest._DistTestBase.test_all_reduce_sum_cuda_complexc                 C   sF   |   \}}}| |||tjjddtdd dgt|d  d d S )NrA   r<   c                 S   s   | | S r\   rP   r  rP   rP   rQ   r   ,
  r   zGDistributedTest._DistTestBase.test_all_reduce_product.<locals>.<lambda>r?   )r]  r  r   r  r  r   r~  r\  rP   rP   rQ   test_all_reduce_product"
  s    z5DistributedTest._DistTestBase.test_all_reduce_productc              	   C   s,   |   \}}}| |||tjjddd d S r  )r]  r  r   r  r  r\  rP   rP   rQ   test_all_reduce_min/
  s          z1DistributedTest._DistTestBase.test_all_reduce_minc              	   C   s,   |   \}}}| |||tjjddd d S r  )r]  r  r   r  r  r\  rP   rP   rQ   test_all_reduce_max6
  s          z1DistributedTest._DistTestBase.test_all_reduce_maxc                 C   s<   |   \}}}| |||tjjddddt|d    d S r  )rX  r  r   r  r  r~  r\  rP   rP   rQ   test_all_reduce_group_sum=
  s    z7DistributedTest._DistTestBase.test_all_reduce_group_sumc                 C   sF   |   \}}}| |||tjjddtdd dgt|d  d d S )NrA   r<   c                 S   s   | | S r\   rP   r  rP   rP   rQ   r   V
  r   zMDistributedTest._DistTestBase.test_all_reduce_group_product.<locals>.<lambda>r?   )rX  r  r   r  r  r   r~  r\  rP   rP   rQ   test_all_reduce_group_productK
  s    z;DistributedTest._DistTestBase.test_all_reduce_group_productc              	   C   s,   |   \}}}| |||tjjddd d S r  )rX  r  r   r  r  r\  rP   rP   rQ   test_all_reduce_group_minY
  s          z7DistributedTest._DistTestBase.test_all_reduce_group_minc              	   C   s,   |   \}}}| |||tjjddd d S r  )rX  r  r   r  r  r\  rP   rP   rQ   test_all_reduce_group_maxa
  s          z7DistributedTest._DistTestBase.test_all_reduce_group_maxc                 C   s<   |   \}}}| |||tjjddddt|d    d S r  )rZ  r  r   r  r  r~  r\  rP   rP   rQ   test_all_reduce_full_group_sumi
  s    z<DistributedTest._DistTestBase.test_all_reduce_full_group_sumc                 C   sF   |   \}}}| |||tjjddtdd dgt|d  d d S )NrA   r<   c                 S   s   | | S r\   rP   r  rP   rP   rQ   r   
  r   zRDistributedTest._DistTestBase.test_all_reduce_full_group_product.<locals>.<lambda>r?   )rZ  r  r   r  r  r   r~  r\  rP   rP   rQ   "test_all_reduce_full_group_productv
  s    z@DistributedTest._DistTestBase.test_all_reduce_full_group_productc              	   C   s,   |   \}}}| |||tjjddd d S r  )rZ  r  r   r  r  r\  rP   rP   rQ   test_all_reduce_full_group_min
  s          z<DistributedTest._DistTestBase.test_all_reduce_full_group_minc              	   C   s,   |   \}}}| |||tjjddd d S r  )rZ  r  r   r  r  r\  rP   rP   rQ   test_all_reduce_full_group_max
  s          z<DistributedTest._DistTestBase.test_all_reduce_full_group_maxc           	         sn   |   \}}}t|t dd}|D ]D\}} fdd|D }t|d tjj| | |d |d  q$d S )Nr?   )Z
num_inputsc                    s   g | ]} |qS rP   rP   )r{   inputfnrP   rQ   r   
  s     zMDistributedTest._DistTestBase._test_sparse_all_reduce_sum.<locals>.<listcomp>r   )r]  r!   r   r   r  r  r  rf  )	rM   r  rV  rW  r   testsinputsoutputsr  rP   r  rQ   _test_sparse_all_reduce_sum
  s      z9DistributedTest._DistTestBase._test_sparse_all_reduce_sumz+Only Gloo backend support sparse all reducec                 C   s   |  dd  d S )Nc                 S   s   | S r\   rP   trP   rP   rQ   r   
  r   zJDistributedTest._DistTestBase.test_sparse_all_reduce_sum.<locals>.<lambda>r  rL   rP   rP   rQ   test_sparse_all_reduce_sum
  s    z8DistributedTest._DistTestBase.test_sparse_all_reduce_sumc                 C   s   |  dd  d S )Nc                 S   s   |    S r\   )cloner
  r  rP   rP   rQ   r   
  r   zODistributedTest._DistTestBase.test_sparse_all_reduce_sum_cuda.<locals>.<lambda>r  rL   rP   rP   rQ   test_sparse_all_reduce_sum_cuda
  s    z=DistributedTest._DistTestBase.test_sparse_all_reduce_sum_cudac                 C   sf   ddt ddgddt ddgdd| d   dd| d   t ddt dd| d   gtjtjtjgfS )NrA   rh   r<   r  r?   )r  rJ   rk  r  Z
group_sizerP   rP   rQ   $_all_reduce_coalesced_sum_test_cases
  s    zBDistributedTest._DistTestBase._all_reduce_coalesced_sum_test_casesc                 C   s8   ddgddgdd| d   dd| d   gt jt jgfS )Nr?   rA   rh   ri   rJ   rk  r  rP   rP   rQ   (_all_reduce_coalesced_product_test_cases
  s
    
zFDistributedTest._DistTestBase._all_reduce_coalesced_product_test_casesc                 C   s    ddgddgddgt jt jgfS Nr?   ri   rA   rh   r  r  rP   rP   rQ   $_all_reduce_coalesced_min_test_cases
  s
    
zBDistributedTest._DistTestBase._all_reduce_coalesced_min_test_casesc                 C   s    ddgddgddgt jt jgfS r  r  r  rP   rP   rQ   $_all_reduce_coalesced_max_test_cases
  s
    
zBDistributedTest._DistTestBase._all_reduce_coalesced_max_test_casesc              	   C   sH   |   \}}}| td$ ttdtjdgtjj	| W 5 Q R X d S r  )
r]  r  r,  r   all_reduce_coalescedr  rJ   r  r  r  r\  rP   rP   rQ   1test_all_reduce_coalesced_max_complex_unsupported
  s      zODistributedTest._DistTestBase.test_all_reduce_coalesced_max_complex_unsupportedc              
      s  t jj| jt jj| jt jj| jt jj| j	i| }|t
|\}}	}
}|D ] krX|n|	}fddt||D }|r fdd|D }g }|D ]0}|jtjkr|t|j q||j q| jddt j||||d fddt||
D }| || qH|   d S )Nc                    s"   g | ]\}}t  d  ||dqS r?   r  rd  )r{   r  r  r  rP   rQ   r   
  s   zSDistributedTest._DistTestBase._test_all_reduce_coalesced_helper.<locals>.<listcomp>c                    s   g | ]}|   d  qS r   r
  r{   r  r   r  rP   rQ   r   
  s     r  Fr{  c                    s"   g | ]\}}t  d  ||dqS r  rd  )r{   r  r  r  rP   rQ   r     s   )r   r  r  r  r  r  r  r  r  r  r~  rh  r  rJ   r  r  r  r}  rm  r  rf  rK  )rM   rV  rW  r   r~  r
  r  Ztest_case_funcZmaster_valuesZworker_valuesZexpected_valuesdtypesZcurr_valuesr  r|  rq   r
  rP   )r   r  r  rQ   !_test_all_reduce_coalesced_helper
  sP    
    
	
z?DistributedTest._DistTestBase._test_all_reduce_coalesced_helperc                 C   s,   |   \}}}| j|||tjjdd d d S NF)r
  r  )r]  r  r   r  r  r\  rP   rP   rQ   test_all_reduce_coalesced_sum  s    z;DistributedTest._DistTestBase.test_all_reduce_coalesced_sumc                 C   s,   |   \}}}| j|||tjjdd d d S r  )r]  r  r   r  r  r\  rP   rP   rQ   !test_all_reduce_coalesced_product  s    z?DistributedTest._DistTestBase.test_all_reduce_coalesced_productc                 C   s,   |   \}}}| j|||tjjdd d d S r  )r]  r  r   r  r  r\  rP   rP   rQ   test_all_reduce_coalesced_min&  s    z;DistributedTest._DistTestBase.test_all_reduce_coalesced_minc                 C   s,   |   \}}}| j|||tjjdd d d S r  )r]  r  r   r  r  r\  rP   rP   rQ   test_all_reduce_coalesced_max2  s         z;DistributedTest._DistTestBase.test_all_reduce_coalesced_maxc                 C   s,   |   \}}}| j|||tjjdd d d S r  )rX  r  r   r  r  r\  rP   rP   rQ   #test_all_reduce_coalesced_group_sum9  s         zADistributedTest._DistTestBase.test_all_reduce_coalesced_group_sumc                 C   s,   |   \}}}| j|||tjjdd d d S r  )rX  r  r   r  r  r\  rP   rP   rQ   'test_all_reduce_coalesced_group_productA  s    zEDistributedTest._DistTestBase.test_all_reduce_coalesced_group_productc                 C   s,   |   \}}}| j|||tjjdd d d S r  )rX  r  r   r  r  r\  rP   rP   rQ   #test_all_reduce_coalesced_group_minN  s         zADistributedTest._DistTestBase.test_all_reduce_coalesced_group_minc                 C   s,   |   \}}}| j|||tjjdd d d S r  )rX  r  r   r  r  r\  rP   rP   rQ   #test_all_reduce_coalesced_group_maxV  s         zADistributedTest._DistTestBase.test_all_reduce_coalesced_group_maxc                 C   s,   |   \}}}| j|||tjjdd d d S r  )rZ  r  r   r  r  r\  rP   rP   rQ   (test_all_reduce_coalesced_full_group_sum^  s         zFDistributedTest._DistTestBase.test_all_reduce_coalesced_full_group_sumc                 C   s,   |   \}}}| j|||tjjdd d d S r  )rZ  r  r   r  r  r\  rP   rP   rQ   ,test_all_reduce_coalesced_full_group_producte  s    zJDistributedTest._DistTestBase.test_all_reduce_coalesced_full_group_productc                 C   s,   |   \}}}| j|||tjjdd d d S r  )rZ  r  r   r  r  r\  rP   rP   rQ   (test_all_reduce_coalesced_full_group_minq  s    zFDistributedTest._DistTestBase.test_all_reduce_coalesced_full_group_minc                 C   s,   |   \}}}| j|||tjjdd d d S r  )rZ  r  r   r  r  r\  rP   rP   rQ   (test_all_reduce_coalesced_full_group_max}  s         zFDistributedTest._DistTestBase.test_all_reduce_coalesced_full_group_maxc                    s   |D ]ĉ t  d dd}t  d d} krH fdd|D ng }	|rv| d }fdd|	D }	tjkrdd |	D }
nd	d |	D }
| jd
dtj| |	|d|
d	 | || q|   d S )Nr?   r   r  c                    s   g | ]}t  d  |dqS r  rd  r  rY  r  rP   rQ   r     s     zFDistributedTest._DistTestBase._test_scatter_helper.<locals>.<listcomp>r   c                    s   g | ]}|   d  qS r  r  r  r  rP   rQ   r     s     c                 S   s   g | ]}t |jqS rP   )rJ   r  r}  r  rP   rP   rQ   r     s     c                 S   s   g | ]
}|j qS rP   )r}  r  rP   rP   rQ   r     s     z:scatterF)r  scatter_listrV  r  r|  )	r  r
  rJ   r  rm  r   scatterrf  rK  )rM   rV  rW  r   r
  r  r  rq   r-  r  r|  rP   rY  r  r   r  rQ   _test_scatter_helper  s4    
z2DistributedTest._DistTestBase._test_scatter_helperZuccz&CPU tensor ops not supported by UCP TLc                    s   |   \}}}tdg   d }|dkrR fdd|D }tj|d|d ntj|dd | | |    d }|dkr fdd|D }tj||d	 n
t| | | |  d S )
Nr?   r   r   c                    s   g | ]}   | qS rP   r  r  ZonerP   rQ   r     s     zEDistributedTest._DistTestBase.test_scatter_checks.<locals>.<listcomp>)r  r  r  c                    s   g | ]}   | qS rP   r	  r  r
  rP   rQ   r     s     )r  )r]  rJ   r   r  r   r  rf  )rM   rV  rW  r   outputr  rP   r
  rQ   test_scatter_checks  s    
z1DistributedTest._DistTestBase.test_scatter_checksc                 C   s    |   \}}}| ||| d S r\   )r]  r  r\  rP   rP   rQ   test_scatter  s    z*DistributedTest._DistTestBase.test_scatterzOnly Nccl supports CUDA gatherc                 C   s2   |   \}}}tt t}| |||d| d S NT)r]  r   r   r   rs   r  r  rP   rP   rQ   test_scatter_cuda  s    z/DistributedTest._DistTestBase.test_scatter_cudac                 C   s&   |   \}}}| j|||tjd d S r  )r]  r  rJ   r  r\  rP   rP   rQ   test_scatter_complex  s    z2DistributedTest._DistTestBase.test_scatter_complexc                 C   s8   |   \}}}tt t}| j|||d|tjd d S NTr  )r]  r   r   r   rs   r  rJ   r  r  rP   rP   rQ   test_scatter_cuda_complex  s    z7DistributedTest._DistTestBase.test_scatter_cuda_complexc                 C   s    |   \}}}| ||| d S r\   )rX  r  r\  rP   rP   rQ   test_scatter_group  s    z0DistributedTest._DistTestBase.test_scatter_groupc                 C   s    |   \}}}| ||| d S r\   )rZ  r  r\  rP   rP   rQ   test_scatter_full_group  s    z5DistributedTest._DistTestBase.test_scatter_full_groupc                    s   |D ]ʉ t  d } kr0 fdd|D ng }|r^| d }fdd|D }| jddtj| ||dt|dkr|d jgnd d	  kr fd	d|D }t||D ]\}	}
| |	|
 qq| 	  d S )
Nr?   c                    s   g | ]}t  d  dqS )r?   r   rd  r  rY  rP   rQ   r     s     zEDistributedTest._DistTestBase._test_gather_helper.<locals>.<listcomp>r   c                    s   g | ]}|   d  qS r  r  r  r  rP   rQ   r     s     z:gatherF)r,  gather_listrV  r  r|  c                    s   g | ]}t  d  |qS rf  rd  r  r  rP   rQ   r     s     )
r  r
  rm  r   gatherr~  r}  rh  rf  rK  )rM   rV  rW  r   r
  r  rq   r  r
  t1t2rP   )rY  r   r  rQ   _test_gather_helper  s.    z1DistributedTest._DistTestBase._test_gather_helperc                    s   |   \}}}tdg |dkrh fdd|D }tj | d|d |D ]}| ||  |  qLntj | dd |dkrƇ fdd|D }tj | |d |D ]}| ||  |  qnt |  d S )	Nr?   r   c                    s   g | ]}   qS rP   r	  r  r
  rP   rQ   r   	  s     zDDistributedTest._DistTestBase.test_gather_checks.<locals>.<listcomp>)r,  r  r,  c                    s   g | ]}   qS rP   r	  r  r
  rP   rQ   r     s     )r  )r]  rJ   r   r   r  rf  )rM   rV  rW  r   r  r  rP   r
  rQ   test_gather_checks  s    z0DistributedTest._DistTestBase.test_gather_checksc                 C   s    |   \}}}| ||| d S r\   )r]  r  r\  rP   rP   rQ   test_gather  s    z)DistributedTest._DistTestBase.test_gatherc                 C   s2   |   \}}}tt t}| |||d| d S r  )r]  r   r   r   rs   r  r  rP   rP   rQ   test_gather_cuda  s    z.DistributedTest._DistTestBase.test_gather_cudac                 C   s    |   \}}}| ||| d S r\   )rX  r  r\  rP   rP   rQ   test_gather_group&  s    z/DistributedTest._DistTestBase.test_gather_groupc                 C   s    |   \}}}| ||| d S r\   )rZ  r  r\  rP   rP   rQ   test_gather_full_group-  s    z4DistributedTest._DistTestBase.test_gather_full_groupc                    s   |D ]؉ t  d d} fdd|D }tj}	|r^| d }fdd|D }|d jtjkrt|d jg}
n|d jg}
| j	dd|	|||d|
d	  fd
d|D }t
||D ]\}}| || qq|   d S )Nr?   r  c                    s   g | ]}t  d  ddqS )r?   r   r  rd  r  r  rP   rQ   r   9  s     zIDistributedTest._DistTestBase._test_all_gather_helper.<locals>.<listcomp>r   c                    s   g | ]}|   d  qS r  r  r  r  rP   rQ   r   =  s     :all_gatherFr{  c                    s   g | ]}t  d  |dqS r  rd  r  r  rP   rQ   r   M  s    )r  r   ri  r
  r  rJ   r  r  r}  rm  rh  rf  rK  )rM   rV  rW  r   r
  r  r  rq   r  Z	allgatherr|  r
  r  r  rP   r  rQ   _test_all_gather_helper4  s4    z5DistributedTest._DistTestBase._test_all_gather_helperc                 C   s    |   \}}}| ||| d S r\   )r]  r"  r\  rP   rP   rQ   test_all_gatherU  s    z-DistributedTest._DistTestBase.test_all_gatherz"Only Nccl supports CUDA all gatherc                 C   s2   |   \}}}tt t}| |||d| d S r  )r]  r   r   r   rs   r"  r  rP   rP   rQ   test_all_gather_cudaZ  s    z2DistributedTest._DistTestBase.test_all_gather_cudac                 C   s&   |   \}}}| j|||tjd d S r  )r]  r"  rJ   r  r\  rP   rP   rQ   test_all_gather_complexa  s    z5DistributedTest._DistTestBase.test_all_gather_complexc                 C   s8   |   \}}}tt t}| j|||d|tjd d S r  )r]  r   r   r   rs   r"  rJ   r  r  rP   rP   rQ   test_all_gather_cuda_complexf  s         z:DistributedTest._DistTestBase.test_all_gather_cuda_complexc                 C   s    |   \}}}| ||| d S r\   )rX  r"  r\  rP   rP   rQ   test_all_gather_groupo  s    z3DistributedTest._DistTestBase.test_all_gather_groupc                 C   s    |   \}}}| ||| d S r\   )rZ  r"  r\  rP   rP   rQ   test_all_gather_full_groupu  s    z8DistributedTest._DistTestBase.test_all_gather_full_groupzOnly Nccl supports all_gather_vc                 C   s   |    |  \}}}tt t}|| d }g }|D ]}||d  q8t|}d}	dD ]|}
tj	|| ||tj
d|	|}t|d|d}ttt|||||
}|
r|  |	}t|||d}| || q\|    d S )Nr   r?   rA   r  r  r   r  )rK  r]  r   r   r   rs   r  r   rJ   r  rk  r	  r
  r  ri  rY  r   r  rf  )rM   rV  rW  r   r  r  Zoutput_split_sizesr,  r  ra   r  rq   r  r  r  r-  rP   rP   rQ   test_all_gather_v_cudaz  s0    $z4DistributedTest._DistTestBase.test_all_gather_v_cudac                 C   sn   |r(| || d }| || d }|jtjkrDt|jg}n|jg}| jddtj|||dd|d	 |S )Nr   z:all_gather_into_tensorFr  )	r
  r  rJ   r  r  r}  rm  r   Zall_gather_into_tensorr  rP   rP   rQ   _all_gather_into_tensor_helper  s$    z<DistributedTest._DistTestBase._all_gather_into_tensor_helperz.Only Nccl supports CUDA all_gather_into_tensorc                    s   |   \}}}tt t}d t  g| }tt|   gd }| ||||d|}t	 fdd|D }| 
|| |   d S )NrA   r   Tc                    s   g | ]}t   g| qS rP   rJ   r   r  r  rP   rQ   r     s    zVDistributedTest._DistTestBase.test_all_gather_into_cat_tensor_cuda.<locals>.<listcomp>)r]  r   r   r   rs   rJ   r   r~  r*  rK  rf  rK  rM   rV  rW  r   r  r  r  r-  rP   r,  rQ   $test_all_gather_into_cat_tensor_cuda  s    zBDistributedTest._DistTestBase.test_all_gather_into_cat_tensor_cudac                    s   |   \}}}tt t}d t  g| }tt|  gd }| ||||d|}t	 fdd|D }| 
|| |   d S )NrA   r   Tc                    s   g | ]}t   g| qS rP   r+  r  r,  rP   rQ   r     s    zXDistributedTest._DistTestBase.test_all_gather_into_stack_tensor_cuda.<locals>.<listcomp>)r]  r   r   r   rs   rJ   r   r~  r*  stackrf  rK  r-  rP   r,  rQ   &test_all_gather_into_stack_tensor_cuda  s    zDDistributedTest._DistTestBase.test_all_gather_into_stack_tensor_cudac              	   C   s   g }|D ]0}|j tjkr,|t|j q||j q| jddtj||||d t	||D ]0\}}t	||D ]\}	}
t
|	|
sp  dS qpq^dS )z|
            Helper that runs all_gather_coalesced and returns true if output
            matches expectations.
            r!  Fr{  T)r  rJ   r  r  r  r}  rm  r   Zall_gather_coalescedrh  r`   )rM   output_tensor_listsinput_tensorsr
  rW  r|  input_tensorl1l2r  r  rP   rP   rQ   $_run_all_gather_coalesced_and_verify  s&    
zBDistributedTest._DistTestBase._run_all_gather_coalesced_and_verifyc                    s~   |d k	rrt ddD ]^ fddt dD } fdd|D } fdd|D }| ||||stdq|   d S )	NrA   rn   c                    s    g | ]}t |||  d qS r  r  r{   Z	tensor_id)r  r   rP   rQ   r      s      zSDistributedTest._DistTestBase._test_all_gather_coalesced_helper.<locals>.<listcomp>r?   c                    s$   g | ]} fd dt dD qS )c                    s   g | ]}t ||d  dqS )r   r  r8  r9  r  rP   rQ   r     s      ^DistributedTest._DistTestBase._test_all_gather_coalesced_helper.<locals>.<listcomp>.<listcomp>r?   r  r  r  test_case_idrP   rQ   r     s   
c                    s&   g | ]  fd dt dD qS )c                    s    g | ]}t |||  d qS r7  r8  r9  )r  	rank_iterrP   rQ   r     s      r:  r?   r;  )r{   r<  )r>  rQ   r     s   z+output tensors do not match expected ouputs)r  r6  r  rK  )rM   rV  rW  r   r  r2  r1  r
  rP   )r  r   r=  rQ   !_test_all_gather_coalesced_helper  s(    		   z?DistributedTest._DistTestBase._test_all_gather_coalesced_helperZallgather_coalescedz& does not support all_gather_coalescedc                 C   s    |   \}}}| ||| d S r\   )r]  r?  r\  rP   rP   rQ    test_all_gather_coalesced_simple  s    z>DistributedTest._DistTestBase.test_all_gather_coalesced_simplec                 C   s&   |   \}}}| j|||tjd d S r  )r]  r?  rJ   r  r\  rP   rP   rQ   !test_all_gather_coalesced_complex&  s       z?DistributedTest._DistTestBase.test_all_gather_coalesced_complexc                 C   s    |   \}}}| ||| d S r\   )rX  r?  r\  rP   rP   rQ   test_all_gather_coalesced_group0  s    z=DistributedTest._DistTestBase.test_all_gather_coalesced_groupc                 C   s    |   \}}}| ||| d S r\   )rZ  r?  r\  rP   rP   rQ   $test_all_gather_coalesced_full_group9  s    zBDistributedTest._DistTestBase.test_all_gather_coalesced_full_groupc                 C   s   |   \}}}|tddg tdg|d tddg tdgtdgg}dd |D }dd |D }| ||||st|   d S )NrA   r   r?   rh   c              
   S   sZ   g | ]R}d t ddg d t dg d t ddg d t dg d t dg gqS )r   rA   r   rh   r+  r  rP   rP   rQ   r   N  s   zVDistributedTest._DistTestBase.test_all_gather_coalesced_with_empty.<locals>.<listcomp>c              	   S   sR   g | ]J}|t d d g t dg|d t ddg t dgt dggqS )rA   r   r?   rh   r+  r[  rP   rP   rQ   r   X  s   


)r]  rJ   r   r6  r  rK  )rM   rV  rW  r   r2  Zoutput_tensors_listsr
  rP   rP   rQ   $test_all_gather_coalesced_with_emptyA  s(    




   zBDistributedTest._DistTestBase.test_all_gather_coalesced_with_emptyc              	      s   |d k	rt |tjg d| }t fdd|D }tjg dd }	|r||| d }||| d }|	|| d }	 tjkrt|jg}
n|jg}
| jddt	j
|	|||
d | |	| |   d S )	Nr  c                    s"   g | ]}t jd g d| qS r  r+  r  r  r  rP   rQ   r   o  s     z\DistributedTest._DistTestBase._test_all_to_all_single_equal_split_helper.<locals>.<listcomp>r   r   z:all_to_allF)rV  r|  )r~  rJ   r   rK  r
  r  r  r}  rm  r   all_to_all_singlerf  rK  )rM   rV  rW  r   r
  r  r  	in_tensorr-  r  r|  rP   rE  rQ   *_test_all_to_all_single_equal_split_helperh  s2    
	zHDistributedTest._DistTestBase._test_all_to_all_single_equal_split_helperc                    s   |d k	rt |dd |D }fdd|D }tjt|g d }	tjd  g d}
t fdd|D }|r|	| d }	|| d }|
| d }
tj|
|	|||d | |
| | 	  d S )	Nc                 S   s   g | ]}|d  qS rf  rP   r  rP   rP   rQ   r     s     z^DistributedTest._DistTestBase._test_all_to_all_single_unequal_split_helper.<locals>.<listcomp>c                    s   g | ]} d  qS rf  rP   r  )r   rP   rQ   r     s     r  r?   c                    s&   g | ]}t jd  g d| qS r  r+  r  r  r   r  rP   rQ   r     s     r   rV  )
r~  rJ   r   r   rK  r
  r   rF  rf  rK  )rM   rV  rW  r   r
  r  r  	in_splitsZ
out_splitsrG  r  r-  rP   rI  rQ   ,_test_all_to_all_single_unequal_split_helper  s,        zJDistributedTest._DistTestBase._test_all_to_all_single_unequal_split_helperc                    s   |d k	rt |dd |D  fddt|D } fdd|D } fdd|D }	|rfdd|D }fdd|	D }	fdd|D }tj|||d	 t||	D ]\}
}| |
| q|   d S )
Nc                 S   s   g | ]}|d  qS rf  rP   r  rP   rP   rQ   r     s     zIDistributedTest._DistTestBase._test_all_to_all_helper.<locals>.<listcomp>c                    s*   g | ]"\}}t j| g d  qS r7  r+  )r{   r  r  )r  rK  r   r  rP   rQ   r     s   c                    s"   g | ]}t jd  g dqS r  r+  r  rI  rP   rQ   r     s    c                    s&   g | ]}t jd  g d| qS r  r+  r  rI  rP   rQ   r     s    c                    s   g | ]}|   d  qS r  r  r  r  rP   rQ   r     s     c                    s   g | ]}|   d  qS r  r  r  r  rP   rQ   r     s    c                    s   g | ]}|   d  qS r  r  r  r  rP   rQ   r     s     rJ  )r~  	enumerater   Z
all_to_allrh  rf  rK  )rM   rV  rW  r   r
  r  r  Z
in_tensorsZout_tensorsr
  r  r  rP   )r  rK  r   r  r  rQ   _test_all_to_all_helper  s,    	z5DistributedTest._DistTestBase._test_all_to_all_helpermpiz'Only MPI supports CPU all_to_all_singlec                 C   s    |   \}}}| ||| d S r\   )r]  rH  r\  rP   rP   rQ   "test_all_to_all_single_equal_split  s    z@DistributedTest._DistTestBase.test_all_to_all_single_equal_splitz)Only Nccl supports CUDA all_to_all_singlec                 C   s2   |   \}}}tt t}| |||d| d S r  )r]  r   r   r   rs   rH  r  rP   rP   rQ   'test_all_to_all_single_equal_split_cuda  s    zEDistributedTest._DistTestBase.test_all_to_all_single_equal_split_cudac                 C   s&   |   \}}}| j|||tjd d S r  )r]  rH  rJ   r  r\  rP   rP   rQ   *test_all_to_all_single_equal_split_complex  s       zHDistributedTest._DistTestBase.test_all_to_all_single_equal_split_complexc                 C   s8   |   \}}}tt t}| j|||d|tjd d S r  )r]  r   r   r   rs   rH  rJ   r  r  rP   rP   rQ   /test_all_to_all_single_equal_split_cuda_complex  s         zMDistributedTest._DistTestBase.test_all_to_all_single_equal_split_cuda_complexc                 C   s    |   \}}}| ||| d S r\   )r]  rL  r\  rP   rP   rQ   $test_all_to_all_single_unequal_split  s    zBDistributedTest._DistTestBase.test_all_to_all_single_unequal_splitc                 C   s2   |   \}}}tt t}| |||d| d S r  r]  r   r   r   rs   rL  r  rP   rP   rQ   )test_all_to_all_single_unequal_split_cuda  s    zGDistributedTest._DistTestBase.test_all_to_all_single_unequal_split_cudac                 C   s&   |   \}}}| j|||tjd d S r  )r]  rL  rJ   r  r\  rP   rP   rQ   ,test_all_to_all_single_unequal_split_complex  s       zJDistributedTest._DistTestBase.test_all_to_all_single_unequal_split_complexc                 C   s8   |   \}}}tt t}| j|||d|tjd d S r  )r]  r   r   r   rs   rL  rJ   r  r  rP   rP   rQ   1test_all_to_all_single_unequal_split_cuda_complex  s    zODistributedTest._DistTestBase.test_all_to_all_single_unequal_split_cuda_complexzOnly MPI supports all_to_allc                 C   s    |   \}}}| ||| d S r\   )r]  rN  r\  rP   rP   rQ   test_all_to_all  s    z-DistributedTest._DistTestBase.test_all_to_allz"Only NCCL supports CUDA all_to_allc                 C   s2   |   \}}}tt t}| |||d| d S r  )r]  r   r   r   rs   rN  r  rP   rP   rQ   test_all_to_all_cuda  s    z2DistributedTest._DistTestBase.test_all_to_all_cudac                 C   s&   |   \}}}| j|||tjd d S r  )r]  rN  rJ   r  r\  rP   rP   rQ   test_all_to_all_complex  s    z5DistributedTest._DistTestBase.test_all_to_all_complexc                 C   s8   |   \}}}tt t}| j|||d|tjd d S r  )r]  r   r   r   rs   rN  rJ   r  r  rP   rP   rQ   test_all_to_all_cuda_complex  s         z:DistributedTest._DistTestBase.test_all_to_all_cuda_complexc                 C   s    |   \}}}| ||| d S r\   )rX  rH  r\  rP   rP   rQ   (test_all_to_all_single_equal_split_group   s    zFDistributedTest._DistTestBase.test_all_to_all_single_equal_split_groupc                 C   s2   |   \}}}tt t}| |||d| d S r  )rX  r   r   r   rs   rH  r  rP   rP   rQ   -test_all_to_all_single_equal_split_group_cuda&  s    zKDistributedTest._DistTestBase.test_all_to_all_single_equal_split_group_cudac                 C   s    |   \}}}| ||| d S r\   )rX  rL  r\  rP   rP   rQ   *test_all_to_all_single_unequal_split_group4  s    zHDistributedTest._DistTestBase.test_all_to_all_single_unequal_split_groupc                 C   s2   |   \}}}tt t}| |||d| d S r  rU  r  rP   rP   rQ   /test_all_to_all_single_unequal_split_group_cuda:  s    zMDistributedTest._DistTestBase.test_all_to_all_single_unequal_split_group_cudac                 C   s    |   \}}}| ||| d S r\   )rX  rN  r\  rP   rP   rQ   test_all_to_all_groupH  s    z3DistributedTest._DistTestBase.test_all_to_all_groupc                 C   s2   |   \}}}tt t}| |||d| d S r  )rX  r   r   r   rs   rN  r  rP   rP   rQ   test_all_to_all_group_cudaN  s    z8DistributedTest._DistTestBase.test_all_to_all_group_cudac                 C   s    |   \}}}| ||| d S r\   )rZ  rH  r\  rP   rP   rQ   -test_all_to_all_single_equal_split_full_groupV  s    zKDistributedTest._DistTestBase.test_all_to_all_single_equal_split_full_groupc                 C   s2   |   \}}}tt t}| |||d| d S r  )rZ  r   r   r   rs   rH  r  rP   rP   rQ   2test_all_to_all_single_equal_split_full_group_cuda[  s    zPDistributedTest._DistTestBase.test_all_to_all_single_equal_split_full_group_cudac                 C   s    |   \}}}| ||| d S r\   )rZ  rL  r\  rP   rP   rQ   /test_all_to_all_single_unequal_split_full_grouph  s    zMDistributedTest._DistTestBase.test_all_to_all_single_unequal_split_full_groupc                 C   s2   |   \}}}tt t}| |||d| d S r  )rZ  r   r   r   rs   rL  r  rP   rP   rQ   4test_all_to_all_single_unequal_split_full_group_cudam  s    zRDistributedTest._DistTestBase.test_all_to_all_single_unequal_split_full_group_cudac                 C   s    |   \}}}| ||| d S r\   )rZ  rN  r\  rP   rP   rQ   test_all_to_all_full_groupz  s    z8DistributedTest._DistTestBase.test_all_to_all_full_groupc                 C   s2   |   \}}}tt t}| |||d| d S r  )rZ  r   r   r   rs   rN  r  rP   rP   rQ   test_all_to_all_full_group_cuda  s    z=DistributedTest._DistTestBase.test_all_to_all_full_group_cudac           	      C   s   d}|D ]}t dd}|r2||| d }||krt|t |  t||| t|d  t| qt||| t| | 	t
t t
|d d||f d  q| jdd	 d S )
N333333?r?           r   r'  z!destination rank: %d, my rank: %dz3 (if you see this failure, please report in #14554)r   r  )rJ   ZDoubleTensorr	  r
  r*  r   r  r-  r  r  rk  rK  )	rM   rV  rW  r   r
  r  Z	WAIT_TIMErY  r  rP   rP   rQ   _test_barrier_helper  s(    



z2DistributedTest._DistTestBase._test_barrier_helperz MPI doesn't supports GPU barrierz flaky on PyTorch CI with timeoutc                 C   s2   |   \}}}tt t}| |||d| d S r  )r]  r   r   r   rs   rk  r  rP   rP   rQ   test_barrier_cuda  s    z/DistributedTest._DistTestBase.test_barrier_cudac                 C   s2   |   \}}}tt t}| |||d| d S r  )rX  r   r   r   rs   rk  r  rP   rP   rQ   test_barrier_group_cuda  s    z5DistributedTest._DistTestBase.test_barrier_group_cudac                 C   s2   |   \}}}tt t}| |||d| d S r  )rZ  r   r   r   rs   rk  r  rP   rP   rQ   test_barrier_full_group_cuda  s    z:DistributedTest._DistTestBase.test_barrier_full_group_cudazcpu barrierz does not support CPU barrierc                 C   s    |   \}}}| ||| d S r\   )r]  rk  r\  rP   rP   rQ   test_barrier  s    z*DistributedTest._DistTestBase.test_barrierc                 C   s    |   \}}}| ||| d S r\   )rX  rk  r\  rP   rP   rQ   test_barrier_group  s    z0DistributedTest._DistTestBase.test_barrier_groupc                 C   s    |   \}}}| ||| d S r\   )rZ  rk  r\  rP   rP   rQ   test_barrier_full_group  s    z5DistributedTest._DistTestBase.test_barrier_full_groupc                    s|   |D ]j t  d } fdd|| D }| krJ|j|| d d|d< t| | |D ]}| || q\q|   d S )Nr?   c                    s"   g | ]}t  d  dj|dqS )r?   r   r   r  r
  r  r  rP   rQ   r     s    zQDistributedTest._DistTestBase._test_broadcast_multigpu_helper.<locals>.<listcomp>r   r   )r  r
  r   Zbroadcast_multigpurf  rK  )rM   rV  rW  r   r  r-  r  rq   rP   r  rQ   _test_broadcast_multigpu_helper  s    
z=DistributedTest._DistTestBase._test_broadcast_multigpu_helperz&MPI doesn't support broadcast multigpuzNCCL broadcast multigpu skippedc                 C   s0   |   \}}}tt t}| |||| d S r\   )r]  r   r   r   rs   rs  r  rP   rP   rQ   test_broadcast_multigpu  s    z5DistributedTest._DistTestBase.test_broadcast_multigpuc
              	      s~   |D ]l|kr|n|  fdd|| D }
|  ddtj|
|| td |d}|
D ]}| || q^q|   d S )Nc                    s&   g | ]}t d   dj|dqS r?   r  r   rr  r  r  r  r  rP   rQ   r     s   zRDistributedTest._DistTestBase._test_all_reduce_multigpu_helper.<locals>.<listcomp>r  Fr?   r  )rm  r   Zall_reduce_multigpur  rf  rK  )rM   rV  rW  r   r  r~  r  r  r  r  r  r-  rq   rP   rv  rQ    _test_all_reduce_multigpu_helper  s"    z>DistributedTest._DistTestBase._test_all_reduce_multigpu_helperz)CUDA all_reduce multigpu skipped for NCCLzUCC all_reduce multigpu skippedc                 C   sX   |   \}}}tt t}| ||||tjjddddt|d   t|d   d S )NrA   r<   r?   r   )	r]  r   r   r   rs   rw  r  r  r~  r  rP   rP   rQ   test_all_reduce_multigpu  s    z6DistributedTest._DistTestBase.test_all_reduce_multigpuc                 C   sv   |   \}}}tt t}| j||||tjjtddtddtddtddt	|d   t	|d  t
jd	 d S )NrA   rh   r<   r  r?   r   r  )r]  r   r   r   rs   rw  r  r  r  r~  rJ   r  r  rP   rP   rQ    test_all_reduce_multigpu_complex  s    
z>DistributedTest._DistTestBase.test_all_reduce_multigpu_complexc	                    s   |D ] | kr|n| fdd|| D }	| j ddtj|	 ||t|	dk|	d jgd	 | krt d |}
| |	d |
 q|   d S )Nc                    s"   g | ]}t  d  j|dqS )r?   r   rr  r  r  Ztensor_valuerP   rQ   r   7  s   zNDistributedTest._DistTestBase._test_reduce_multigpu_helper.<locals>.<listcomp>rz  Fr?   r   r  )rm  r   Zreduce_multigpur~  r}  r  rf  rK  )rM   rV  rW  r   r  r~  r  r  r  r  r-  rP   rz  rQ   _test_reduce_multigpu_helper*  s(    

z:DistributedTest._DistTestBase._test_reduce_multigpu_helperz*Only Nccl backend supports reduce multigpuc                 C   sp   |   \}}}tt t}|| d }tj| | ||||tj	j
ddddt|d   t|d   d S )Nr   rA   r<   r?   )r]  r   r   r   rs   rJ   r
  r  r{  r  r  r~  rs  rP   rP   rQ   test_reduce_multigpuL  s    z2DistributedTest._DistTestBase.test_reduce_multigpuc              
      s   |D ]މ  fdd|| D }g }g }t  d ddgt|d  t| }	t  d dgt|d  t| }
|| D ]4|fdd|	D  |fdd|
D  q~| jd	d
tj|||t|dkd | || q|   d S )Nc                    s$   g | ]}t  d  dj|dqS ru  rr  r  r  rP   rQ   r   d  s   zRDistributedTest._DistTestBase._test_all_gather_multigpu_helper.<locals>.<listcomp>r?   r   r  r   c                    s   g | ]}|j  d qS r   r  r  r  rP   rQ   r   x  s     c                    s   g | ]}|j  d qS r}  r  r  r~  rP   rQ   r   z  s     r!  F)r  )r  r~  r  rm  r   Zall_gather_multigpurf  rK  )rM   rV  rW  r   r  r  r  Zoutput_tensorsZexpected_outputZoutput_per_gpuZexpected_per_gpurP   )rY  r  r  rQ    _test_all_gather_multigpu_helper`  sD    


	z>DistributedTest._DistTestBase._test_all_gather_multigpu_helperz-Only Nccl backend supports allgather multigpuc                 C   sH   |   \}}}tt t}|| d }tj| | |||| d S r   )	r]  r   r   r   rs   rJ   r
  r  r  rs  rP   rP   rQ   test_all_gather_multigpu  s
    z6DistributedTest._DistTestBase.test_all_gather_multigpuc                 C   sN   |   \}}}tt t}|| d }tj| | j||||tj	d d S )Nr   r  )
r]  r   r   r   rs   rJ   r
  r  r  r  rs  rP   rP   rQ    test_all_gather_multigpu_complex  s        z>DistributedTest._DistTestBase.test_all_gather_multigpu_complexc              
   C   s@   |  D ]2}|jd k	rt  ||j7 }W 5 Q R X d |_qd S r\   )r  r  rJ   no_gradrM   r   r  rP   rP   rQ   _model_step  s
    

z)DistributedTest._DistTestBase._model_stepc              
   C   sP   |  D ]B}|jd k	rt  ||j7 }W 5 Q R X |jd |j  qd S NF)r  r  rJ   r  requires_grad_zero_r  rP   rP   rQ   _model_step_with_zero_grad  s    

z8DistributedTest._DistTestBase._model_step_with_zero_gradc                 C   sB   t tjd }|| }t|d}t|d}t }||||fS )Nr   rA   ri   )r   r   r   rJ   rK   rE   MSELoss)rM   local_bsr   	global_bs	input_cputargetr   rP   rP   rQ   _prepare_dummy_data  s    z1DistributedTest._DistTestBase._prepare_dummy_data      ?c           	      C   sD   |   ||}|||| }|  |d k	r@| |j|d d S )Nmemory_format)trainbackwardr  Zis_contiguous)	rM   r   	input_varr  r   Zscale_factorr  r  lrP   rP   rQ   _test_DDP_helper  s    z.DistributedTest._DistTestBase._test_DDP_helperc                 C   s8   |  t|t| t||D ]\}}|  || qd S r\   )rf  r~  rh  )rM   Z	param_gpuZ	param_DDPZp_gpuZp_DDPrP   rP   rQ   _assert_equal_param  s    z1DistributedTest._DistTestBase._assert_equal_paramr   rn   c              
   C   s  t |D ]6}| j|||||d |
d kr2|| }
| j|||
|
|  ||
|
|  ||dkrj|| | nd|d |r| | | | n| | | | | t| t|j  |t	| }|	r|dkrt
drt N}tjdkrt|| |d t|}nt||j t|j}W 5 Q R X qt &}t|| |d t|}W 5 Q R X | D ]"}| | | | |  q~d S )Nr  r   r?   rA   r  r9   )r  r  r  r  r  rY  r  r   rJ   randpermrt   
startswithr   NamedTemporaryFiler   r   saveseekloadrx   TemporaryFile
state_dictrf  )rM   
model_base	model_DDPr  r  r   r  r   
batch_sizeZ	test_saveoffsetr   	zero_gradr  Zn_iteridxtmptmp_fileZsaved_modelr^  rP   rP   rQ   _test_DDP_niter  sT        




 



z-DistributedTest._DistTestBase._test_DDP_niterc                 C   s  t }t|}||d  t|}	|	|d  tjj|	|||d}	|rT|	  t	 L}
t
jdkrt|	|
 |
d t|
}	nt|	|
j t|
j}	W 5 Q R X t|}| |\}}}}| ||	||d ||d ||||d	 |   d S )Nr   
device_idsgradient_as_bucket_viewstatic_graphr9   T)DDP_NETr  r  r
  rE   parallelr   _set_static_graphr   r  r   r   rJ   r  r  r  rx   r~  r  r  rK  )rM   
gpu_subsetr   output_devicer  r  set_static_graph_twicer   	model_gpur  r  r  r  r  r  r   rP   rP   rQ   _test_DistributedDataParallel  sD    




z;DistributedTest._DistTestBase._test_DistributedDataParallelc                 C   sl   |   \}}}t}t|}tjj||d}d}| |\}}	}
}| j|||	|
||||ddd
 | 	  |S )Nr  rA   FT)r  )
r]  r  r  r  rE   r  r   r  r  rK  )rM   r  rV  rW  r   r  r  r  r  r  r  r   rP   rP   rQ    _test_DistributedDataParallelCPUI  s.    
 z>DistributedTest._DistTestBase._test_DistributedDataParallelCPUz'nccl does not support DDP on CPU modelsc                 C   s   |    d S r\   r  rL   rP   rP   rQ   test_DistributedDataParallelCPUl  s    z=DistributedTest._DistTestBase.test_DistributedDataParallelCPUc                 C   s   | j dd d S )NTr  r  rL   rP   rP   rQ   ,test_DistributedDataParallelCPU_grad_is_viewp  s    zJDistributedTest._DistTestBase.test_DistributedDataParallelCPU_grad_is_viewddpz1 backend does not support DistributedDataParallelc                 C   s   |  tdd  |   d S )Nc                   S   s   t jt  S r\   )rE   r  r   r   rP   rP   rP   rQ   r   {  r   zZDistributedTest._DistTestBase.test_DistributedDataParallel_requires_grad.<locals>.<lambda>)r  r,  rK  rL   rP   rP   rQ   *test_DistributedDataParallel_requires_gradt  s
     zHDistributedTest._DistTestBase.test_DistributedDataParallel_requires_gradzGloo-only testc                 C   sb   G dd dt j}| }tj j|}tdD ].}| jdd | tdd |	 D  q.d S )Nc                       s$   e Zd Z fddZdd Z  ZS )zBDistributedTest._DistTestBase.test_ddp_create_graph.<locals>.Modelc                    s    t    ttd| _d S )Nr  )rC   rD   rE   r   rJ   rq   r   rL   rN   rP   rQ   rD     s    
zKDistributedTest._DistTestBase.test_ddp_create_graph.<locals>.Model.__init__c                 S   s   | j dS r   )r   powrL   rP   rP   rQ   rU     s    zJDistributedTest._DistTestBase.test_ddp_create_graph.<locals>.Model.forwardrV   rP   rP   rN   rQ   Model  s   r     T)Zcreate_graphc                 S   s   g | ]
}|j qS rP   r   )r{   r  rP   rP   rQ   r     s     zGDistributedTest._DistTestBase.test_ddp_create_graph.<locals>.<listcomp>)
rE   r   rJ   r  r   r  r  r  r   r  )rM   r  r   	ddp_modelr  rP   rP   rQ   test_ddp_create_graph  s    z3DistributedTest._DistTestBase.test_ddp_create_graphr   c                 C   s,  t j| j}| j}t j|  t jjjt jjdddd||gd}t	dD ]}|j
jj}|d k	r||d |  t |g |}|| }|  |j
jj}| }t| ttjd }	||	 tdd t	|	D |	 }
| j|d	 |
d
|
 d| d| j d qTW 5 Q R X d S )Nr?   Fr=   r  r   r   c                 s   s   | ]
}|V  qd S r\   rP   r  rP   rP   rQ   r     s     z`DistributedTest._DistTestBase.test_DistributedDataParallel_non_default_stream.<locals>.<genexpr>r   r   zExpected gradient of 	 but got z	 on rank msg)rJ   r
  ZStreamr   streamrE   r  r   rF   r  r   weightr  r  r  rq   rk  r   r  r  r   r  r   r   r   Zdiv_rf  )rM   r  r   netr  r  batchr   avgr   expected_gradrP   rP   rQ   /test_DistributedDataParallel_non_default_stream  s4     




zMDistributedTest._DistTestBase.test_DistributedDataParallel_non_default_streamr
  z@ backend does not support DDP communication hook on CUDA devicesc           	      C   s  t jt jtjtjtjtjg}t	j
jt	j
jg}|D ]r}tjjjtjjdddd| j| jgd}| }| |dd  |d | | }| |d|j q0|D ]r}tjjjtjjdddd| j| jgd}| }| |dd  || | }| |dt| qtjjjtjjdddd| j| jgd}| }| |dd  tdD ],}tjdd| jd}|| }|  qj| }| |ddd d S )	Nr?   Fr=   r  	comm_hookrA   r    )defaultallreduce_hookZfp16_compress_hookpowerSGDr   Zbatched_powerSGD_hookquantization_hooksZquantization_pertensor_hookZquantization_perchannel_hookr   ZBuiltinCommHookTypeZ	ALLREDUCEZFP16_COMPRESSrJ   rE   r  r   rF   r
  r   _get_ddp_logging_datarf  rG  register_comm_hookrY   Z_register_builtin_comm_hookr   r  r   r   r  )	rM   hooksZcpp_builtin_hooksr   r  ddp_logging_datar  r   r   rP   rP   rQ   test_ddp_comm_hook_logging  sT    

z8DistributedTest._DistTestBase.test_ddp_comm_hook_loggingc              
   O   s  | j }tj| t| tj| t tdd fg}trh|t	j
 tdddd f |D ]B\}	}
tjjjdddd tjjjt|	 | j g||d}tjjjt|	 | j g||d}| }| }|r t|}t|}| t|d |d g}|d g}|rB|j|f|d	|i| n|j|f|| ||f||}t| | D ]\}}| || qvtt| }td
D ]&}|  ||
}| }|  qt   td
D ].}|  ||
}| }|  |!  qt   t| | D ]\}}| || q*|r| "|d t| d  | |dd  t| dd   n| "|t|  t   W 5 Q R X qld S )Nr?   r   rh   TF)ZenabledZdeterministicZ	benchmarkr  r   Zoptim_paramsr  )#r   rJ   r
  r  manual_seedr   rK   HAS_TORCHVISIONr  torchvisionmodelsresnet50r   cudnnflagsrE   r  r   r  r  r  rY  ZassertGreaterr~  Z_register_fused_optimrh  rf  r  r  r   r  r   r  r  assertNotEqual)rM   grad_as_bucket_viewr  Z	optim_clsoptimize_subsetZfunctional_optim_argsZfunctional_optim_kwargsr   models_to_testr   r   Zddp_model_with_optimizer_hookZddp_model_with_no_hookZhook_paramsZno_hook_paramsZoptimizer_no_hookZ
hook_paramZallreduce_paramZopt_hook_init_paramsr  rv  r   rP   rP   rQ   $_test_ddp_hook_with_optimizer_parity  s    
  





zBDistributedTest._DistTestBase._test_ddp_hook_with_optimizer_parityzUIssues with async error handling, see https://github.com/pytorch/pytorch/issues/73259r  r  r  c              	   C   s,   d}d}d}| j ||tjj||||d d S )N{Gz??r   ư>ZbetasZeps)r  rJ   r  ZAdamW)rM   r  r  r  Zadamw_lrZadamw_betasZ	adamw_epsrP   rP   rQ   )test_ddp_hook_with_optimizer_parity_adamw  s    zGDistributedTest._DistTestBase.test_ddp_hook_with_optimizer_parity_adamwc              	   C   s,   d}d}d}| j ddtjj||||d d S )Nr  r  r  TFr  )r  rJ   r  ZAdam)rM   r  Zadam_lrZ
adam_betasZadam_epsrP   rP   rQ   (test_ddp_hook_with_optimizer_parity_adam  s    zFDistributedTest._DistTestBase.test_ddp_hook_with_optimizer_parity_adamc              	   C   s,   d}d}d}| j ddtjj||||d d S )Nr  r  TF)r   Zweight_decay)r  rJ   r  r  )rM   r  Zsgd_lrZsgd_momentumZsgd_weight_decayrP   rP   rQ   'test_ddp_hook_with_optimizer_parity_sgd  s    zEDistributedTest._DistTestBase.test_ddp_hook_with_optimizer_parity_sgdr  c              	   C   s  | j }tjdd}z
|j}W n tk
r6   |}Y nX tjjjt	|
||g|d}|j||d tjjjt	|
||g|d}tdD ]}	|jjj|jjjfD ]}
|
d k	r|
d |
  qt|g |}|| }|  |jjj}| }tdd tt D t  }|| }|  |jjj}| }|	|k r| j|d	  |d
| d|d	  d | j|d	 |d	 d|d	  d|d	  d qd S )Nr?   rn   r  r  r   r   r  Fc                 s   s   | ]
}|V  qd S r\   rP   r  rP   rP   rQ   r     s     zFDistributedTest._DistTestBase._test_ddp_hook_parity.<locals>.<genexpr>r  zExpected hook grad of r  r  z,Expected hook grad to be close to allreduce z
, but got )r   rJ   rE   rF   r  AttributeErrorr  r   r  r  r  r  r  r   r  r  r  r  rq   rk  r
  r   r  r  r   r   rf  item)rM   r   r   num_validated_itersr   mr  Znet_with_hookZnet_without_hookr  gr  r   r  r  r  	loss_hookZ	grad_hookZavg_hookrP   rP   rQ   _test_ddp_hook_parity  s\    




 

z3DistributedTest._DistTestBase._test_ddp_hook_parityc                 C   s   | j d tjd d S )Nr  )r  r  r  rL   rP   rP   rQ   test_ddp_hook_parity_allreduce  s    z<DistributedTest._DistTestBase.test_ddp_hook_parity_allreducec                    sH   t  }t|t  fddt|D }tj|}| j|t	j
d d S )Nc                    s   g | ]} t | d  qS r  )r   r[  r  rP   rQ   r     s     z^DistributedTest._DistTestBase.test_ddp_hook_parity_allreduce_process_group.<locals>.<listcomp>r  )r   r   r   rs   r  rJ   distributedrT  r  r  r  )rM   r   gpusr  rP   r  rQ   ,test_ddp_hook_parity_allreduce_process_group  s
    
zJDistributedTest._DistTestBase.test_ddp_hook_parity_allreduce_process_groupc                 C   s0   dD ]&}t jd dd|d}| j|t jd qd S )Nr  r?   rA   )r  matrix_approximation_rankstart_powerSGD_iter
warm_startr  )r  PowerSGDStater  r   )rM   r  powersgd_staterP   rP   rQ   test_ddp_hook_parity_powerSGD  s     z;DistributedTest._DistTestBase.test_ddp_hook_parity_powerSGDzmDisabled for environments that                          don't support multiprocessing with spawn start methodc                 C   s   t jd tjjdd}| j|t jd d}t jd tjj|dd}| j|t j|d | jt kr~t jd d dd}| j|t jd t jd d dd}| j|t jd d S )Nr<   )r  r  start_localSGD_iterr  F)r  r  r   Zpost_local_gradient_allreduce)r   r   r  r   )	post_localSGDZPostLocalSGDStater   rV  r[  r  r   r   r   )rM   r   r   rP   rP   rQ   "test_ddp_hook_parity_post_localSGD#  sP               z@DistributedTest._DistTestBase.test_ddp_hook_parity_post_localSGDc                 C   sx   t  }|r|d ntd| }tt||||d|d}	|| t|d|}
t|d|}||	|
|fS )Nr   cuda:%dMbP?)r  r  bucket_cap_mbr  rA   ri   )r   rJ   r   r   r  r  r  rK   )rM   r   r  devicesr  global_batch_sizer  r   r   r  r  r  rP   rP   rQ   _prepare_single_device_moduleY  s    	
z;DistributedTest._DistTestBase._prepare_single_device_modulec                 C   s@   t  }tt||d|d}t|d}t|d}||||fS )Nr  )r  r  r  rA   ri   )r   r   r  r  rJ   rK   )rM   r  r  r  r   r  r  r  rP   rP   rQ   _prepare_cpu_modules  s    z1DistributedTest._DistTestBase._prepare_cpu_modulec              
   C   s  |   \}}}t }tdks$tdkrB|}d}	| |||\}
}}}tdkrtt t}|| dd }dd |D }|}t|}	| ||||||\}
}}}|dk	r||| dd	 }t	
 * |  |  || W 5 Q R X W 5 Q R X t|D ]}||
|| |||	 |d |	  }|||	 |d |	  }|d
 dkrj|  |||| W 5 Q R X n|||| t|
 | D ]F\}}|jsq|d
 dkr| |j|j n| |j|j qt	d|  |t	| }qdS )a#  
            This is the recommended way to implement accumulate grads.
            If ``ddp_comm_hook`` input was specified, it will also register that hook
            to the ``ddp_model``. The hook fed into this function should not change
            the resulting gradients.
            rO  r  r?   r?  Nc                 S   s   g | ]}t d t| qS zcuda:rJ   r   r   r  rP   rP   rQ   r     s     zTDistributedTest._DistTestBase._test_accumulate_gradients_no_sync.<locals>.<listcomp>c                 S   s0   |    | |}t|||j}|  d S r\   )r  r   mse_lossr  r   r  )r   r  r  r  r   rP   rP   rQ   
step_model  s    zTDistributedTest._DistTestBase._test_accumulate_gradients_no_sync.<locals>.step_modelrA   r   9  )r]  r   rs   r	  r   r   r~  r  r  rJ   r  no_syncr  r  rh  r  r   r  r  rf  r  r  )rM   	num_itersddp_comm_hookr  rV  rW  r   r   r  Zlocal_batch_sizer   r  r  r  r  Zint_devicesr  r  	iterationZ	ddp_inputZ
ddp_targetr  jrP   rP   rQ   "_test_accumulate_gradients_no_sync  sj    	  	

 
 

z@DistributedTest._DistTestBase._test_accumulate_gradients_no_syncz2get_future is only supported on mpi, nccl and glooc                 C   s   |    dS )V
            Runs _test_accumulate_gradients_no_sync using default inputs
            Nr  rL   rP   rP   rQ   !test_accumulate_gradients_no_sync  s    	z?DistributedTest._DistTestBase.test_accumulate_gradients_no_syncc                 C   s   | j dd dS )r  Tr  Nr  rL   rP   rP   rQ   .test_accumulate_gradients_no_sync_grad_is_view  s    	zLDistributedTest._DistTestBase.test_accumulate_gradients_no_sync_grad_is_viewc                    s:   t   ttjtjjtj d fdd}| jd|d dS )z
            Runs multiple iterations on _test_accumulate_gradients_no_sync
            using allreduce hook and validates whether future result was properly
            passed as gradients in reducer.
            rW  bucketreturnc                    s&   |    g}| | dd S )Nc                 S   s   |   d S r   r  futrP   rP   rQ   r      r   zxDistributedTest._DistTestBase.test_accumulate_gradients_no_sync_allreduce_hook.<locals>.allreduce_hook.<locals>.<lambda>)r@   r  
get_futurethen)rW  r  r  r   rP   rQ   r    s
    zfDistributedTest._DistTestBase.test_accumulate_gradients_no_sync_allreduce_hook.<locals>.allreduce_hookri   r  r  N	r   objectr   Z
GradBucketrJ   futuresZFuturer_   r  )rM   r  rP   r   rQ   0test_accumulate_gradients_no_sync_allreduce_hook  s     
 zNDistributedTest._DistTestBase.test_accumulate_gradients_no_sync_allreduce_hookc                    s:   t   ttjtjjtj d fdd}| jd|d dS )ao  
            Runs multiple iterations on _test_accumulate_gradients_no_sync using allreduce
            hook that also uses then callbacks. In first then callback result is multiplied
            by 2, and the second callback divides the result by 2 * world_size. It validates
            whether final result was properly passed as gradients in reducer.
            r  c                    s8   |  | g }dd } fdd}|||S )Nc                 S   s   d|   d  S )NrA   r   r  r  rP   rP   rQ   mult  s    zDistributedTest._DistTestBase.test_accumulate_gradients_no_sync_allreduce_with_then_hook.<locals>.allreduce_with_then_hook.<locals>.multc                    s   |   d   S r   r%  r  r   rP   rQ   div  s    zDistributedTest._DistTestBase.test_accumulate_gradients_no_sync_allreduce_with_then_hook.<locals>.allreduce_with_then_hook.<locals>.div)r  r@   r  r  )rW  r  r  r&  r'  r   rP   rQ   allreduce_with_then_hook  s    zzDistributedTest._DistTestBase.test_accumulate_gradients_no_sync_allreduce_with_then_hook.<locals>.allreduce_with_then_hookri   r   Nr!  )rM   r(  rP   r   rQ   :test_accumulate_gradients_no_sync_allreduce_with_then_hook  s      zXDistributedTest._DistTestBase.test_accumulate_gradients_no_sync_allreduce_with_then_hookc                 C   s   dd }dd }|   \}}}tdd}tdkrTtt t}|| d }||}||g }	|		|	|
 }
tddt| d d	 }| |
d | d S )
Nc                 S   s   dd |   D S )Nc                 S   s   g | ]}|d  qS rZ  rP   r  rP   rP   rQ   r   0  s     zODistributedTest._DistTestBase.test_get_future.<locals>.mult.<locals>.<listcomp>r%  r  rP   rP   rQ   r&  /  s    z;DistributedTest._DistTestBase.test_get_future.<locals>.multc                 S   s   dd |   D S )Nc                 S   s   g | ]}|d  qS rf  rP   r  rP   rP   rQ   r   3  s     zNDistributedTest._DistTestBase.test_get_future.<locals>.add.<locals>.<listcomp>r%  r  rP   rP   rQ   r}  2  s    z:DistributedTest._DistTestBase.test_get_future.<locals>.addrh   rA   r?  r   r?   )r]  r  rs   r   r   r   r  r  r  r  r  r~  rf  )rM   r&  r}  rV  rW  r   r  r  r  r  resexpectedrP   rP   rQ   test_get_future)  s    

z-DistributedTest._DistTestBase.test_get_futurec           	      C   s   |   \}}}tt t}t|| }tddD ]p\}}| j||||d | j||||dd | j||t	
d||d dd |D }| j||t	
d||d q4d S )	N)FT)r  r   r  r  T)r  r   r  r  r  r
  )r  r   r  r  r  c                 S   s   g | ]}t d t| qS r
  r  r  rP   rP   rQ   r   h  s     zNDistributedTest._DistTestBase.test_DistributedDataParallel.<locals>.<listcomp>)r]  r   r   r   rs   rY  rL  productr  rJ   r   )	rM   rV  rW  r   r  r  Zuse_bucket_viewr  Z	gpus_listrP   rP   rQ   rv   A  sF     		z:DistributedTest._DistTestBase.test_DistributedDataParallelc              
   C   sz  t d tt }t jj| dd}t	 }t
jj|| jg|d}t t d d }t t d d }t
 }| D ]"}	| |	d k	 | |	jd k qtdD ]}
|  t  ||}|||}W 5 Q R X ||  | D ]B}	|	jr| |	jd k	 | |	j   | |	j   q|| |  t d|
  |t  t d  }q|S )	Nz  Q?r  r  r  rA   ri   r   r  )!rJ   r  r  r  r  r
  r  r  r  r   rE   r  r   r   rK   r   r   r  r  r  r  r  r   Zscaler  r   r  isnananyisinfr  updater  )rM   grad_is_viewr   	optimizerZscalerr  r  r  loss_fnr   r  r  r   rP   rP   rQ   &_test_DistributedDataParallel_with_ampq  s>    
  
zDDistributedTest._DistTestBase._test_DistributedDataParallel_with_ampc                 C   sR   t j| j | jdd}| jdd}t| | D ]\}}| || q8d S )NFr5  T)rJ   r
  r  r   r8  rh  r  rf  )rM   Zddp_model_grad_not_viewZddp_model_grad_is_viewr  r  rP   rP   rQ   6test_DistributedDataParallel_with_amp_and_grad_is_view  s    zTDistributedTest._DistTestBase.test_DistributedDataParallel_with_amp_and_grad_is_viewc                 C   s  |rt nt}t|}	|	|d  tjt|}
|
|d  tjj	|
|d}
t
 L}tjdkrt|
| |d t|}
nt|
|j t|j}
W 5 Q R X t|d}t|d}t }| |	|
||d ||d ||||d|t |r
dnd |   d S )Nr   r  r9   rA   ri   Trn   )BN_NETBN_NET_NO_AFFINEr  r  r
  rE   SyncBatchNormconvert_sync_batchnormr  r   r   r  r   r   rJ   r  r  r  rx   rK   r  r  r   r   rK  )rM   r  r   r  r  r  r  r   r   r  r  r  r  r  r   rP   rP   rQ   +_test_DistributedDataParallel_SyncBatchNorm  sD    
 


zIDistributedTest._DistTestBase._test_DistributedDataParallel_SyncBatchNormc                 C   s.  d}t jjjtt | jg|d}| }t j	j
| |d}t jjjtt | jg|d}| }| |||}	t t d d }
t t d d }t }tdD ]b}| ||||
| ||  | |	|||
| t| | D ]\}}| |j|j qq| |j|j d S )Nr/  r0  r  rA   ri   r   )rJ   rE   r  r   r  r  r  r
  r   r  r  r  _create_post_localSGD_optimizerrK   r   r   r  r  _perform_a_train_stepr  rh  rf  r3  r  )rM   create_averagerr5  learning_rater  r  r  net_using_post_localSGD_opt	averager2post_localSGD_optr  r  r7  r  p1p2rP   rP   rQ   $_test_post_localSGD_optimizer_parity  sF    zBDistributedTest._DistTestBase._test_post_localSGD_optimizer_parityc                 C   s   t jdddS )Nri   r<   r  )r  r  rL   rP   rP   rQ   _create_periodic_model_averager%  s    z=DistributedTest._DistTestBase._create_periodic_model_averagerc                 C   s   t jtjj| |d|dS )Nr  )r  r  )post_localSGD_optimizerZPostLocalSGDOptimizerrJ   r  r  r  )rM   r  rC  r  rP   rP   rQ   r@  (  s    z=DistributedTest._DistTestBase._create_post_localSGD_optimizerc                 C   s.   |   ||}|||}|  |  d S r\   )r  r  r  )rM   r6  r  r7  r  r  r  r   rP   rP   rQ   rA  .  s
    
z3DistributedTest._DistTestBase._perform_a_train_stepc              	   C   sj  d}t jjjtt | jgd}| }| 	|||}| }| 	|||}t 
t d d }	t 
t d d }
t }tdD ]}| ||||	|
 q| jdkrt d| i| t  dd d| j i}t j||d	}||d  | |jd | |j|j |d d
= | d
|d  | jtdd ||d  W 5 Q R X | |jd d S )Nr/  r  rA   ri   r   r   Zoptimizer_state_dictr  map_locationr  z]Loaded state dict does not contain a step counter for an averager. Setting step counter to 0.)Zexpected_warningZexpected_regex)rJ   rE   r  r   r  r  r  r
  r   r@  rK   r   r   r  r  rA  r  r  r  r  load_state_dictr  r  rf  ru  ZassertWarnsRegexUserWarning)rM   rB  
chkpt_filerC  rD  r  rF  rE  Zdummy_post_localSGD_optr  r  r7  r  rM  
checkpointrP   rP   rQ   )_test_post_localSGD_optimizer_step_reload5  sX    

zGDistributedTest._DistTestBase._test_post_localSGD_optimizer_step_reloadc                 C   s"   t j| j | j| jdd d S NFr9  rJ   r
  r  r   rI  rJ  rL   rP   rP   rQ   #test_post_localSGD_optimizer_parityt  s
    zADistributedTest._DistTestBase.test_post_localSGD_optimizer_parityc                 C   s"   t j| j | j| jdd d S NTr9  rT  rL   rP   rP   rQ   0test_post_localSGD_optimizer_parity_grad_is_view  s
    zNDistributedTest._DistTestBase.test_post_localSGD_optimizer_parity_grad_is_viewc                 C   s"   t ddt fg}tj|ddS )N)rA   rA   ri   r  )r   r   r   r  r  )rM   r  rP   rP   rQ   #_create_hierarchical_model_averager  s
     zADistributedTest._DistTestBase._create_hierarchical_model_averagerc                 C   s"   t j| j | j| jdd d S rS  rJ   r
  r  r   rI  rX  rL   rP   rP   rQ   9test_post_localSGD_optimizer_parity_with_hierarchical_sgd  s
    zWDistributedTest._DistTestBase.test_post_localSGD_optimizer_parity_with_hierarchical_sgdc                 C   s"   t j| j | j| jdd d S rV  rY  rL   rP   rP   rQ   Ftest_post_localSGD_optimizer_parity_with_hierarchical_sgd_grad_is_view  s
    zdDistributedTest._DistTestBase.test_post_localSGD_optimizer_parity_with_hierarchical_sgd_grad_is_viewc              	   C   s2   t j| j t }| | j| W 5 Q R X d S r\   )rJ   r
  r  r   r  rR  rJ  )rM   r  rP   rP   rQ   (test_post_localSGD_optimizer_step_reload  s    zFDistributedTest._DistTestBase.test_post_localSGD_optimizer_step_reloadc                 C   s   |   \}}}t }d}t|d }t|d }t}t||}	tj	j
|	|gd}
tj}tj|dddtjd|j|d}tj|dddtjd|j|d}t }| j|	|
||||||d|t |d |   d S )NrA   r  ri   r  r  T)r]  r   r   r   ONLY_SBN_NETr  r  r
  rE   r  r   rJ   Zchannels_lastrK   rk  r  r  r  rK  )rM   rV  rW  r   r  r  	bs_offsetr  r   r  r  r  Z	input_gpuZ
target_gpur   rP   rP   rQ   8test_DistributedDataParallel_SyncBatchNorm_Channels_Last  sP     zVDistributedTest._DistTestBase.test_DistributedDataParallel_SyncBatchNorm_Channels_Lastc           	   	   C   s   |   \}}}t }|g}d}t|d }t|d }| j|||||d | j|||||tdd dd |D }| j|||||tdd d S )NrA   r  r   r  r  r  r
  )r  r   r  r  r  r  c                 S   s   g | ]}t d t| qS r
  r  r  rP   rP   rQ   r     s     z\DistributedTest._DistTestBase.test_DistributedDataParallel_SyncBatchNorm.<locals>.<listcomp>)r]  r   r   r   r?  rJ   r   	rM   rV  rW  r   r   r  r  r^  r  rP   rP   rQ   *test_DistributedDataParallel_SyncBatchNorm  s<    	
zHDistributedTest._DistTestBase.test_DistributedDataParallel_SyncBatchNormc           	      C   sR   |   \}}}t }|g}d}t|d }t|d }| j|||||dd d S )NrA   F)r  r   r  r  r  r   )r]  r   r   r   r?  ra  rP   rP   rQ   4test_DistributedDataParallel_SyncBatchNorm_No_Affine  s    zRDistributedTest._DistTestBase.test_DistributedDataParallel_SyncBatchNorm_No_Affinec                 C   s   |   \}}}|g}td}t|}||d  tjt|}||d  tjj	||d}t
|d }t | }	t|	d}
t|	d}t }tjjd< | |||
|d ||d ||||	d	 |   W 5 Q R X d S )NrA   r   r  FT)r]  rE   r   r  r  r
  r=  r>  r  r   r~  r   r   rJ   rK   r  r   r  r  r  rK  rM   rV  rW  r   r  r   r  r  r  r  r  r  r   rP   rP   rQ   3test_DistributedDataParallel_SyncBatchNorm_2D_Input/  s4    

zQDistributedTest._DistTestBase.test_DistributedDataParallel_SyncBatchNorm_2D_Inputc                 C   s   |   \}}}|g}td}t|}||d  tjt|}||d  tjj	||d}d}t
 }	t|	d}
t|	d}t }tjjd< | |||
|d ||d ||||	d	 |   W 5 Q R X d S )NrA   r   r  r?   FT)r]  rE   r   r  r  r
  r=  r>  r  r   r   r   rJ   rK   r  r   r  r  r  rK  rd  rP   rP   rQ   Ctest_DistributedDataParallel_SyncBatchNorm_Single_Input_Per_Process]  s4    

zaDistributedTest._DistTestBase.test_DistributedDataParallel_SyncBatchNorm_Single_Input_Per_Processc              
   C   s  |   \}}}tjjt||gd}g }tt D ]\}t	j
t	ddd|d  d|d   t	ddd|d  d|d   gdd}|| q6t	j
dd	 |D dd|}td
D ]"}||| |}	|	   q|jj|jj }
}t	j|
|d t	j||d d S )Nr  rA   r?   r<   r'  ri  r   c                 S   s(   g | ] }| d dd tjdqS )r?   r   rA   r   )Zpermute
contiguousviewr]  Znum_featuresr{   rT   rP   rP   rQ   r     s   z{DistributedTest._DistTestBase.test_DistributedDataParallel_SyncBatchNorm_Diff_Input_Sizes_Running_Value.<locals>.<listcomp>r  )r]  rE   r  r   r]  r
  r  r   r   rJ   rK  r   r  meanr  r   running_meanrunning_vartestingZassert_closerp  )rM   rV  rW  r   r   r  r  Zinput_var_rankZall_input_varr  rk  rl  rP   rP   rQ   Itest_DistributedDataParallel_SyncBatchNorm_Diff_Input_Sizes_Running_Value  s<       zgDistributedTest._DistTestBase.test_DistributedDataParallel_SyncBatchNorm_Diff_Input_Sizes_Running_Valuec           
      C   sh   |   \}}}|g}t}t }|d }t|d | d }t|d | d }	| j||||	|d d S )NrA   rh   r`  )r]  r;  r   r   r   r?  )
rM   rV  rW  r   r  r   r  r  r^  r  rP   rP   rQ   Dtest_DistributedDataParallel_SyncBatchNorm_Diff_Input_Sizes_gradient  s    zbDistributedTest._DistTestBase.test_DistributedDataParallel_SyncBatchNorm_Diff_Input_Sizes_gradientc              	   C   s  t  }tt}|r0tjj|||gd}ntj|}d}| 	|\}}}}|rj||}||}|
d tdD ]}	|| }
| |||
|
|  ||
|
|  |d | | | }|	dkr`|	dk s|	d dkr`| |dd | |dd | |d	d | |d|d
 | |d	|d
 | |d|	 n|	dkr|| |d|	 |t| }q||S )Nr  rA   r   r?   r   r<   Zforward_compute_timeZbackward_compute_timeZbackward_comm_timeZ"backward_compute_comm_overlap_timer  )r   r   r  r  r  rE   r  r   r
  r  Z$_set_ddp_runtime_logging_sample_rater  r  r  r  r  rG  rf  r  rJ   r  )rM   is_gpur   r  r  r  r  r  r   r  r  r  rP   rP   rQ   _test_ddp_logging_data  sf    
 



    
z4DistributedTest._DistTestBase._test_ddp_logging_datac                 C   s  dd }t t jj |  \}}}| jdd}| }| |dt 	  | |dt 
  | |dd | |d	d
 | |dd | |dd | |dd | |dd | |dd | |dt | | |dd t| }d}d}	tdd tdd |D }|D ]"}
|d7 }|	|
 |
  7 }	q@| |dd | |d|	 | |d| | |dt|	 | |d |d! | |d"|d# | |d$|d% | |d&|d' |dd(krZ| |d)|d* | |d+|d, d-}| |d.| | |d/d  | |d0d  | |d1d  | |d2d  | |d3d  | |d4d  | |d5dd | |d6d | |d7t|	 |d8}ttd9d: td;D }| |d<| |d=}| |d>| | |d?d | |d@d | |dAd | |d@|dB | |dA|dB |dC}|dD}|dE}|dF}|dG}| || | || | || | || t }|  |j  tjj|dHdI}| }t| }| |dtdJ |d  |d   |d  |d   g}| |dd<dKd |D  | |ddL d S )MNc                 S   s   | t jkrt j|  S dS )Nrn  rP  ro  rP   rP   rQ   	parse_env  s    zJDistributedTest._DistTestBase.test_ddp_logging_data_cpu.<locals>.parse_envFrp  r   r   module_namer   r  r  r  r   broadcast_buffersr?   Zbucket_cap_bytesi  find_unused_parametersr   r  backend_namer  r  c                 s   s   | ]
}|V  qd S r\   rP   )r{   	parameterrP   rP   rQ   r   2  s   zJDistributedTest._DistTestBase.test_ddp_logging_data_cpu.<locals>.<genexpr>c                 S   s   | j S r\   r   )rx  rP   rP   rQ   r   5  r   zIDistributedTest._DistTestBase.test_ddp_logging_data_cpu.<locals>.<lambda>r  rk  Ztotal_parameter_size_bytesZnum_parameter_tensorsbucket_sizesZmaster_portrr  Zmaster_addrr7  Ztorch_distributed_debugZTORCH_DISTRIBUTED_DEBUGZcuda_visible_devicesZCUDA_VISIBLE_DEVICESr  Zgloo_socket_ifnameZGLOO_SOCKET_IFNAMEZgloo_device_transportZGLOO_DEVICE_TRANSPORTrA   Zgloo_num_threadsZnccl_socket_ifnameZnccl_blocking_waitZnccl_async_error_handlingZ
nccl_debugZnccl_nthreadsZnccl_ib_timeoutZunused_parameter_sizehas_rebuilt_bucketsZrebuilt_bucket_sizes'prev_iteration_grad_ready_order_indicesc                 S   s   g | ]}t |qS rP   r   ri  rP   rP   rQ   r   n  s     zKDistributedTest._DistTestBase.test_ddp_logging_data_cpu.<locals>.<listcomp>rh   ,  rebuilt_per_bucket_param_indices avg_forward_compute_timeavg_backward_compute_timeavg_backward_comm_time&avg_backward_compute_comm_overlap_timeforward_compute_time_startbackward_compute_time_startbackward_compute_time_endbackward_comm_time_startbackward_comm_time_endg      ?)r  g      8Ac                 s   s   | ]}t |V  qd S r\   r|  ri  rP   rP   rQ   r     s     zdouble, float)r   Zset_debug_levelr  INFOr]  rq  r  rf  rG  r   r   r  rY  r  filterZnumelZelement_sizer   reversedr  r   r  r   rk  r   rl  rE   r  r   r   )rM   rr  rV  rW  r   r  r  r  Z
num_paramsZ
param_sizer   Zdefault_gloo_threadsgrad_ready_orderexpected_orderbucket_indicesfwd_host_side_timebwd_comp_start_host_side_timebwd_comp_end_host_side_timebwd_comm_start_host_side_timebwd_comm_end_host_side_timer   ry  rP   rP   rQ   test_ddp_logging_data_cpu  s          

 





 z7DistributedTest._DistTestBase.test_ddp_logging_data_cpuc                 C   sV  |   \}}}| jdd}| }| |dt| | |d| |d}ttdd tdD }| |d		| |d
}| |d	| | 
|dd | 
|dd | 
|d|d | 
|d|d |d}	|d}
|d}|d}|d}| 
|| | 
||
 | 
||
 | 
|
|	 d S )NTrs  r  r  r{  c                 S   s   g | ]}t |qS rP   r|  ri  rP   rP   rQ   r     s     zKDistributedTest._DistTestBase.test_ddp_logging_data_gpu.<locals>.<listcomp>rh   r}  r~  r  r  r?   r  r  r  r  r  r  r  r  )r]  rq  r  rf  rG  r   rY  r  r  r   r  )rM   rV  rW  r   r  r  r  r  r  r  r  r  r  r  rP   rP   rQ   test_ddp_logging_data_gpu  s@    

 




z7DistributedTest._DistTestBase.test_ddp_logging_data_gpuc           	   	   C   s   t jt}d}| t|Z d}| |\}}}}t | }| 	|||||  ||||  |d |
  W 5 Q R X t|| d S )Nz,should be called before training loop startsrA   r?   )rE   r  r   r  r  r,  r  r   r   r  r  r-   )	rM   r  expected_errr  r  r  r  r   r  rP   rP   rQ   test_static_graph_api_cpu  s    z7DistributedTest._DistTestBase.test_static_graph_api_cpuc                 C   sN   d}t j|g}tj }tjt	
||}|jd jj}| || d S r   )rJ   r  rT  r  r  r  rE   r=  r>  r  r  Zlayer1Zbn1r  rf  )rM   Zprocess_idsr  Zres50_modelZres50_model_syncZprocess_group_syncrP   rP   rQ    test_SyncBatchNorm_process_group  s    
 z>DistributedTest._DistTestBase.test_SyncBatchNorm_process_groupc                 C   sj   |t jkr"|d kr"td| d|d k	rP|||| t  |krf| || n||| | || d S )NzReduction fn z must specify dst!)r   r  r  r   rf  )rM   rq   r-  r~  Zreduction_fnr,  rP   rP   rQ   _run_reduction_test  s    
z1DistributedTest._DistTestBase._run_reduction_testc                 C   s   t j| j | jd dk}tjjtjjfD ]f}t ||g	| j}| 
|t ddg	| j| t ddg	| j}| }| 
||| q,tjjtjjfD ]:}t ||g	| j}| 
|t ddg	| j| qd S )NrA   r   FT)rJ   r
  r  r   r   r  r  r  rq   r  r  r  r  r  )rM   elementr~  r3  r-  rP   rP   rQ    test_nccl_backend_bool_allreduce  s&        z>DistributedTest._DistTestBase.test_nccl_backend_bool_allreducec                    s   t j j ddgddgd}t | jd   j}| } fddtt	 D }t
||  t|t	  t|D ].\}}t ||d   j} || q || d S )NTFr   r?   rA   c                    s"   g | ]}t d d g jqS F)rJ   rq   r  r   r  rL   rP   rQ   r   #  s   zRDistributedTest._DistTestBase.test_nccl_backend_bool_allgather.<locals>.<listcomp>)rJ   r
  r  r   rq   r  r  r  r   r   ri  rf  r~  rM  )rM   r   r3  Zinput_tensor_copytensor_listr  r  r+  rP   rL   rQ    test_nccl_backend_bool_allgather  s    

z>DistributedTest._DistTestBase.test_nccl_backend_bool_allgatherc                 C   s(  t j| j ddgddgd}tjjtjjfD ]}t || jd  	| j}t ddg	| j}| j
|||tjdd t ddg	| j}| }| j
|||tjdd q0tjjtjjfD ]`}t || jd  	| j}| jdkrt ddg	| jn| }| j
|||tjdd qd S )NTFr  rA   r   r  )rJ   r
  r  r   r   r  r  r  rq   r  r  r   r  r  r  )rM   r   r~  r3  r+  r-  rP   rP   rQ   test_nccl_backend_bool_reduce1  s,        z;DistributedTest._DistTestBase.test_nccl_backend_bool_reducec                    s   dt  fddtD  j}tj|dd  fddtt D }t|| |d }|dd  D ]} 	|| qrd S )Nr<   c                    s&   g | ]} j d krt dk ndqS )r   r  F)r   randomr  rL   rP   rQ   r   S  s   zRDistributedTest._DistTestBase.test_nccl_backend_bool_broadcast.<locals>.<listcomp>r   r  c                    s,   g | ]$}t d d tD  jqS )c                 S   s   g | ]}d qS r  rP   r  rP   rP   rQ   r   [  s     z]DistributedTest._DistTestBase.test_nccl_backend_bool_broadcast.<locals>.<listcomp>.<listcomp>)rJ   rq   r  r  r   r  rM   Ztensor_sizerP   rQ   r   Z  s   r?   )
rJ   rq   r  r  r   r   r  r   ri  rf  )rM   Zbcast_tensorr  r+  rq   rP   r  rQ    test_nccl_backend_bool_broadcastM  s     

z>DistributedTest._DistTestBase.test_nccl_backend_bool_broadcastc                    s  t  d d } fddt|D }td d d} fddt|D }t|dd}|j|j }}| d	krt|  n| } 	||  	||  t
t|}	 	t|	|  fd
d}
|
| t|d}|j|j }} 	|t|   	||  t
t|}	 	t|	| |
| t|d}|j|j }} 	|t|   	||  t
t|}	 	t|	| |
| d S )Nr  r?   c                    s"   g | ]}t d  j| qS rf  rJ   r   r  r   r  rL   rP   rQ   r   n  s     zQDistributedTest._DistTestBase.test_DistributedSampler_padding.<locals>.<listcomp>rA   c                    s"   g | ]}t d  j| qS rf  r  r  rL   rP   rQ   r   r  s    T)datasetZ	drop_lastr   c                    sX    fddt D }t|t| g j dd |D } tt	|d d S )Nc                    s    g | ]}t d g jqS r  )rJ   Z
LongTensorr  r   r  rL   rP   rQ   r     s    zrDistributedTest._DistTestBase.test_DistributedSampler_padding.<locals>.validate_global_samples.<locals>.<listcomp>c                 S   s   g | ]}|  qS rP   r  )r{   samplerP   rP   rQ   r     s     r?   )
r  r   ri  rJ   rq   r  r   rf  r~  r|  )local_num_samplesZworld_samplesrM   r   rP   rQ   validate_global_samples  s    
 z^DistributedTest._DistTestBase.test_DistributedSampler_padding.<locals>.validate_global_samples)r  )r   r   r  ro  r8   Znum_samplesZ
total_sizemathceilrf  rY  iterr~  )rM   Zdataset_sizer  Zdataset_tiny_sizeZdataset_tinyZdist_samplerr  Zlocal_dataset_sizeZeffective_dataset_sizeZindices_listr  Zdist_sampler_added_samplesZdist_sampler_added_samples_tinyrP   r  rQ   test_DistributedSampler_paddingc  sT    
	


 z=DistributedTest._DistTestBase.test_DistributedSampler_paddingc           	      C   s   t  }tjd }|dkr:| jd t| j }tj	| |dkr\|
ttjdddd dd tt D }tj||| jt|  |d	 t|D ]$\}}||t|  }| || qd S )
Nrs   r?  r?   rh   r   r   c                 S   s   g | ]}d qS r\   rP   r  rP   rP   rQ   r     s     zHDistributedTest._DistTestBase._test_allgather_object.<locals>.<listcomp>rJ  )COLLECTIVES_OBJECT_TEST_LISTr  r   r   r   r   r   rJ   r
  r  r  r[   rK   r  r   r   all_gather_objectr~  rM  rf  )	rM   r  gather_objectsr   	next_rankoutput_gatheredr  r  r+  rP   rP   rQ   _test_allgather_object  s     
z4DistributedTest._DistTestBase._test_allgather_objectrs   OFFr  r  )levelsc                 C   s   |   S r\   )r  rL   rP   rP   rQ   !test_all_gather_object_default_pg  s    z?DistributedTest._DistTestBase.test_all_gather_object_default_pgc                 C   s(   t  }t|}tj|d}| j|dS )Nr   )r  )r   r   r  rT  r  rM   r  r   r  rP   rP   rQ   test_all_gather_object_subgroup  s    
z=DistributedTest._DistTestBase.test_all_gather_object_subgroupc              	      sv  t  }t|}tjd }|dkrD| jd t| j }t	j
| |dkrf|tt	jdd|d dd tt|D }d}tj|| jt|  ||kr|nd ||d	 ||kr| |d
d tt D  n.t|D ]$\}}	||t|  }
| |	|
 qG dd d}|   fddtt D }| td. tjdd tt D || j |d W 5 Q R X d S )Nrs   r?  r?   rh   r   c                 S   s   g | ]}d qS r\   rP   r  rP   rP   rQ   r     s     zEDistributedTest._DistTestBase._test_gather_object.<locals>.<listcomp>r   )Zobject_gather_listr,  rV  c                 S   s   g | ]}d qS r\   rP   r  rP   rP   rQ   r     s     c                   @   s   e Zd ZdS )z>DistributedTest._DistTestBase._test_gather_object.<locals>.BarN)rW   rX   rY   rP   rP   rP   rQ   Bar  s   r  c                    s   g | ]} qS rP   rP   r  rH   rP   rQ   r   
  s     zCan't pickle local objectc                 S   s   g | ]}d qS r\   rP   r  rP   rP   rQ   r     s     rJ  )r  r  r   r   r   r   r   r   r   rJ   r
  r  r  r[   rK   r  r   Zgather_objectr~  rf  rM  r  r  r  )rM   Zpgr  Zmy_rankr   r  r  Zgather_on_rankr  r  r+  r  rP   r  rQ   _test_gather_object  sF    

 z1DistributedTest._DistTestBase._test_gather_objectc                 C   s   |   S r\   )r  rL   rP   rP   rQ   test_gather_object  s    z0DistributedTest._DistTestBase.test_gather_objectc                 C   s&   t  }t|}tj|d}| |S Nr   )r   r   r  rT  r  r  rP   rP   rQ   test_gather_object_subgroup  s    
z9DistributedTest._DistTestBase.test_gather_object_subgroupc                    s\   t |j  }|D ]@  fddtt D }t|  |D ]}| |  qDqd S )Nc                    s   g | ]}t  qS rP   rJ   Z
zeros_liker  r  rP   rQ   r   $  s    zJDistributedTest._DistTestBase.validate_net_equivalence.<locals>.<listcomp>)	rY  r   r  r]  r  r   r   ri  rf  )rM   r  net_module_statesr  rq   rP   r  rQ   validate_net_equivalence  s    

z6DistributedTest._DistTestBase.validate_net_equivalencec                    s>  d}| j }d}t| tj||dd}tjjj||| j gdd}tj||dd|}t	||_
t|j
  }|D ]^  fddtt D }t|  t|D ]*\}	}
|	|kr|  |
 q|  |
 qq~t|j
|j|j||jd | | ||kr:|  }t||D ]\ }|  | q"d S )	NrA   r?   Fr=   r  r  c                    s   g | ]}t  qS rP   r  r  r  rP   rQ   r   A  s    zMDistributedTest._DistTestBase.test_ddp_sync_module_states.<locals>.<listcomp>)r   r  broadcast_bucket_sizer  Zparams_and_buffers_to_ignore)r   rJ   r  rE   rF   r  r   r
  r  r  r   rY  r  r]  r  r   r   ri  rM  rf  r  r   r  r  Zparameters_to_ignorer  rh  )rM   r   r   Zrank_to_broadcastr   r  Z	new_modelr  r  r  rq   Zexpected_statesr+  rP   r  rQ   test_ddp_sync_module_states+  sD    
  



z9DistributedTest._DistTestBase.test_ddp_sync_module_statesc              	   C   s  d}d}d}| j }tj||dd}tj||| j d| }tjjj||| j gdd}d}| j d	krl|d
7 }|jddt t	|D ]d}	||
 }
|
  tj||| j d| }t| d	 }| ||j |  tjj| j d qW 5 Q R X |jdd t	|D ]}||
 }
|
  t }|dkr:|d8 }tj||| j d| | t  }t| d	 }| ||j |  tjj| j d qW 5 Q R X d S )Nrn   r?   r   Fr=   r   r  rh   r   rA   )Zdivide_by_initial_world_sizeT)r   rE   rF   rJ   r   r  r   r
  r   r  r   r  rY  r  rf  r  r  synchronizer   r   )rM   r   r  Z
grad_scaler   r   r   r  n_itersr  r   r  r  r  Zeffective_wsrP   rP   rQ   test_ddp_grad_div_uneven_inputs\  sR    	  

z=DistributedTest._DistTestBase.test_ddp_grad_div_uneven_inputsc              	   C   s  d}d}d}t j| j tj||dd}t j||| jd}t jjj|| j| jgd}t	
|}|(}	t|D ]}
|| }|  qrW 5 Q R X t  d}t||	}td	d
 |D }| || |D ]}| |j | |j| qt  d}t||	}tdd
 |D }| |d |D ]}| |j| q*t jjj|| j| jgdd}tdD ]}
|| }|  qh|}	|| }|  W 5 Q R X t||	}| t|d | |d jd | |d j| |D ]}| |j qtd|	}| t|d d S )Nrh   r<   r  Fr=   r   r  r  c                 s   s   | ]}|j V  qd S r\   r7  r9  rP   rP   rQ   r     s     zDDistributedTest._DistTestBase._test_ddp_profiling.<locals>.<genexpr>rj  c                 s   s   | ]}|j V  qd S r\   r7  r9  rP   rP   rQ   r     s     r?   Tr  rv  r   Zsearch_unused_parameters)rJ   r
  r  r   rE   rF   randr  r   r  r  r  r   r  r   r  r   rf  r  r:  rx   r  r~  r8  )rM   r*  r  r   r  r   r   r  Zprofiler_ctx_copyr+  r  r   Zall_reduce_event_namer   r<  r|   Zbroadcast_event_nameZbroadcast_eventsrP   rP   rQ   _test_ddp_profiling  s^    






z1DistributedTest._DistTestBase._test_ddp_profilingc                 C   s   t jj }| j|dS r?  )rJ   r  r   r   r  rB  rP   rP   rQ   $test_ddp_profiling_autograd_profiler  s    zBDistributedTest._DistTestBase.test_ddp_profiling_autograd_profilerc                 C   s6   t jjj}t jjj}t jj||gd}| j|d d S )N)r  r@  )rJ   r   r  r  r5  r   r  )rM   Zcpu_actZcuda_actrF  rP   rP   rQ   !test_ddp_profiling_torch_profiler  s    	

z?DistributedTest._DistTestBase.test_ddp_profiling_torch_profilerc              	   C   s  d}d}d}t j||dd}tj||| jd}t|}|| j}dd tt	
 D }t| }tjj| |d	}	t|D ],}
|	  ||}| }|  |	  q|| j }tj jj|| j| jgd
}tjj| |t	
  d	}| N t|D ]>}|  ||}| }|  tjj| jd |  qW 5 Q R X t|  |j  D ]\\}
}\}
}| || qld S )Nrh   r<   r/  Fr=   r   c                 S   s   i | ]}|d |d  qS )rA   r?   rP   r%  rP   rP   rQ   r`    s     zQDistributedTest._DistTestBase.test_ddp_join_model_equivalence.<locals>.<dictcomp>r  r  )rE   rF   rJ   r  r   r  r  r
  r  r   r   r   r]  r  r  r  r  r  r  r  r   r   r  rh  r  re   r   rf  )rM   r  r   rC  r   r   local_modelZrank_to_iter_mappingZlocal_itersZlocal_optimr  rv  r   r  r  Z	ddp_optimr  Zlocal_tensorZdist_tensorrP   rP   rQ   test_ddp_join_model_equivalence  sP    




  


 z=DistributedTest._DistTestBase.test_ddp_join_model_equivalencec                    st  |j }|j}| j}|j}tj| t  tj	j
j|||gd|d}|jd k	rt||j|j td|j  || }	tj|	gtj d}
tj|
tjjd |
 }d}|jr||	kr| td| j d}q| td	}nt }| |j|jd
 t|	D ]}|| dkr$| }nt }|L t|trJ||  }n|| }|   | !| tjj"|d W 5 Q R X |d7 }qW 5 Q R X W 5 Q R X |jr| #|| n| $|| tjj"|d |jsp| %|j& tj|j&g| jd  fddtt' D }t(|  t' d }| )|ht*dd |D  | +| |, }| %|-d t  d S )Nr?   )r  r  rv  zregistered hook r   )r~  r   Rank  exhausted all inputs1Detected at least one rank that exhausted inputs.r   c                    s   g | ]}t  qS rP   r  r  Zfinal_rank_tensorrP   rQ   r   t  s   zIDistributedTest._DistTestBase._run_uneven_inputs_test.<locals>.<listcomp>c                 s   s   | ]}|  V  qd S r\   r  )r{   rq   rP   rP   rQ   r   {  s     zHDistributedTest._DistTestBase._run_uneven_inputs_test.<locals>.<genexpr>Zjoin_uneven_inputs).r   r   r   r   rJ   r
  r  r   r  rE   r  r   r   r  r   printrq   current_devicer  r  r  r  r   r  r,  r   r   r  r  r^   r   r   r  r  r  rf  r  r  Z_authoritative_rankr   ri  ZassertSetEqualr|  r  r  rG  )rM   	test_caseiteration_mappingfind_unused_paramsr   r   r   r   r  r  Znum_iters_tensormin_num_itersZtotal_itersr  r  contextr   r  Zmax_rankr  rP   r  rQ   _run_uneven_inputs_test  s    
  

  

 
z5DistributedTest._DistTestBase._run_uneven_inputs_testc                 C   sB  G dd dt jj}t j| j t}tjt	
|| j}| | j}t ddt j }||fD ]}t jjj|| jgd}d}| jdkr|}| td| j d	}n|d }| td
}d}	|R |jdd: t|D ]*}
|| }|  | | |	d7 }	qW 5 Q R X W 5 Q R X | |	| | | qnd S )Nc                       s$   e Zd Z fddZdd Z  ZS )zbDistributedTest._DistTestBase.test_ddp_uneven_inputs_stop_iteration_sync_bn.<locals>.ModelWithCommc                    s    t    tjdddd| _d S )NrA   r   Fr=   rC   rD   rE   rF   r   rL   rN   rP   rQ   rD     s    
zkDistributedTest._DistTestBase.test_ddp_uneven_inputs_stop_iteration_sync_bn.<locals>.ModelWithComm.__init__c                 S   s   |  |}t| |S r\   )r   r   r  rS   rP   rP   rQ   rU     s    

zjDistributedTest._DistTestBase.test_ddp_uneven_inputs_stop_iteration_sync_bn.<locals>.ModelWithComm.forwardrV   rP   rP   rN   rQ   ModelWithComm  s   r  r<   rA   r  rn   r   r  r  r  Tr  r?   )rJ   rE   r   r
  r  r   r;  r=  r>  r  r  rK   r  r  r   r  r,  r   r  r   r  r  rf  r  )rM   r  Zmodel_bnZ
comm_modelZmodel_inputr   r  r  r  nr  r   rP   rP   rQ   -test_ddp_uneven_inputs_stop_iteration_sync_bn  sJ    	

 
zKDistributedTest._DistTestBase.test_ddp_uneven_inputs_stop_iteration_sync_bnc                    sT  d}d}t t dddt  t dddt  t dddt  }t j||dd}t }G d	d
 d
t j}|d}|d}| j}	td|t	j
|d|	dddtd|t	j
|||||	dddtd|t	j
|||	dddtd|t	j
|d|	d|	fddtd|t	j
|d|	d|	fddg}
td|tjd t	j
|||	dddtd|tjtjd dddddt	j
|||	dddg}|
| trtj }|
td|t	
dddddd g }t|
D ]*\}}|t|j|j|j|d d qg }|
D ]&}|t|j|j|j|jdd q|
| |
| ddg}dddg}dg}t dkrR|d g }|D ]}|D ] |D ] fddtd|D }|dkr| D ]$}	|	dkr||	  d 7  < q| fddt|t D  || qjqbqZt !|
|D ]J\}}| jdkr6t"d |j d!|j d"|  | j#||d#|jkd$ qd S )%Nr   r?   r   rn          Fr=   c                       s,   e Zd Z fddZdd Zdd Z  ZS )zODistributedTest._DistTestBase.test_ddp_uneven_inputs.<locals>.UnusedParamModulec                    s$   t    t | _t | _|| _d S r\   )rC   rD   r   t0r  unused_params_rank)rM   r  rN   rP   rQ   rD     s    
zXDistributedTest._DistTestBase.test_ddp_uneven_inputs.<locals>.UnusedParamModule.__init__c                 S   s   | j j| jjfS r\   )r  r   r  rL   rP   rP   rQ   task_parameters  s    z_DistributedTest._DistTestBase.test_ddp_uneven_inputs.<locals>.UnusedParamModule.task_parametersc                 S   s$   || j kr| | |S | |S r\   )r  r  r  )rM   rT   r   rP   rP   rQ   rU     s    zWDistributedTest._DistTestBase.test_ddp_uneven_inputs.<locals>.UnusedParamModule.forward)rW   rX   rY   rD   r  rU   rZ   rP   rP   rN   rQ   UnusedParamModule  s   r  r   Zbatch_norm_netrA   r   )rx   r   r   r   Zlarge_conv_modelsmall_model&unjoined_rank_with_unused_params_model$joined_rank_with_unused_params_modelZsmall_model_allreduce_hook)rx   r   r   r   r   r   Zsmall_model_power_sgd_hook)r  r  r  r  Zuse_error_feedbackresnet_modelrh   T)rx   r   r   r   r   r<   c                    s   i | ]
}| qS rP   rP   r%  )baseline_iterrP   rQ   r`  _  s    zHDistributedTest._DistTestBase.test_ddp_uneven_inputs.<locals>.<dictcomp>c                    s   i | ]}|  qS rP   rP   r%  r  r  rP   rQ   r`  k  s    zRunning test: z' sync interval
                        z0 with iteration mapping
                        Zunused_params_model)r  )$rE   r  r  r   rF   r   r   r   r   rJ   r   r  r  r  r   r  extendr  r  r  r  r  rM  rx   r   r   r   r   r   r  r  r4  rL  r-  r  r  )rM   r   r  Zlarge_modelr  Zbn_netr  r  r  r   r  Zmodels_with_hookr  Zmodels_with_syncr  Z
test_inputZthrow_on_early_term_testsZbaseline_num_itersZiteration_offsetsZnum_uneven_ranksZiteration_mappingsZnum_early_join_ranksmappingr  r  rP   r  rQ   r     s$   &	


	







  
z4DistributedTest._DistTestBase.test_ddp_uneven_inputsc              	   C   s  t | j t jjjt jdd| j| jgd}t d| j }d}t	
 }|jdd t|D ]p}|jjj}|d k	r|d |  ||}| }|  tdd t|D | }	| |jjj |	 qfW 5 Q R X |j}
| |
j | | d S )Nr?   r  rn   F)enablec                 s   s   | ]
}|V  qd S r\   rP   r  rP   rP   rQ   r     s     zSDistributedTest._DistTestBase.test_ddp_uneven_input_join_disable.<locals>.<genexpr>)rJ   r  r   rE   r  r   rF   r
  r   r   r   r   r  r   r  r  r  r  r   r  rf  r  Z_join_configr  r  r  )rM   r  r   r  r   r  r  rv  r   r  Zjoin_configrP   rP   rQ   "test_ddp_uneven_input_join_disable  s,     

"z@DistributedTest._DistTestBase.test_ddp_uneven_input_join_disablec              
      s   d G  fdddt j}| }tj jj|| j| jgd}td}| t	 2 |
  ||}| }|  W 5 Q R X W 5 Q R X d S )NzIntentional errorc                       s(   e Zd Z fddZfddZ  ZS )zVDistributedTest._DistTestBase.test_ddp_uneven_input_exception.<locals>.ExceptionModulec                    s$   t    ttjddd| _d S )Nr?   Tr   )rC   rD   rE   r   rJ   r   r  rL   rN   rP   rQ   rD     s    
z_DistributedTest._DistTestBase.test_ddp_uneven_input_exception.<locals>.ExceptionModule.__init__c                    s   t  d S r\   )r  )rM   r  Z	error_strrP   rQ   rU     s    z^DistributedTest._DistTestBase.test_ddp_uneven_input_exception.<locals>.ExceptionModule.forwardrV   rP   r  rN   rQ   ExceptionModule  s   r  r  r?   )rE   r   rJ   r  r   r
  r   r   r  r  r   r   r  )rM   r  Zexception_moduler  r   rv  r   rP   r  rQ   test_ddp_uneven_input_exception  s    
 

z=DistributedTest._DistTestBase.test_ddp_uneven_input_exceptionc                 C   sd  t  }| jd t| j }tjd }|dkr:tj	| d}|dkr`|
ttjdddd trz|
ttdd | j|kr|ndd	 |D }|dkr|d g}| j|kr| |d |d  tj|d|td
d | |d |d  |dkrhtj t| jkrh|d g}| j|kr<| |d |d  tj|d|t|d | |d |d  |dkrtj t| jkr|d g}| j|kr| |d |d  tj|d|t|d | |d |d  |d g}| j|kr| |d |d  tj|d|d | |d |d  | j|krD| || tj|d|d | || d S )Nr?   rs   r?  r   rh   r   i
c                 S   s   g | ]}d qS r\   rP   r  rP   rP   rQ   r     s     zMDistributedTest._DistTestBase._test_broadcast_object_list.<locals>.<listcomp>r  )r  rV  r   r  )r  r  r   r   r   r   r   rJ   r
  r  r  r[   rK   r3   r  r   r  r   rf  rE  )rM   rV  r  r  r   src_rankZobjectsZsingle_obj_listrP   rP   rQ   _test_broadcast_object_list  s    


     
     
    
z9DistributedTest._DistTestBase._test_broadcast_object_listc                 C   s   |   S r\   )r  rL   rP   rP   rQ   test_broadcast_object_list  s    z8DistributedTest._DistTestBase.test_broadcast_object_listc                 C   s&   t  }t|}tj|d}| |S r  )r   r   r  rT  r  r  rP   rP   rQ   $_test_broadcast_object_list_subgroup  s    
zBDistributedTest._DistTestBase._test_broadcast_object_list_subgroupc                    s&  G  fdddt j | j}tddgddgD ]\}} | j |jdt	j
d| j | jd tj }tj }fdd	 D d
 fdd	j D }fdd	j D }t	j jj||  t	j jj|g|||d}	t jdddd||	j_|	  t|	j| j}
t	jdt	jd|| jd  }tdD ]}|	|   |
|   t|	jj |
j D ]\}}| |j |j  qt|	jj! |
j! D ]\}}| "|j |j k q|D ]}| #|j d k qqbt	jj$| jd q0d S )Nc                       s&   e Zd Z fddZdd Z  ZS )zLDistributedTest._DistTestBase._test_ddp_ignore_params_arg.<locals>.TestModelc                    sZ   || _ t |   tjdddd| _| j dkrDtjdddd| _ntjdddd| _d S )Nr?   Fr=   r   r<   )r   rC   rD   rE   rF   r   r   r  )	TestModelrO   rP   rQ   rD   &  s    
zUDistributedTest._DistTestBase._test_ddp_ignore_params_arg.<locals>.TestModel.__init__c                 S   s   |  |}| |}|S r\   r   rS   rP   rP   rQ   rU   1  s    

zTDistributedTest._DistTestBase._test_ddp_ignore_params_arg.<locals>.TestModel.forwardrV   rP   )r  rN   rQ   r  %  s   r  FTZignore_bufferrn   r   c                    s   g | ]\}}| j kr|qS rP   )r   )r{   rt  r   r   rP   rQ   r   C  s   
zMDistributedTest._DistTestBase._test_ddp_ignore_params_arg.<locals>.<listcomp>r   c                    s   g | ]\}}  d | qS r   rP   )r{   
param_namer  model_fc2_namerP   rQ   r   H  s   c                    s   g | ]\}}  d | qS r  rP   )r{   Zbuf_namer  r  rP   rQ   r   L  s   )r  rv  ru  r  r?   r=   r  r  )%rE   r   r   rL  r-  rk  r  r   rI   rJ   r  rY  r  rg  named_modulesnamed_parametersre  r  r   +_set_params_and_buffers_to_ignore_for_modelrF   r   Z_build_replicated_tensor_moduler  r  r
  r   r  r   r  rh  rf  r  r   r  r  r  )rM   r  r  find_unusedru  Zproxy_paramsZproxy_buffersZproxy_param_namesZproxy_buffer_namesr  r  r   r  Zmaterialized_paramlocal_paramZsynced_paramZproxy_paramrP   )r  r   r  rQ   _test_ddp_ignore_params_arg$  sn      



 	 
 
 z9DistributedTest._DistTestBase._test_ddp_ignore_params_argc                 C   s   | j dd | j dd d S )NFr  T)r  rL   rP   rP   rQ   test_ddp_ignore_params_arg  s    z8DistributedTest._DistTestBase.test_ddp_ignore_params_argc           	         sP  G  fdddt j tj jj  | j| jgd}tdD ]}tdd}|dkr2z||	 
  W n tk
r" } zt|}t|| tttg}t tjjkr|t n$dd	g}|d
| j d|  |D ] }| ||kd| d|  q| t|k W 5 d }~X Y nX | dd q>||	 
  q>t  d S )Nc                       s&   e Zd Z fddZdd Z  ZS )z`DistributedTest._DistTestBase.test_ddp_unused_params_rebuild_buckets_exception.<locals>.ToyModelc                    s6   t  |   tjdddd| _tjdddd| _d S r   rC   rD   rE   rF   net1net2rL   ToyModelrO   rP   rQ   rD     s    ziDistributedTest._DistTestBase.test_ddp_unused_params_rebuild_buckets_exception.<locals>.ToyModel.__init__c                 S   s
   |  |S r\   )r  rS   rP   rP   rQ   rU     s    zhDistributedTest._DistTestBase.test_ddp_unused_params_rebuild_buckets_exception.<locals>.ToyModel.forwardrV   rP   r
  rN   rQ   r
    s   r
  r  rA   r?   r<   r   r}  znet2.weightdid not receive grad for rank : 	Expected 
 to be in Tz'DDP unused parameters error not raised.)rE   r   rJ   r  r   r
  r   r  r  r   r  r,  r   r-   !ddp_prev_reduction_unfinished_str$ddp_recommend_find_unused_params_str ddp_outputs_not_used_in_loss_strr   r  r  r  r  ddp_suggest_debug_mode_strr   r  r  "ddp_find_unused_params_enabled_strr  )	rM   r  r  r   rN  r  expected_strsunreduced_paramssrP   r  rQ   0test_ddp_unused_params_rebuild_buckets_exception  s@    	 

  zNDistributedTest._DistTestBase.test_ddp_unused_params_rebuild_buckets_exceptionc                    s   G  fdddt j tj| j   tj }dD ]\}tj jj	t
|| jgd|d}tjdd| jd}td	D ]}||}|d }|  qxq:d S )
Nc                       s&   e Zd Z fddZdd Z  ZS )zVDistributedTest._DistTestBase.test_ddp_shared_grad_acc_unused_params.<locals>.ToyModelc                    sN   t  |   tjdddd| _ttd| _| j| j_tdd| _	d S )Nr<   rn   Fr=   )
rC   rD   rE   rF   r  r   rJ   r  r>   r  rL   r	  rP   rQ   rD     s
    
z_DistributedTest._DistTestBase.test_ddp_shared_grad_acc_unused_params.<locals>.ToyModel.__init__c                 S   s   |  | S r\   )r  r   rS   rP   rP   rQ   rU     s    z^DistributedTest._DistTestBase.test_ddp_shared_grad_acc_unused_params.<locals>.ToyModel.forwardrV   rP   r  rN   rQ   r
    s   r
  r  T)r  rv  r  r   r<   r   r  )rE   r   rJ   r
  r  r   r  r  r  r   r  r  rK   r  r  )rM   r   Zstaticr  r   r  r   rP   r  rQ   &test_ddp_shared_grad_acc_unused_params  s    zDDistributedTest._DistTestBase.test_ddp_shared_grad_acc_unused_paramsc                    s  t ddj}d G dd d} fdd}fdd}fd	d
}fdd}||t|t|t|t|t|iG fdddt	j j
}t	j jj| jjgdfdd}tdd t D }	||	t dd t D }	||	t |t	dd}	||	| d}
d}t	|
|}t	|
|}t||}	||	t|	 t||}	||	t|	 td |td |i}	||	t|	 d S )Nr<   rA   c                   @   s   e Zd ZddgZdd ZdS )zDDistributedTest._DistTestBase.test_ddp_device.<locals>.TensorWrapperr  moved_to_gpuc                 S   s   || _ d| _d S r  )r  r  )rM   r  rP   rP   rQ   rD     s    zMDistributedTest._DistTestBase.test_ddp_device.<locals>.TensorWrapper.__init__N)rW   rX   rY   	__slots__rD   rP   rP   rP   rQ   TensorWrapper  s   r  c                    sT    t|   dttdd | D  | d jjj | d | d  S )Nr?   c                 s   s   | ]}|j V  qd S r\   r   r  rP   rP   rQ   r     s     zbDistributedTest._DistTestBase.test_ddp_device.<locals>.tuple_and_list_validator.<locals>.<genexpr>r   )r  r~  rf  r|  r   indexr   r]   )expected_lenrM   rP   rQ   tuple_and_list_validator  s    zODistributedTest._DistTestBase.test_ddp_device.<locals>.tuple_and_list_validatorc                    sF     | jt   | jjj| jjj   | jjj j | j| j S r\   )rf  _fieldsEXPECTED_FIELDSrG   r   r  rH   r   r]   rL   rP   rQ   namedtuple_validator  s    zKDistributedTest._DistTestBase.test_ddp_device.<locals>.namedtuple_validatorc                    s8     | jpt| jjdk | j j| _d| _| jS )Nr  T)r  r  r   r  r   r  r   r]   rL   rP   rQ   custom_type_validator  s    zLDistributedTest._DistTestBase.test_ddp_device.<locals>.custom_type_validatorc                    s     td |  k   td |  k  dttdd |  D   | td  jj j	 | td  | td   S )Nr   r?   c                 s   s   | ]}|j V  qd S r\   r   r  rP   rP   rQ   r     s     zXDistributedTest._DistTestBase.test_ddp_device.<locals>.dict_validator.<locals>.<genexpr>)
r  r!  r  rf  r~  r|  r]  r   r  r   r]   rL   rP   rQ   dict_validator  s
    "zEDistributedTest._DistTestBase.test_ddp_device.<locals>.dict_validatorc                       s*   e Zd Z fddZfddZ  ZS )z?DistributedTest._DistTestBase.test_ddp_device.<locals>.ToyModelc                    s    t    tjdddd| _d S r   r  _selfrN   rP   rQ   rD     s    
zHDistributedTest._DistTestBase.test_ddp_device.<locals>.ToyModel.__init__c                    s&     t|| | |}| |S r\   )r  r^   r   )r&  rT   expected_typeZ
fwd_tensorrM   
validatorsrP   rQ   rU     s    zGDistributedTest._DistTestBase.test_ddp_device.<locals>.ToyModel.forwardrV   rP   r(  rN   rQ   r
    s   r
  r  c                    s(   t dD ]} | |}|   qd S )Nri   )r  r   r  )r   Z
input_typer  rv  r  rP   rQ   
train_iter$  s    
zADistributedTest._DistTestBase.test_ddp_device.<locals>.train_iterc                 s   s   | ]}t d d V  qdS )r<   NrJ   rK   r  rP   rP   rQ   r   +  s     z@DistributedTest._DistTestBase.test_ddp_device.<locals>.<genexpr>c                 S   s   g | ]}t d d qS rX  r+  r  rP   rP   rQ   r   0  s     zADistributedTest._DistTestBase.test_ddp_device.<locals>.<listcomp>rn   r   r?   )rE   rF   r  r   r   rY  TestNamedTupleInput_0rp   dictrJ   r   r  r   r  rK   r  typer!  )rM   r  r  r  r"  r#  r$  r
  r*  r   r  r   rG   rH   rP   )r  r   rM   r)  rQ   test_ddp_device  s\    
      	 




  z-DistributedTest._DistTestBase.test_ddp_devicec                    s   d}d}t j||jd t j||jdG  fdddt jj}t jjj| jjgd}t }||t	| t
 }||t	| d S )Nrn   r<   r   c                       s,   e Zd Z fddZfddZ  ZS )zKDistributedTest._DistTestBase.test_ddp_namedtuple.<locals>.NamedTupleModulec                    s   t    tdd| _d S )Nr<   r?   r  r%  rN   rP   rQ   rD   U  s    
zTDistributedTest._DistTestBase.test_ddp_namedtuple.<locals>.NamedTupleModule.__init__c                    sb    t||d| dt|  |jt  |j |j | t	
|j|jS )NzExpected type r  )r  r^   r.  rf  r   r!  rG   rH   r   rJ   mul)r&  r  r'  rG   rH   rM   rP   rQ   rU   Y  s    zSDistributedTest._DistTestBase.test_ddp_namedtuple.<locals>.NamedTupleModule.forwardrV   rP   r1  rN   rQ   NamedTupleModuleT  s   r2  r  )rJ   r  r   rE   r   r  r   r
  r,  r.  rp   )rM   r  r   r2  r   r   rP   r1  rQ   test_ddp_namedtupleJ  s     

z1DistributedTest._DistTestBase.test_ddp_namedtuplec                 C   sJ  d}d}t  }tj| j tjjjt	 | j| jgdd}tj
||| jd}tj||| jd}tdD ]}|d dkr||}n||}| }	|	  |j }
|d dkrtj|dg| jtjd	}ntj||g| jtjd	}|
}| || qltjjjt	 | j| jgd
d}tdD ]}|dkrH|| }	|	  nz|| }	|	  W n tk
r, } zt|}t|| d}tttd| j d| g}t  t jjkr|t n$ddg}|d| j d|  |D ]"}|  ||kd| d|  q| !t"|k W 5 d }~X Y nX | !dd q"t #  d S )Nr   r<   Tr  r   r  rA   r   r   r  Fr?   6Parameter indices which did not receive grad for rank r  r}  lin2.weightr  r  r  DDP error not raised)$r   r   rJ   r
  r  r   rE   r  r   r   rK   r   r  r   r  reducer_get_local_used_maprq   int32rf  r,  r   r-   r  r  r  r  r  r  r  r  r   r  r  r  r  )rM   r  r   r   r   random_input
ones_inputr  rv  r   local_used_mapr+  variable_usage_tensorrN  r  unused_param_indexr  r  r  rP   rP   rQ   'test_ddp_control_flow_same_across_ranksn  s~    

    


  zEDistributedTest._DistTestBase.test_ddp_control_flow_same_across_ranksc           	   	   C   s   t  }tj| j tjjjt	 | j| jgdd}tj
dd| jd}tjdd| jd}d}| t|B tdD ]2}|d dkr||}n||}| }|  qvW 5 Q R X t|| | td	B tdD ]2}|d dkr||}n||}| }|  qW 5 Q R X t|d
 d S )NTr  r  r   r<   r   z1Your training graph has changed in this iterationrA   r   af  Expected to have finished reduction in the prior iteration before starting a new one. This error indicates that your training graph has changed in this iteration, e.g., one parameter is used in first iteration, but then got unused in the second iteration. this is not compatible with static_graph set to True.
Parameter indices which did not receive grad forz#Expected to have finished reduction)r   r   rJ   r
  r  r   rE   r  r   r   rK   r   r  r,  r  r   r  r-   )	rM   r   r   r;  r<  r  r  rv  r   rP   rP   rQ   test_invalid_static_graph  s:    



z7DistributedTest._DistTestBase.test_invalid_static_graphc                    sl  ddG  fdddt j t }tj| j tj jj	 | j| j| jgdd}tj
| jd}tj| jd}tdD ]}|d	 d
kr||}n||}| }|  |j }|d	 d
krtj|d
g| jtjd}	ntj|dg| jtjd}	|}
| |
|	 qtj jj	 | j| j| jgdd}td	D ]}|d
krj|| }|  nz|| }|  W n tk
rN } zt|}t|| d}tttd| j d| g}t tjjkr|t n$ddg}|d| j d|  |D ]"}|  ||kd| d|  q| !t"|k W 5 d }~X Y nX | !dd qDt#  d S )Nr   r<   c                       s,   e Zd Z fddZfddZ  ZS )z\DistributedTest._DistTestBase.test_ddp_control_flow_different_across_ranks.<locals>.ToyModelc                    s<   t  |   tjdddd| _tjdddd| _|| _d S r   rC   rD   rE   rF   r   r   r   r  r	  rP   rQ   rD     s    zeDistributedTest._DistTestBase.test_ddp_control_flow_different_across_ranks.<locals>.ToyModel.__init__c                    sR   t |t j |jdo"| jdk}|r>| t| |S t| |S d S )Nr   r?   )	rJ   r`   r   r   r   r   r   r   r   r   )r  r   rP   rQ   rU     s    zdDistributedTest._DistTestBase.test_ddp_control_flow_different_across_ranks.<locals>.ToyModel.forwardrV   rP   r
  r  r   rN   rQ   r
    s   r
  Tr  r   r  rA   r   r4  r?   Fr5  r  r}  r6  r  r  r  r7  )$rE   r   r   r   rJ   r
  r  r   r  r   rK   r   r  r   r  r8  r9  rq   r:  rf  r,  r   r-   r  r  r  r  r  r  r  r  r   r  r  r  r  )rM   r   r   r;  r<  r  rv  r   r=  r+  r>  rN  r  r?  r  r  r  rP   rD  rQ   ,test_ddp_control_flow_different_across_ranks  s    

    


  zJDistributedTest._DistTestBase.test_ddp_control_flow_different_across_ranksc              	   C   s   d}| j |krtndd tD }t }|d | }d}t||k r\|||  |d7 }q8d g}tj|||d | |d t| j tt   | t	d tjg ||d W 5 Q R X d S )Nr   c                 S   s   g | ]}d qS r\   rP   r  rP   rP   rQ   r   b  s     zJDistributedTest._DistTestBase.test_scatter_object_list.<locals>.<listcomp>r?   r  zMExpected argument scatter_object_output_list to be a list of size at least 1.)
r   r  r   r   r~  r  Zscatter_object_listrf  r  r,  )rM   r  r  r   r  Zoutput_obj_listrP   rP   rQ   test_scatter_object_list\  s0    
z6DistributedTest._DistTestBase.test_scatter_object_listc              	   C   sv   t jdgt jdt jdgt jdt jdgt jdt jdgt jdt jdgt jdt jdgt jdg}dd |D }|S )Nr   r  r  c                 S   s   g | ]}|  qS rP   )Z	to_sparser  rP   rP   rQ   r     s     zeDistributedTest._DistTestBase._generate_sparse_tensors_for_bucket_assignment_test.<locals>.<listcomp>)rJ   r  rk  rl  )rM   r  tensors_sparserP   rP   rQ   3_generate_sparse_tensors_for_bucket_assignment_testz  s    	zQDistributedTest._DistTestBase._generate_sparse_tensors_for_bucket_assignment_testc              	   C   s   t jtddt jjd}dtjd< t jt  tddd}tj	
| j td}tjjj|| j| jg|d	}d
}| t|6 |  }|rt j|dg|jd}nt |dg}W 5 Q R X |rt|| t | d S )Nr   r@  r/  r   r  r   rn   r   r/  r   r  zNo support for sparse tensors.i  )logger)r   rT  r   r   r   r   r   r  rJ   r
  r  r   r   rE   r  r   r  r  r,  rH  Z"_compute_bucket_assignment_by_sizerK  r-   r  )rM   
use_loggerr   group_to_user  r  rG  r  rP   rP   rQ   '_test_compute_bucket_assignment_by_size  s:     
 

zEDistributedTest._DistTestBase._test_compute_bucket_assignment_by_sizec                 C   s   | j dd d S NFrL  rN  rL   rP   rP   rQ   Btest_compute_bucket_assignment_by_size_sparse_error_without_logger  s    z`DistributedTest._DistTestBase.test_compute_bucket_assignment_by_size_sparse_error_without_loggerc                 C   s   | j dd d S NTrP  rQ  rL   rP   rP   rQ   ?test_compute_bucket_assignment_by_size_sparse_error_with_logger  s    z]DistributedTest._DistTestBase.test_compute_bucket_assignment_by_size_sparse_error_with_loggerc                 C   s   |rd}|  t|}||fS t tjjk}| jdkrnt|tjj	kr^|s^d}|  t|}q~d }| 
t}nd}|  t|}||fS )Nz'DDP expects same model across all ranksr   #Caught collective operation timeoutzappears not to match)r  r,  r   r  r  r  r   r  r   r   r  )rM   rM  r   r  r;  Zis_detail_dbg_moderP   rP   rQ   2_determine_expected_error_verify_model_across_rank  s    
zPDistributedTest._DistTestBase._determine_expected_error_verify_model_across_rankc              	   C   s  t jtddt jjd}dtjd< t jt  tddd}tj	
| j | |\}}td}tjjj|| j| jg|d	}t| jdkrd
ndd|j_|B |rt|jt| |j nt|jt|  t | W 5 Q R X |r| jdkrt|| t | d S )Nr   r@  rI  r  r   rn   rJ  r   r  r  r<   r?   )r   rT  r   r   r   r   r   r  rJ   r
  r  r   rV  r   rE   r  r   r  rF   r   r   r   r  rY  r  rK  r  r-   )rM   rL  r   rM  r;  r  r  rP   rP   rQ   _test_verify_model_across_rank  sB     
 



z<DistributedTest._DistTestBase._test_verify_model_across_rankc                 C   s   | j dd d S rS  rW  rL   rP   rP   rQ   )test_verify_model_across_rank_with_logger  s    zGDistributedTest._DistTestBase.test_verify_model_across_rank_with_loggerc                 C   s   | j dd d S rO  rX  rL   rP   rP   rQ   ,test_verify_model_across_rank_without_logger  s    zJDistributedTest._DistTestBase.test_verify_model_across_rank_without_loggerc              	   C   sH   |0 t jjj|| j| jg|d}t| W 5 Q R X t| d S )Nr  )rJ   rE   r  r   r  r   r   r  )rM   r;  r  Z	ddp_groupr   rP   rP   rQ   $_run_test_ddp_model_with_diff_params  s    
zBDistributedTest._DistTestBase._run_test_ddp_model_with_diff_paramsc                 C   st   t jtddt jjd}dtjd< t jt  tddd}tj	
| j | |\}}t| j}| |||| d S )Nr   r@  rI  r  r   r<   rJ  r   rT  r   r   r   r   r   r  rJ   r
  r  r   rV  r   r[  rM   r   rM  r;  r  r  rP   rP   rQ   &test_ddp_model_diff_shape_across_ranks,  s$     
 
   zDDistributedTest._DistTestBase.test_ddp_model_diff_shape_across_ranksc                 C   s   t jtddt jjd}dtjd< t jt  tddd}tj	
| j | j|dd	\}}t| j| jd
kd	}| |||| d S )Nr   r@  rI  r  r   r<   rJ  T)r   r?   r\  r]  rP   rP   rQ   +test_ddp_model_diff_num_params_across_ranksB  s*     
  
   zIDistributedTest._DistTestBase.test_ddp_model_diff_num_params_across_ranksc                 C   s  | }t |}tjjjt || j| jgdd}tdd}|t	krl||d \}}||d \}	}
n||\}}||\}	}
|

 }|  |t	kr| |jjjjjd k | |jjjjj|jjjj n0| |jjjjd k | |jjjj|jjj d }d }|  |  tdD ]}|t	krV||d \}}||d \}	}
n||\}}||\}	}
|dk r|| }|	|
 }|
 }|
 }n|
 }|

 }|  |  |dkr|t	kr|jjjj}|jjjjj}n|jjj}|jjjj}| || nf|dkrt|t	krN| |jjjjj| | |jjjj| n&| |jjjj| | |jjj| t| | D ]"\}}|j}|j}| || qq$t  d S )NTr  r<   r   r  rA   r?   )r  r  rJ   rE   r  r   r
  r   rK   r   r   r  r  r   rG   r  r  rf  r  r  rh  r  r   r  )rM   
module_clsr  r   	local_netr  r   rG   rH   Za_distZb_distZ	loss_distZsaved_a_local_gradZsaved_a_dist_gradr  r  Zt_distr   r  
dist_param
local_grad	dist_gradrP   rP   rQ   _test_output_unused_in_loss]  s~    

 








 z9DistributedTest._DistTestBase._test_output_unused_in_lossc                 C   s   t }dD ]}| || qd S Nr  )r   re  rM   r`  r  rP   rP   rQ   'test_output_unused_in_loss_tuple_module  s    zEDistributedTest._DistTestBase.test_output_unused_in_loss_tuple_modulec                 C   s   t }dD ]}| || qd S rf  )r   re  rg  rP   rP   rQ   &test_output_unused_in_loss_dict_module  s    zDDistributedTest._DistTestBase.test_output_unused_in_loss_dict_modulec                 C   s   t dd| j}t | j}t|}t jjj	|| jgdd}||
 }||
 }t jj |  t jj |  t| | D ]B\\}}\}}	|j}
|	j}| |
|d| d|
 d| d|  qd S )	Nr?   rA   Tr  z
DDP param z with grad z0
                    does not match local param z with grad
                    )rJ   r   r  r   r   r  r  rE   r  r   r   Z_CZ
_functionsZUndefinedGradr  rh  r  r  rf  )rM   rT   r  ra  rv  Z	local_outZdist_param_namerb  Zlocal_param_namer  rd  rc  rP   rP   rQ   ,test_undefined_grad_parity_unused_parameters  s6    
 zJDistributedTest._DistTestBase.test_undefined_grad_parity_unused_parametersc           
         s   G  fdddt j td t }tj| j  | j| j}tj j	j
|| jg|d|d}tjdd| jd}tdD ]}||}| }	|	  q~|S )	Nc                       s&   e Zd Z fddZdd Z  ZS )zRDistributedTest._DistTestBase._test_different_graph_across_ranks.<locals>.ToyModelc                    s<   t  |   tjdddd| _tjdddd| _|| _d S r   rC  r  r	  rP   rQ   rD     s    z[DistributedTest._DistTestBase._test_different_graph_across_ranks.<locals>.ToyModel.__init__c                 S   s4   | j dkr | t| |S t| |S d S r   )r   r   r   r   r   rS   rP   rP   rQ   rU     s    
zZDistributedTest._DistTestBase._test_different_graph_across_ranks.<locals>.ToyModel.forwardrV   rP   r  rN   rQ   r
    s   r
  r.  T)r  rv  r  r  r   r<   r   )rE   r   rJ   r  r   r   r
  r  r   r  r   rK   r  r   r  )
rM   rv  r  r   r   r  r;  r  rv  r   rP   r  rQ   "_test_different_graph_across_ranks  s$    

z@DistributedTest._DistTestBase._test_different_graph_across_ranksc                 C   sp   | j dd}| | dd | j dd}| | dd t| | D ]\}}| || qVd S )NT)rv  rz  r   r  )rk  r  r  rG  r  rh  r  rf  )rM   Z
base_modelZstatic_modelr  r  rP   rP   rQ   !test_different_graph_across_ranks  s    z?DistributedTest._DistTestBase.test_different_graph_across_rankszUMacOS uses uv transport which does not have as robust error handling as tcp transportc              	   C   s  t d| j g}tdD ]}tt | qtdd}tj|d tdD ]}tt | qNtj|dd d}d}| j|kr| 	t
d	| d
 tj|d W 5 Q R X n@| j|krd	| j d| }| 	t
| tj|d W 5 Q R X | jdd d S )Nr<   rA   r@  r  Tr/  wait_all_ranksr?   r   r    failed to pass monitoredBarrierb successfully reached monitoredBarrier, but received errors while waiting for send/recv from rank    )rJ   r   r   r  r   r  rK  r   monitored_barrierr  r,  rK  )rM   r  r  r/  failed_rankr  	err_regexrP   rP   rQ   test_monitored_barrier_gloo"  s,    

 

z9DistributedTest._DistTestBase.test_monitored_barrier_glooc              	   C   sn   d}d}t jddgd}| j|kr&d S | jdkr^| td| d t || W 5 Q R X nt || d S )Nr?   r'  r   r  r  ro  )r   rT  r   r  r,  rr  )rM   rs  r/  r  rP   rP   rQ   $test_monitored_barrier_gloo_subgroupK  s    

 
zBDistributedTest._DistTestBase.test_monitored_barrier_gloo_subgroupc           	   	   C   sb  t jtdd tt| jD tddt jjd}t jtdd tt| jD t jj	d}t
jd| jd	| j g}||td
d | jdkrt  t jjkrd}nd}| t| ||tdd W 5 Q R X nt|rddd tdt| jD }d| d}nd}d| d}tdd}| t| |j||d W 5 Q R X | jdd d S )Nc                 s   s   | ]
}|V  qd S r\   rP   r  rP   rP   rQ   r   e  s     zWDistributedTest._DistTestBase._test_monitored_barrier_allreduce_hang.<locals>.<genexpr>r  r@  )r  r/  r   c                 s   s   | ]
}|V  qd S r\   rP   r  rP   rP   rQ   r   l  s     r  r<   r   rn   r   zTimed out waitingrU  r'  r}  c                 S   s   g | ]}t |qS rP   r|  r  rP   rP   rQ   r     s     zXDistributedTest._DistTestBase._test_monitored_barrier_allreduce_hang.<locals>.<listcomp>r?   Ranks ro  r  rn  rq  r  )r   rT  rY  r  r   r   r   r   r   r   rJ   r   r   r  r  r  r  r  r  r,  r   rr  rK  )	rM   rn  Znccl_pgZgloo_pgr  rt  rank_strZexpected_first_fail_rankZ!monitored_barrier_timeout_secondsrP   rP   rQ   &_test_monitored_barrier_allreduce_hangb  s>    
"
 zDDistributedTest._DistTestBase._test_monitored_barrier_allreduce_hangc                 C   s   | j dd d S )NFry  r{  rL   rP   rP   rQ   %test_monitored_barrier_allreduce_hang  s    zCDistributedTest._DistTestBase.test_monitored_barrier_allreduce_hangc                 C   s   | j dd d S )NTry  r|  rL   rP   rP   rQ   4test_monitored_barrier_allreduce_hang_wait_all_ranks  s    zRDistributedTest._DistTestBase.test_monitored_barrier_allreduce_hang_wait_all_ranksc              	   C   sh   t jtdd tt| jD d}tdd}| jdkrd| t	d| j d |
| W 5 Q R X d S )Nc                 s   s   | ]
}|V  qd S r\   rP   r  rP   rP   rQ   r     s     z[DistributedTest._DistTestBase.test_monitored_barrier_gloo_rank_0_timeout.<locals>.<genexpr>rv  r   r@  r  z timed out in monitoredBarrier)r   rT  rY  r  r   r   r   r   r  r,  rr  )rM   r  r/  rP   rP   rQ   *test_monitored_barrier_gloo_rank_0_timeout  s    

 zHDistributedTest._DistTestBase.test_monitored_barrier_gloo_rank_0_timeoutc              	   C   s   d}t dd}d}| j|krH| td|  tj|d W 5 Q R X n@| jdkrd| j d| }| t| tj|d W 5 Q R X d S )NrA   r@  r   r  r  r?   rp  )r   r   r  r,  r   rr  )rM   Zexpected_first_failed_rankr/  r  rt  rP   rP   rQ   $test_monitored_barrier_failure_order  s    

 
zBDistributedTest._DistTestBase.test_monitored_barrier_failure_orderc              	   C   sj   | j dkrftdd}ddd tdt| jD }d| d	}| t| tj	|d
d W 5 Q R X d S )Nr   r'  r@  r}  c                 S   s   g | ]}t |qS rP   r|  r  rP   rP   rQ   r     s     zWDistributedTest._DistTestBase.test_monitored_barrier_wait_all_ranks.<locals>.<listcomp>r?   rx  ro  Trm  )
r   r   r   r  r   r   r  r,  r   rr  )rM   r/  rz  rt  rP   rP   rQ   %test_monitored_barrier_wait_all_ranks  s    

 zCDistributedTest._DistTestBase.test_monitored_barrier_wait_all_ranksc              	   C   s  t  }tjjj|| j| jgd}ddd}| \}}||}| 	|| t  }dg}tjjj
|| tjjj|| j| jgd}ddi}| \}}||}| 	|| t  }tjjj|| j| jgd}| \}}| jdkrtt|d  |tjtdtjtdg | td || W 5 Q R X |d d }| td	 || W 5 Q R X |tjtdtjtdg d S )
Nr  za.weightzb.weightr  r   r?   zExpected param to name mappingr  zParam with name)r   rJ   rE   r  r   r
  r   _build_params_for_reducer"_build_debug_param_to_name_mappingZassertDictEqualr   r  r.  r  r   r   r  r  )rM   r   r  expected_mapping
net_paramsr  param_to_name_mappingZparams_to_ignorerP   rP   rQ   *test_ddp_build_debug_param_to_name_mapping  s\    



 


zHDistributedTest._DistTestBase.test_ddp_build_debug_param_to_name_mappingc                 C   sd   G dd dt j}| }tj jj|| j| jgd}ddi}| \}}||}| 	|| d S )Nc                       s$   e Zd Z fddZdd Z  ZS )zcDistributedTest._DistTestBase.test_ddp_build_debug_param_to_name_mapping_requires_grad.<locals>.Netc                    s*   t    tdd| _| jjd d S )Nr<   F)rC   rD   rE   rF   r   r>   r  rL   rN   rP   rQ   rD   (   s    
zlDistributedTest._DistTestBase.test_ddp_build_debug_param_to_name_mapping_requires_grad.<locals>.Net.__init__c                 S   s
   |  |S r\   r   rS   rP   rP   rQ   rU   /   s    zkDistributedTest._DistTestBase.test_ddp_build_debug_param_to_name_mapping_requires_grad.<locals>.Net.forwardrV   rP   rP   rN   rQ   r   '   s   r   r  r   z
lin.weight)
rE   r   rJ   r  r   r
  r   r  r  rf  )rM   r   r   r  r  r  r  r  rP   rP   rQ   8test_ddp_build_debug_param_to_name_mapping_requires_grad    s    
  
zVDistributedTest._DistTestBase.test_ddp_build_debug_param_to_name_mapping_requires_gradc                    s  t  t jjk}G dd dtj G  fdddtj}| }g }|r| D ]D\}}||jjj	krN|j
ddD ] \}}	| d| }
||
 qpqNtjjj|| |jjj|jjjg}nt|jj |jjjg }g }g }i }d}| D ]~\}}|j
ddD ]h\}}	| d| }
|||
< |
|kr4|d	7 }||krJ||
 n |r`||jjj	kr||
 qqtjjj|| j| jgd
}d\}}t||}tdD ]}|dkr||}| }|  nz||}| }|  W n tk
r } zt|}||dd  }|D ]B}| ||kp6| | t|| |kd||  d|  q"|D ]}| ||k qj|D ]}| ||k qW 5 d }~X Y nX | dd qd S )Nc                       s$   e Zd Z fddZdd Z  ZS )z^DistributedTest._DistTestBase._test_ddp_multiple_nested_unused_params_error.<locals>.SubModulec                    s:   t    td| _t | _t | _tj	dddd| _
d S )Nr   ri   r<   Fr=   )rC   rD   r   embedding_netr   r   r   r   rE   rF   	lin_layerrL   rN   rP   rQ   rD   A   s
    

zgDistributedTest._DistTestBase._test_ddp_multiple_nested_unused_params_error.<locals>.SubModule.__init__c                 S   s$   |  |}| |}| j|}|S r\   )r   r  r   rG   rS   rP   rP   rQ   rU   H   s    

zfDistributedTest._DistTestBase._test_ddp_multiple_nested_unused_params_error.<locals>.SubModule.forwardrV   rP   rP   rN   rQ   	SubModule@   s   r  c                       s&   e Zd Z fddZdd Z  ZS )z\DistributedTest._DistTestBase._test_ddp_multiple_nested_unused_params_error.<locals>.MyModelc                    s   t      | _d S r\   )rC   rD   
sub_modulerL   )r  rO   rP   rQ   rD   Q   s    
zeDistributedTest._DistTestBase._test_ddp_multiple_nested_unused_params_error.<locals>.MyModel.__init__c                 S   s
   |  |S r\   )r  rS   rP   rP   rQ   rU   U   s    zdDistributedTest._DistTestBase._test_ddp_multiple_nested_unused_params_error.<locals>.MyModel.forwardrV   rP   r  rN   rQ   MyModelP   s   r  F)Zrecurser   r   r?   r  )r<   rA   rA   zdid not receive gradzDid not find index z for zExpected error was not raised!)r   r  r  r  rE   r   r  r  r  r   r  r  rJ   r  r   r   r   rH   rY  modulesr
  r   r   r  r   r  r,  r   findr  r  )rM   ignore_sparseZdebug_mode_offr  r   Zsparse_embedding_fqnsrt  r   Zparameter_namer  ZfqnZunused_modulesZexpected_unused_param_fqnsZused_param_fqnsZfqn_to_param_indexr  r  r  r   r   r  rv  r   rN  Zunused_param_substrZunused_param_fqnZused_param_fqnZsparse_param_fqnrP   r  rQ   -_test_ddp_multiple_nested_unused_params_error=   s    
 






$zKDistributedTest._DistTestBase._test_ddp_multiple_nested_unused_params_errorc                 C   s   | j dd d S )NFr  r  rL   rP   rP   rQ   ,test_ddp_multiple_nested_unused_params_error   s    zJDistributedTest._DistTestBase.test_ddp_multiple_nested_unused_params_errorc                 C   s   | j dd d S )NTr  r  rL   rP   rP   rQ   8test_ddp_multiple_nested_unused_params_err_ignore_params   s    zVDistributedTest._DistTestBase.test_ddp_multiple_nested_unused_params_err_ignore_paramsc                 C   s  | j }tj| t  }t|}tjjj	||gd}tj
dddd }t|}tjjj	||gd}tjdd|d}tjdddd|d}|||f|||fg}|D ]J}	|	\}
}}| j d	kr|
  |  td
D ]}| |
||| qq| jdd d S )Nr  rA   r   Fr   Ztrack_running_statsr<   r   ri   r   r  rq  r  )r   rJ   r
  r  r   r  r  rE   r  r   r=  rK   evalr  rf  rK  )rM   r   r   r  Zsyncbn_modelZlocal_syncbn_modelr   Z
inp_syncbnr  testZ
test_modelZtest_local_modelZtest_inpr  rP   rP   rQ   test_ddp_inference   sD    

  

 

 z0DistributedTest._DistTestBase.test_ddp_inferencec           
   	   C   sX  | j }tj| tjdddd|}tjjj||gd}tjj	
 B}tdD ]2}tdddd|}||}| }|  qRW 5 Q R X td	krtd
|}n
td|}| g | |j}	| j dkrT|	  tjj	
 B}tdD ]2}tdddd|}|	|}| }|  qW 5 Q R X td	kr>td
|}n
td|}| g | d S )NrA   r   Fr  r  r  r<   ri   r?  Z_all_gather_baseri  r   )r   rJ   r
  r  rE   r=  r  r   r  r   r   r  rK   r   r  rs   r   r  r   r  rf  )
rM   r   r   r+  r  r   rv  r   Zall_gather_callsZmodel_inferencerP   rP   rQ   !test_ddp_sync_bn_training_vs_eval   s:    


z?DistributedTest._DistTestBase.test_ddp_sync_bn_training_vs_evalc              	   C   sZ   t  | j}tjjj|| jgd}d}| t| |	i i  W 5 Q R X t
|| d S )Nr  zmust be callable)r   r
  r   rJ   rE   r  r   r  	TypeErrorr  r-   )rM   r   r  rP   rP   rQ   test_ddp_python_error_logged!  s    
z:DistributedTest._DistTestBase.test_ddp_python_error_loggedc                    s4  | j }tj| G dd dtjj} fdd | |}t|}tjjj	||gd}tjjj	||gdd}t
dd	}tttd
}| D ]}tdD ]}|||d}	 |	}
|
  | | |||d}| t|||   |}|  | | t| | D ]\}}| || qqqd S )Nc                       s$   e Zd Z fddZdd Z  ZS )z\DistributedTest._DistTestBase.test_ddp_static_graph_nested_types.<locals>.NestedOutputModulec                    s    t    tjdddd| _d S )Nr  r?   Fr=   r  rL   rN   rP   rQ   rD   >!  s    
zeDistributedTest._DistTestBase.test_ddp_static_graph_nested_types.<locals>.NestedOutputModule.__init__c                 S   sr   |dkr&|  ||  ||  |ffS |dkrL|  ||  ||  |ggS |dkrn|  |d|  |idS d S )Nr   rY  r-  r   ro   r  )rM   r   output_typerP   rP   rQ   rU   B!  s$     zdDistributedTest._DistTestBase.test_ddp_static_graph_nested_types.<locals>.NestedOutputModule.forwardrV   rP   rP   rN   rQ   NestedOutputModule=!  s   r  c                    s   d}t | tjr|  S t | tr>|  D ]}| |7 }q*n>t | tsRt | trj| D ]}| |7 }qVntdt	|  |S )Nrj  zUnknown model output type )
r^   rJ   r_   r   r-  r]  r   rY  r  r.  )Zmodel_outputr   ra   rT   get_lossrP   rQ   r  [!  s    
zRDistributedTest._DistTestBase.test_ddp_static_graph_nested_types.<locals>.get_lossr  TrA  r<   r  )rY  r   r-  r  )r  )r   rJ   r
  r  rE   r   r  r  r  r   rK   rY  r   r-  r  r  r  r  r  r^   rh  r  rf  )rM   r   r  r   Zmodel_static_graphr   Ztype_mappingr  r  rv  r   Z
out_staticZloss_staticr   Zp_staticrP   r  rQ   "test_ddp_static_graph_nested_types2!  sH    


 z@DistributedTest._DistTestBase.test_ddp_static_graph_nested_typesc           
      C   s   t j| j G dd dtj}| | j}t jdd| jd}t	ddgddgD ]b\}}t
|| jg| j||d}td	D ]8}||}| |d
 j |d
 |d   }	|	  q|qTd S )Nc                       s$   e Zd Z fddZdd Z  ZS )zSDistributedTest._DistTestBase.test_ddp_returns_tensor_with_no_grad.<locals>.MyModelc                    s2   t    tjdddd| _tjdddd| _d S r   )rC   rD   rE   rF   r   r   rL   rN   rP   rQ   rD   !  s    
z\DistributedTest._DistTestBase.test_ddp_returns_tensor_with_no_grad.<locals>.MyModel.__init__c                 S   s8   |  t| |}| }| }|jr0t||fS r\   )r   r   r   r   r  detachr   r  )rM   rT   r  rP   rP   rQ   rU   !  s
    
z[DistributedTest._DistTestBase.test_ddp_returns_tensor_with_no_grad.<locals>.MyModel.forwardrV   rP   rP   rN   rQ   r  !  s   r  r?   r<   r   TF)r  r  rv  r  r  r   )rJ   r
  r  r   rE   r   r  rK   rL  r-  r   r  r  r   r   r  )
rM   r  r   r   r  r  r  r  rv  orP   rP   rQ   $test_ddp_returns_tensor_with_no_grad!  s"    zBDistributedTest._DistTestBase.test_ddp_returns_tensor_with_no_gradc           	         s  G  fdddt j tj| j    }dD ]h}tj jj|| jg|d}tjdddd}t	d	D ]2}|||d
d}|
 }|  | |j  qfq2tj jj|| jgdd}tjdddd}t	d	D ]*}||d|d dkd}|
 }|  q| |j  d S )Nc                       s&   e Zd Z fddZdd Z  ZS )zRDistributedTest._DistTestBase.test_detect_ddp_is_actually_static.<locals>.ToyModelc                    s2   t  |   tjdddd| _tdd| _d S r   r  rL   r	  rP   rQ   rD   !  s    z[DistributedTest._DistTestBase.test_detect_ddp_is_actually_static.<locals>.ToyModel.__init__c                 S   s8   |r$|r|  | |S |  |S n|  | |S d S r\   )r  r  )rM   rT   r  dynamicrP   rP   rQ   rU   !  s
    zZDistributedTest._DistTestBase.test_detect_ddp_is_actually_static.<locals>.ToyModel.forwardrV   rP   r  rN   rQ   r
  !  s   r
  r  r  r?   r<   r
  r   r  F)r  r  TrA   r   )rE   r   rJ   r
  r  r   r  r   rK   r  r   r  r  r8  Z_ddp_graph_staticr  )	rM   r   r  r  r   r  rv  r   r  rP   r  rQ   "test_detect_ddp_is_actually_static!  s4    

z@DistributedTest._DistTestBase.test_detect_ddp_is_actually_staticc           
   	   C   s  G dd dt j}| | j}dD ]}t|| jg| jd||d}dd tdD }td	D ]}|  tjd
d| jd}|||d |d
 |d	 d\}	|d< |d
< |d	< tt	|D ]<}t
|| r| || jd  q| || d jd  q|	   q\q$d S )Nc                       s,   e Zd Z fddZdd Zdd Z  ZS )zJDistributedTest._DistTestBase._test_ddp_new_tensor_in_fwd.<locals>.MyModelc                    s>   t    tjdddd| _tjdddd| _| jjj| _d S r   )rC   rD   rE   rF   r   r   r  r   rL   rN   rP   rQ   rD   !  s    
zSDistributedTest._DistTestBase._test_ddp_new_tensor_in_fwd.<locals>.MyModel.__init__c                 S   s   t jdd| jd}|S )Nr?   r<   r   )rJ   rK   r   )rM   r  rP   rP   rQ   Z
__init_opt!  s    zUDistributedTest._DistTestBase._test_ddp_new_tensor_in_fwd.<locals>.MyModel.__init_optc                 S   sd   t | |}| |}|d kr*|  }|d kr:|  }|d ksLt|sT|  }|||d|ifS )Nrq   )r   r   r   r   _MyModel__init_optrJ   	is_tensor)rM   rT   opt_1opt_2
opt_nestedrP   rP   rQ   rU   !  s    
zRDistributedTest._DistTestBase._test_ddp_new_tensor_in_fwd.<locals>.MyModel.forward)rW   rX   rY   rD   r  rU   rZ   rP   rP   rN   rQ   r  !  s   r  r  F)r  r  ru  rv  r  c                 S   s   g | ]}d qS r\   rP   r  rP   rP   rQ   r   "  s     zMDistributedTest._DistTestBase._test_ddp_new_tensor_in_fwd.<locals>.<listcomp>rh   rA   r?   r<   r   r   )r  r  r  rq   )rE   r   r  r   r   r  r  rJ   rK   r~  r  rf  Zgrad_fnrj  r  )
rM   r  r  r   r  r  r  r  rT   rv  rP   rP   rQ   _test_ddp_new_tensor_in_fwd!  s4    	   z9DistributedTest._DistTestBase._test_ddp_new_tensor_in_fwdc                 C   s   | j ddS )NFr  r  rL   rP   rP   rQ   test_ddp_new_tensor_in_fwd"  s    z8DistributedTest._DistTestBase.test_ddp_new_tensor_in_fwdc                 C   s   | j ddS )NTr  r  rL   rP   rP   rQ   'test_ddp_new_tensor_in_fwd_static_graph"  s    zEDistributedTest._DistTestBase.test_ddp_new_tensor_in_fwd_static_graphc                    s  | j }tj| t| tj|  fdd}tjjjjj	}tjjjjj
}||fD ],}t |}tjjj|| j gd}|||| tjjjt|| j gdd}	tjdd|d}
tdD ]}||
 }||krt|	j }|D ]}t| q|	|
 }||kr8t|	j }|D ]}t| q&tj   sT| ||	 |  |   r||kr| ||	 qt  qXd S )	Nc                    s@   dd |  D } fdd|D }r,|S tj|  d S )Nc                 S   s   g | ]\}}|qS rP   rP   r{   r  r@   rP   rP   rQ   r   -"  s    zkDistributedTest._DistTestBase._test_ddp_buffer_hook_allreduce.<locals>.buffer_comm_hook.<locals>.<listcomp>c                    s"   g | ]}t j| jd d qS )T)rV  r  )r   r  r  r  )r{   r@   r  rP   rQ   r   0"  s   )re   rJ   r#  Zcollect_allr  )r  re  rg  Zfutsreturn_futuresr  rQ   buffer_comm_hook,"  s    
zWDistributedTest._DistTestBase._test_ddp_buffer_hook_allreduce.<locals>.buffer_comm_hookr  F)r  ru  rA   r<   r   )r   rJ   r
  r  r  rE   r  r  Z_BufferCommHookLocationZPRE_FORWARDZPOST_FORWARDr:   r   _register_buffer_comm_hookr  r  rK   r  r   rY  r   rg  r   r  r  rm  r  r  )rM   r  r   r  Zhook_pre_fwdZhook_post_fwdZhook_run_locationr   	model_ddpmodel_ddp_no_hookr   r  r  Zmodel_no_hook_buffersrq   loss_no_hookrP   r  rQ   _test_ddp_buffer_hook_allreduce&"  sZ    



z=DistributedTest._DistTestBase._test_ddp_buffer_hook_allreducec                 C   s   | j dd d S )NTr  r  rL   rP   rP   rQ   ,test_ddp_buffer_hook_allreduce_return_futureq"  s    zJDistributedTest._DistTestBase.test_ddp_buffer_hook_allreduce_return_futurec                 C   s   | j dd d S )NFr  r  rL   rP   rP   rQ   test_ddp_buffer_hook_allreduce{"  s    z<DistributedTest._DistTestBase.test_ddp_buffer_hook_allreducec           
      C   s   | j }tj| t| tj| dd }t |}tjjj|| j gd}|	|| tjjjt
|| j gd}tjdd|d}tdD ]8}|| }|| }	| || |  |	  qd S )Nc                 S   s    dd |  D }| | d S )Nc                 S   s   g | ]\}}|qS rP   rP   r  rP   rP   rQ   r   "  s    znDistributedTest._DistTestBase.test_ddp_broadcast_buffer_via_hook.<locals>.buffer_comm_hook.<locals>.<listcomp>)re   Z_default_broadcast_coalesced)r  re  rg  rP   rP   rQ   r  "  s    zZDistributedTest._DistTestBase.test_ddp_broadcast_buffer_via_hook.<locals>.buffer_comm_hookr  rA   r<   r   )r   rJ   r
  r  r  r:   rE   r  r   r  r  r  rK   r  r   rm  r  )
rM   r   r  r   r  r  r   r  r  r  rP   rP   rQ   "test_ddp_broadcast_buffer_via_hook"  s2    
z@DistributedTest._DistTestBase.test_ddp_broadcast_buffer_via_hookc           
         s   | j }tj| t| tj| G dd dtj}| |}tjjj|| j gd tj	dd|d}t
dD ]}|dkr jjd  j_ | }|   fd	d
t
t D }t| jj |d }|dd  D ]}	| ||	 qqtd S )Nc                       s$   e Zd Z fddZdd Z  ZS )zODistributedTest._DistTestBase.test_ddp_broadcast_buffer.<locals>.NetWithBuffersc                    sF   t    tjdddd| _tjdddd| _| dtdd d S r;   rB   rL   rN   rP   rQ   rD   "  s    
zXDistributedTest._DistTestBase.test_ddp_broadcast_buffer.<locals>.NetWithBuffers.__init__c                 S   s   |  | |S r\   )rH   rG   rS   rP   rP   rQ   rU   "  s    zWDistributedTest._DistTestBase.test_ddp_broadcast_buffer.<locals>.NetWithBuffers.forwardrV   rP   rP   rN   rQ   r:   "  s   r:   r  rA   r<   r   r   r?   c                    s   g | ]}t  jjqS rP   )rJ   rb  r   r@   r  r  rP   rQ   r   "  s     zKDistributedTest._DistTestBase.test_ddp_broadcast_buffer.<locals>.<listcomp>)r   rJ   r
  r  r  rE   r   r  r   rK   r  r   r@   r   r  r   r   ri  rf  )
rM   r   r:   r   r   r  r   ZbufsZ
rank_0_bufrl  rP   r  rQ   test_ddp_broadcast_buffer"  s*    

z7DistributedTest._DistTestBase.test_ddp_broadcast_bufferz8Only Nccl & Gloo backend support DistributedDataParallelc                 C   s   t }| j}||}tjjjt|| jgd}|	 }|
dd}| | tj|}tjjj|| jgd}|	 }|
dd}| | d S )Nr  Zhas_sync_bnTF)r;  r   r
  rJ   rE   r  r   r  r  r  rG  r  r=  r>  r  )rM   r   r   r  Z
no_sync_bnr  Zsync_bn_loggedr  rP   rP   rQ   test_sync_bn_logged"  s$    

z1DistributedTest._DistTestBase.test_sync_bn_loggedc              	   C   sn  G dd dt jj}| j}| |}ddlm} |d t jjj||gd}W 5 Q R X t 	d|}t j
dgg|d	d
}t j
dg|d	d
}t j
dg|d}|||d}	|jjj }
|jj }t||	|}| || |jjj}|jj}| ||
 | || |  | |j | |j | |j | |jjjj | |jjjj | |jjj d S )Nc                       s$   e Zd Z fddZdd Z  ZS )zMDistributedTest._DistTestBase.test_stateless_api_with_ddp.<locals>.MockModulec                    s4   t    tjdd| _td}| d| d S )Nr?   r@   )rC   rD   rJ   rE   rF   r4  r   rI   )rM   r@   rN   rP   rQ   rD   "  s    

zVDistributedTest._DistTestBase.test_stateless_api_with_ddp.<locals>.MockModule.__init__c                 S   s   |  || j S r\   )r4  r@   rS   rP   rP   rQ   rU   "  s    zUDistributedTest._DistTestBase.test_stateless_api_with_ddp.<locals>.MockModule.forwardrV   rP   rP   rN   rQ   
MockModule"  s   r  r   )_ddp_replicated_tensorFr  )r?   r?   r  T)r   r   rj  r   )zmodule.l1.weightzmodule.l1.biaszmodule.buffer)rJ   rE   r   r   r  rC  r  r  r   r  rq   r   r4  r  r  r@   
_statelessZfunctional_callrf  r  ZassertIsNotNoner  r  r>   )rM   r  r   r   r  rT   r  r>   r@   r  Zprev_weightZprev_bufferr*  Z
cur_weightZ
cur_bufferrP   rP   rQ   test_stateless_api_with_ddp"  s@    


z9DistributedTest._DistTestBase.test_stateless_api_with_ddpc           
         s$  G  fdddt j dd }dd }dd }  }  }|j| |j| |j| |j| |j| |j| t|| j| jgd	}t	
d
d}||}||| j}| || |   |   dd | D }	| |	d |jjj | |	d |jjj d S )Nc                       s&   e Zd Z fddZdd Z  ZS )zTDistributedTest._DistTestBase.test_ddp_forward_backward_hook.<locals>.DummyTestModelc                    s*   t  |   td tdd| _d S )Nr   rA   )rC   rD   rJ   r  rE   rF   r   rL   )DummyTestModelrO   rP   rQ   rD   ,#  s    
z]DistributedTest._DistTestBase.test_ddp_forward_backward_hook.<locals>.DummyTestModel.__init__c                 S   s
   |  |S r\   r   rS   rP   rP   rQ   rU   1#  s    z\DistributedTest._DistTestBase.test_ddp_forward_backward_hook.<locals>.DummyTestModel.forwardrV   rP   r  rN   rQ   r  +#  s   r  c                 S   s   t j|d S r   )rE   
functionalr   )r   r  rP   rP   rQ   	relu_hook4#  s    zODistributedTest._DistTestBase.test_ddp_forward_backward_hook.<locals>.relu_hookc                 S   s   t j|S r\   )rE   r  Zgelur   _inputr  rP   rP   rQ   	gelu_hook7#  s    zODistributedTest._DistTestBase.test_ddp_forward_backward_hook.<locals>.gelu_hookc                 S   s   t j|d fS r   )rE   r  Zcelur  rP   rP   rQ   	celu_hook:#  s    zODistributedTest._DistTestBase.test_ddp_forward_backward_hook.<locals>.celu_hookr  rn   rA   c                 S   s   g | ]
}|j qS rP   )r  )r{   r   rP   rP   rQ   r   N#  s     zPDistributedTest._DistTestBase.test_ddp_forward_backward_hook.<locals>.<listcomp>r   r?   )rE   r   r   Zregister_forward_pre_hookZregister_forward_hookZregister_backward_hookr   r  r   rJ   r  rf  r   r  r  r  r  r>   )
rM   r  r  r  r  r  Z
input_dataZoutput_localZ
output_ddpZ	ddp_gradsrP   r  rQ   test_ddp_forward_backward_hook'#  s2    	
 z<DistributedTest._DistTestBase.test_ddp_forward_backward_hookc              	   C   s8  t d d}t d }| j}t jdd|d}t jdd|d}t jdd|}t	t
||gd}	t	t
||gd}
t jj|	 |d	}|	|| |	  td
D ]0}|  |	|}t||}|  |  q|	 ||d}|dkr@|  }t || W 5 Q R X | t|jd | |jd  d t  dd d| i}|  }t j ||d}W 5 Q R X | t|jd | |jd  d |
!|d  |d }|d }t jj|
 |d	}| |j"|j" | |j#|j# |j#D ]2}|dkr|dkr| t$||t$|| q| |j%t&  t'|j() |j() D ]\}}t*j+,|| qT|
|| |
  td
D ]^}|  |  |	|}|
|}t||}t||}|  |  |  |  qt'|	 |
 D ]\}}| |j-|j- qt  |dkr4t./| d S )Nr   r  z/checkpoint.ptr  r?   r   rn   r  r  r<   )r  r  comm_hook_statezHNOTE: Process group is not serializable and excluded from a saved state.r  rL  zNOTE: Process group will be set to a default group (i.e. the world size).                If a different group is desired, please set `self.process_group` after PowerSGD state is loaded.r  r  r  r  rng)0rJ   r  r   
gettempdirr   rK   rE   rF   r  r   r  r  r  r  r  r  r  r  r  r   r  r  r  r  Z
assertLogsr  rf  r~  records
getMessager   r  r  rN  rY   r  getattrr  r   rh  r  Z	get_stater  rm  Zassert_array_equalr  r   r  )rM   r   Z
hook_staterC  rP  r   r  r  r  r  Zdummy_ddp_modelr6  r  rv  r   r   ZcapturedrM  rQ  Z
dummy_hookZdummy_hook_stateZdummy_optimizerentryZentry1Zentry2Z
out_originZ	out_dummyZloss_originZ
loss_dummyZ
orig_paramZdummy_paramrP   rP   rQ   _test_hook_picklingR#  s    






z1DistributedTest._DistTestBase._test_hook_picklingzDflaky on PyTorch CI: No such file or directory: '/tmp/checkpoint.pt'c                 C   s&   t j}t jd ddd}| || d S )Nr?   ri   )r  r  r  )r  r   r  r  )rM   r   r  rP   rP   rQ   test_ddp_hook_pickling_powerSGD#  s    z=DistributedTest._DistTestBase.test_ddp_hook_pickling_powerSGD)N)FNF)FN)FN)TN)FN)FN)TN)FN)r  N)Nr   FNrn   )NFFF)F)r  )F)F)rA   NF)F)NT)N)N)N)F)F)FF(  rW   rX   rY   rK  rX  rZ  r]  rm  rz  r  r  r  r  r  r  r  r  r7   rs   rt   r  r  r#   r  r  r  r   r.   r   r   r   r%   r  r  r  r'   r  r  r  r  r  r  r  r  r  r  r  r)   r  r  r  r  r  r  r  r  r  r!  r1  r2  r4  r3   r0   r1   r6  r>  rA  rD  rG  rP  Zskip_collectiverQ  rR  rS  rT  rU  rV  rW  r`  ra  rb  rc  rh  rq  rr  rt  ru  rv  ry  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rm  rJ   rk  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  staticmethodr  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r"  r#  r$  r%  r&  r'  r(  r)  r*  r.  r0  r6  r?  r@  rA  rB  rC  rD  rH  rL  rN  rP  rQ  rR  rS  rT  rV  rW  rX  rY  r"   rZ  r[  r\  r]  r^  r_  r`  ra  rb  rc  rd  re  rf  rg  rh  rk  rl  rm  rn  ro  rp  rq  rs  rt  rw  rx  ry  r{  r|  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r   r  r  r  r5   r  r  r  r  r  r  r  r4   r  r  r	  r  r&   r  r  r$  r)  r,  rv   r8  r:  r?  rI  rJ  r@  rA  rR  rU  rW  rX  r$   rZ  r[  r\  r_  rb  rc  re  rf  rn  ro  rq  r  r  r  skipIfNoTorchVisionr  r   r  r  r  r  r  r  r  r  r(   r,   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r/  r3  r@  rB  rE  rF  rH  rN  rR  rT  rV  rW  rY  rZ  r[  r^  r_  re  rh  ri  rj  rk  rl  ru  rw  r{  r+   r}  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rP   rP   rP   rQ   _DistTestBaseU  s@
  	!


	







$

#
"
E

,' 
 
 M 
 
 " 
 
 -


  
H

 
! 
)(  
2@
8








   3  !  !"  '




$    !   " )  	  
    H   <#!; 93    Q




+7 ;/?+())&E 
& N 
 
/,4<.k6 @O 
 
[2"f!P.e'6\  "#5
<t&,S!-3K%"2(sr  N)rW   rX   rY   r  rP   rP   rP   rQ   rR  T  s   rR  )r  rL  r  r   r  r   r   r*  collectionsr   r   
contextlibr   r   datetimer   	functoolsr   typingr   r	   r
   r   Znumpyr  rJ   Z
torch.cudaZtorch.distributedr  r   Z6torch.distributed.algorithms.model_averaging.averagersZ
algorithmsZmodel_averagingr  ZHtorch.distributed.algorithms.model_averaging.hierarchical_model_averagerZhierarchical_model_averagerr  Z2torch.distributed.algorithms.model_averaging.utilsutilsr  Ztorch.nnrE   Ztorch.nn.functionalr  r   Ztorch.nn.utils._statelessr  Ztorch._utils_internalr   r7  r   rr  Ztorch.cuda.ampr   r   Z+torch.distributed.algorithms.ddp_comm_hooksr   r  r   r  r   r  r   r  r  r   r   r   r   Ztorch.distributed.utilsr   r   Ztorch.nn.parallelr   Ztorch.nn.parallel.distributedr   Z*torch.testing._internal.common_distributedr   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   Z$torch.testing._internal.common_utilsr/   r0   r1   r2   r3   r4   r5   r6   r7   Z/torch.distributed.optim.post_localSGD_optimizerr  rK  Ztorch.utils.data.distributedr8   r  r  ImportErrorr   r   r   r   r:   r[   r1  barrK   Zfoo_cpu_tensorr  r   r   r   r   r   r  r  r(  r!  r,  rp   r  r   rs   getenvrt   r   r   r   r  r  r  r  r  r   r   r   r   r   r   r   r   r   r   r   r  r;  r<  r=  r]  r   rH  rF  r   r   r   r   r  rk  r  r  r  r  r"  r  r6  rR  r  rP   rP   rP   rQ   <module>   sx  T,


		


	



	'J                                                                   H