U
    <cm                     @   sJ   d dl Z d dlZd dlmZ d dlmZ d dlmZ dd Zd	ddZ	dS )
    N_get_device_index)autocast)ExceptionWrapperc                 C   s~   t | tjr| S t | ts$t | trHtt| D ]}t |tjr.|  S q.t | trztt|  D ]}t |tjr`|  S q`d S )N)	
isinstancetorchZTensorlisttuplemap	get_a_vardictitems)objresult r   D/tmp/pip-unpacked-wheel-gikjz4vx/torch/nn/parallel/parallel_apply.pyr      s    


r   c           
         sz  t | t |kst|dk	r2t | t |ks@tni ft |  }|dk	r^t | t |ksltndgt |  }dd |D }dd |D }t i t t  d
fdd	 t | dkr fddtt| ||||D }|D ]}|	  q|D ]}|
  q n& d	| d	 |d	 |d	 |d	 |d	  g }tt |D ],}| }	t|	trh|	  ||	 qH|S )a~  Applies each `module` in :attr:`modules` in parallel on arguments
    contained in :attr:`inputs` (positional) and :attr:`kwargs_tup` (keyword)
    on each of :attr:`devices`.

    Args:
        modules (Module): modules to be parallelized
        inputs (tensor): inputs to the modules
        devices (list of int or torch.device): CUDA devices

    :attr:`modules`, :attr:`inputs`, :attr:`kwargs_tup` (if given), and
    :attr:`devices` (if given) should all have same length. Moreover, each
    element of :attr:`inputs` can either be a single object as the only argument
    to a module, or a collection of positional arguments.
    Nc                 S   s   g | ]}t |d qS )Tr   .0xr   r   r   
<listcomp>/   s     z"parallel_apply.<locals>.<listcomp>c                 S   s   g | ]}t j|qS r   )r   cudacurrent_streamr   r   r   r   r   0   s     c                    s   t  |d krt| }|d kr2t j|}zt j|R t j|: t d$ t	|t
tfsp|f}|||}W 5 Q R X W 5 Q R X W 5 Q R X  || < W 5 Q R X W n: tk
r    td| |d| < W 5 Q R X Y nX d S )N)Zenabledzin replica {} on device {})where)r   Zset_grad_enabledr   Z
get_devicer   r   devicestreamr   r   r   r	   	Exceptionr   format)imoduleinputkwargsr   r   output)autocast_enabledgrad_enabledlockresultsr   r   _worker5   s"    
((
zparallel_apply.<locals>._worker   c              
      s4   g | ],\}\}}}}}t j ||||||fd qS ))targetargs)	threadingThread)r   r   r   r   r    r   r   )r&   r   r   r   I   s   r   )NN)lenAssertionErrorr*   Lockr   Zis_grad_enabledZis_autocast_enabled	enumeratezipstartjoinranger   r   reraiseappend)
modulesinputsZ
kwargs_tupZdevicesZstreamsthreadsthreadoutputsr   r!   r   )r&   r"   r#   r$   r%   r   parallel_apply   s:    

&r;   )NN)
r*   r   Ztorch.cuda._utilsr   Ztorch.cuda.ampr   Ztorch._utilsr   r   r;   r   r   r   r   <module>   s   