U
    Kc]                     @   s  d dl Z d dlZd dlZd dlmZmZmZmZ d dlZddl	m
Z
mZmZ ddlmZmZ d dlmZ d dlmZ dd	d
dddddddddddddddddgZdd Zejdd ZdFeeef d d!dZd"d	 ZdGeeef dd#d$d
Zdd%d&dZdHeeef eeef d#d'dZdIeeef eeef d#d(dZ dJeeef dd#d)dZ!dKeeef dd#d*dZ"dLeeef dd#d+dZ#dMeeef dd#d,dZ$dNeeef ed#d-dZ%dOeeef ed#d.dZ&dPeeef ed#d/dZ'dQeeef ed#d0dZ(dReeef ed#d1dZ)dSeeef ed#d2dZ*d3d Z+dTeeef e,ed5d6dZ-dUeeef ed#d7dZ.dVeeef eeef d#d8dZ/dWe,eeef d9d:d;Z0dXeeef d d<d=Z1dYd?d@Z2dZdAdBZ3edCdDdEZ4dS )[    N)AnyDictUnionTuple   )is_initialized_get_device_index
_lazy_init)segmentsmemory)Device)_Ccaching_allocator_alloccaching_allocator_deleteset_per_process_memory_fractionempty_cachememory_statsmemory_stats_as_nested_dictreset_accumulated_memory_statsreset_peak_memory_statsreset_max_memory_allocatedreset_max_memory_cachedmemory_allocatedmax_memory_allocatedmemory_reservedmax_memory_reservedmemory_cachedmax_memory_cachedmemory_snapshotmemory_summarylist_gpu_processesmem_get_infoc                   C   s   t   tj S N)r	   torchr   Z_cuda_cudaHostAllocator r$   r$   5/tmp/pip-unpacked-wheel-gikjz4vx/torch/cuda/memory.py_host_allocator   s    r&   c                   c   s&   t j  z
d V  W 5 t j  X d S r"   )r#   r   Z_cuda_lock_mutexZ_cuda_unlock_mutexr$   r$   r$   r%   _free_mutex   s    

r'   devicec              
   C   s   |dkrt j }t|}|dkr.t j|}t|t jjjrD|j}t|t	sVt
dt j| t j| |W  5 Q R  S Q R X dS )a  Performs a memory allocation using the CUDA memory allocator.

    Memory is allocated for a given device and a stream, this
    function is intended to be used for interoperability with other
    frameworks. Allocated memory is released through
    :func:`~torch.cuda.caching_allocator_delete`.

    Args:
        size (int): number of bytes to be allocated.
        device (torch.device or int, optional): selected device. If it is
            ``None`` the default CUDA device is used.
        stream (torch.cuda.Stream or int, optional): selected stream. If is ``None`` then
            the default stream for the selected device is used.

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    NzsInvalid type for stream argument, must be `torch.cuda.Stream` or `int` representing a pointer to a exisiting stream)r#   cudacurrent_devicer   Zcurrent_stream
isinstanceZstreamsZStreamZcuda_streamint	TypeErrorr)   r   Z$_cuda_cudaCachingAllocator_raw_alloc)sizer)   streamr$   r$   r%   r   #   s    

c                 C   s   t j|  dS )a  Deletes memory allocated using the CUDA memory allocator.

    Memory allocated with :func:`~torch.cuda.caching_allocator_alloc`.
    is freed here. The associated device and stream are tracked inside
    the allocator.

    Args:
        mem_ptr (int): memory address to be freed by the allocator.

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    N)r#   r   Z%_cuda_cudaCachingAllocator_raw_delete)Zmem_ptrr$   r$   r%   r   E   s    )r)   returnc                 C   sb   t   |dkrtj }t|}t| ts2td| dk sB| dkrPtd	| tj
| | dS )a  Set memory fraction for a process.
    The fraction is used to limit an caching allocator to allocated memory on a CUDA device.
    The allowed value equals the total visible memory multiplied fraction.
    If trying to allocate more than the allowed value in a process, will raise an out of
    memory error in allocator.

    Args:
        fraction(float): Range: 0~1. Allowed memory equals total_memory * fraction.
        device (torch.device or int, optional): selected device. If it is
            ``None`` the default CUDA device is used.
    .. note::
        In general, the total available free memory is less than the total capacity.
    Nz3Invalid type for fraction argument, must be `float`r   r   z.Invalid fraction value: {}. Allowed range: 0~1)r	   r#   r*   r+   r   r,   floatr.   
ValueErrorformatr   Z_cuda_setMemoryFraction)fractionr)   r$   r$   r%   r   V   s    

)r1   c                   C   s   t  rtj  dS )a  Releases all unoccupied cached memory currently held by the caching
    allocator so that those can be used in other GPU application and visible in
    `nvidia-smi`.

    .. note::
        :func:`~torch.cuda.empty_cache` doesn't increase the amount of GPU
        memory available for PyTorch. However, it may help reduce fragmentation
        of GPU memory in certain cases. See :ref:`cuda-memory-management` for
        more details about GPU memory management.
    N)r   r#   r   Z_cuda_emptyCacher$   r$   r$   r%   r   q   s    c                    s8   g  fdd t | d} d|   tS )aX  Returns a dictionary of CUDA memory allocator statistics for a
    given device.

    The return value of this function is a dictionary of statistics, each of
    which is a non-negative integer.

    Core statistics:

    - ``"allocated.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      number of allocation requests received by the memory allocator.
    - ``"allocated_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      amount of allocated memory.
    - ``"segment.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      number of reserved segments from ``cudaMalloc()``.
    - ``"reserved_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      amount of reserved memory.
    - ``"active.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      number of active memory blocks.
    - ``"active_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      amount of active memory.
    - ``"inactive_split.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      number of inactive, non-releasable memory blocks.
    - ``"inactive_split_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      amount of inactive, non-releasable memory.

    For these core statistics, values are broken down as follows.

    Pool type:

    - ``all``: combined statistics across all memory pools.
    - ``large_pool``: statistics for the large allocation pool
      (as of October 2019, for size >= 1MB allocations).
    - ``small_pool``: statistics for the small allocation pool
      (as of October 2019, for size < 1MB allocations).

    Metric type:

    - ``current``: current value of this metric.
    - ``peak``: maximum value of this metric.
    - ``allocated``: historical total increase in this metric.
    - ``freed``: historical total decrease in this metric.

    In addition to the core statistics, we also provide some simple event
    counters:

    - ``"num_alloc_retries"``: number of failed ``cudaMalloc`` calls that
      result in a cache flush and retry.
    - ``"num_ooms"``: number of out-of-memory errors thrown.

    The caching allocator can be configured via ENV to not split blocks larger than a
    defined size (see Memory Management section of the Cuda Semantics documentation).
    This helps avoid memory framentation but may have a performance
    penalty. Additional outputs to assist with tuning and evaluating impact:

    - ``"max_split_size"``: blocks above this size will not be split.
    - ``"oversize_allocations.{current,peak,allocated,freed}"``:
      number of over-size allocation requests received by the memory allocator.
    - ``"oversize_segments.{current,peak,allocated,freed}"``:
      number of over-size reserved segments from ``cudaMalloc()``.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistics for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    c                    sR   t |tr@t| dkr| d7 } | D ]\}} | | | q&n| |f d S )Nr   .)r,   dictlenitemsappend)prefixobjkv_recurse_add_to_resultresultr$   r%   r@      s    
z,memory_stats.<locals>._recurse_add_to_resultr(    )r   sortcollectionsOrderedDict)r)   statsr$   r?   r%   r      s    F	

c                 C   s"   t  s
i S t| dd} tj| S )zNReturns the result of :func:`~torch.cuda.memory_stats` as a nested dictionary.Toptional)r   r   r#   r   Z_cuda_memoryStatsr(   r$   r$   r%   r      s    c                 C   s   t | dd} tj| S )a~  Resets the "accumulated" (historical) stats tracked by the CUDA memory allocator.

    See :func:`~torch.cuda.memory_stats` for details. Accumulated stats correspond to
    the `"allocated"` and `"freed"` keys in each individual stat dict, as well as
    `"num_alloc_retries"` and `"num_ooms"`.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    TrG   )r   r#   r   Z!_cuda_resetAccumulatedMemoryStatsr(   r$   r$   r%   r      s    c                 C   s   t | dd} tj| S )a  Resets the "peak" stats tracked by the CUDA memory allocator.

    See :func:`~torch.cuda.memory_stats` for details. Peak stats correspond to the
    `"peak"` key in each individual stat dict.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    TrG   )r   r#   r   Z_cuda_resetPeakMemoryStatsr(   r$   r$   r%   r      s    c                 C   s   t dt t| dS )a  Resets the starting point in tracking maximum GPU memory occupied by
    tensors for a given device.

    See :func:`~torch.cuda.max_memory_allocated` for details.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. warning::
        This function now calls :func:`~torch.cuda.reset_peak_memory_stats`, which resets
        /all/ peak memory stats.

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    zytorch.cuda.reset_max_memory_allocated now calls torch.cuda.reset_peak_memory_stats, which resets /all/ peak memory stats.r(   warningswarnFutureWarningr   r(   r$   r$   r%   r     s
    c                 C   s   t dt t| dS )a  Resets the starting point in tracking maximum GPU memory managed by the
    caching allocator for a given device.

    See :func:`~torch.cuda.max_memory_cached` for details.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. warning::
        This function now calls :func:`~torch.cuda.reset_peak_memory_stats`, which resets
        /all/ peak memory stats.

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    zvtorch.cuda.reset_max_memory_cached now calls torch.cuda.reset_peak_memory_stats, which resets /all/ peak memory stats.r(   rI   r(   r$   r$   r%   r   !  s
    c                 C   s   t | dddS )a`  Returns the current GPU memory occupied by tensors in bytes for a given
    device.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. note::
        This is likely less than the amount shown in `nvidia-smi` since some
        unused memory can be held by the caching allocator and some context
        needs to be created on GPU. See :ref:`cuda-memory-management` for more
        details about GPU memory management.
    r(   zallocated_bytes.all.currentr   r   getr(   r$   r$   r%   r   ;  s    c                 C   s   t | dddS )a  Returns the maximum GPU memory occupied by tensors in bytes for a given
    device.

    By default, this returns the peak allocated memory since the beginning of
    this program. :func:`~torch.cuda.reset_peak_memory_stats` can be used to
    reset the starting point in tracking this metric. For example, these two
    functions can measure the peak allocated memory usage of each iteration in a
    training loop.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    r(   zallocated_bytes.all.peakr   rM   r(   r$   r$   r%   r   M  s    c                 C   s   t | dddS )a  Returns the current GPU memory managed by the caching allocator in bytes
    for a given device.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    r(   zreserved_bytes.all.currentr   rM   r(   r$   r$   r%   r   c  s    c                 C   s   t | dddS )a   Returns the maximum GPU memory managed by the caching allocator in bytes
    for a given device.

    By default, this returns the peak cached memory since the beginning of this
    program. :func:`~torch.cuda.reset_peak_memory_stats` can be used to reset
    the starting point in tracking this metric. For example, these two functions
    can measure the peak cached memory amount of each iteration in a training
    loop.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    r(   zreserved_bytes.all.peakr   rM   r(   r$   r$   r%   r   s  s    c                 C   s   t dt t| dS )z4Deprecated; see :func:`~torch.cuda.memory_reserved`.zGtorch.cuda.memory_cached has been renamed to torch.cuda.memory_reservedr(   )rJ   rK   rL   r   r(   r$   r$   r%   r     s
    c                 C   s   t dt t| dS )z8Deprecated; see :func:`~torch.cuda.max_memory_reserved`.zOtorch.cuda.max_memory_cached has been renamed to torch.cuda.max_memory_reservedr(   )rJ   rK   rL   r   r(   r$   r$   r%   r     s
    c                   C   s
   t j S )a)  Returns a snapshot of the CUDA memory allocator state across all devices.

    Interpreting the output of this function requires familiarity with the
    memory allocator internals.

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    )r#   r   _cuda_memorySnapshotr$   r$   r$   r%   r     s    
F)r)   abbreviatedr1   c                 C   st  t | dd} t| d}dd }dd }dd	|fd
d|fdd|fdd|fdd|fdd|fdd|fdd|fg}g }|d |d |d |d |d |d |D ]\}}}	|d d|fg}
|s|
d |
d d \}}}}|
D ]\}}|d! | d! }||d"  }||d#  }||d$  }||d%  }|d&krV|}|}|}|}|d'||	|||	|||	|||	|| qqd(d)|fd*d+|fg}|D ]~\}}}	|d |d! }||d"  }||d#  }||d$  }||d%  }|d'||	|||	|||	|||	|| q|d d,| d-}| D ]\}}|||d!d.< q>d/d0|jf | d1 S )2a  Returns a human-readable printout of the current memory allocator
    statistics for a given device.

    This can be useful to display periodically during training, or when
    handling out-of-memory exceptions.

    Args:
        device (torch.device or int, optional): selected device. Returns
            printout for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).
        abbreviated (bool, optional): whether to return an abbreviated summary
            (default: False).

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    TrG   r(   c                 S   sV   ddddddg}|d }|dd  D ]$}|d	k r4 qJ|}| d
 } |d
 }q$d | |S )NzB ZKBZMBZGBZTBZPBr   r   i   i   z{:7d} {}r4   )szZpref_szprefixesr;   
new_prefixr$   r$   r%   _format_size  s    
z$memory_summary.<locals>._format_sizec                 S   sP   dddg}|d }|dd  D ]$}|dk r. qD|}| d } |d }qd | |S )	N KMr   r   iq i  z	{:7d} {} rQ   )ZcntZpref_cntrS   r;   rT   r$   r$   r%   _format_count  s    

z%memory_summary.<locals>._format_countZallocated_byteszAllocated memoryZactive_byteszActive memoryZreserved_byteszGPU reserved memoryZinactive_split_byteszNon-releasable memoryZ
allocationZAllocationsactivezActive allocssegmentzGPU reserved segmentsZinactive_splitzNon-releasable allocszK===========================================================================z= {_:16} PyTorch CUDA memory summary, device ID {device:<17d} zK---------------------------------------------------------------------------zX  {_:9} CUDA OOMs: {num_ooms:<12d} | {_:6} cudaMalloc retries: {num_alloc_retries:<8d}  zK        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  all)Z
large_poolz      from large pool)Z
small_poolz      from small pool)NNNNr6   currentpeak	allocatedfreedNz {:<21} | {} | {} | {} | {} Zoversize_allocationszOversize allocationsZoversize_segmentszOversize GPU segmentsrB   )_r)   -|z|
|z|
)r   r   r:   r4   r9   replacejoin)r)   rP   rF   rU   rY   Zmetrics_to_displaylinesZ
metric_keyZmetric_name	formatterZ
submetricsZcurrent_prefvalZpeak_prefvalZallocated_prefvalZfreed_prefvalZsubmetric_keyZsubmetric_namer;   r]   r^   r_   r`   Zfmt_dictr=   r>   r$   r$   r%   r     s    












	



c                 C   s   zddl }W n tk
r"   Y dS X ddl m} z|  W n |k
rR   Y dS X t| dd} || }||}g }|d|   t|dkr|d	 |D ],}|j	d
 }|d|j
dd|dd qd|S )a  Returns a human-readable printout of the running processes
    and their GPU memory use for a given device.

    This can be useful to display periodically during training, or when
    handling out-of-memory exceptions.

    Args:
        device (torch.device or int, optional): selected device. Returns
            printout for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).
    r   Nz.pynvml module not found, please install pynvml)NVMLError_DriverNotLoadedz-cuda driver can't be loaded, is cuda enabled?TrG   zGPU:zno processes are runningi   zprocess z>10dz uses z>12.3fz MB GPU memory
)pynvmlModuleNotFoundErrorrh   ZnvmlInitr   ZnvmlDeviceGetHandleByIndexZ$nvmlDeviceGetComputeRunningProcessesr:   r8   ZusedGpuMemorypidre   )r)   rj   rh   handleZprocsrf   pZmemr$   r$   r%   r    "  s(    



 c                 C   s*   | dkrt j } t| } t j | S )a  Returns the global free and total GPU memory occupied for a given
    device using cudaMemGetInfo.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. note::
        See :ref:`cuda-memory-management` for more
        details about GPU memory management.
    N)r#   r*   r+   r   ZcudartZcudaMemGetInfor(   r$   r$   r%   r!   D  s    
Zenabledr)   c              	   C   s&   t j| t|  W 5 Q R X d S r"   )r#   r*   r)   r   Z_cuda_recordMemoryHistoryro   r$   r$   r%   _record_memory_historyV  s    rp   c              
   C   s,   t j|  t W  5 Q R  S Q R X d S r"   )r#   r*   r)   r   rO   r(   r$   r$   r%   	_snapshotZ  s    rq   
output.svgc              	   C   s6   |d krt  }t| d}|t| W 5 Q R X d S Nw)r   openwrite	_segmentsfilenameZsnapshotfr$   r$   r%   _save_segment_usage^  s    r{   c              	   C   s6   |d krt  }t| d}|t| W 5 Q R X d S rs   )r   ru   rv   _memoryrx   r$   r$   r%   _save_memory_usaged  s    r}   envc                 C   s   t j| S r"   )r#   r   Z1_cuda_cudaCachingAllocator_set_allocator_settingsr~   r$   r$   r%   _set_allocator_settingsj  s    r   )NN)N)N)N)N)N)N)N)N)N)N)N)N)N)NF)N)N)N)N)rr   N)rr   N)5rD   
contextlibrJ   typingr   r   r   r   r#   rB   r   r   r	   Z_memory_vizr
   rw   r   r|   Ztorch.typesr   r   __all__r&   contextmanagerr'   r-   r   r   r   r   strr   r   r   r   r   r   r   r   r   r   r   r   r   boolr   r    r!   rp   rq   r{   r}   r   r$   r$   r$   r%   <module>   sn               
""X"|""

