U
    &c;                     @   s^  d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
mZmZ ddlmZmZ e rnddlmZ e rddlmZ eeZd	ad
d ZG dd de
ZG dd de
ZG dd de
ZG dd de
ZG dd de
Ze	e Z d!eee!ee! f  eee!ee! f  e!ee	e"  e dddZ#d"ee  e$ee dddZ%e"e!ddd Z&dS )#z
Utilities for working with the local dataset cache.
This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp
Copyright by the AllenNLP authors.
    N)defaultdict)IterableList
NamedTupleOptionalUnion   )is_tf_availableis_torch_available)empty_cache)contextFc                   C   s   t S N)_is_memory_tracing_enabled r   r   @/tmp/pip-unpacked-wheel-ymerj3tt/transformers/benchmark_utils.pyis_memory_tracing_enabled   s    r   c                   @   s:   e Zd ZU dZeed< eed< eed< eed< eed< dS )Framea   `Frame` is a NamedTuple used to gather the current frame state.
            `Frame` has the following fields:
            - 'filename' (string): Name of the file currently executed
            - 'module' (string): Name of the module currently executed
            - 'line_number' (int): Number of the line currently executed
            - 'event' (string): Event that triggered the tracing (default will be "line")
            - 'line_text' (string): Text of the line in the python script
    filenamemoduleline_numbereventZ	line_textN)__name__
__module____qualname____doc__str__annotations__intr   r   r   r   r   "   s   
	r   c                   @   s*   e Zd ZU dZeed< eed< eed< dS )UsedMemoryStatea   `UsedMemoryState` are named tuples with the following fields:
        - 'frame': a `Frame` namedtuple (see below) storing information on the current tracing frame (current file, location in current file)
        - 'cpu_memory': CPU RSS memory state *before* executing the line
        - 'gpu_memory': GPU used memory *before* executing the line (sum for all GPUs or for only `gpus_to_trace` if provided)
    frameZ
cpu_memoryZ
gpu_memoryN)r   r   r   r   r   r   r   r   r   r   r   r   3   s   
r   c                   @   s(   e Zd ZU dZeed< edddZdS )Memoryz `Memory` NamedTuple have a single field `bytes` and
        you can get a human readable string of the number of bytes by calling `__repr__`
            - `byte` (integer): number of bytes,
    bytes)returnc                 C   s
   t | jS r   )bytes_to_human_readabler!   )selfr   r   r   __repr__G   s    zMemory.__repr__N)r   r   r   r   r   r   r   r%   r   r   r   r   r    ?   s   
r    c                   @   s2   e Zd ZU dZeed< eed< eed< eed< dS )MemoryStatea   `MemoryState` are namedtuples listing frame + CPU/GPU memory with the following fields:
        - `frame` (`Frame`): the current frame (see above)
        - `cpu`: CPU memory consumed at during the current frame as a `Memory` named tuple
        - `gpu`: GPU memory consumed at during the current frame as a `Memory` named tuple
        - `cpu_gpu`: CPU + GPU memory consumed at during the current frame as a `Memory` named tuple
    r   cpugpucpu_gpuN)r   r   r   r   r   r   r    r   r   r   r   r&   K   s
   
r&   c                   @   s2   e Zd ZU dZee ed< ee ed< eed< dS )MemorySummaryan   `MemorySummary` namedtuple otherwise with the fields:
        - `sequential`: a list of `MemoryState` namedtuple (see below) computed from the provided `memory_trace`
            by substracting the memory after executing each line from the memory before executing said line.
        - `cumulative`: a list of `MemoryState` namedtuple (see below) with cumulative increase in memory for each line
            obtained by summing repeted memory increase for a line if it's executed several times.
            The list is sorted from the frame with the largest memory consumption to the frame with the smallest (can be negative if memory is released)
        - `total`: total memory increase during the full tracing as a `Memory` named tuple (see below).
            Line with memory release (negative consumption) are ignored if `ignore_released_memory` is `True` (default).
    
sequential
cumulativetotalN)r   r   r   r   r   r&   r   r    r   r   r   r   r*   Y   s   

r*   line)modules_to_tracemodules_not_to_traceevents_to_tracegpus_to_tracer"   c              	      s  zddl }W n" tk
r.   td dY nX |t z<ddlm   |dkrlt	t
 n|   W nH tk
r   td dY n4 tjfk
r   td dY nX t pt g  f	dd	t d
aS )uC	   Setup line-by-line tracing to record rss mem (RAM) at each line of a module or sub-module.
        See `../../examples/benchmarks.py for a usage example.
        Current memory consumption is returned using psutil and in particular is the RSS memory
            "Resident Set Size” (the non-swapped physical memory the process is using).
            See https://psutil.readthedocs.io/en/latest/#psutil.Process.memory_info

        Args:
            - `modules_to_trace`: (None, string, list/tuple of string)
                if None, all events are recorded
                if string or list of strings: only events from the listed module/sub-module will be recorded (e.g. 'fairseq' or 'transformers.modeling_gpt2')
            - `modules_not_to_trace`: (None, string, list/tuple of string)
                if None, no module is avoided
                if string or list of strings: events from the listed module/sub-module will not be recorded (e.g. 'torch')
            - `events_to_trace`: string or list of string of events to be recorded (see official python doc for `sys.settrace` for the list of events)
                default to line
            - `gpus_to_trace`: (optional list, default None) list of GPUs to trace. Default to tracing all GPUs

        Return:
            - `memory_trace` is a list of `UsedMemoryState` for each event (default each line of the traced script).
                - `UsedMemoryState` are named tuples with the following fields:
                    - 'frame': a `Frame` namedtuple (see below) storing information on the current tracing frame (current file, location in current file)
                    - 'cpu_memory': CPU RSS memory state *before* executing the line
                    - 'gpu_memory': GPU used memory *before* executing the line (sum for all GPUs or for only `gpus_to_trace` if provided)

        `Frame` is a namedtuple used by `UsedMemoryState` to list the current frame state.
            `Frame` has the following fields:
            - 'filename' (string): Name of the file currently executed
            - 'module' (string): Name of the module currently executed
            - 'line_number' (int): Number of the line currently executed
            - 'event' (string): Event that triggered the tracing (default will be "line")
            - 'line_text' (string): Text of the line in the python script

    r   NzsPsutil not installed, we won't log CPU memory usage. Install psutil (pip install psutil) to use CPU memory tracing.)py3nvmlzvpy3nvml not installed, we won't log GPU memory usage. Install py3nvml (pip install py3nvml) to use GPU memory tracing.FzTError while initializing comunication with GPU. We won't perform GPU memory tracing.c                    s  t s	S dk	r@ttr&|kr&	S tttfr@|kr@	S | jd  t tsX	S dk	rttrv krv	S tttfrt fddD r	S dk	rttr kr	S tttfrt fddD r	S | j}| jd }|	ds|	dr|dd	 }t
|| }t| |||}d
}dk	rT }|j}d
}	rt rlt  t rt     D ]$}
|
}|}|	|j7 }	q  t|||	}| 	S )z Tracing method executed before running each line in a module or sub-module
            Record memory allocated in a list with debugging information
        Nr   c                 3   s   | ]}| kV  qd S r   r   .0mnamer   r   	<genexpr>   s     z8start_memory_tracing.<locals>.traceit.<locals>.<genexpr>c                 3   s   | ]}| kV  qd S r   r   r4   r7   r   r   r9      s     __file__z.pycz.pyor   )r   
isinstancer   listtuple	f_globalsallanyf_linenoendswith	linecachegetlinerstripr   Zmemory_infoZrssr
   torch_empty_cacher	   
tf_contextr   Z_clear_cachesnvmlInitZnvmlDeviceGetHandleByIndexZnvmlDeviceGetMemoryInfousednvmlShutdownr   append)r   r   argslinenor   r.   Ztraced_statecpu_memZmemgpu_memihandleZmeminfoZ	mem_state	Zdevicesr1   Zlog_gpumemory_tracer0   r/   processr3   traceitr7   r   rV      sZ    

$$




z%start_memory_tracing.<locals>.traceitT)psutilImportErrorloggerwarningProcessosgetpidr3   rI   r=   rangeZnvmlDeviceGetCountrK   OSErrorZ	NVMLErrorr
   r	   syssettracer   )r/   r0   r1   r2   rW   r   rS   r   start_memory_tracingl   s6    '


F
rb   T)rT   ignore_released_memoryr"   c              	   C   sD  da | dk	r@t| dkr@g }tdd }t| dd | dd D ]\\}}}\}}}	|| }
|	| }|
| }|t|t|
t|t|d || d  |
7  < || d  |7  < || d	  |7  < qFtt|	 d
d dd}tdd |D }|rt
dd |D }nt
dd |D }t|}t|||dS dS )a	   Stop memory tracing cleanly and return a summary of the memory trace if a trace is given.

        Args:
            - `memory_trace` (optional output of start_memory_tracing, default: None): memory trace to convert in summary
            - `ignore_released_memory` (boolean, default: None): if True we only sum memory increase to compute total memory

        Return:
            - None if `memory_trace` is None
            - `MemorySummary` namedtuple otherwise with the fields:
                - `sequential`: a list of `MemoryState` namedtuple (see below) computed from the provided `memory_trace`
                    by substracting the memory after executing each line from the memory before executing said line.
                - `cumulative`: a list of `MemoryState` namedtuple (see below) with cumulative increase in memory for each line
                    obtained by summing repeted memory increase for a line if it's executed several times.
                    The list is sorted from the frame with the largest memory consumption to the frame with the smallest (can be negative if memory is released)
                - `total`: total memory increase during the full tracing as a `Memory` named tuple (see below).
                    Line with memory release (negative consumption) are ignored if `ignore_released_memory` is `True` (default).

        `Memory` named tuple have fields
            - `byte` (integer): number of bytes,
            - `string` (string): same as human readable string (ex: "3.5MB")

        `Frame` are namedtuple used to list the current frame state and have the following fields:
            - 'filename' (string): Name of the file currently executed
            - 'module' (string): Name of the module currently executed
            - 'line_number' (int): Number of the line currently executed
            - 'event' (string): Event that triggered the tracing (default will be "line")
            - 'line_text' (string): Text of the line in the python script

        `MemoryState` are namedtuples listing frame + CPU/GPU memory with the following fields:
            - `frame` (`Frame`): the current frame (see above)
            - `cpu`: CPU memory consumed at during the current frame as a `Memory` named tuple
            - `gpu`: GPU memory consumed at during the current frame as a `Memory` named tuple
            - `cpu_gpu`: CPU + GPU memory consumed at during the current frame as a `Memory` named tuple
    FNr   c                   S   s
   dddgS )Nr   r   r   r   r   r   <lambda>*      z%stop_memory_tracing.<locals>.<lambda>r;   r   r'   r(   r)   r      c                 S   s   | d d S )Nr   rg   r   )xr   r   r   rd   ;  re   T)keyreversec                 s   s4   | ],\}\}}}t |t|t|t|d V  qdS )rf   N)r&   r    )r5   r   cpu_mem_incgpu_mem_inccpu_gpu_mem_incr   r   r   r9   =  s      z&stop_memory_tracing.<locals>.<genexpr>c                 s   s   | ]}t d |jjV  qdS )r   N)maxr)   r!   r5   Z
step_tracer   r   r   r9   E  s     c                 s   s   | ]}|j jV  qd S r   )r)   r!   ro   r   r   r   r9   G  s     )r+   r,   r-   )r   lenr   ziprL   r&   r    sortedr=   itemssumr*   )rT   rc   Zmemory_diff_traceZcumulative_memory_dictr   rO   rP   Z
next_frameZnext_cpu_memZnext_gpu_memrk   rl   rm   Zcumulative_memoryZtotal_memoryr   r   r   stop_memory_tracing   sH    &
 
   
  ru   )memory_amountr"   c                 C   s<   dD ],}| dkr(| dk r(d | |  S | d } qd | S )zX Utility to convert a number of bytes (int) in a human readable string (with units)
    )BZKBZMBZGBg      g      @z{:.3f}{}z{:.3f}TB)format)rv   unitr   r   r   r#   N  s
    
r#   )NNr.   N)NT)'r   rD   loggingr\   r`   collectionsr   typingr   r   r   r   r   Z
file_utilsr	   r
   Z
torch.cudar   rG   Ztensorflow.python.eagerr   rH   	getLoggerr   rY   r   r   r   r   r    r&   r*   ZMemoryTracer   r   rb   boolru   r#   r   r   r   r   <module>   sP   
    
     N