U
    Kc%                     @   s   d dl Z d dlZd dlZd dlZd dlmZmZ dd aG dd dZddd	Z	d
d Z
dd Zdd Zdd Zdd Zg aeedddZdddZdS )    N)profileProfilerActivityc                   C   s   d S N r   r   r   B/tmp/pip-unpacked-wheel-gikjz4vx/functorch/_src/benchmark_utils.pysynchronize	   s    r   c                   @   s   e Zd Zdd Zdd ZdS )NullContextc                 C   s   d S r   r   )selfr   r   r   	__enter__   s    zNullContext.__enter__c                 C   s   d S r   r   )r	   exc_typeexc_valexc_tbr   r   r   __exit__   s    zNullContext.__exit__N)__name__
__module____qualname__r
   r   r   r   r   r   r      s   r      c	              
   C   s*  |dkrdg}|dgkr*t j r*t jja|dkr6i }|dkrBi }|j t d tdD ]}	| |f| t  qZt d t }
t|D ]}	| |f| t  qt }W 5 Q R X ||
 }tf d|i|F}|6 t  t d t|D ]}	| |f| t  qW 5 Q R X W 5 Q R X |	| |S )a0  
    Output the chrome trace of running f(input, **kwargs_for_f) with [optimize_ctx]
    [num_runs] times to [trace_filename].

    [activities] are the activities that the profiler will record, e.g. ProfilerActivity.CUDA.
    Return total runtime without the profiler

    Outputs to trace_filename
    Ncudacpui9     
activities)
torchr   Zis_availabler   Zmanual_seedrangetimeperf_counterr   Zexport_chrome_trace)finputZtrace_filenameoptimize_ctxr   num_runsdevicesZkwargs_for_fZkwargs_for_profiler_t0t1ZtimingZprofr   r   r   dump_chrome_trace   s:    



r#   c                 C   s   t | }t|}|d }|S )NZtraceEvents)openjsonload)filenamer   dataeventsr   r   r   get_chrome_trace_eventsF   s    
r*   c                 C   s(   d| ko&| d t ko&d| ko&| d dkS )NpidphX)gpu_pidseventr   r   r   is_gpu_compute_eventM   s    r1   c                 C   s2   g }| D ]}t |sq|| qt|dd dS )Nc                 S   s   | d S )Ntsr   )xr   r   r   <lambda>X       z'get_sorted_gpu_events.<locals>.<lambda>)key)r1   appendsorted)r)   sorted_gpu_eventsr0   r   r   r   get_sorted_gpu_eventsR   s    r:   c                 C   s   t | dkrdS | d }|d |d  }|d }| dd  D ]>}t|d |}|d |d  }|t|| d }t||}q<|S )Nr   r2   Zdurr   )lenmax)r9   r0   Zcurrent_end_timeZtotal_duration
start_timeZend_timer   r   r   get_duration[   s    r>   c                 C   s6   dd }t | }g }|D ]}||s&q|| q|S )Nc                 S   s8   d| ko6d| d kp6d| d kp6d| d kp6d| d kS )NnameZgemmconvZcutlassZwgradr   r/   r   r   r   is_mm_conv_eventj   s
     

z7get_sorted_gpu_mm_conv_events.<locals>.is_mm_conv_event)r:   r7   )r)   rA   Z
gpu_eventsZsorted_eventsr0   r   r   r   get_sorted_gpu_mm_conv_eventsi   s    rB   )r'   total_lengthc                 C   s   t | }g a|D ]8}d|krq|d dkrd|d d krt|d  q|d }t|}t|| }t|}t|| }||fS )a  
    Process the chrome traces outputs by the pytorch profiler to compute GPU Utilization
    and percent of times spent on matmal and convolution

    Args:
        filename(str): Name of chrome traces file produced by pytorch profiler

        total_length(float): total length of the process without profiler in second

    Return:
        tuple: (GPU Utilization, percent of time spent on matmal and convolution)
    r?   Zprocess_labelsZGPUargslabelsr+   g    .A)r*   r.   r7   r:   r>   rB   )r'   rC   r)   r0   r9   utilizationZsorted_gpu_mm_conv_eventsmm_conv_utilizationr   r   r   compute_utilizationy   s    rH   tmp_chrome_tracec              	   C   sv   t j|}|s&t | td|  |dkr4t }t j||d }t| |||tj	g|dd}t
||\}	}
|	|
fS )a  
    Benchmark the GPU Utilization and percent of time spent on matmal and convolution operations of
    running f(input, **kwargs_for_f) with [optimize_ctx] [num_runs] times.
    It will produce a chrome trace file in trace_folder/trace_file_name.json

    Example:

    ```
    def f(a):
        return a.sum()
    a = torch.rand(2**20, device="cuda")
    utilization, mm_conv_utilization = benchmark_utilization(f, a, "tmp", trace_file_name = "tmp_chrome_trace")
    ```

    Args:
        f: function to benchmark

        input: input to :attr:`f`

        trace_folder: name of the folder to store the chrome trace

        optimize_ctx: the context in which f will run

        trace_file_name: name of the dumped chrome trace file, default to "tmp_chrome_trace"

        num_runs: number of times to run f, excluding the warm-up runs, default to 1.

    Return:
        tuple: (GPU Utilization, percent of time spent on matmal and convolution)

    zcreate folder Nz.jsonr   )r   r   )ospathexistsmakedirsprintr   joinr#   r   ZCUDArH   )r   r   Ztrace_folderr   Ztrace_file_namer   ZisExistZchrome_trace_file_namerC   rF   rG   r   r   r   benchmark_utilization   s     

  rP   )r   NNN)NrI   r   )r   rJ   r%   r   Ztorch.profilerr   r   r   r   r#   r*   r1   r:   r>   rB   r.   strfloatrH   rP   r   r   r   r   <module>   s&         
1	"