U
    <c=9                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlZddlZddlmZ ddlmZmZ ddlmZmZmZ d\ZZdZdZd	Zd
\ZZdZedediZ dZ!edediZ"edediedediededid	Z#ej$e%  Z&dddd	Z'ej(ej)ej*dZ+dd Z,dZ-dd Z.dd Z/dd Z0dd Z1d d! Z2d"d# Z3d$d% Z4e5d&d'd(Z6d4d*d+Z7d,d- Z8d.d/ Z9d0d1 Z:e;d2kre, Z<e<j=d1kre:e< e<j=d3krze/e< W n e>k
r   Y nX dS )5a  End-to-end example to test a PR for regressions:

$ python -m examples.end_to_end --pr 39850
$ python -m examples.end_to_end --pr 39967
$ python -m examples.end_to_end --pr 39744

NOTE:
  This example assumes that you have and environment prefixed with
  `ref_`, and another prefixed with `pr_` for the PR
  in question. (e.g. `ref_39850` and `pr_39850`).

  A helper script (examples/prepare_e2e.sh) is provided to build
  the required environments with the correct configuration.
    N)unary)TimerMeasurement)DictTupleList)main
subprocesszpr_{pr}zref_{pr})398503996739744)cpugpu             @   FT)int8float32float64)r   r   )r   r   r   c                  C   s   t  } | jdttd td | jdtd d | jddd | jd	tttftd
 | jdttt	fd d
 | jdtd d | jdtd d | jdtd d | 
 }|jd krtj |_|S )Nz--prr   )typedefaultchoicesz
--num_gpus)r   r   z--test_variance
store_true)actionz--DETAIL_context)r   r   r   z--DETAIL_devicez--DETAIL_envz--DETAIL_result_filez--DETAIL_seed)argparseArgumentParseradd_argumentstr_PR_LISTint_MAIN_SUBPROCESS_CPU_GPU
parse_argsnum_gpustorchcudaZdevice_count)parserargs r,   M/tmp/pip-unpacked-wheel-gikjz4vx/torch/utils/benchmark/examples/end_to_end.pyr&   R   s    
r&   zsource activate {source_env} && python -m examples.end_to_end --pr {pr} --DETAIL_context subprocess --DETAIL_device {device} --DETAIL_env {env} --DETAIL_result_file {result_file} --DETAIL_seed {seed}c              
      s   | dkr fdddD \}}}}t j d }|jd|d}|||g| }ttt d|jdt |d d	}	d
| d|	 ddfS | dkrdS | dkrt j d }|jd d d}
d|
 ddfS t	dd S )Nr
   c                    s   g | ]} | qS r,   r,   .0iparamsr,   r-   
<listcomp>r   s     z,construct_stmt_and_label.<locals>.<listcomp>)k0k1k2dimZrandom_valuer   )lowhigh   r   ztorch.topk(x, dim=z, k=)Ztopkr   )ztorch.std(x)Zstdr   r7   ztorch.sort(x, dim=sortz
Unknown PR)
nprandomZRandomStaterandintmaxr!   flooruniformlog2
ValueError)prr2   r4   r5   r6   r7   stateZtopk_dimZdim_sizekZsort_dimr,   r1   r-   construct_stmt_and_labelp   s    (rH   c                 C   s   | j }| jtk}t| jd}t| j D ]}t| }tj	|||d
t}t|D ]~\}\}}	}
||
d< t| j|
\}}t|||d| d| d| d| | jd}|jtd	}|	|
d
|_t| t|| qRq(W 5 Q R X d S )Nab)seeddtyper)   	dtype_str[z, seed=z] (z
), stmt = )stmtglobalslabeldescriptionenv)Zmin_run_time)tensor_parametersr2   )ZDETAIL_seedZDETAIL_devicer%   openZDETAIL_result_file_DTYPES_TO_TESTrE   _DTYPE_STR_TO_DTYPEr   ZUnaryOpFuzzerZtake_RUNS_PER_LOOP	enumeraterH   r   Z
DETAIL_envZblocked_autorange_MIN_RUN_SECmetadataprintpickledump)r+   rJ   r)   frL   rK   iteratorr0   ZtensorsrS   r2   rN   rP   Ztimermeasurementr,   r,   r-   subprocess_main   s8    
  ra   c                    s  i i i   }} j tjdtjdft j  t rdt< t jD ]}t	| qNt
j j|t<  fddttt D tt  }|t t||t< t j  t r dt< tt
 d }t
j||t<  fddttt D tt  }|t t||t< g }| D ]F}|D ]:}|| dd  D }	tdd	|	 d
d q4q,t  | D ]}
|
  qt| j d S )NrE   r   c                    s   g | ]}|d  j fqS )Ttest_variancer/   rJ   r+   envsfinished_countsrE   r,   r-   r3      s   z_main.<locals>.<listcomp>r   c                    s   g | ]}|d  j fqS )Frc   re   rf   r,   r-   r3      s   c              	   S   s0   g | ](\}}| d | dt | t|   qS )z: z / )
_NUM_LOOPS_REPLICATESr/   rG   vr,   r,   r-   r3      s   z
           )end)rE   _REF_ENV_TEMPLATEformat_PR_ENV_TEMPLATE_DEVICES_TO_TESTr%   ranger'   _AVAILABLE_GPUSputmultiprocessingdummyZPoolri   rj   imapmap_fnr$   r!   	cpu_countvaluesappenditemsr[   joincloseprocess_resultsrd   )r+   poolsZ	map_itersr0   ZtrialsZcpu_workersresultsZmap_iterrprogresspoolr,   rf   r-   _main   sH    


 r   c                    s   | sd S dd | D d   D ]*  dkr0q"t fddD s"tq"dd | D }t|d	krp| nd	d }fd
d| D }ttj| |d< |d< d d |d< tf |S )Nc                 S   s   g | ]}|  qS r,   )__getstate__r/   mr,   r,   r-   r3      s     zmerge.<locals>.<listcomp>r   )number_per_runtimesrZ   c                 3   s"   | ]}|  d    kV  qdS )r   Nr,   )r/   s)rG   statesr,   r-   	<genexpr>   s     zmerge.<locals>.<genexpr>c                 S   s   h | ]
}|j qS r,   r   r   r,   r,   r-   	<setcomp>   s     zmerge.<locals>.<setcomp>r   c                    s"   g | ]  fd d j D qS )c                    s   g | ]}| j   qS r,   r   )r/   t)r   nr,   r-   r3      s     z$merge.<locals>.<listcomp>.<listcomp>)r   )r/   )r   )r   r-   r3      s     r   r   rZ   )	keysallAssertionErrorlenpoplistitchainr   )ZmeasurementsZnumbers_per_runZmerged_stater   r,   )rG   r   r   r-   merge   s    r   c                    s6  i }| D ]`\\}}}|D ]N}|j |j|j||f}||g g g |jdrPdnd}|| | | qqdd | D }t  | D ],\}\}	}
t	dd |	|
fD r 
| q fdd| D }tt  d	t| d
 dd | D }dd | D }|r t|d| |r2t|d| d S )Nrefr   r   c                 S   s&   i | ]\}\}}|t |t |gqS r,   )r   )r/   keyZ
r_ref_listZ	r_pr_listr,   r,   r-   
<dictcomp>   s   
 z#process_results.<locals>.<dictcomp>c                 s   s   | ]}|d kp|j V  qd S N)Zhas_warnings)r/   r   r,   r,   r-   r      s     z"process_results.<locals>.<genexpr>c                    s   i | ]\}}| kr||qS r,   r,   rk   Zflagged_for_removalr,   r-   r      s    z samples were culled, z remainc                 S   s    g | ]\}}|d  r||fqS r   r,   rk   r,   r,   r-   r3      s      z#process_results.<locals>.<listcomp>c                 S   s    g | ]\}}|d  s||fqS r   r,   rk   r,   r,   r-   r3      s      ZCPUZGPU)rP   rQ   Znum_threads
setdefaultrR   
startswithr}   r~   setanyaddr[   r   construct_table)r   rd   Zpaired_resultsrJ   use_gpuZresult_batchr   r   indexr_refr_prZgpu_resultsZcpu_resultsr,   r   r-   r      s0    
r   c                 C   s`  d| d|rdnd d dd}td d	| d	d d	 td
d | D dd d} t| }tdd | D }tdd | D }|| | }dddg}t||||gD ]4\}}	t|dd|	dd|	t|  d dd qdd | dd D dd | t|d  d! t|d  d!  D B d"d | d#d$ D B }
| d% d& | t|d  d'  d& h}d(dd)d*dd+d,dd-d.dd-d/dd0d1d2 }tjd3d4\}}t|d5}|	| d6| d	 td	| d7 | D ]X\}\}}}t
||j|j |}|	| d	 ||
krt| ||krtd8 qtd9 W 5 Q R X ttd: td;|  d S )<Nz==  z (Variance Test)rn     (   =z(========================================
c                 s   s0   | ](\}\}}|||f|j |j  d  fV  qdS r   N)median)r/   r   r   r   r,   r,   r-   r     s   
z"construct_table.<locals>.<genexpr>c                 S   s   | d S )Nr:   r,   )r0   r,   r,   r-   <lambda>      z!construct_table.<locals>.<lambda>)r   c                 S   s   g | ]}|d  dkr|qS )r:   g?r,   r.   r,   r,   r-   r3     s      z#construct_table.<locals>.<listcomp>c                 S   s   g | ]}|d  dk r|qS )r:   gr,   r.   r,   r,   r-   r3     s      zImproved  (>5%):zRegressed (>5%):z
Within 5%:z<17>6z  (d   z>3.0fz%)c                 S   s   h | ]}|d  qS r   r,   r.   r,   r,   r-   r     s     z"construct_table.<locals>.<setcomp>      c                 S   s   h | ]}|d  qS r   r,   r.   r,   r,   r-   r     s     r:   r   c                 S   s   h | ]}|d  qS r   r,   r.   r,   r,   r-   r     s     ii   r      u-   Relative Δ     Absolute Δ      |      numel>8rK   z>14shape>10stepsZlayoutz>7ztask specific
z~==============================================================================================================================z.logsuffixwtz

z1
[First twenty omitted (these tend to be noisy) ]z...z/[Last twenty omitted (these tend to be noisy) ]a  
        steps:
            Indicates that `x` is sliced from a larger Tensor. For instance, if
            shape is [12, 4] and steps are [2, 1], then a larger Tensor of size
            [24, 4] was created, and then x = base_tensor[::2, ::1]. Omitted if
            all elements are ones.

        layout:
            Indicates that `x` is not contiguous due to permutation. Invoking
            `x.permute(layout)` (e.g. x.permute((2, 0, 1)) if layout = [2, 0, 1])
            would produce a Tensor with physical memory layout matching logical
            memory layout. (Though still not contiguous if `steps` contains
            non-one elements.)
        z
Complete results in: )ljustr[   sortedr   zipr!   tempfilemkstemprT   writerow_strr   textwrapdedent)r   Z
device_strrd   r   Zn_regressedZ
n_improvedZn_unchangedZlegendsZlegendcountZkeys_to_printZellipsis_afterZcolumn_labels_Zresult_log_filer^   r   r   r   rel_diffrowr,   r,   r-   r   	  sJ    "
.,$0

r   c                    sr  |j d  |j d } d }|d d } fddt|D }|d d }ttd	d
 t|t|D rldn|}d}|jdr|jd d ddd\}	}
|	 d|
d}n0|jdrn"|jdr|jd d dd}| d ddt	|d ddddd|dd d ddt fd dt|D d!d"td#d
 |D sVt|nddd"|ddd$| S )%Nr2   rS   r7   xZnumelc                    s   g | ]} d |  qS )Zx_step_r,   r.   r1   r,   r-   r3   J  s     zrow_str.<locals>.<listcomp>orderc                 s   s   | ]\}}||kV  qd S r   r,   )r/   r0   jr,   r,   r-   r   L  s     zrow_str.<locals>.<genexpr>rn   z
torch.topkztorch.topk(x, z, z<8z	torch.stdz
torch.sortztorch.sort(x, r   z>5.0fz%     g    .Az>11.1fz usr   |z>12z   rL   r   c                    s   g | ]} d |  qS )rG   r,   r.   r1   r,   r-   r3   X  s     z>17r   c                 s   s   | ]}|d kV  qdS r   r,   r.   r,   r,   r-   r   X  s     r   )
rZ   rt   r   r   r   rN   r   replacesplitabs)r   Zdiff_secondsr`   rS   r7   Zx_numelr   r   Ztask_specificZdim_strZk_strr,   r1   r-   r   D  s"    

( r   )result_filec              	   C   sN   g }t | d6}z|t| W q tk
r<   Y q@Y qX qW 5 Q R X |S )Nrb)rT   r}   r\   loadEOFError)r   outputr^   r,   r,   r-   read_resultsc  s    r   rn   c                 C   s&   t j| t|tdddt jddS )NPATHrn   )ZCUDA_VISIBLE_DEVICESr   T)rR   stdoutshell)r	   runr   osgetenvPIPEcmdZcuda_visible_devicesr,   r,   r-   r   n  s    
r   c                 C   s6   | D ],}t d| }|jdkrtd| dqdS )zEnsure that subprocesszsource activate r   zFailed to source environment ``N)r   
returncoderD   )rg   rR   resultr,   r,   r-   test_sourcez  s    
r   c              	   C   s   | \}}}}}}|rt  nd }zztjdd\}	}|D ]B}
t	j
|rL|d n|
|
||rZtnt||d}t||rr|ndd q8||rtnt  d7  < ||ft|fW W S  tk
r   Y nX W 5 |d k	rt | tj|rt| X d S )Nz.pklr   r   )Z
source_envrR   rE   Zdevicer   rJ   rn   r   r   )ru   getrv   r   pathexistsremover   r   _SUBPROCESS_CMD_TEMPLATErq   r%   r$   r   r   KeyboardInterrupt)r+   rJ   rg   rE   r   rh   rd   r   r   r   rR   r   r,   r,   r-   rz     s,      
 

rz   c                 C   s,   t tj| jdtj| jdg t|  d S )Nrb   )r   rp   rq   rE   rr   r   )r+   r,   r,   r-   r     s
    r   __main__r	   )rn   )?__doc__r   	itertoolsr   rw   Zmultiprocessing.dummyr   r\   queuer	   r   r   Znumpyr=   r(   Z torch.utils.benchmark.op_fuzzersr   Ztorch.utils.benchmarkr   r   typingr   r   r   r"   r#   rr   rp   r    r$   r%   rY   rj   rW   ri   rs   Queuer!   ru   rU   r   r   r   rV   r&   r   rH   ra   r   r   r   r   r   r   r   r   r   rz   r   __name__r+   ZDETAIL_contextr   r,   r,   r,   r-   <module>   s       


.#;

