
    BvhaO                     @   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
m
Z
 d dlmZ d dlZd dlZd dlZd dlmZmZ e	 G d d             Ze	 G d d	             Z	 	 	 dd
Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d Z!d Z"e#dk(  r
dZ$ e"        yy)    N)	dataclass)datetime)Path)generate_test_dataget_bert_inputsc                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   eed<   eed<   eed<   y)TestSetting
batch_sizesequence_length
test_cases
test_timesuse_gpuuse_io_bindingproviderintra_op_num_threadsseedverboselog_severityaverage_sequence_lengthrandom_sequence_lengthN)__name__
__module____qualname__int__annotations__boolstr     Q/RAG/venv/lib/python3.12/site-packages/onnxruntime/transformers/bert_perf_test.pyr	   r	   !   sI    OOOMM
IM    r   r	   c                   j    e Zd ZU eed<   eed<   eed<   eed<   eed<   edz  ed<   edz  ed<   eed	<   y)
ModelSetting
model_pathinput_ids_namesegment_ids_nameinput_mask_name	opt_levelNinput_tuning_resultsoutput_tuning_results	mask_type)r   r   r   r   r   r   r   r   r    r"   r"   2   s7    ON*$:%Nr   r"   c                    dd l }|j                  |       |rd|j                         vrt        d       |r7|dk(  rddg}n0|dk(  rddg}n&|d	k(  rg d
}n|dk(  rddg}n|dk(  rg d}nddg}ndg}|j	                         }	||	_        |j                  j                  |	_        ||j                  j                  |	_        n|dk(  r|j                  j                  |	_        nj|dk(  r|j                  j                  |	_        nI|dk(  r|j                  j                  |	_        n(|dk(  r|j                  j                  |	_        n||	_        |||	_        |j!                  | |	|      }
|r|dk(  rd|
j#                         v sJ |dk(  rd|
j#                         v sJ |d	k(  r(d|
j#                         v sJ d|
j#                         v spJ |dk(  rd|
j#                         v sWJ |dk(  r(d|
j#                         v sJ d|
j#                         v s*J d|
j#                         v sJ d|
j#                         v sJ |:t%        |      5 }|
j'                  t)        j*                  |             d d d        |
S |
S # 1 sw Y   |
S xY w)Nr   CUDAExecutionProviderzWarning: Please install onnxruntime-gpu package instead of onnxruntime, and use a machine with GPU for testing gpu performance.dmlDmlExecutionProviderCPUExecutionProviderrocmROCMExecutionProvidermigraphx)MIGraphXExecutionProviderr1   r/   cudatensorrt)TensorrtExecutionProviderr,   r/         c   )	providersr3   r6   )onnxruntimeset_default_logger_severityget_available_providersprintSessionOptionslog_severity_levelExecutionModeORT_SEQUENTIALexecution_modeGraphOptimizationLevelORT_ENABLE_ALLgraph_optimization_levelORT_DISABLE_ALLORT_ENABLE_BASICORT_ENABLE_EXTENDEDr   InferenceSessionget_providersopenset_tuning_resultsjsonload)r#   r   r   r   rF   r   tuning_results_pathr;   execution_providerssess_optionssessionfs               r    create_sessionrU   >   s    ++L9+;3V3V3XX N	
 u#9;Q"R#:<R"S##
 #:<R"S## $;<R"S56--/L&2L#"-";";"J"JL'0;0R0R0a0a-	!Q	&0;0R0R0b0b-	!Q	&0;0R0R0c0c-	!Q	&0;0R0R0f0f-	!R	'0;0R0R0a0a-0H-',@)**:|Ob*cGu)W-B-B-DDDD*g.C.C.EEEE#.'2G2G2IIII*g.C.C.EEEE*g.C.C.EEEE#.'2G2G2IIII*g.C.C.EEEE*g.C.C.EEEE%)>)>)@@@@&%&!&&tyy|4 ' N7N ' Ns   %I66J c                     t         j                  t        j                  t         j                  t        j                  t         j                  t        j                  t         j
                  t        j                  i}||    S N)torchfloat32npfloat16int32int64longlong)
torch_typetype_maps     r    
numpy_typera      sJ    rzzrzzRXXR[[	H Jr   c                 6   | j                         D ci c]*  \  }}|t        j                  |      j                  |      , }}}|j                         D ci c]*  \  }}|t        j                  |      j                  |      , }}}||fS c c}}w c c}}w rW   )itemsrX   
from_numpyto)inputsoutputsdevicenamearrayinput_tensorsoutput_tensorss          r    create_input_output_tensorsrm      s    QWQ]Q]Q_`Q_+$T5++E255f==Q_M`RYR_R_RabRa;4dE,,U366v>>RaNb.(( abs   /B/Bc           
         | j                         }|j                         D ]Z  \  }}|j                  ||j                  j                  dt        |j                        |j                  |j                                \ |j                         D ]Z  \  }}|j                  ||j                  j                  dt        |j                        |j                  |j                                \ |S Nr   )

io_bindingrc   
bind_inputrh   typera   dtypeshapedata_ptrbind_output)sessrk   rl   rp   ri   tensors         r    create_io_bindingry      s    "J%++-fMMv||$LLOO	
 . ',,.fMMv||$LLOO	
 / r   c                    g }g }|j                   rdnd}t        |      D ]  \  }}| j                  ||      }	|j                  |	       i }
t	        t        |            D ]  }|	|   |
||   <    t        ||
|      \  }}t        | ||      }| j                  |       t        j                         }| j                  |       t        j                         |z
  }|j                  |        ||fS )Nr4   cpu)r   	enumeraterunappendrangelenrm   ry   run_with_iobindingtimeitdefault_timer)rS   
all_inputsoutput_namestest_settingresultslatency_listrh   _test_case_idrf   resultrg   irk   rl   rp   
start_timelatencys                    r    %onnxruntime_inference_with_io_bindingr      s    GL#++VF!*:!6v\62vs<()A'-ayGLO$ * )DFGU[(\%~&w~N
 	"":.))+
"":.&&(:5G$! "7$ L  r   c                 \   t        |      dkD  r%| j                  |t        j                  |             g }g }t	        |      D ]d  \  }}t        j                         }| j                  ||      }t        j                         |z
  }	|j                  |       |j                  |	       f ||fS ro   )r   r}   randomchoicer|   r   r   r~   )
rS   r   r   r   r   r   rf   r   r   r   s
             r    onnxruntime_inferencer      s    
:L&--
";<GL!*:!6v))+
\62&&(:5vG$ "7 L  r   c                    |j                         }dt        j                  j                  |        d}|d|j                   d|j
                   dj                  dd      z  }|d|j                   d|j                   dz  }|d	|j                   d
|j                   dz  }|d|j                   d|j                   dz  }|d|j                   dz  }|d|j                   z  }|S )Nzmodel=,zgraph_optimization_level=z,intra_op_num_threads=zGraphOptimizationLevel.ORT_ zbatch_size=z,sequence_length=ztest_cases=z,test_times=zuse_gpu=z,use_io_binding=zaverage_sequence_length=zrandom_sequence_length=)get_session_optionsospathbasenamerF   r   replacer
   r   r   r   r   r   r   r   )r#   rS   r   rR   options        r    	to_stringr      s9   ..0Lbgg&&z2315F
),*O*O)PPfgs  hI  hI  gJ  JK  L  T  T%r F L3344ElFbFbEccdeeF
L334LAXAX@YYZ[[F
--..>|?Z?Z>[[\]]F
()M)M(NaPPF
'(K(K'LMMFMr   c           	      $   t        | j                  |j                  |j                  || j                  |j
                  | j                        }|j                         D cg c]  }|j                   }}t        | j                  ||      }||v rt        d|       y t        d|       g }	|j                  r=t        |j                        D ]$  }
t        ||||      \  }}|	j                  |       & n;t        |j                        D ]#  }
t!        |||      \  }}|	j                  |       % t#        j$                  |	      dz  }t'        j(                  |      }t#        j*                  |d      }t#        j*                  |d      }t#        j*                  |d      }t#        j*                  |d      }t#        j*                  |d	      }|j,                  d
|z  z  }|||||||f||<   t        dj/                  t/        |d      t/        |d                   | j0                  rt2        j4                  j7                  | j0                        }t2        j4                  j9                  |      rL|}|j;                  dd      d    dt=        j>                         jA                          d}t        d|d|d       |jC                         }tE        |d      5 }tG        jH                  ||       d d d        t        d|       y y c c}w # 1 sw Y   xY w)N)r   rP   zskip duplicated test:zRunning test:  2   K   Z   _   r9   g     @@z,Average latency = {} ms, Throughput = {} QPS.2fz.jsonr7   r   .zWARNING:zexists, will write tozinstead.wzTuning results is saved to)%rU   r#   r   r   r'   r   r(   get_outputsri   r   r>   r   r   r   r   extendr   rZ   rj   
statisticsmean
percentiler
   formatr)   r   r   abspathexistsrsplitr   now	timestampget_tuning_resultsrL   rN   dump)model_settingr   perf_resultsr   r   rS   outputr   keyall_latency_list_ir   r   
latency_msaverage_latency
latency_50
latency_75
latency_90
latency_95
latency_99
throughputoutput_pathold_output_pathtrsrT   s                            r    run_one_testr      s     !..)>>G /6.A.A.CD.CFFKK.CLD
M,,g|
DC
l%s+	/3""//0B$I\<%!G\ ##L1	 1 //0B$9':|$\!G\##L1 1
 *+d2J ooj1Oz2.Jz2.Jz2.Jz2.Jz2.J((F_,DEJ 	L 
6==f_V[>\^deoqv^wx **ggoom&I&IJ77>>+&)O*11'1=a@A8<<>C[C[C]B^^cfK*o/FU_`((*+s#qIIc1 $*K8 +] El $#s   !LLLc                     t        j                  t        | ||||f      }|j                          |j	                          y )N)targetargs)multiprocessingProcessr   startjoin)r   r   r   r   r   processs         r    launch_testr   ;  s=    %% 
	G MMOLLNr   c                 t   |j                   t        | ||||j                          y t        j                  d      }t        j                  d      }t	        ||h      }t        dt        d|            D ]  }||vs|j                  |        |j                  d       |D ]  }t        | ||||        y )NF)logicalTr7      )reverse)	r   r   psutil	cpu_countlistr   minr~   sort)	r   r   r   r   r   logical_corescandidate_threadsr   r   s	            r    run_perf_testsr   J  s    ((4--	
 	  /I$$T2MmY781c"m,-%%$$Q' . 4( 1M<zK_` !2r   c                    t        | j                  | j                  | j                  | j                        \  }}}t        d|j                   d|j                   d|j                          t        |j                  |j                  |j                  |j                  |j                  ||||j                  |j                  | j                        }t        | |||       y )NzGenerating z samples for batch_size=z sequence_length=)r*   )r   r#   r$   r%   r&   r>   r   r
   r   r   r   r   r   r   r*   r   )r   r   r   	input_idssegment_ids
input_maskr   s          r    run_performancer   b  s    )8  $$&&%%	*&I{J 

l--..F|G^G^F__pq}  rN  rN  qO  	P $$$,,++))J =,jIr   c                     t        j                         } | j                  ddt        d       | j                  dddt        dd	       | j                  d
ddt        d       | j                  ddt        dd       | j                  dddt        dd       | j                  ddt        g ddd       | j                  ddt        dd       | j                  dddd !       | j                  d"       | j                  d#dt        d$g d%d&'       | j                  d(ddd)!       | j                  d*       | j                  d+ddd,!       | j                  d-       | j                  d.dt        d d/       | j                  d0d1dt        d d2       | j                  d3dt        d d4       | j                  d5dt        d d6       | j                  d7dt        d d8       | j                  d9d t        d:;       | j                  d<d t        d=;       | j                  d>d?d@t        dA;       | j                  dBdCdddD!       | j                  dE       | j                  dFdt        d$dG       | j                         }|S )HNz--modelTzbert onnx model path)requiredrr   helpz-bz--batch_size+zKbatch size of input. Allow one or multiple values in the range of [1, 128].)r   rr   nargsr   z-sz--sequence_lengthz maximum sequence length of inputz	--samplesF
   z!number of samples to be generated)r   rr   defaultr   z-tz--test_timesr   zJnumber of times to run per sample. By default, the value is 1000 / samplesz--opt_level)r   r7   r8   r9   r9   zZonnxruntime optimization level: 0 - disable all, 1 - basic, 2 - extended, 99 - enable all.)r   rr   choicesr   r   z--seed   zPrandom seed. Use the same seed to make sure test data is same in multiple tests.z	--verbose
store_truezprint verbose information)r   actionr   )r   z--log_severityr8   )r   r7   r8   r      z.0:Verbose, 1:Info, 2:Warning, 3:Error, 4:Fatal)r   rr   r   r   r   z	--use_gpuzuse GPU)r   z--use_io_bindingzuse io_binding)r   z
--providerzExecution provider to usez-nz--intra_op_num_threadsz>=0, set intra_op_num_threadsz--input_ids_namezinput name for input idsz--segment_ids_namezinput name for segment idsz--input_mask_namezinput name for attention maskz--input_tuning_resultsz3tuning results (json) to be loaded before benchmark)r   rr   r   z--output_tuning_resultsz1tuning results (json) to be saved after benchmarkz-az--average_sequence_lengthz)average sequence length excluding paddingz-rz--random_sequence_lengthz3use uniform random instead of fixed sequence length)r   z--mask_typezmmask type: (1: mask index or sequence length, 2: raw 2D mask, 3: key len, cumulated lengths of query and key))argparseArgumentParseradd_argumentr   r   set_defaults
parse_args)parserr   s     r    parse_argumentsr     s=   $$&F
	DsAWX
Z   /   0   Y   i   _   (	   &
=   eLyY
&
*U<Vfg
u-
(    ,   '   )   ,    B	   !@	   #8   "B   u5
|   DKr   c                     t               } | j                  dk(  r't        dt        d| j                  z              | _        | j
                  dk  r| j                  | _        t        j                         }|j                         }t        | j                        }t        |      dk\  rt        |      dk  st        d      t        | j                  | j                   | j"                  | j$                  | j&                  | j(                  | j*                  | j,                        }|D ]  }t/        || j                  | j                  | j                  | j0                  | j2                  | j4                  | j6                  | j8                  | j:                  | j<                  | j
                  | j>                        }tA        d|       tC        |||        tE        |jG                         dd 	      }tH        jJ                  jM                  tO        | j                        jP                  d
jS                  | j0                  rdnddjM                  tE        |      D cg c]  }tU        |       c}      | j                  tW        jX                         j[                  d                  }	t]        |	dd      5 }
t_        j`                  |
dd      }d }|D ]  \  }}|jc                  d      }|Fg d}|je                  |D cg c]  }|jc                  d      d    c}       |jg                  |       |D cg c]  }tS        |d       }}|je                  |D cg c]  }|jc                  d      d    c}       |jg                  |        	 d d d        tA        d|	       y c c}w c c}w c c}w c c}w # 1 sw Y   *xY w)Nr   r7   r      z batch_size not in range [1, 128]ztest settingFc                     | d   S )Nr7   r   )xs    r    <lambda>zmain.<locals>.<lambda>Q  s	    qQRtr   )r   r   zperf_results_{}_B{}_S{}_{}.txtGPUCPU-z%Y%m%d-%H%M%Szw+r   )newline	
)	delimiterlineterminatorr   )zLatency(ms)Latency_P50Latency_P75Latency_P90Latency_P95Latency_P99zThroughput(QPS)=r   zTest summary is saved to)4r   r   maxr   samplesr   r   r   Managerdictsetr
   r   	Exceptionr"   modelr$   r%   r&   r'   r(   r)   r*   r	   r   r   r   r   r   r   r   r   r>   r   sortedrc   r   r   r   r   parentr   r   r   r   strftimerL   csvwritersplitr   writerow)r   managerr   batch_size_setr   r
   r   sorted_resultsr   summary_filetsv_file
tsv_writerheadersr   perf_resultparamsvaluess                    r    mainr  !  s   D!aTDLL%8!9:##q('+';';$%%'G<<>L)N1$^)<)C:;; 

!!""	M %
"  LLOOLLMM%%IILL((''
  	nl+|\B% %* L..0%^TN77<<TZZ(//\\EuHHf^&<=&<c!f&<=>  LLN##O4		
L 
lD"	-ZZDN
 .CYYs^F @AQ@A##G,0;<1fQ&F<MMF;Fq1773<?F;<'# !/ 
., 

$l37 >(  A =;' 
.	-s=   N#4A	N7=N(N74N-N7N23N7(N77O __main__)Nr8   N)%r   r  rN   r   r   r   r   r   dataclassesr   r   pathlibr   numpyrZ   r   rX   bert_test_datar   r   r	   r"   rU   ra   rm   ry   r   r   r   r   r   r   r   r   r  r   __spec__r   r   r    <module>r%     s     
   	    !      > ! ! !      "Vr ).!2! B9Ja0J:_DQ4h zHF	 r   