
    BvhT                        d Z ddlZddlZddlZddlZddlmZ ddlmZ ddlZddl	m
Z
 ddlmZ ddlmZmZmZ ddlmZmZmZ dd	lmZ  G d
 d      Zd"dedefdZd"dedefdZ G d d      ZdefdZdefdZ	 d"dej:                  dej<                  dz  fdZd Z d Z!e"dk(  r e!       Z# e$de#        e#jJ                  e#jL                  dk(  rdnde#_%        e#jN                  r>ejP                  jS                         sJ e#jT                  dk(  r/d  e       v sJ d!e#_+        ne#jX                  rJ e#jV                  rJ e#jX                  se#jV                  r	 ee#       y e e#       yy)#z]
Benchmark performance of SAM2 encoder with ORT or PyTorch. See benchmark_sam2.sh for usage.
    N)Mapping)datetime)SAM2ImageDecoder)SAM2ImageEncoder)decoder_shape_dictencoder_shape_dictload_sam2_model)InferenceSessionSessionOptionsget_available_providers)CudaSessionc            +          e Zd Zddddddddddddej                  dddddd	dfd
edededej                  dededededededededededededededededef*dZ	d  Z
d!eeee   f   fd"Zd!eeej                  f   fd#Zy$)%
TestConfigimage_encoderCPUExecutionProvidermax-autotune      FT     
model_type	onnx_pathsam2_dirdevice	component
batch_sizeheightwidth
num_labels
num_points	num_masksmulti_mask_outputuse_tf32enable_cuda_graphprefer_nhwcwarm_upenable_nvtx_profileenable_ort_profileenable_torch_profilerepeatsverbosec                    |dv sJ |	dk\  r|	dk  sJ |
dk\  r|
dk  sJ || _         || _        || _        || _        || _        || _        || _        |	| _        |
| _        || _	        || _
        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        | j                  dk(  r&| j                  dk(  r| j                  dk(  sJ d       y y )Nsam2_hiera_tinysam2_hiera_smallsam2_hiera_largesam2_hiera_base_plus   i   r   r   z7Only image size 1024x1024 is allowed for image encoder.)r   r   r   r   providertorch_compile_moder   r   r   r   r    r!   r"   r   r#   r$   dtyper%   r&   r'   r(   r)   r*   r+   )selfr   r   r   r   r   r3   r4   r   r   r   r   r    r!   r"   r#   r$   r5   r%   r&   r'   r(   r)   r*   r+   s                            ]/RAG/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/sam2/benchmark_sam2.py__init__zTestConfig.__init__   s   6 pppp}4//|--$" " "4$
$$"!2 !2
&#6 "4$8!>>_,;;$&4::+=x?xx=+= -    c                     t        |        S N)varsr6   s    r7   __repr__zTestConfig.__repr__V   s    t*r9   returnc                     | j                   dk(  r+t        | j                  | j                  | j                        S t        | j                  | j                  | j                  | j                  | j                        S )Nr   )	r   r   r   r   r   r   r   r    r!   r=   s    r7   
shape_dictzTestConfig.shape_dictY   sV    >>_,%doot{{DJJOO%dkk4::tPTP_P_aeaoaoppr9   c                    | j                   }| j                  dk(  rEdt        j                  | j                  d| j
                  | j                  || j                        iS t        j                  dddd|| j                        t        j                  ddd	d	|| j                        t        j                  dddd|| j                        t        j                  d
d| j                  | j                  df|| j                        t        j                  d
d| j                  | j                  ft        j                  | j                        t        j                  | j                  ddd|| j                        t        j                  | j                  || j                        t        j                  | j
                  | j                  gt        j                  | j                        dS )Nr   image   )r5   r   r          @      r   r      )image_features_0image_features_1image_embeddingspoint_coordspoint_labelsinput_maskshas_input_masksoriginal_image_size)r5   r   torchrandnr   r   r   r   randrandintr   r    int32zerosonestensor)r6   r5   s     r7   random_inputszTestConfig.random_inputs_   ss   

>>_,U[[!T[[$**\ajnjujuvww %*JJq"c3eTXT_T_$`$)JJq"c3eTXT_T_$`$)JJq#r2USWS^S^$_ %tdootB%X\XcXc! !&q4??DOO<EKKX\XcXc!  %{{4??AsCu]a]h]hi#(::dooUSWS^S^#_',||T[[$**4MUZU`U`imitit'u r9   N)__name__
__module____qualname__rR   float32strr   intboolr8   r>   r   listrA   TensorrZ    r9   r7   r   r      sn    )')"'"'mm!$)#(%*39y9y 9y 	9y
 9y 9y 9y 9y 9y 9y 9y 9y  9y  !9y"  #9y& '9y( )9y* "+9y, !-9y. #/9y0 19y2 39yvqGCcN3 qwsELL'89 r9   r   configr?   c                    | j                   rt        dt        |               | j                  dk(  rt	        | j
                  t              rt        j                  j                         n| j
                  j                  }t        j                  || j                        }t        | j                        |d<   | j                   rd|d<   | j                  |fdg}ndg}t#        | j$                  ||      }|S )Nzcreate session for CUDAExecutionProviderr#   r   r%   r   )	providers)r+   printr<   r3   
isinstancer   r_   rR   cudacurrent_deviceindexr   get_cuda_provider_optionsr$   r`   r#   r%   r
   r   )re   session_options	device_idprovider_optionsrh   ort_sessions         r7   create_ort_sessionrs   t   s    ~~#DL>23113=fmmS3QEJJ--/W]WdWdWjWj	&@@FLdLde'*6??';$./]+oo'78:PQ	+,	"6#3#3_PYZKr9   c                     t        | |      }t        || j                  | j                        }|j	                  | j                                |S r;   )rs   r   r   r$   allocate_buffersrA   )re   ro   rr   cuda_sessions       r7   create_sessionrw      sC    $V_=K{FMM6;S;STL!!&"3"3"56r9   c                   $    e Zd ZdZddefdZd Zy)OrtTestSessionz;A wrapper of ORT session to test relevance and performance.Nre   c                 P    t        ||      | _        |j                         | _        y r;   )rw   rr   rZ   	feed_dict)r6   re   ro   s      r7   r8   zOrtTestSession.__init__   s!    )&/B--/r9   c                 L    | j                   j                  | j                        S r;   )rr   inferr{   r=   s    r7   r}   zOrtTestSession.infer   s    %%dnn55r9   r;   )r[   r\   r]   __doc__r   r8   r}   rd   r9   r7   ry   ry      s    E0z 06r9   ry   rv   c                 ~    t        j                          }| j                  |      }t        j                          }||z
  S r;   )timer}   )rv   
input_dictstart_ends        r7   measure_latencyr      s2    IIKE:&A
))+C;r9   c                    | j                   j                  }|dk(  }|rt        j                  j	                  d      j
                  dk\  rT| j                  rHdt        j                  j                  j                  _	        dt        j                  j                  _	        |xr | j                  t        j                  k7  }| j                         }t        j                         5  t        j                  || j                  |      5  t!        | j"                  | j$                  | j                         }| j&                  dk(  r|rU| j(                  dk7  rFt        j*                  |j,                  j.                  | j(                  dd	
      |j,                  _        | j1                         d   }t        j2                  |      j5                  | j                   | j                        }t7        |      }|r(| j(                  dk7  rt9        d| j(                   d       t;        | j<                        D ]  }	 ||      \  }
}} |re| j>                  rYdd l }ddlm!} |jE                          t9        d       |jG                  d      5   ||d       d d d        |jI                          |r| jJ                  rt        jL                  jO                  t        jL                  jP                  jR                  t        jL                  jP                  jT                  gd      5 }t9        d       t        jL                  jW                  d      5   ||       d d d        d d d        t9        jY                         j[                  dd             |j]                  d       | j^                  dk(  r	 d d d        d d d        y t9        d| j^                   d       ta        j`                         }t;        | j^                        D ]/  }	 ||      \  }
}}|st        j                  jc                          1 n|d   |d   |d   |d   |d    |d!   |d"   |d#   f}te        || jf                  $      }|rA| j(                  dk7  r2t        j*                  |j.                  | j(                  dd	
      |_        t;        | j<                        D ]  }	 || \  }}} |rc| j>                  rWdd l }ddlm!} |jE                          t9        d%       |jG                  d      5   ||d&di d d d        |jI                          |r| jJ                  rt        jL                  jO                  t        jL                  jP                  jR                  t        jL                  jP                  jT                  gd      5 }t9        d'       t        jL                  jW                  d(      5   ||  d d d        d d d        t9        jY                         j[                  dd             |j]                  d)       | j^                  dk(  r	 d d d        d d d        y t9        d| j^                   d       ta        j`                         }t;        | j^                        D ],  }	 || \  }}}|st        j                  jc                          . ta        j`                         }||z
  | j^                  z  cd d d        cd d d        S # 1 sw Y   dxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   KxY w# 1 sw Y   PxY w# 1 sw Y   nxY wd d d        y # 1 sw Y   y xY w)*Nrk   r      T)device_typer5   enabled)r   r   noneF)mode	fullgraphdynamicrC   )r   r5   zBRunning warm up. It will take a while since torch compile mode is .cudartz#Start nvtx profiling on encoder ...one_run)r'   )
activitiesrecord_shapesz$Start torch profiling on encoder ...encodercuda_time_total
   )sort_by	row_limitztorch_image_encoder.jsonzStart z runs of performance tests...rJ   rK   rL   rM   rN   rO   rP   rQ   )multimask_outputz"Start nvtx profiling on decoder...r'   z$Start torch profiling on decoder ...decoderztorch_image_decoder.json)4r   typerR   rk   get_device_propertiesmajorr#   backendsmatmul
allow_tf32cudnnr5   r^   rZ   inference_modeautocastr	   r   r   r   r4   compiler   forwardrA   rS   tor   ri   ranger&   r'   nvtxr   cudaProfilerStartannotatecudaProfilerStopr)   profilerprofileProfilerActivityCPUCUDArecord_functionkey_averagestableexport_chrome_tracer*   r   synchronizer   r"   )re   r   is_cudaenabled_auto_cast
ort_inputs
sam2_modelimage_shapeimgsam2_encoderr   _image_features_0_image_features_1_image_embeddingsr   r   profr   torch_inputssam2_decoder_masks_iou_predictions_low_res_masksr   s                          r7   	run_torchr      s   --$$KV#G 5::33A6<<Afoo04""-*.'AFLLEMM$A%%'J				Kv||ev!w$V__f6G6GPVP]P]^
.644>38==,,4422"!	4
((0 !++-g6K++k*--V]]&,,-WC+J7L644>Z[a[t[tZuuvwx6>>*JVWZJ[G!#46G + 655'((*;<]]9- $? .'')666^^++ % ? ? C CU^^EdEdEiEij"& ,  @A77	B$S) C d'')//8IUW/XY(()CD~~"W "x		Z F6>>**GHIIIKE6>>*JVWZJ[G!#46GJJ**, + -.-.-.>*>*=),-01	L ,!'!9!9L
 644>',}} ((22"!	($ 6>>*;G;V8(. + 655'((*:;]]9- ,IDI .'')666^^++ % ? ? C CU^^EdEdEiEij"& ,  @A77	B$l3 C d'')//8IUW/XY(()CD~~"M "x		P F6>>**GHIIIKE6>>*;G;V8(.JJ**, +
 iikev~~-a "x!w		6 .- CB l .- CB w "x!w			s   ##]F ][3B]+\		\ 	\A]-]>A]D]	\ B]-+\4\'	\4&A]9]
A]A] 	]3[=8] \
\\]\$]'\1,\44\>9]]
	]]args
csv_writerc                 b	   | j                   }| j                  }| j                  }|r7t        j                  j                         }t        j                  d|      }d}nd}t        j                  d      }d}d}t        j                  t        j                  t        j                  d}t        d+i d| j                  d	| j                  d
| j                  d| j                  d|d| j                  d| j                   d| j"                  d|ddd|d|| j$                     d| j&                  d| j                  d| j(                  d| j*                  d| j,                  d| j.                  d| j0                  dd}	| j2                  dk(  rSt5               }
| j6                  |
_        |	j,                  rd|
_        d|
_        d|
_        t?        |	|
      }|	jA                         }	 tC        |	j(                        D ]  }tE        ||      } 	 |	j*                  rUdd l%}dd!lm&} |jO                          |jQ                  d"      5  |jS                  |      }d d d        |jU                          |	j,                  r|jV                  jY                          |dk(  ry g }tC        |      D ]  }tE        ||      }|j[                  |       ! t]        j^                  |      }~n0t        j`                         5  	 tc        |	      }	 d d d        |dk(  ry | j2                  d#z   |rdndz   }i d| j                  d| j                  d| j$                  d$|d|d|	j&                  d|	jd                  d| j                  d| j                   d| j"                  d%| jf                  d&|	jh                  d'|	jj                  d(|	jl                  d)| j6                  d|	j(                  d|| j*                  | j0                  |d*}||jo                  |       tI        tq        |	              tI        |        y # tF        $ r}tI        d|	d |        Y d }~y d }~ww xY w# 1 sw Y   
xY w# tF        $ r#}tI        d|	d |        Y d }~d d d        y d }~ww xY w# 1 sw Y   xY w),Nrk   rg   r   cpuFr   fp32fp16bf16r   r   r   r   r3   r   r   r   r   r#   Tr$   r5   r%   r*   r&   r'   r(   r)   r4   r+   ort   zFailed to run config=z. Exception: r   r   :use_gpur"   r   r    r!   intra_op_num_threads)r'   r4   engineaverage_latencyrd   )9r   use_cuda_graphr*   rR   rk   rl   r   r^   float16bfloat16r   r   r   r   r   r   r   r   r5   r%   r&   r'   r(   r)   r4   r   r   r   enable_profilinglog_severity_levellog_verbosity_levelrw   rZ   r   r   	Exceptionri   r   r   r   r   r}   r   rr   end_profilingappend
statisticsmeanno_gradr   r#   r   r   r    r!   writerowr<   )r   r   r   r$   r*   rp   r   r3   dtypesre   sess_optionssessionr   r   er   r   latency_listlatencyr   r   rows                         r7   run_testr     s    LLG"11<<GJJ--/	fi0*	e$!)mmU]]ENNSF ??..  ..	
  ?? {{ jj   , TZZ  $$    !44!"  22#$ "66%&  22'( )F. {{e%',0,E,E)$$,0L)./L+/0L, 6))+
	6>>*#GZ8 + %%#$$&y)MM*- *##%$$--/a<wA%gz:G(   %//,7]]_"+F"3  a<[[3G&?FdooT^^ 	 	7	
 	. 	v)) 	FOO 	doo 	$++ 	 	T22 	f'' 	f'' 	V%% 	 9 9  	6>>!" 	7#$  $77"55*+C0 C 	T&\N	SEOE  	*6)=<=	 *)*  .vi}QC@A _ _sT   0&Q Q(>R$ Q5	Q%
Q  Q%(Q25	R!>RR$R!!R$$R.c                 T   | j                   rdnd}dj                  || j                  t        j                         j                  d            }t        |dd      5 }g d}t        j                  ||	      }|j                          t        | |       d d d        y # 1 sw Y   y xY w)
Ngpur   zbenchmark_sam_{}_{}_{}.csvz%Y%m%d-%H%M%Sa )r   newline)r   r   r5   r   r$   r%   r#   r   r   r   r"   r   r    r!   r   r&   r*   r'   r4   r   r   )
fieldnames)r   formatr   r   nowstrftimeopencsv
DictWriterwriteheaderr   )r   featurescsv_filenamecsv_filecolumn_namesr   s         r7   run_perf_testr     s    u%H/660L
 
lb	1X
. ^^HF
 z"7 
2	1	1s   8BB'c                  ~   t        j                  d      } | j                  ddddgdd       | j                  d	dg d
dd       | j                  dddd       | j                  d       | j                  dddd       | j                  d       | j                  ddt        g ddd       | j                  ddt        dd       | j                  ddt        dd        | j                  d!dt        dd"       | j                  d#dt        d$d%       | j                  d&dt        d'd(       | j                  d)dt
        d*d*d+gd,-       | j                  d.dddd/0       | j                  d1dddd20       | j                  d3dddd40       | j                  d5dddd60       | j                  d7dddd80       | j                  d9dt
        d:g d;d<-       | j                  d=dt
        d>d?       | j                  d@dt
        dAdB       | j                  dCdt
        d g dDdE-       | j                         }|S )FNz,Benchmark SMA2 for ONNX Runtime and PyTorch.)descriptionz--componentFr   image_decoderzDcomponent to benchmark. Choices are image_encoder and image_decoder.)requiredchoicesdefaulthelpz--dtyper   r   zData type for inference.z	--use_gpu
store_truezUse GPU for inference.)r   actionr  )r   z--use_cuda_graphzUse cuda graph in onnxruntime.)r   z--intra_op_num_threads)r   r   rI   r   r      r   z&intra_op_num_threads for onnxruntime. )r   r   r  r  r  z--batch_sizer   z
batch size)r   r   r  r  z--heightr   zimage heightz--widthzimage widthz	--repeatsr   z8number of repeats for performance test. Default is 1000.z	--warm_upr   z)number of runs for warm up. Default is 5.z--enginer   rR   zengine for inference)r   r   r  r  r  z--multimask_outputz:Export mask_decoder or image_decoder with multimask_output)r   r  r  r  z--prefer_nhwcz;Use prefer_nhwc=1 provider option for CUDAExecutionProviderz--enable_nvtx_profilezVEnable nvtx profiling. It will add an extra run for profiling before performance test.z--enable_ort_profilezEnable ORT profiling.z--enable_torch_profilezYEnable PyTorch profiling. It will add an extra run for profiling before performance test.z--model_typer0   r-   zsam2 model namez
--sam2_dirz./segment-anything-2z6The directory of segment-anything-2 git root directoryz--onnx_pathz6./sam2_onnx_models/sam2_hiera_large_image_encoder.onnxzpath of onnx modelz--torch_compile_mode)zreduce-overheadr   zmax-autotune-no-cudagraphsr   z4torch compile mode. none will disable torch compile.)argparseArgumentParseradd_argumentset_defaultsr`   r_   
parse_args)parserr   s     r7   _parse_argumentsr    s&   $$1_`F
 /2S   E+CVZt   %	   &
-	   u-
 #5            G   8    #   I   J   e   $    h   "c   &E   H!   YC   DKr9   __main__z
arguments:r   r   r   r   rg   Fr;   )-r~   r  r   r   r   collections.abcr   r   rR   r   r   r   r   
sam2_utilsr   r   r	   onnxruntimer
   r   r   *onnxruntime.transformers.io_binding_helperr   r   rs   rw   ry   r   r   	Namespacer   r   r   r  r[   r   ri   r4   r   r   rk   is_availabler   r)   r'   rd   r9   r7   <module>r     s    
   #   * * N N Q Q BW Wtz DT $:  6 6+ |.j |.B )-{


{%{|"#JeP zD	Jtf
&48NNo4U.[a||zz&&(((;;%*.E.GGGG(-D% ++++,,,,4#<#<d+ r9   