
    Bvh&1                         d Z ddlZddlZ ed      ZddZd ZddZddZd Z	d	 Z
ed
k(  rM e       Z ede       ddlmZ  eej                          ej"                  Z e
ee      ZeD ]
  Z ee        yy)zThis profiler result processor print out the kernel time spent on each Node of the model.
Example of importing profile result file from onnxruntime_perf_test:
    python profile_result_processor.py --input profile_2021-10-25_12-02-41.json
    N)ScanLoopIfc                    t        j                         }|j                  dddt        d       |j                  ddt        dd	       |j                  d
dt        dd	       |j                  dddd       |j                  d       |j                  dddd       |j                  d       |j                  |       S )Nz-iz--inputFz2Set the input file for reading the profile results)requiredtypehelpz--thresholdg{Gz?zfThreshold of run time ratio among all nodes. Nodes with larger ratio will show in top expensive nodes.)r   r   defaultr	   z
--providercudazExecution provider to usez--kernel_time_only
store_truez.Only include the kernel time and no fence time)r   actionr	   )kernel_time_onlyz-vz	--verbose)r   r   )verbose)argparseArgumentParseradd_argumentstrfloatset_defaults
parse_args)argvparsers     [/RAG/venv/lib/python3.12/site-packages/onnxruntime/transformers/profile_result_processor.pyparse_argumentsr      s    $$&F
A   u   (   =	   /
kE,O
&T""    c                     t        d|  d       t        |       5 }t        j                  |      }d d d        t	        t
              sJ |S # 1 sw Y   xY w)Nzloading profile output z ...)printopenjsonload
isinstancelist)profile_fileopened_file	sess_times      r   load_profile_jsonr&   ;   sR    	#L>
67	l	{IIk*	 
 i&&&	 
	s   AAc                    i }i }i }d}d}| D ]  }|d   dk(  r
|d   dk(  rd}|s|d   dk(  s!d	|v s&d
|v s+d|d
   v s3|d   }|d
   d   }	|	t         v rI|	sd| d}	||v r||xx   |d	   z  cc<   ||xx   dz  cc<   n|d	   ||<   d||<   |	||<   ||d	   z  } |sdgS g }
|
j                  d|dz  dd       |
j                  d       |
j                  d       t        |j                         d d      D ]I  \  }}||z  }||k  r||   }|t	        |      z  }|
j                  |dd|dz  dd|dd|dd| 	       K i }|j                         D ]!  \  }}	||   }|	|v r||	xx   |z  cc<   |||	<   # |
j                  d       |
j                  d       |
j                  d       t        |j                         d  d      D ](  \  }	}||z  }|
j                  |dd|dz  dd|	        * |
S )!a<  Parse profile data and output nodes in two sections - nodes in the original order, and top expensive nodes.

    Args:
        sess_time (List[Dict]): profile data
        threshold (int, optional): Minimum ratio of duration among all. Defaults to 0.

    Returns:
        List[str]: lines of string for output.
    r   FcatSessionnamesession_initializationTKerneldurargsop_name()   zNo kernel record found!z%
Top expensive kernels with Time% >= d   .2f:@----------------------------------------------------------------u&   Total(μs)	Time%	Calls	Avg(μs)	Kernelc                     | d   S Nr2    xs    r   <lambda>z&parse_kernel_results.<locals>.<lambda>x   s    1Q4r   keyreverse10d	      Y@5.2f5d8.1fz
Group kernel time by operator:u   Total(μs)	Time%	Operatorc                     | d   S r8   r9   r:   s    r   r<   z&parse_kernel_results.<locals>.<lambda>   s    1Q4r   )_NODES_TYPE_CONTAINING_SUBGRAPHappendsorteditemsr   )r%   	thresholdkernel_name_to_op_namekernel_timekernel_freqtotalsession_inititemkernel_namer/   linesdurationratiocallsavg_timeop_times                   r   parse_kernel_resultsrY   E   s     KKEL;)#V8P(PL;("u}4IY]^dYeLev,K6l9-G99 k]!,k)K(DK7(K(A-(+/;K(+,K(6=&{3T%[ E5 8 )** E	LL9)c/#9NaPQ	LL	LL=>!'(9(9(;Y]!^X5 9K(eEl*~Rd';2eBZr(SWXZ[fZghi "_ G 6 < < >W{+gG('GG !? 
LL34	LL	LL./#GMMOQUV5 ~Rd';2gYGH W Lr   c                 d   g }i }i }i }d}| D ]  }|d   dk(  sd|v sd|v sd|d   v s|d   j                  dd	      j                  d
d	      j                  dd	      }	d|d   v r=|d   d   dk(  rd}
n|d   d   dk(  rd}
n|d   d   dk(  rd}
|	|vr
||	<   n||	   
k(  sJ |r|d   d   }|t        v r|	|v r||	xx   |d   z  cc<   ||	xx   dz  cc<   n|d   ||	<   d||	<   |j                  |	       ||d   z  } g d}d}|D ]d  }	||	   }||	   }|t        |      z  }||z  dz  }|j	                  |	d	      }||z  }|j                  |dd|dd|dd|dd|dd|dd|	        f |j                  d|dz  dd        |j                  d!       |j                  d"       t        |j                         d# d$%      D ]d  \  }	}||z  }||k  r||	   }|t        |      z  }||z  dz  }|j	                  |	d	      }|j                  |dd|dd|dd|dd|dd|	        f |S )&a  Parse profile data and output nodes in two sections - nodes in the original order, and top expensive nodes.

    Args:
        sess_time (List[Dict]): profile data
        kernel_time_only (bool, optional): Only include items for kernel time. Defaults to False.
        threshold (int, optional): Minimum ratio of duration among all. Defaults to 0.

    Returns:
        List[str]: lines of string for output.
    r   r(   Noder-   r.   r/   r*   _kernel_time _fence_before_fence_afterproviderCPUExecutionProviderCPUCUDAExecutionProviderCUDADmlExecutionProviderDMLr2   )z
Nodes in the original order:r6   u3   Total(μs)	Time%	Acc %	Avg(μs)	Calls	Provider	Nodeg        rB   r@   rA   rC   rE   rD   8sz#
Top expensive nodes with Time% >= r3   r4   r5   r6   u-   Total(μs)	Time%	Avg(μs)	Calls	Provider	Nodec                     | d   S r8   r9   r:   s    r   r<   z$parse_node_results.<locals>.<lambda>   s    qtr   Tr=   )replacerG   rH   r   getrI   rJ   )r%   r   rK   node_name_list	node_time	node_freqnode_providerrO   rQ   	node_namedevicer/   rS   before_percentagerT   rV   rW   
percentager`   rU   s                       r   parse_node_resultsrs      s2    NIIME;& Ud]v~)W[\bWcJcV$$^R8@@RTU]]^lnpq  T&\)<
+/EE"F&\*-1HH#F&\*-1GG"FM1/5M),(3v===!6l9-G99I%)$U3$)$)$'+E{	)$'(	)$%%i0T%[ EC HE
 #	Y')$eEl*&%/
 $$Y3Z'nBz$/r2CD1IHUY?Z\]bce\ffhiqrthuuw  yB  xC  D	
 $ 
LL7	C7LANO	LL	LLEF%ioo&7^UYZ	85 9)$eEl*&%/
 $$Y3~R
4'88D/ERT:UWX`acWddfgpfqrs  [ Lr   c                 2   i }i }d}i }i }i }i }d}i }	| D ]  }
|
d   dk(  sd|
v sd|
v sd|
d   v s|
d   d   }|t         v r0d|
d   vr-d|
d	   v r%||v r||xx   |
d   z  cc<   n|
d   ||<   ||
d   z  }d|
d   j                  dd
      }||	v r|	|xx   dz  cc<   nd|	|<   | d| }||v r||xx   |
d   z  cc<   ||xx   dz  cc<   n|
d   ||<   d||<   ||v r||xx   |
d   z  cc<   n|
d   ||<   ||v r||xx   |
d   z  cc<   ||xx   dz  cc<   n|
d   ||<   d||<   ||
d   z  } d
dg}|j                  d       |j                  d       t        |j	                         d d      D ]h  \  }}|j                  |d      }||z  }||z   }|||z   z  }||   }||z  }|j                  |dd|dz  dd|dd|dz  dd|dd|dd|dd|        j |d
dgz  }|j                  d       |j                  d       t        |j	                         d d      D ]n  \  }}|j                  d      }|d   }|d   }|j                  dd
      }||   }||z  }|||   z  }|j                  |dd|dz  dd|dd|dd|dd|        p |S ) zGroup results by operator name.

    Args:
        sess_time (List[Dict]): profile data

    Returns:
        List[str]: lines of string for output.
    r   r(   r[   r-   r.   r/   r`   fencer*   r]   r2   r5   zGrouped by operatorr6   uM   Total(μs)	Time%	Kernel(μs)	Kernel%	Calls	AvgKernel(μs)	Fence(μs)	Operatorc                     | d   S r8   r9   r:   s    r   r<   z$group_node_results.<locals>.<lambda>0  s    QqTr   Tr=   r@   rA   rB   rC   11drD   z14.1fzGrouped by provider + operatoru<   Kernel(μs)	Provider%	Calls	AvgKernel(μs)	Provider	Operatorc                     | d   S r8   r9   r:   s    r   r<   z$group_node_results.<locals>.<lambda>>  s    RSTURVr   ExecutionProviderz9.2frg   )rG   rj   rH   rI   rJ   splitri   )r%   op_kernel_timeop_kernel_recordstotal_kernel_timeprovider_op_kernel_timeprovider_op_kernel_recordsprovider_kernel_timeop_fence_timetotal_fence_timeprovider_counterrQ   r/   r`   r>   rS   rM   
fence_timekernel_time_ratio
total_time
time_ratiokernel_callsavg_kernel_timepartsshort_eprV   provider_time_ratios                             r   group_node_resultsr      s*    N !#M;& Ud]v~)W[\bWcJc6l9-G 99f-d6l*-/%g.$u+=.15eg.$U3$F|''
B7H++ *a/*-. *Jay)C--',U;,*3/14//3E{',23*3///$X.$u+=.15e$X..(w'4;6'!'*a/**.u+w'-.!'*e,W Z &'E	LL	LLgh &~';';'=>[_ `"&&w2
'*;; :-
#47G#GH
(1%4#be!3D 9K;LBO`chOhimNnnpq}  A  qB  BD  ET  UZ  D[  []  ^h  il  ]m  mo  pw  ox  y	
 !a 
b233E	LL	LLTU"#:#@#@#B`de[		#8(##$7<*3/%-),@,JJ3r"5"=d!C2eBZrRabgQhhjkstvjwwy  {B  zC  D	
 f Lr   c                     t        |       }t        ||j                        }|t        ||j                  |j                        z  }|t        |      z  }|S N)r&   rY   rK   rs   r   r   )r#   r.   profile_recordsrS   s       r   process_resultsr   M  sO    '5O $..AE	1F1FWWE	00ELr   __main__	Arguments)setup_loggerr   )r   )Fr   )__doc__r   r   	frozensetrG   r   r&   rY   rs   r   r   __name__	argumentsr   benchmark_helperr   r   inputr#   resultsliner9   r   r   <module>r      s   
  "+,B"C '#TL^Tn_D	 z!I	+y!-""#??LlI6Gd  r   