
    BvhM                     ~   d dl mZ d dlmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ d dlmZmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z.m/Z/ d dl0m1Z1m2Z2 d dl3m4Z4 d dl5m6Z6m7Z7m8Z8 d dl9m:Z:  ee;      Z< G d de:      Z=y)    )	getLogger)PackingMode)AttentionMaskFusionAttention)FusionBartAttention)FusionBiasGelu)FusionConstantFold)FusionEmbedLayerNormalization)FusionFastGelu)
FusionGelu)FusionGeluApproximation)FusionGemmFastGelu)FusionLayerNormalizationFusionLayerNormalizationTF)AttentionMaskFormatFusionOptions)FusionQOrderedAttention)FusionQOrderedGelu) FusionQOrderedLayerNormalization)FusionQOrderedMatMul)FusionQuickGelu)FusionReshape)FusionRotaryEmbeddings)FusionShape)"FusionSimplifiedLayerNormalization&FusionSkipSimplifiedLayerNormalization) FusionBiasSkipLayerNormalizationFusionSkipLayerNormalization)FusionUtils)
ModelProtoTensorProtohelper)	OnnxModelc                       e Zd Zd(dededef fdZd Zd Zd Zd Z	d	 Z
d
 Zd Zd Zd Zd Zd Zd Zd)dZd Zd Zd Zdedee   defdZdefdZd Zd*dZd Zd Zd Zd Z d+d!e!d z  d"efd#Z"d$ Z#d,d%Z$d-d&efd'Z% xZ&S ).BertOnnxModelmodel	num_headshidden_sizec                 v   |dk(  r|dk(  s|dkD  r||z  dk(  sJ t         |   |       || _        || _        t	        |       | _        t        | | j                  | j                  | j
                        | _        t        | | j                  | j                  | j
                        | _	        t        |       | _        y)aG  Initialize BERT ONNX Model.

        Args:
            model (ModelProto): the ONNX model
            num_heads (int, optional): number of attention heads. Defaults to 0 (detect the parameter automatically).
            hidden_size (int, optional): hidden dimension. Defaults to 0 (detect the parameter automatically).
        r   N)super__init__r'   r(   r   attention_maskr   attention_fusionr   qordered_attention_fusionr   utils)selfr&   r'   r(   	__class__s       R/RAG/venv/lib/python3.12/site-packages/onnxruntime/transformers/onnx_model_bert.pyr+   zBertOnnxModel.__init__&   s     Q;!#3Q;YbKbfgKghh"&+D1 /d6F6FX\XkXk l)@$""DNND4G4G*
& !&
    c                 :    t        |       }|j                          y N)r	   applyr0   fusions     r2   fuse_constant_foldz BertOnnxModel.fuse_constant_fold;       #D)r3   c                 l    | j                   j                          | j                  j                          y r5   )r-   r6   r.   r0   s    r2   fuse_attentionzBertOnnxModel.fuse_attention?   s&    ##%&&,,.r3   c                     t        |       }|j                          t        |       }|j                          t        |       }|j                          t	        |       }|j                          y r5   )r   r6   r   r   r   r7   s     r2   	fuse_geluzBertOnnxModel.fuse_geluD   sN    D!% &#D)r3   c                 <    t        | |      }|j                          y r5   )r   r6   )r0   is_fastgelur8   s      r2   fuse_bias_geluzBertOnnxModel.fuse_bias_geluO   s    k2r3   c                 :    t        |       }|j                          y r5   )r   r6   r7   s     r2   gelu_approximationz BertOnnxModel.gelu_approximationS   s    (.r3   c                 :    t        |       }|j                          y r5   )r   r6   r7   s     r2   fuse_gemm_fast_geluz!BertOnnxModel.fuse_gemm_fast_geluW   r:   r3   c                 :    t        |       }|j                          y r5   )r   r6   r7   s     r2   fuse_add_bias_skip_layer_normz+BertOnnxModel.fuse_add_bias_skip_layer_norm[   s    1$7r3   c                 :    t        |       }|j                          y r5   )r   r6   r7   s     r2   fuse_reshapezBertOnnxModel.fuse_reshape_   s    t$r3   c                 :    t        |       }|j                          y r5   )r   r6   r7   s     r2   
fuse_shapezBertOnnxModel.fuse_shapec   s    T"r3   c                 <    t        | |      }|j                          y r5   )r
   r6   )r0   use_mask_indexr8   s      r2   fuse_embed_layerzBertOnnxModel.fuse_embed_layerg   s    .t^Dr3   c                     t        |       }|j                          t        |       }|j                          t        |       }|j                          y r5   )r   r6   r   r   r7   s     r2   fuse_layer_normzBertOnnxModel.fuse_layer_normk   s=    )$/+D1 2$7r3   c                 :    t        |       }|j                          y r5   )r   r6   r7   s     r2   fuse_simplified_layer_normz(BertOnnxModel.fuse_simplified_layer_normv   s    3D9r3   c                 >    t        | |      }|j                          y )N)shape_infer)r   r6   )r0   rU   r8   s      r2   fuse_skip_layer_normz"BertOnnxModel.fuse_skip_layer_normz   s    -dLr3   c                 :    t        |       }|j                          y r5   )r   r6   r7   s     r2   fuse_skip_simplified_layer_normz-BertOnnxModel.fuse_skip_simplified_layer_norm~   s    7=r3   c                 .   t        |       }|j                          t        t        d | j                  j
                  j                              }|D ch c]  }|j                   }}d}|t        | j                  j                        k  r| j                  j                  |   }d|j                  v r4|j                  |vr&| j                  j                  j                  |       n|dz  }|t        | j                  j                        k  ry y c c}w )Nc                 B    | j                   dk(  xr | j                  dk7  S )NRotaryEmbeddingcom.microsoft)op_typedomain)nodes    r2   <lambda>z6BertOnnxModel.fuse_rotary_embeddings.<locals>.<lambda>   s     T\\->>a4;;RaCaar3   r   r[      )r   r6   listfilterr&   graphr_   r^   len	functionsnameremove)r0   r8   rot_emb_nodesr_   non_ms_domains_to_keepifns          r2   fuse_rotary_embeddingsz$BertOnnxModel.fuse_rotary_embeddings   s    '-a

  %%
 ;H!H-$$++-!H#djj**++%%a(B BGG+		AW0W

$$++B/Q #djj**++ "Is   Dc                 :    t        |       }|j                          y r5   )r   r6   r7   s     r2   fuse_qordered_mamtulz"BertOnnxModel.fuse_qordered_mamtul   s    %d+r3   r]   input_indicescastedc                    g }| j                         }| j                  |      }|D ]  }|D cg c]*  }|t        |j                        k  s|j                  |   , }	}|	D ]  }
| j	                  |
      r|r|j                  |
       )|
|v s.||
   }|j                  dk(  sC| j	                  |j                  d         b|se|j                  |j                  d            |S c c}w )z
        Get graph inputs that feed into node type (like EmbedLayerNormalization or Attention).
        Returns a list of the graph input names based on the filter whether it is casted or not.
        Castr   )output_name_to_nodeget_nodes_by_op_typere   inputfind_graph_inputappendr]   )r0   r]   rp   rq   graph_inputsrt   nodesr_   rk   bert_inputs
bert_inputparents               r2   get_graph_inputs_from_node_typez-BertOnnxModel.get_graph_inputs_from_node_type   s    
 "668))'2D2?W-Q1s4::CV4::a=-KW)
((4!$++J7#660<F~~/D4I4I&,,WX/4Z4f!(//Q@ *   Xs   C)C)c                 ^    | j                  dg d|      }|| j                  ddg|      z  }|S )NEmbedLayerNormalization)r   ra      	Attention   )r~   )r0   rq   inputss      r2   !get_graph_inputs_from_fused_nodesz/BertOnnxModel.get_graph_inputs_from_fused_nodes   s9    556OQZ\bc$66{QCPPr3   c                     | j                         }d}d}|j                  D ]:  }| j                  |t        j                        \  }}|r|dz  }|t        |      z  }< t        j                  d| d| d       y)zPChange data type of all graph inputs to int32 type, and add Cast node if needed.r   ra   z)Graph inputs are changed to int32. Added z Cast nodes, and removed z Cast nodes.N)rd   rv   change_graph_input_typer!   INT32re   loggerinfo)r0   rd   add_cast_countremove_cast_countgraph_inputnew_noderemoved_nodess          r2   change_graph_inputs_to_int32z*BertOnnxModel.change_graph_inputs_to_int32   s    

 ;;K&*&B&B;P[PaPa&b#Hm!#]!33	 '
 	77GG`ar`ss  A	
r3   c                 >   | j                  d      | j                  d      z   }| j                  j                  j                  D ]|  }|j                  |v s|j
                  j                  j                  j                  d   }||_	        |I|j
                  j                  j                  j                  d   }||_	        ~ | j                  j                  j                  D ]6  }|j
                  j                  j                  j                  d   }||_	        8 y)zD
        Update input and output shape to use dynamic axes.
        T)rq   Fr   Nra   )r   r&   rd   rv   rg   typetensor_typeshapedim	dim_paramoutput)r0   dynamic_batch_dimdynamic_seq_lenbert_graph_inputsrv   	dim_protor   s          r2   use_dynamic_axeszBertOnnxModel.use_dynamic_axes   s     !BB C 
22%2@A ZZ%%++Ezz..!JJ2288<<Q?	&7	#". %

 6 6 < < @ @ CI*9I' , jj&&--F//5599!<I"3I .r3   c                 $    | j                          y r5   )adjust_reshape_and_expandr<   s    r2   
preprocesszBertOnnxModel.preprocess   s    &&(r3   c                 "   g }| j                         D ]D  }|j                  dk(  s| j                  |j                  d         }|N|j                  dk(  r?|j                  |g       | j                  |j                  d   |j                  d          | j                  |g dg d| j                               }||d   }| j                  |j                  d         }|d   }| j                  |j                  d         }|d   }	||t        |      d	k(  st        |      dk(  s|d   |d   k(  s)|	j                  d   |j                  d<   G |r3| j                  |       t        j                  d
t        |              y y )NReshapera   r   )Expandr   r   Slice)r   r   r   r      z"Removed Reshape and Expand count: )rz   r]   get_constant_valuerv   sizeextendreplace_input_of_all_nodesr   match_parent_pathrt   re   remove_nodesr   r   )
r0   nodes_to_remover_   reshape_shapereshape_pathexpand_nodeexpand_shape_valuereshape_before_expandshape_value
slice_nodes
             r2   r   z'BertOnnxModel.adjust_reshape_and_expand   s   JJLD||y( !% 7 7

1 F ,1C1Cq1H#**D6233DKKNDJJqMR  $55< ,,.	   +".r"2K)-)@)@ARARSTAU)V&,8,<)"&"9"9:O:U:UVW:X"YK!-b!1J*6'3 23q8,1.q1[^C(2(9(9!(<

1C !F o.KK<S=Q<RST r3   c                 @   | j                         }g }| j                         D ]  }dddd}|j                  |v r||j                     }| j                  |g d|dddddg|      }|l|\  }}}	}
}}|j                  d   | j                         j                  d   j                  k(  r,|j                  d   |j                  d<   | j                         }|j                  dk(  s| j                  |g dg d|      }||d	   j                  d   | j                         j                  d   j                  k(  st        j                  d|j                  dt        |j                        dz
   |j                  |j                  d
z         }d|_        |j                  j                  t        j                  d| j                        g       | j!                  || j#                  |      j                         |j%                  |        | j'                  |       y )Nra   r   r   )r   	ReduceSumr   )rs   ConstantOfShapeConcat	UnsqueezeGatherShaper   )r   rs   r   r   )r   r   r   r   r   _remove_mask)r   outputsrg   r\   r'   )rt   rz   r]   r   rv   rd   rg   r   r"   	make_nodere   r^   	attributer   make_attributer'   add_nodeget_graph_by_noderx   r   )r0   rt   r   r_   op_input_idrk   parent_nodescastconstantOfShapeconcat	unsqueezegatherr   attention_nodes                 r2   clean_graphzBertOnnxModel.clean_graph  s   "668JJLD 78aVWXK||{*-#55 1aA&'   + %'!{{1~););A)>)C)CC38<<?--a0.2.F.F.H+||{*
  $55E '	   +#B'--a0DJJL4F4Fq4I4N4NN)/)9)9'#'::a#djj/A2E#F$(KK!%^!;	* 1@-&00779N9N{\`\j\j9k8lmnd6L6L^6\6a6ab'..t4y !z 	/*r3   c                 D    | j                          | j                          y r5   )r   prune_graphr<   s    r2   postprocesszBertOnnxModel.postprocessF  s    r3   Noptionsadd_dynamic_axesc                 l   ||j                   s| j                          | j                  j                          | j                  j	                          | j                          ||j                  r | j                          | j                          ||j                  r| j                          | j                          | j                          ||j                  r+| j                  |j                          | j                          ||j                   r| j#                          || j$                  j'                  |j(                         |j*                  rVt-        | j.                  t0              s<t3        | | j4                  | j6                  | j$                  |j*                        | _        ||j8                  r| j;                          ||j<                  r| j?                          | jA                          ||jB                  r.|j(                  tD        jF                  k(  }| jI                  |       | j                  jK                          | jM                          ||jN                  r$| jQ                  d       | jQ                  d       ||jR                  r| jU                          ||jV                  r| jY                          ||jZ                  r| j]                          | j_                          |r| ja                          tb        je                  d| jg                                 y )NT)rA   Fzopset version: )4enable_shape_inferencedisable_shape_inferencer/   remove_identity_nodesremove_useless_cast_nodesr9   enable_layer_normrQ   rS   enable_gelur?   r   rJ   enable_skip_layer_normrV   rX   enable_rotary_embeddingsrm   r,   set_mask_formatattention_mask_formatuse_multi_head_attention
isinstancer-   r   r   r(   r'   enable_attentionr=   enable_qordered_matmulro   rL   enable_embed_layer_normr   MaskIndexEndrO   remove_useless_reshape_nodesr   enable_bias_gelurB   enable_bias_skip_layer_normrH   enable_gelu_approximationrD   enable_gemm_fast_gelurF   remove_unused_constantr   r   r   get_opset_version)r0   r   r   rN   s       r2   optimizezBertOnnxModel.optimizeJ  s   )G)G((*

((* 	

,,. 	!O 9 9  "++-O 3 3NNO > >%%g&D&DE002O @ @'')//0M0MN//
4CXCXZm8n(7$$NN''44)% O 8 8! O > >%%'O ? ?$::>Q>^>^^N!!.1 	

//1 O 8 8D1E2O C C..07#D#D##%7#@#@$$&##% !!#od&<&<&>%?@Ar3   c                     i }g d}g d}||z   D ]!  }| j                  |      }t        |      ||<   # t        j                  d|        |S )z8
        Returns node count of fused operators.
        )r   r   MultiHeadAttentionGeluFastGeluBiasGeluGemmFastGeluLayerNormalizationSimplifiedLayerNormalizationSkipLayerNormalization SkipSimplifiedLayerNormalizationr[   )QOrderedAttentionQOrderedGeluQOrderedLayerNormalizationQOrderedMatMulzOptimized operators: )ru   re   r   r   )r0   op_countopsq_opsoprz   s         r2   get_fused_operator_statisticsz+BertOnnxModel.get_fused_operator_statistics  s_     

 +B--b1Eu:HRL  	+H:67r3   c                 R   | j                         dt        ffd} |d      } |d       |d      z    |d      z   } |d       |d      z    |d	      z   } |d
       |d      z   } |d       |d      z   }|dkD  xr  |dkD  xr ||k(  xr |d|z  k\  xs |d|z  k\  }|dk(  rt        j                  d       |dk(  rt        j                  d       |dk(  rt        j                  d       |dk(  rt        j                  d       |dk(  rt        j	                  d       |S )zA
        Returns True when the model is fully optimized.
        op_namec                 .    j                  |       xs dS )Nr   )get)r   fused_op_counts    r2   r   z2BertOnnxModel.is_fully_optimized.<locals>.op_count  s    !%%g.3!3r3   r   r   r   r   r   r   r   r   r   r   r   r   r   zLayer Normalization not fusedz$Simple Layer Normalization not fusedzGelu (or FastGelu) not fusedz!EmbedLayerNormalization not fusedz+Attention (or MultiHeadAttention) not fused)r   strr   debugwarning)	r0   r  r   embed	attentiongelu
layer_normsimple_layer_norm
is_perfects	    `       r2   is_fully_optimizedz BertOnnxModel.is_fully_optimized  sV    !!??AN	4c 	4 23[)H5I,JJXViMjj	(:"66*9MM23h?W6XX
$%CDxPrGss QY XQXd"X I-V3DI3U	 	 ?LL89!LL?@19LL78A:LL<=>NNHIr3   use_symbolic_shape_inferc                 <    t        |       }|j                  |       y r5   )r   convert)r0   r  packing_modes      r2   convert_to_packing_modez%BertOnnxModel.convert_to_packing_mode  s    "4(56r3   )r   r   )T)
batch_sizemax_seq_len)NFr5   )F)'__name__
__module____qualname__r    intr+   r9   r=   r?   rB   rD   rF   rH   rJ   rL   rO   rQ   rS   rV   rX   rm   ro   r  rb   boolr~   r   r   r   r   r   r   r   r   r   r   r  r  __classcell__)r1   s   @r2   r%   r%   %   s    'j 'S '3 '*/
		(s 4PS9 ^b , 

4('UR@+DRB 4 RBt RBh@&P7 7r3   r%   N)>loggingr   r  r   fusion_attentionr   r   fusion_bart_attentionr   fusion_biasgelur   fusion_constant_foldr	   fusion_embedlayerr
   fusion_fastgelur   fusion_gelur   fusion_gelu_approximationr   fusion_gemmfastgelur   fusion_layernormr   r   fusion_optionsr   r   fusion_qordered_attentionr   fusion_qordered_gelur   fusion_qordered_layernormr   fusion_qordered_matmulr   fusion_quickgelur   fusion_reshaper   fusion_rotary_attentionr   fusion_shaper   fusion_simplified_layernormr   r   fusion_skiplayernormr   r   fusion_utilsr   onnxr    r!   r"   
onnx_modelr#   r  r   r%    r3   r2   <module>r4     sp     / ; 5 * 3 ; * " = 2 Q = = 3 F 7 , ( : $ r _ $ 0 0  	8	C7I C7r3   