
    BvhR                        d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	m
Z
 d dlmZmZ d dlmZ d dlmZmZmZmZmZ d d	lmZ  ee      Z G d
 d      Z G d d      Z G d d      Z G d d      Z G d d      Z G d d      Z G d d      Z  G d d      Z! G d de      Z" G d de      Z# G d de"      Z$ G d  d!e"      Z% G d" d#e"      Z& G d$ d%e"      Z' G d& d'e      Z(y)(    )	getLoggerN)DynamoOnnxHelper)Fusion)AttentionOpTypeFusionOptions) FusionBiasSkipLayerNormalizationFusionSkipLayerNormalization)NumpyHelper)
ModelProto	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                       e Zd Zd Zy)ProcessGemmWFuncc                 .    t        j                  |d      S )N   r   )np	transposeselfxs     Q/RAG/venv/lib/python3.12/site-packages/onnxruntime/transformers/onnx_model_phi.py__call__zProcessGemmWFunc.__call__   s    ||Av&&    N__name__
__module____qualname__r    r   r   r   r      s    'r   r   c                       e Zd Zd Zy)ProcessMatMulQFuncc                 ^    t        j                  t        j                  |dd      d   d      S )N   r   r   r   r   splitr   s     r   r   zProcessMatMulQFunc.__call__   %    ||BHHQ1-a0&99r   Nr   r"   r   r   r$   r$          :r   r$   c                       e Zd Zd Zy)ProcessMatMulKFuncc                 ^    t        j                  t        j                  |dd      d   d      S )Nr&   r   r   r   r'   r   s     r   r   zProcessMatMulKFunc.__call__   r)   r   Nr   r"   r   r   r,   r,      r*   r   r,   c                       e Zd Zd Zy)ProcessMatMulVFuncc                 ^    t        j                  t        j                  |dd      d   d      S )Nr&   r      r   r'   r   s     r   r   zProcessMatMulVFunc.__call__$   r)   r   Nr   r"   r   r   r/   r/   #   r*   r   r/   c                       e Zd Zd Zy)ProcessBiasQFuncc                 :    t        j                  |dd      d   }|S )Nr&   r   r   r(   r   s     r   r   zProcessBiasQFunc.__call__)       HHQ2q!r   Nr   r"   r   r   r3   r3   (       r   r3   c                       e Zd Zd Zy)ProcessBiasKFuncc                 :    t        j                  |dd      d   }|S )Nr&   r5   r   r6   r   s     r   r   zProcessBiasKFunc.__call__/   r7   r   Nr   r"   r   r   r:   r:   .   r8   r   r:   c                       e Zd Zd Zy)ProcessBiasVFuncc                 :    t        j                  |dd      d   }|S )Nr&   r5   r1   r6   r   s     r   r   zProcessBiasVFunc.__call__5   r7   r   Nr   r"   r   r   r=   r=   4   r8   r   r=   c                       e Zd Zd Zy)ProcessRotCacheFuncc                 t    t        |j                        dk(  sJ |j                  d   dk(  r|d d ddf   S |S )Nr1   r       r      )lenshaper   s     r   r   zProcessRotCacheFunc.__call__;   s?    177|q   771:Q"W:r   Nr   r"   r   r   r@   r@   :   s    r   r@   c                       e Zd Zdedee   f fdZdefdZd Z	d Z
d Zd	 Zdd
Zd Zd Zd Zdee   dedee   fdZddee   dee   defdZddee   dee   defdZd dee   dee   defdZddee   dee   defdZddee   dee   defdZd!dee   dee   defdZd!dee   dee   defdZd!dee   dee   defdZ	 	 	 	 d"dee   dee   defdZ xZS )#Fissionmodelnodes_to_findc                 (    t         |   |d|       y )NDONOTUSEsuper__init__)r   rH   rI   	__class__s      r   rN   zFission.__init__E   s    
 	
M:r   attn_op_typec                     || _         y N)rP   )r   rP   s     r   set_attention_op_typezFission.set_attention_op_typeL   s
    (r   c                 $    |dz   t        |      z   S )N_)str)r   layer_idnames      r   	get_unamezFission.get_unameO   s    czCM))r   c                     |D ]-  }||k(  s#|j                  |      s|j                  |      s+|c S  t        d| d      )NzEdge z
 not found)endswith
startswith
ValueError)r   edgesrX   edges       r   get_edge_by_namezFission.get_edge_by_nameR   sD    Dt|t}}T2dood6K  5j122r   c                 :    | j                  |j                  |      S rR   )r`   inputr   noderX   s      r   get_input_by_namezFission.get_input_by_nameX   s    $$TZZ66r   c                 :    | j                  |j                  |      S rR   )r`   outputrc   s      r   get_output_by_namezFission.get_output_by_name[   s    $$T[[$77r   c                    | j                   j                  |      }t        j                  |      } ||      }t	        j
                  ||dz   n|t        j                  |j                  |j                         j                         d      }| j                   j                  || j                         |j                  S )N
_processedT	data_typedimsvalsraw)rH   get_initializerr
   to_arrayr   make_tensorr   FLOATrE   flattentobytesadd_initializerthis_graph_namerX   )r   initializer_namefunctorcustom_namei
i_np_arrayprocessed_i_np_array
new_tensors           r   process_initializerzFission.process_initializer^   s    JJ&&'78 ))!,
&z2''/:/B|+!''%++%--/779

 	

"":t/C/CDr   c                     | j                   j                         j                  j                         }||_        t
        j                  |j                  j                  _	        y rR   )
rH   graph
value_infoaddrX   r   rs   typetensor_type	elem_typer   rX   new_value_infos      r   add_fp32_value_infozFission.add_fp32_value_infol   E    ))+66::<"4?4E4E''1r   c                     | j                   j                         j                  j                         }||_        t
        j                  |j                  j                  _	        y rR   )
rH   r   r   r   rX   r   INT64r   r   r   r   s      r   add_int64_value_infozFission.add_int64_value_infoq   r   r   c                    | j                   j                         j                  D ]F  }|j                  |k(  s| j                   j                         j                  j	                  |        n t        j                  |t        j                  |      }| j                   j                         j                  j                  |g       y )Nr   rE   )
rH   r   r   rX   remover   make_tensor_value_infor   rs   extend)r   rX   rE   r   r   s        r   replace_fp32_value_infozFission.replace_fp32_value_infov   s    ****,77J$&

  "--44Z@ 8  66!''

 	

%%,,n-=>r   subgraph_nodesrW   layer_known_edges_namesc                 h   |D ],  }t        |j                        D ]M  \  }}|dk(  r||vs| j                  ||      |j                  |<   | j                  |j                  |          O t        |j                        D ]M  \  }}|dk(  r||vs| j                  ||      |j                  |<   | j                  |j                  |          O | j                  ||j
                        |_        | j                  j                  |       | j                  | j                  |j
                  <   / y )N )
	enumeraterb   rY   r   rg   rX   nodes_to_addappendrw   node_name_to_graph_name)r   r   rW   r   new_noder{   rX   s          r   set_unique_name_and_add_nodesz%Fission.set_unique_name_and_add_nodes   s
    'H$X^^442:!88(,x(FHNN1%,,X^^A->? 5 %X__542:!88)-$)GHOOA&,,X__Q-?@ 6 !NN8X]]CHM$$X.:>:N:ND((7 'r   inputsoutputsprefixc                     t        |      dk(  sJ t        |      dk(  sJ t        j                  d|||dz   d      }|gS )Nr&   r   LayerNormalization_LayerNormalizationg   >)r   r   rX   epsilonrD   r   	make_noder   r   r   r   rd   s        r   	layernormzFission.layernorm   sT    6{a7|q    //)
 vr   c                     t        |      dk(  sJ t        |      dk(  sJ t        j                  d|d   |d   g|dz   g|dz         }t        j                  d|dz   |d   g||d	z         }||gS )
Nr&   r   MatMulr   
matmul_outr   r   rX   Addr1   Biasr   )r   r   r   r   matmulr   s         r   gemmzFission.gemm   s    6{a7|q   !!1Ivay)l*+("	
 \)6!95&	
 }r   c           	          t        |      dk(  sJ t        |      dk(  sJ t        j                  d|||dz   d||      }|gS )N   r   RotaryEmbeddingcom.microsoft)r   r   rX   domainrotary_embedding_dim	num_headsr   )r   r   r   r   rot_dimr   rd   s          r   rotaryzFission.rotary   sZ    6{a7|q   ++"!(
 vr   c                     t        |      dk(  sJ t        |      dk(  sJ t        j                  d|||dz   d      }|gS )Nr   FastGelur   )r   r   rX   r   r   r   s        r   fastgeluzFission.fastgelu   sS    6{a7|q   *$"
 vr   c                     t        |      dk(  sJ t        |      dk(  sJ t        j                  d|||dz         }|gS )Nr1   r   r   r   r   r   s        r   r   zFission.add   sO    6{a7|q   %	
 vr   c           	          t        |      dk(  sJ t        |      dk(  sJ t        j                  d|||dz   d|d      }|gS )N   r&   MultiHeadAttentionr   r   )r   r   rX   r   r   unidirectionalr   r   r   r   r   r   rd   s         r   mhazFission.mha   sZ    6{a7|q    .."
 vr   c           	          t        |      dk(  sJ t        |      dk(  sJ t        j                  d|||dz   d||      }|gS )N   r&   GroupQueryAttentionr   )r   r   rX   r   r   kv_num_headsr   r   s         r   gqazFission.gqa   sZ    6{a7|q   !//""
 vr   c                     t        |      dk(  sJ t        |      dk(  sJ t        j                  d|||dz   d|ddd	      }|gS )N   r1   	Attentionr   r   rB   )r   r   rX   r   r   r   	do_rotaryr   r   r   s         r   	attentionzFission.attention   s_    6{a7|q   +%"!#

 vr   c                     t        |      dk(  sJ t        |      dk(  sJ t        j                  d|||dz   d||||	      }|gS )N   r   PagedAttentionzvllm.ort.ext)r   r   rX   r   r   num_kv_heads	head_sizescaler   )r   r   r   r   r   r   r   rd   s           r   
paged_attnzFission.paged_attn  sb     6{a7|q   **!"

 vr   rR   )r   )r   rB   rB   )r   rB   )r   rB   P   g   %?)r   r    r!   r   listrV   rN   r   rS   rY   r`   re   rh   r   r   r   r   r   intr   r   r   r   r   r   r   r   r   r   __classcell__rO   s   @r   rG   rG   D   s   ;; Cy;)/ )*378F
F

?O"9oO9<OW[\_W`O(
S	 
DI 
s 
49 tCy # "T#Y c C 
tCy 
49 
c 
	$s) 	d3i 	 	$s) d3i  $s) d3i  S	 DI s ( !S	 c 	r   rG   c                   V     e Zd Zdededef fdZdefdZd Zde	fd	Z
de	fd
Z xZS )Phi2PreProcessorrH   r   hidden_sizec                 \    t         |   |       d| _        || _        || _        d| _        y )NrB   modeling_phi_PhiModel_model_1)rM   rN   num_hidden_layersnum_attention_headsr   	func_namer   rH   r   r   rO   s       r   rN   zPhi2PreProcessor.__init__  s/    !##, &8r   returnc                    i }d|d<   d|d<   d|d<   d|d<   t        d	| j                  d	      D ]0  }d
| |d| <   d| |d| <   d| |d| d<   d| |d| d<   2 | j                  j                  j                  D cg c]  }|j
                   }}d|v rd|v rd|d<   d|d<   |S d|v rd|v sJ d|d<   d|d<   |S c c}w )Nlogits	lm_head_1	input_idsl_input_ids_
past_key_0
key_statespast_value_0value_statesr   	past_key_key_states_past_value_value_states_present_key_model_layers__1present_value__1_1model_layers_0_1_1model_layers_0_1_2present_key_0present_value_0model_layers_0_1)ranger   rH   r   rg   rX   )r   	edge_dictr{   or   s        r   get_phi2_edge_dictz#Phi2PreProcessor.get_phi2_edge_dict&  s>   	!)	+$/	.!".	,$2	.!q$00!4A-6qc?IA3'(/:1#->IaS)*1=aS/AIaS+,3A!1EIaS-.	 5 $(::#3#3#:#:;#:a166#:;7*/Cw/N.=I*+.?I*+
  &05IW5TTT,;I().?I*+ <s   Cc                     d}| j                   j                  j                  D ]7  }|j                  j	                  |      }|dk7  s$|j                  |d  |_        9 y )N)modeling_phi_PhiDecoderLayer_model_layersr5   )rH   r   rd   op_typefind)r   phi2_transformer_layer_namerd   indexs       r   simplify_phi2_op_typez&Phi2PreProcessor.simplify_phi2_op_type<  sP    &Q#JJ$$))DLL%%&ABE{#||EF3 *r   rP   c                 
   |t         j                  k(  | _        |t         j                  k(  | _        | j
                  j                  }g }|j                  D ]A  }d|j                  v r t        j                  |j                  | j                  st        j                  nt        j                  ddg      }t        j                  dt        j                  dg      }t        j                  dt        j                  ddg      }t        j                  dt        j                  ddg      }t        j                  d	t        j                  dg      }	| j                  s|j                  |||g      n|j                  |||	g       | j                  rd
|j                  v sOt        j                  |j                  j                  d
d      |j                   j"                  j$                  dd| j&                  d| j(                  | j&                  z  g      }
|j                  |
g       | j                  rd
|j                  v rTt        j                  |j                  |j                   j"                  j$                  g d      }
|j                  |
g       d|j                  v sWt        j                  |j                  |j                   j"                  j$                  g d      }
|j                  |
g       d
|j                  v sd|j                  v st        j                  |j                  |j                   j"                  j$                  d| j&                  d| j(                  | j&                  z  g      }
|j                  |
g       D |j+                  d       |j                  j                  |       g }t-        |j.                        D ]F  \  }}|dk(  r|j                  |g       | j                  rd|j                  v s:t        j                  |j                  j                  dd      |j                   j"                  j$                  dd| j&                  d| j(                  | j&                  z  g      }
|j                  |
g       | j                  rt        j                  |j                  |j                   j"                  j$                  d| j&                  d| j(                  | j&                  z  g      }
|j                  |
g       I |j+                  d       |j.                  j                  |       y )Nr   
batch_sizeseq_lenr   stepr   position_idsattention_maskinput_metadatapast_keypastr1   past_seq_len)
num_blocksr   head_size_x
block_sizeblock_x
past_value)r  r   r   r  rb   r   present_keypresenttotal_seq_lenrg   )r   r   use_attnr   use_vllmrH   r   rb   rX   r   r   r   INT32r   r   replacer   r   r   r   r   
ClearFieldr   rg   )r   rP   r   
new_inputsvivi_iidvi_stepvi_pidvi_maskvi_metavi_cachenew_outputsr{   s                r   process_graph_ioz!Phi2PreProcessor.process_graph_ioC  s^   $(A(AA$(F(FF

  
++Bbgg%66GG7;}}k//+J[J['3
 !77)//#
  66")//'3
 !77$)//'3
 !77$)//#  == %%vw&@A#**FFG+DE}}(%<<
F;"$''"5"5"?"?( 44* ,,0H0HH
 H %%xj1(%<<"$''"5"5"?"?a H
 %%xj1277*%<<"$''"5"5"?"?	 H %%xj1(LBGG,C%<<"$''"5"5"?"?( 44* ,,0H0HH		 H %%xj1] ` 	!:&u||,EArAv""B4(==$/#)#@#@GGOOM9E&(gg&9&9&C&C ! , $ 8 8 / $ 0 0D4L4L L#
$ $**H:6]]%<<"$''"5"5"?"?( 44+ ,,0H0HH		 H  &&z2? -B 	"K(r   c                    d }| j                   j                  D ]5  }|j                  j                  | j                        s)|j                  } n |J | j                  |       | j                  | j                                | j                          | j                          |t        j                  k(  r| j                          | j                  |       y rR   )rH   	functionsrX   r[   r   unroll_functionupdate_edgesr   r  remove_dropout_layerr   r   remove_lm_head_layerr&  )r   rP   function_namefuncs       r   preprocess_onnxz Phi2PreProcessor.preprocess_onnx  s    JJ((Dyy!!$..1 $		 ) (((]+$1134""$!!#?999%%'l+r   )r   r    r!   r   r   rN   dictr   r  r   r&  r/  r   r   s   @r   r   r     sI    9j 9S 9s 9D ,4{)_ {)z,O ,r   r   c                   *     e Zd Zdef fdZd Z xZS )FissionTransformerEmbeddingPhirH   c                 (    t         |   |dg       y )N6torch_nn_modules_sparse_Embedding_model_embed_tokens_1rL   r   rH   rO   s     r   rN   z'FissionTransformerEmbeddingPhi.__init__  s     	!Y Z[r   c                    t         j                  d|j                         t        |j                        dk(  sJ t        |j
                        dk(  sJ |j                  d   }|j
                  d   }| j                  |d      }|||g}t        j                  d||g|gd      g}| j                  |d|       | j                  j                  |       d	| _        y )
NOptimizing %s...r1   r   r   zembed_tokens.weightGatherEmbedding_Gatherr   T)loggerinforX   rD   rb   rg   re   r   r   r   nodes_to_remover   prune_graph)	r   rd   input_name_to_nodesoutput_name_to_noderb   rg   	embeddingr   r   s	            r   fusez#FissionTransformerEmbeddingPhi.fuse  s    &		24::!###4;;1$$$

1Q**41FG	#(&)"< !5)'	
 	**>1>UV##D)r   r   r    r!   r   rN   rA  r   r   s   @r   r2  r2    s    \\ r   r2  c                   *     e Zd Zdef fdZd Z xZS )FissionTransformerLayerNormPhirH   c                 (    t         |   |dg       y )N@torch_nn_modules_normalization_LayerNorm_model_final_layernorm_1rL   r5  s     r   rN   z'FissionTransformerLayerNormPhi.__init__  s     	!c der   c                 F   t         j                  d|j                         t        |j                        dk(  sJ t        |j
                        dk(  sJ |j                  d   }|j
                  d   }| j                  |d      }| j                  |d      }||||g}g }	|	j                  | j                  |||g|gd             | j                  |	d|       | j                  |g d	       | j                  |g d	       | j                  j                  |       d
| _        y )Nr7  r&   r   r   zfinal_layernorm.weightzfinal_layernorm.biasFinalc   r  r  r   T)r:  r;  rX   rD   rb   rg   re   r   r   r   r   r<  r   r=  )
r   rd   r>  r?  rb   rg   	ln_weightln_biasr   r   s
             r   rA  z#FissionTransformerLayerNormPhi.fuse  s   &		24::!###4;;1$$$

1Q**41IJ	((/EF#(&)W"EdnneY-H6(T[\]**>2?VW$$U,TU$$V-UV##D)r   rB  r   s   @r   rD  rD    s    ff r   rD  c                   *     e Zd Zdef fdZd Z xZS )!FissionTransformerCausalLMHeadPhirH   c                 (    t         |   |dg       y )N(torch_nn_modules_linear_Linear_lm_head_1rL   r5  s     r   rN   z*FissionTransformerCausalLMHeadPhi.__init__  s     	!K LMr   c                 v   t         j                  d|j                         t        |j                        dk(  sJ t        |j
                        dk(  sJ |j                  d   }|j
                  d   }| j                  | j                  |d      t                     }| j                  |d      }||||g}g }	|	j                  | j                  |||g|gd             | j                  |	d	|       | j                  |g d
       | j                  |g d       | j                  j                  |       d| _        y )Nr7  r   r   r1   r   zlm_head.weightzlm_head.biasLMHead_rI  rJ  )r  r  i   T)r:  r;  rX   rD   rb   rg   r   re   r   r   r   r   r   r<  r   r=  )
r   rd   r>  r?  rb   rg   	fc_weightfc_biasr   r   s
             r   rA  z&FissionTransformerCausalLMHeadPhi.fuse  s   &		24::!###4;;1$$$

1Q,,T-C-CDJZ-[]m]op	((~>#(&)W"Edii	7(CfXyYZ**>2?VW$$U,TU$$V-MN##D)r   rB  r   s   @r   rN  rN    s    NN r   rN  c                   @     e Zd Zdedef fdZd Zd Zd Zd Z	 xZ
S )FissionTransformerBlockPhirH   r   c                     || _         d}i | _        g }t        |      D ](  }d| d}|j                  |       || j                  |<   * t        |   ||       y )NrB   *modeling_phi_PhiDecoderLayer_model_layers_r   )r   func_to_layer_idr   r   rM   rN   )r   rH   r   max_num_layersrI   layerr   rO   s          r   rN   z#FissionTransformerBlockPhi.__init__5  sj    
 # ">*EDUG2NI  +/4D!!), +
 	.r   c                 4    | j                   |j                     S rR   )rY  r  )r   rd   s     r   get_layer_idz'FissionTransformerBlockPhi.get_layer_idE  s    $$T\\22r   c                    t        j                  ddgdgdt        j                        t        j                  dddgdgd	
      t        j                  dddgdgd
      t        j                  ddgdgdt        j                        t        j                  ddgdgd
      t        j                  dddgdgdd      t        j                  ddgdgdt        j                        g}|S )NCastr  
mask_int64Cast_gqa_aux_0)r   r   rX   to	ReduceSumonemask_row_sumsReduceSum_gqa_auxr   Subseqlens_k_int64Sub_gqa_aux	seqlens_kCast_gqa_aux_1Shape
mask_shapeShape_gqa_aux_0r8  total_seq_len_int64Gather_gqa_aux_0r   )r   r   rX   axistotal_sequence_lengthCast_gqa_aux_2)r   r   r   r   r  )r   gqa_aux_nodess     r   get_gqa_aux_nodesz,FissionTransformerBlockPhi.get_gqa_aux_nodesH  s   ()%%$$ $e,()(	 '/*+"	 )*$%$$ Wl^l^Zkl$e,./' -.01%$$G*
V r   c	                 n   | j                   j                  |      }	| j                   j                  |      }
| j                   j                  |      }t        j                  t	        j
                  |	      d      }t        j                  t	        j
                  |
      d      }t        j                  t	        j
                  |      d      }t        j                  |||fd      }| j                   j                  |      }| j                   j                  |      }| j                   j                  |      }t	        j
                  |      }t	        j
                  |      }t	        j
                  |      }t        j                  |||fd      }|j                  d   }t        j                  |t        j                  ||dz  g|j                         j                         d      }| j                   j                  || j                         t        j                  |t        j                  |dz  g|j                         j                         d      }| j                   j                  || j                         | j!                  |j"                         | j!                  |j"                         ||fS )Nr   r   )rq  r   r&   Trk   )rH   rp   r   r   r
   rq   stackrE   r   rr   r   rs   rt   ru   rv   rw   r   rX   )r   q_wk_wv_wq_bk_bv_bweight_name	bias_nameq_weightk_weightv_weightqwkwvw
qkv_weightq_biask_biasv_biasqbkbvbqkv_biasr   weightbiass                             r   pack_qkv_gemmz(FissionTransformerBlockPhi.pack_qkv_gemmv  s   ::--c2::--c2::--c2\\+..x8&A\\+..x8&A\\+..x8&AXXr2rl3
++C0++C0++C0!!&)!!&)!!&)88RRLq1 &&q)##!''{Q/##%--/
 	

""64+?+?@!!!''/"!!#++-
 	

""4)=)=>  -  +I%%r   c                    t         j                  d|j                         t         j                  d| j                          | j	                  |      }|j
                  d   }| j                  |d      }| j                  |d      }|j                  d   }| j                  |d      }	| j                  |d      }
| j                  |d	      }| j                  |d
      }d\  }}}}}}d\  }}d\  }}| j                  t        j                  k7  r	| j                  | j                  |d      t                     }| j                  | j                  |d      t                     }| j                  | j                  |d      t                     }| j                  |d      }| j                  |d      }| j                  |d      }| j                  | j                  |d      t                     }| j                  | j                  |d      t                     }n| j                  | j                  |d      | j                  |d      | j                  |d      | j                  |d      | j                  |d      | j                  |d      | j                  |d      | j                  |d            \  }}| j                  | j                  |d      t                     }| j                  |d      }| j                  | j                  |d      t                     }| j                  | j                  |d      t                     }| j                  |d      }| j                  |d      }g }|j!                  |||g       |j!                  ||	|
g       |j!                  ||g       | j                  t        j                  k7  r|j!                  ||||||||g       n|j!                  ||g       |j!                  ||||||g       |j!                  g d       g }|j!                  | j#                  |||gdg             |j!                  | j%                  d||gd gd!             |j!                  | j%                  d||gd"gd#             |j!                  | j'                  d"gd$g             |j!                  | j%                  d$||gd%gd&             |j!                  | j)                  d d%gd'gd(             |j!                  | j)                  |d'g|gd)             | j                  t        j                  k7  rg|j!                  | j%                  d||gd*gd+             |j!                  | j%                  d||gd,gd-             |j!                  | j%                  d||gd.gd/             | j                  t        j*                  k(  rd0nd1}|j!                  | j-                  d*|||gd2gd+             |j!                  | j-                  d,|||gd3gd-             | j                  t        j.                  k(  r.|j!                  | j1                  d2d3d.d4d5d4||gd|	|
g             n| j                  t        j2                  k(  r|j!                  | j5                  d2d3d.||d6d7gd|	|
g             |dk(  r2| j7                         } | D ]@  }!| j8                  j;                  |!       | j<                  | j>                  |!j                  <   B | j@                  jC                  tE        jF                  tI        jJ                  d8gd9:      d;<      | j<                         n| j                  t        j*                  k(  rn|j!                  | jM                  d2d3d.||d=gdg             nEd>| }"d?| }#|j!                  |"|#g       |j!                  | jO                  d||d5|"gd|#g             | jQ                  |||       | jS                  |g d@       | jS                  |g d@       | jT                  j;                  |       dA| _+        y )BNr7  zAttentionOpType: r   r  r  r5   r  present_valuezinput_layernorm.weightzinput_layernorm.bias)NNNNNN)NNzself_attn.q_proj.weightzself_attn.k_proj.weightzself_attn.v_proj.weightzself_attn.q_proj.biaszself_attn.k_proj.biaszself_attn.v_proj.biaszrotary_emb.cos_cachedzrotary_emb.sin_cachedattn_qkv_weightattn_qkv_biaszself_attn.dense.weightzself_attn.dense.biaszmlp.fc1.weightzmlp.fc2.weightzmlp.fc1.biaszmlp.fc2.bias)r  r	  rj  rr  r  r
  ln_outattn_outattn_add_outOutProj_fc1_outFC1_gelu_outfc2_outFC2_residual_1_out
Residual_1
Residual_2queryQ_keyK_valueV_r
  r	  	query_rotkey_rotr   r  rj  rr  r   int64)dtyperd  )rX   r  past_present_rJ  T),r:  r;  rX   rP   r]  rb   re   rg   rh   r   r   r   r   r@   r  rY   r   r   r   r   r   r   r   r   r   r   r   ru  r   r   rw   r   rH   rv   r   
from_arrayr   arrayr   r   r   r   r<  r=  )$r   rd   r>  r?  rW   i_hidden_statesi_key_cachei_value_cacheo_hidden_stateso_key_cacheo_value_cacherK  rL  attn_q_weightattn_q_biasattn_k_weightattn_k_biasattn_v_weightattn_v_biasr  r  	cos_cache	sin_cacheattn_out_weightattn_out_biasmlp_fc1_weightmlp_fc2_weightmlp_fc1_biasmlp_fc2_biasr   r   pos_ids_namert  r   	past_namepresent_names$                                       r   rA  zFissionTransformerBlockPhi.fuse  sA    	&		2'(9(9':;<$$T***Q-,,T:>..t\B++b/--dMB//oF**41IJ	((/EF^
Z{M;{ *4&)	9 9 99 44&&t-FGIYI[M !44&&t-FGIYI[M !44&&t-FGIYI[M 007NOK007NOK007NOK00&&t-DEGZG\I 00&&t-DEGZG\I .2-?-?&&t-FG&&t-FG&&t-FG&&t-DE&&t-DE&&t-DEx):;x9	.*O] 22""4)ABDTDV
 ..t5KL11$2H2HO_2`brbtu11$2H2HO_2`brbtu--dNC--dNC"$&&m'TU&&m'TU&&	7';< 9 99#**!!!	 $**O]+KL&&m^\>[gh	
 	 &&n	
 dnnoy'-RU]T^_`dii_m(TWeVfhrstdii><(PS\R]_efgdmmYK*FGdii^\(RU^T_aghidhh	'BEUDVXdefdhh9I'J_L]_klm 9 99!!$))X}k,RU\T]_c"de!!$))X}k,RUZT[]a"bc!!$))X}k,RU\T]_c"de-1->->/B`B`-`>flL!!$++wiQZ.[^i]jlp"qr!!$++ulIy.Y\e[fhl"mn  O$F$FF%%HH$i">NPRT_ano#[-@ ""o&I&II%%HH'%#')'3 $[-@ q=$($:$:$<M$1))00:FJFZFZ44X]]C %2 JJ..$//!G0LSXY[_[o[o ""o&D&DD%%OO$i+}Vfg#  z*I%hZ0L#**I|+DE!!?OQZ[^hjv]w 	**>8E\]$$_6^_$$_6^_##D)r   )r   r    r!   r   r   rN   r]  ru  r  rA  r   r   s   @r   rV  rV  4  s1    // / 3,\(&Tk r   rV  c                   V     e Zd Zdededef fdZddedz  def fdZd	 Z	dd
Z
 xZS )PhiOnnxModelrH   r   r   c                     t         |   |       t        | j                  ||      | _        t        | |      | _        t        |       | _        t        |       | _
        t        |       | _        y rR   )rM   rN   r   rH   phi2_preprocessorrV  fission_transformer_blockrN  fission_causal_lm_headrD  fission_transformer_layernormr2  fission_transformer_embeddingr   s       r   rN   zPhiOnnxModel.__init__O  s[    !1$**i!U)CD))T&&G&M#-KD-Q*-KD-Q*r   Noptionsadd_dynamic_axesc                 &   |J |j                   }| j                  j                  |       | j                  j	                  |       | j                  j                          | j                  j                          | j                  j                          | j                  j                          t        | )          t        |       | _        t        |       | _        | j                  j                          | j                  j                          y rR   )attention_op_typer  rS   r  r/  applyr  r  r  rM   r=  r	   fuse_slnr   fuse_bias_sln)r   r  r  rP   rO   s       r   optimizezPhiOnnxModel.optimizeW  s    """00&&<<\J..|<&&,,.**002##))+**002 5T:=dC  "r   c                     i }g d}|D ]!  }| j                  |      }t        |      ||<   # t        j                  d|        |S )z8
        Returns node count of fused operators.
        )	r   r   r   r   GeluBiasGelur   r   SkipLayerNormalizationzOptimized operators: )get_nodes_by_op_typerD   r:  r;  )r   op_countopsopnodess        r   get_fused_operator_statisticsz*PhiOnnxModel.get_fused_operator_statisticsl  sT     

 B--b1Eu:HRL  	+H:67r   c                    | j                         dt        ffd} |d       |d      z    |d      z    |d      z   } |d       |d      z    |d	      z   } |d
       |d      z   }|dkD  xr ||k(  xr ||k\  }|dk(  rt        j                  d       |dk(  rt        j                  d       |dk(  rt        j	                  d       |S )zA
        Returns True when the model is fully optimized.
        op_namec                 .    j                  |       xs dS )Nr   )get)r  fused_op_counts    r   r  z1PhiOnnxModel.is_fully_optimized.<locals>.op_count  s    !%%g.3!3r   r   r   r   r   r  r  r   r   r  r   zLayer Normalization not fusedzGelu (or FastGelu) not fusedz+Attention (or MultiHeadAttention) not fused)r  rV   r:  debugwarning)r   r  r  r   gelu
layer_norm
is_perfects    `     r   is_fully_optimizedzPhiOnnxModel.is_fully_optimized  s     !!??AN	4c 	4 [!+,-,-. '() 	 (:"66*9MM23h?W6XX
!mZ)t*;Z*PYBY
?LL8919LL78>NNHIr   )NFrR   )r   r    r!   r   r   rN   r   boolr  r  r  r   r   s   @r   r  r  N  sC    Rj RS Rs R# 4 #t #*.r   r  ))loggingr   numpyr   dynamo_onnx_helperr   fusion_baser   fusion_optionsr   r   fusion_skiplayernormr   r	   fusion_utilsr
   onnxr   r   r   r   r   
onnx_modelr   r   r:  r   r$   r,   r/   r3   r:   r=   r@   rG   r   r2  rD  rN  rV  r  r"   r   r   <module>r     s      /  9 _ $ I I  	8	' '
: :
: :
: :
    Vf Vrp,' p,f W  D W  B   BW  W tS9 Sr   