
    Bvh                         d dl Z d dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d dlmZ  e j*                  e      Z G d	 d
e      Z G d de      Z G d de      Zy)    N)AttentionMaskFusionAttention)Fusion)"FusionSimplifiedLayerNormalization&FusionSkipSimplifiedLayerNormalization)NumpyHelper)	NodeProtoTensorProtohelper)	OnnxModel)BertOnnxModelc                        e Zd ZdZdedededef fdZdedz  d	e	d
e	de	dedededededz  de
de	dz  fdZdededededz  dedz  dedz  dedz  dededz  dedz  dedede	dz  fdZd Zd Zd Z xZS )FusionT5Attentionz=
    Fuse T5 Attention subgraph into one Attention node.
    modelhidden_size	num_headsattention_maskc                 @    t         |   ||||ddg       d| _        y )NFSoftmax)use_multi_head_attentionsearch_op_types   )super__init__	static_kv)selfr   r   r   r   	__class__s        P/RAG/venv/lib/python3.12/site-packages/onnxruntime/transformers/onnx_model_t5.pyr   zFusionT5Attention.__init__   s4     	%*&K 	 	
     
mask_indexNq_matmulk_matmulv_matmulinputoutput	attn_biasscalereturnc                 2   |dkD  sJ |dkD  r$||z  dk7  rt         j                  d| d|        y| j                  j                  |j                  d         }| j                  j                  |j                  d         }| j                  j                  |j                  d         }|||&||n||n|}t        |j                  d    d       yt        j                  |      }t        j                  |      }t        j                  |      }|j                  |j                  k(  sJ |j                  d   }|j                  d   }|j                  d   }||cxk(  r|k(  sJ  J |dkD  r!||k7  rt         j                  d| d| d	       t        j                  |j                  dd       }t        j                  |||fd
      }d|z  }| j                  j                  d      }t        j                  |dz   t         j"                  ||g|j%                         d      }| j                  j'                  || j(                         ||dz   dg}|r|j+                  |       n|j+                  d       |	r"|j+                  d       |j+                  |	       |r#|d   dk(  r|j-                          |r	|d   dk(  rt        j.                  d||g|      }d|_        |j2                  j5                  t        j6                  d|      g       |
0|j2                  j5                  t        j6                  d|
      g       | j8                  C|j2                  j5                  t        j6                  dt;        | j8                              g       |S )a  Create an Attention node.
        Args:
            mask_index (str): mask input
            q_matmul (NodeProto): MatMul node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            input (str): input name
            output (str): output name
        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   input hidden size # is not a multiple of num of heads Nr   zl is not an initializer. Please set do_constant_folding=True in torch.onnx.export to unblock attention fusionzInput hidden size (z3) is not same as weight matrix dimension of q,k,v (z:). Please provide a correct input hidden size or pass in 0)axis   	Attention_qkv_weightTname	data_typedimsvalsraw inputsoutputsr1   com.microsoftr   r'   mask_filter_value)loggerdebugr   get_initializerr$   printr   to_arrayshapewarningnpprodstackcreate_node_namer   make_tensorr
   FLOATtobytesadd_initializerthis_graph_nameappendpop	make_nodedomain	attributeextendmake_attributer<   float)r   r    r!   r"   r#   r   r   r$   r%   r&   r'   q_weightk_weightv_weightmatmulqwkwvw
qw_in_size
kw_in_size
vw_in_sizeqw_out_size
qkv_weightqkv_weight_dimattention_node_nameweightattention_inputsattention_nodes                               r   make_attention_nodez%FusionT5Attention.make_attention_node)   s   4 1}}?i 7A=LL-k]:]^g]hij::--hnnQ.?@::--hnnQ.?@::--hnnQ.?@x/83C!)!1X8CSxYaF<<?# $g g !!(+!!(+!!(+ xx288###XXa[
XXa[
XXa[
Z5:55555?{j8NN%k]2efpeq rJ J
 ggbhhqrl+XXr2rl3
["jj99+F##$}4!''n-##%
 	

""64+?+?@ -/

 ##J/##B'##B'##I.#3B#72#=  " #3B#72#=  ))#H$	
 !0  '')>)>{I)V(WX$$++V-B-B7E-R,ST!!-$$++V-B-BCVX]^b^t^tXu-v,wxr   querykeyvaluepast_key
past_valuepresent_keypresent_valuec                     |dkD  r|dkD  r|r|r|sJ ||z  dk7  rt         j                  d| d|        y | j                  j                  d      }|||dg}|r|j	                  |       n|j	                  d       |r|j	                  |       n|j	                  d       |r&|sJ |j	                  |       |j	                  |       |r#|d   dk(  r|j                          |r	|d   dk(  r|g}|	r&|
sJ |j	                  |	       |j	                  |
       t        d|d|d	|       t        j                  d|||
      }d|_	        |j                  j                  t        j                  d|      g       |j                  j                  t        j                  dd      g       | j                  C|j                  j                  t        j                  dt        | j                              g       | j                  d       |S )Nr   r*   r+   MultiHeadAttentionr6   r7   zattention_inputs=z, attention_outputs=z, attention_node_name=r8   r;   r   r'         ?r<   )r=   r>   r   rG   rM   rN   r@   r   rO   rP   rQ   rR   rS   r<   rT   increase_counter)r   rg   rh   ri   r    r&   rj   rk   r%   rl   rm   r   r   rb   rd   attention_outputsre   s                    r   create_mha_nodez!FusionT5Attention.create_mha_node   s    1}qUsuLL)#)LL-k]:]^g]hij"jj99:NO	
 ##J/##B'##I.##B':##H-##J/#3B#72#=  " #3B#72#= $H  =$$[1$$]3"!##8&7%99P<O;QRS)) #%$	
 !0  '')>)>{I)V(WX  '')>)>w)L(MN!!-$$++V-B-BCVX]^b^t^tXu-v,wx23r   c                 R    | j                  |||      ry | j                  |||       y N)fuse_t5_encoderfuse_t5_decoder)r   nodeinput_name_to_nodesoutput_name_to_nodes       r   fusezFusionT5Attention.fuse   s-    &9;NOT#68KLr   c                     |j                   dk(  sJ | j                  j                  |g dg d|      }|y|\  }}}| j                  j                  |g dg d|      }|y|d   }	| j                  j                  |g d	g d
|      }
|
y|
\  }}}| j                  j                  |g dg d|      }|y|\  }}}| j                  j                  |g dg d|      }|d u }||d   }nN| j                  j                  |g dg d|      }|%| j                  j                  |g dg d|      }|y|d   }| j                  j	                  |      \  }}|y|dk7  rt        |      | _        | j                  j                  |d   g dg d|       | j                  j                  |d   g dg d|      } {t         fd| j                  j                         j                  D              rE|C d   j                  d   |d   j                  d   k(  rt         d   j                        dk(  rd}n+| j                  j                  |d   j                  d         }d }| j                  j                  |ddgddg      }|#|r!| j                  j                  |g dg d
      }|y|d   j                  d   }| j                  j                  |g d	g d
      }|y|\  }}}| j                  j                  |g d	g d      }|y|\  }}}|j                  d   |	j                  d   k7  ry| j                  |      \  }}| j                  |||||||	j                  d   |j                  d   |d 
      }|y| j                   j#                  |       | j$                  | j&                  |j(                  <   | j*                  j#                  |       d!| _        y!)"Nr   MatMul	TransposeReshaper   r   r   r   edgesry   FConcat	UnsqueezeGatherShaper   r   r   r   r7   r   r   r~   r   r   r   r   Addr~   r   r   r   r   MulSubCastr   r   r   r   r   r   r   r   r   )r   Slicer   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r      )ConstantOfShaper   r   r   r   r   r   r   r   r   )r   r   r   r   r   c              3   \   K   | ]#  }|j                   d    j                  d   k(   % yw)r7   r   N)r1   r$   ).0r$   mask_nodes_2s     r   	<genexpr>z4FusionT5Attention.fuse_t5_encoder.<locals>.<genexpr>F  s-     bIaEJJ,r"2"8"8";;Ias   ),r   r6   r   RelativePositionBias)r   r   r   rp   )r   r   r$   r%   r&   r'   T)op_typer   match_child_pathmatch_parent_pathget_constant_inputrT   r<   anygraphr$   lenr   process_maskr%   get_num_heads_and_hidden_sizerf   nodes_to_addrM   rL   node_name_to_graph_namer1   nodes_to_removeprune_graph)!r   softmax_nodery   rz   	qkv_nodes
matmul_qkv_reshape_qkvqkv_shape_nodesinput_shape_nodev_nodes	reshape_vmatmul_vqk_nodesadd_qk	matmul_qk
mask_nodesis_pattern_for_one_graph_inputmul_nodemul_valmask_nodes_3r    res_pos_bias	rpb_nodesk_nodesmatmul_kq_nodes	reshape_qmatmul_qq_num_headsq_hidden_sizenew_noder   s!                                   @r   rv   z!FusionT5Attention.fuse_t5_encoder   s\   ##y000JJ//.* 3	 0 
	 %."
A{**666	
 "*2.**...	
 ?!(9h :://(	
 '69ZZ11C	

 *4t);&!!!}H 55H"#	J !!ZZ99T)'	
 % !!}HZZ228<
7?f%*7^D" zz33rNI	
 zz33rNI	
 $bIYIYI[IaIabb(R &&q)\"-=-C-CA-FFLO))*a/J,,99*R.:N:Nq:QRJJJ00*+F
	
 !?

448I
  }++A.**...

 ? 1h **...

 ?!(9h >>! 0 6 6q 99%)%G%G	%R"]++!%"((+%%a(" , 
   *6:6J6J$$X]]3##K0r   c                    |j                   dk(  sJ | j                  j                  |g dg d|      }|y |\  }}}| j                  j                  |g dg d      }|y |d   }	d }
d }d }| j                  j                  |g dg d	      }|| j                  j                  |g d
g d      }|X|\  }}}|j                  d   }
|j
                  d   }d|vry |j                  d   |	j                  d   k7  rd| _        nsd| _        nk|j                  d   }||v ry d|vry d| _        nJ|\  }}}}|j                  d   }||v ry d|vry |j
                  d   }d|vry |j                  d   }
d| _        | j                  j                  |g dg d      }|y |\  }}}d }d }| j                  dk(  r| j                  j                  |g dg d      }||d   }n)| j                  j                  |g dg d      }|y |d   }| j                  j                  |      \  }}|dk7  r|| _        | j                  j                  |d   j                  d         }nY| j                  j                  |ddgddgfddgddgfg|      \  }}}|dk  rt        j                  d       y |j                  d   }d }d }d }| j                  dk(  r| j                  j                  |g d
g d      }|o|\  } }!}|!j                  d   }||!j
                  d      }"|"D ]:  }#| j                  j                  |#j
                  d         }$|$.|$j                  } n |y d|vry | j                  j                  |dgdg      }|y |d   } | j                  d   }||v ry d |vry | j                  j                  |g d!g d"fg d#g d$fg|      \  }%}}d }&d }"||d   |d%   }!}'|!j                  d   }|%dk(  r"||'j                  d      }&|&j                  d   }n|'j                  d   }||v ry d&|vry |%dk(  rR||'j
                  d      }"|"D ]:  }#| j                  j                  |#j
                  d         }$|$.|$j                  } n n|'j
                  d   }|y d'|vry | j                  j                  |g d
g d      }|y |\  }}!}|!j                  d   }||!j
                  d      }"|"D ]:  }#| j                  j                  |#j
                  d         }$|$.|$j                  } n |y d'|vry | j                  j                  |g d
g d      }(|(y |(\  })}*}+|+j                  d   |	j                  d   k7  ry | j!                  |*      \  },}-| j                  dk(  r
||}|}
d }d }|r|
r
|,dkD  r|-dkD  sy | j#                  |+j
                  d   ||
|||||j
                  d   |||,|-(      }.|.r| j$                  j'                  |.       | j(                  | j*                  |.j                  <   |s|rq||fD ]j  }/|/r| j                  j                  |/      st-        d)|/d*        y |/|v sJ |/d+z   ||/   j
                  d<   | j                  j/                  |/|/d+z          l | j0                  j'                  |       d,| _        y y )-Nr   r}   r   r   r   r   r7   )r   r   r   r~   )r   r   r   r   r   r   r   rm   r   past_value_crosspast_value_selfpresent_value_selfr   r   r   r   r   r   r   r   r   r   r   zGSkip MultiHeadAttention fusion since attention bias pattern not matchedpresent_key_crossr   past_key_cross)r   r   r   r~   )r   r   r   r   )r   r   r   r   r~   )r   r   r   r   r   past_key_selfpresent_key_self)rg   rh   ri   r    r&   rj   rk   r%   rl   rm   r   r   zgraph_output=z does not exist in graph output_copyF)r   r   r   r   r$   r%   r   r   r<   r   r   match_parent_pathsr=   r>   find_graph_outputr1   r   rs   r   rM   rL   r   r@   replace_input_of_all_nodesr   r   )0r   r   ry   rz   r   r   _transpose_qkvr   r   r   ri   rk   rm   r   transpose_vr   r   concat_vr   r   r   r   r    r   r   r   r   matched_path_indexrh   rj   rl   r   transpose_k	reshape_kpresent_key_transpose_nodespresent_key_transpose_nodepresent_key_candidateidxpast_key_transpose_nodeconcat_kr   transpose_qr   r   r   r   r   graph_outputs0                                                   r   rw   z!FusionT5Attention.fuse_t5_decoder  s   ##y000JJ//.* 3	 0 
	 2;/
NK**666

 "*2.
**..8

 ?jj222G
 "3:0Y!* + 2 21 5"-7>>!$(8(>(>q(AA%&DN%&DN'--a0
!44%Z7!"(/%HaA!*J00 
2$OOA.M#=8OOA&EDN:://(

 '69
>>Q55G"J
 %%a=!ZZ99T)

 %%a=66x@JAw& )0&,,99*R.:N:Nq:QRJ'+zz'D'DW%1v.34q!f= $($1 "A%fg!<<?L>>Qjj222G
 ",3)Yooa(.A)BRBRSTBU.V+2M.,0JJ,H,HIcIjIjklIm,n),8&;&@&@	 3N
 &&k9**66 MC
 ?%aj&,,Q/22#83"jj;;A<PNP_` $OC! '+#*.'"&-aj'"+)ooa(!8.A(..QRBS.T+6<<Q?H'~~a0H22"(2!82EhooVWFX2Y/6Q204

0L0LMgMnMnopMq0r-0<*?*D*DK!	 7R #+//!"4K&%[8**666
 ?")9aooa(.A)BRBRSTBU.V+2M.,0JJ,H,HIcIjIjklIm,n),8&;&@&@	 3N
 &%[8**...

 ?+2(Y>>! 0 6 6q 99%)%G%G	%R"]>>Q8#7CEHJ+/ma6G''//!$!"!%%a(#'!% ( 
 $$X.:>:N:ND((7 m%0-$@L(TZZ-I-I,-W/NOP'+>>>>BNQXBX'5<<Q?JJ99,W^H^_ %A   ''4$D r   )__name__
__module____qualname____doc__r   intr   r   strr	   rT   rf   rs   r{   rv   rw   __classcell__r   s   @r   r   r      s     	
 &"o$Jo o 	o
 o o o o o :o o 
T	obDD D 	D
 $JD :D *D $JD D 4ZD TzD D D 
T	DLMiVI%r   r   c                   *     e Zd Zdef fdZd Z xZS )FusionRelativePositionBiasBlockr   c                 *    t         |   |ddg       y )Nr   r   )r   r   )r   r   r   s     r   r   z(FusionRelativePositionBiasBlock.__init__  s     6Dr   c                    | j                   j                  |g dg d|      }|%| j                   j                  |g dg d|      }|y |d   }|d   }|d   }|d   }|| j                  v ry | j                   j                  |g d	g d
|      }	|	y | j                   j                  |	d   j                  d         }
|	d   }| j                   j                  |g dg d|      }d}|'| j                   j                  |g dg d|      }d}|y |d   }t        t        j                  t        j                  |
      d|rdndz  z              }|dk7  rt        j                  d| d       | j                   j                  dd|rdndz         }| j                   j                  |j                  d         }|y t        j                  |      }t        j                  |      }t!        j"                  |dz   t$        j&                  t        j(                  |      d   t        j(                  |      d   g|j+                         d      }| j                   j-                  || j.                         |j0                  |j                  d   |j                  d   g}|d z   }||j                  d<   t!        j2                  d||g|!      }d"|_        |j6                  j9                  t!        j:                  d#|      g       |j6                  j9                  t!        j:                  d$|      g       | j.                  | j<                  |j0                  <   | j>                  jA                  |       d| _!        y )%N)r   r   r   r   r   r   Where)r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r      r7   r   r-   )	Minr   r   r   r   r   DivLogr   )	r   r   r   r   r   r   r   r   r   r   )r   Negr   r   r   r   r   Range)r   r   r   r   r   r   r   r   F)r   Absr   r   r   r   T          zmax_distance is z], which is different from the default value 128. Please double check the model configuration.r   RelPosBias_encoderdecodername_prefixr   _bias_table_weightr0   _rel_pos_biasr8   r;   max_distanceis_bidirectional)"r   r   r   get_constant_valuer$   r   rD   roundexpr=   rC   rG   r?   r   rA   	transposer   rH   r
   rI   rB   rJ   rK   rL   r1   rO   rP   rQ   rR   rS   r   r   rM   r   )r   rx   ry   rz   compute_bias_nodesgatherwhereslice	unsqueezecompute_buckets_nodeslog_maxdivrange_nodesr  
range_noder  	node_nametable_weight_itable_weighttable_weight_t
bias_tabler9   bias_outputrpb_nodes                           r   r{   z$FusionRelativePositionBiasBlock.fuse  ss   !ZZ99P!	
 %!%!=!=[(#	" ")#A&"2&"1%&q)	 ,,, $

 < <Z'	!
 !( **//0Eb0I0O0OPQ0RS#B'jj22[$	
 !**66A?TgK  $" _
 288BFF7OrCSaYZ7[$\]^3NN"<. 1? ?
 JJ//"N^dm0n 0 
	 33FLLOD!"++N;l3''11!''((<(+RXXl-CA-FG'')

 	

"":t/C/CDP //:#3#3A#6
8H8H8KL  /1$A##" M	
 *!!6#8#8#V"WX!!6#8#89KM]#^"_`6:6J6J$$X]]3  *r   )r   r   r   r   r   r{   r   r   s   @r   r   r     s    Ei ER r   r   c                   \     e Zd Zddedef fdZd Zd ZddZd Zd Z	d	 Z
d
 Zd Z xZS )T5OnnxModelr   r   c                    t         |   |||       t        |       | _        t	        | j
                  j                  j                        dk(  r!ddlm	} |j                  | j                  _        t        | | j                  | j                  | j                        | _        t!        |       | _        t%        |       | _        t)        |       | _        y )Nr   r   )AttentionMaskFormat)r   r   r   r   r   r   r   r$   fusion_optionsr  NoMaskmask_formatr   r   r   attention_fusionr   layer_norm_fusionr   skip_layer_norm_fusionr   
rpb_fusion)r   r   r   r   r  r   s        r   r   zT5OnnxModel.__init__7  s    	;7+D1 tzz%%&!+:.A.H.HD+ 1$8H8H$..Z^ZmZm n!CD!I&LT&R#9$?r   c                 8    | j                   j                          y ru   )r"  applyr   s    r   fuse_attentionzT5OnnxModel.fuse_attentionF  s    ##%r   c                 8    | j                   j                          y ru   )r#  r'  r(  s    r   fuse_layer_normzT5OnnxModel.fuse_layer_normI  s    $$&r   c                 8    | j                   j                          y ru   )r$  r'  )r   shape_infers     r   fuse_skip_layer_normz T5OnnxModel.fuse_skip_layer_normL  s    ##))+r   c           
         | j                         D ]m  }|j                  dk(  s| j                  |g dg d      }|@| j                  j                  j
                  D cg c]  }|j                   }}|d   j
                  d   |v r| j                  dd      }t        j                  d|d   j
                  d   g|d	z   g|
      }t        j                  dt        j                  dgdg      }| j                  |       t        j                  d|d	z   dg|dz   g| j                  dd      d      }| j                  |       | j                  |       |dz   |j
                  d<   |dz   |j
                  d<    y  y c c}w )Nr   )
r   r   r   r   r   r   r   r   SimplifiedLayerNormalizationr   )
r   r   r   r   r   r   r   r   r   r   r7   r   r   Added_Shape_r   _Outputr8   Constant_Index_1)r1   r2   r3   r4   r   _Output_Gather_1Added_Gather_r   )r9   r:   r1   r,   r   )nodesr   r   r   r   r$   r1   rG   r   rO   rH   r
   INT64rK   add_node)	r   rx   r6  r$   graph_input_namesr  
shape_node	indices_1r  s	            r   adjust_rel_pos_bis_length_inputz+T5OnnxModel.adjust_rel_pos_bis_length_inputO  s    JJLD||55.. 3" $AEAQAQAWAW(XAWAW%(XRyq)->>$($9$9'~$9$^	%+%5%5#$)"IOOA$6#7%.%:$;!*	&
 %+$6$6!3&1&7&7"#"#	%	 ,,Y7!'!1!1$$-	$9;M#N%.1C%C$D!%!6!6x_!6!]!"" j1f-(14F(F

1(14F(F

1g !( )Ys   !Fc                 ^   g }| j                         D ]  }|j                  dk(  s| j                  |g dg d      }|-| j                  |dgdg      }|E|d   }|j                  d   |j                  d<   |j	                  |       |j                  |       | j                  |        y )Nr   )r   r   r   r   r   LessOrEqualTiler   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r6  r   r   r%   rR   rM   remove_nodesr   r   rx   extended_mask_nodesr   r  s         r   !remove_extended_mask_decoder_initz-T5OnnxModel.remove_extended_mask_decoder_init  s    JJLD||u$&*&<&< 6'#" '. 224:P9QTUSVW	$$Q<%)[[^"&&':;&&t,!!/2? !r   c                 b   g }| j                         D ]  }|j                  dk(  s| j                  |g dg d      }|-| j                  |ddgddg      }|G|d   }|j                  d   |j                  d<   |j	                  |       |j                  |       | j                  |        y )Nr   )r   r   r   r   r   r   r>  r?  r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r@  rB  s         r   remove_extended_mask_decoderz(T5OnnxModel.remove_extended_mask_decoder  s    JJLD||u$&*&<&< 9!'#$ '. 224'CY9Z]^`a\bc	$$Q<%)[[^"&&':;&&t,!!/2A !r   c                 X    | j                          | j                  j                          y ru   )adjust_reshape_and_expandr%  r'  r(  s    r   
preprocesszT5OnnxModel.preprocess  s    &&(r   c                     | j                          | j                          | j                          | j                          y ru   )rD  rF  r<  r   r(  s    r   postprocesszT5OnnxModel.postprocess  s2    ..0))+,,.r   r   )T)r   r   r   r   r   r)  r+  r.  r<  rD  rF  rI  rK  r   r   s   @r   r  r  6  sD    @ @s @&',6r!3F"3H r   r  )loggingnumpyrD   fusion_attentionr   r   fusion_baser   fusion_simplified_layernormr   r   fusion_utilsr   onnxr	   r
   r   
onnx_modelr   onnx_model_bertr   	getLoggerr   r=   r   r   r   r   r   <module>rW     sc   
   ;  r $ / /   )			8	$G
% G
%TV f V rc- cr   