
    BvhG                        d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
 d dlmZ  e j                  e      Z G d de      Z G d d	e      Zy)
    N)FusionAttention)Fusion)FunctionProto	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                        e Zd ZdZdededef fdZ	 	 	 	 	 	 	 ddeded	ed
edededededededede	dz  dedz  fdZ
d Zd Zd Z xZS )FusionRotaryAttentionze
    Fuse Attention subgraph with rotary positional embeddings into one MultiHeadAttention node.
    modelhidden_size	num_headsc                 2    t         |   |||dg d       y )NT)SimplifiedLayerNormalization SkipSimplifiedLayerNormalizationLayerNormalizationSkipLayerNormalizationAdd)use_multi_head_attentionsearch_op_types)super__init__)selfr   r   r   	__class__s       Z/RAG/venv/lib/python3.12/site-packages/onnxruntime/transformers/fusion_rotary_attention.pyr   zFusionRotaryAttention.__init__   s(     	%) 	 	
    Ninputoutputq_rotaryk_rotaryv_matmul	attn_maskadd_qkpast_kpast_v	present_k	present_vscalereturnc                    | j                   dkD  sJ | j                  dkD  rL| j                  | j                   z  dk7  r0t        j                  d| j                   d| j                           y | j                  j                  d      }|j                  d   |j                  d   |j                  d   d||||	g}|g}|
r|r|j                  |
|g       t        j                  d|||      }d|_
        |j                  j                  t        j                  d| j                         g       |0|j                  j                  t        j                  d	|      g       | j                  C|j                  j                  t        j                  d
t        | j                              g       | j                  d       |S )Nr   z)fuse_rotary_attention: input hidden size z# is not a multiple of num of heads MultiHeadAttention inputsoutputsnamecom.microsoftr   r)   mask_filter_value)r   r   loggerdebugr   create_node_namer   extendr   	make_nodedomain	attributemake_attributer3   floatincrease_counter)r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   mha_node_name
mha_inputsmha_outputsmha_nodes                    r   create_mha_nodez%FusionRotaryAttention.create_mha_node(   s    ~~!!!aT%5%5%F1$LLL;D<L<L;MMpquqq  qA  B 

334HIOOAOOAOOA	

 h	956## 	
 *!!6#8#8dnn#U"VW%%v'<'<We'L&MN!!-%%v'<'<=PRWX\XnXnRo'p&qr23r   c	                 
   | j                   j                  |dgdg      }	| j                   j                  |dgdg      }
|	|
y|	d   |
d   }}| j                   j                  |g dg d      }| j                   j                  |g dg d      }| j                   j                  |g dg d      }| j                   j                  |g dg d      }||||y|\  }}}|\  }}}|j                  d   |k7  s|j                  d   |k7  ry|d   j                  |j                  k7  s|d   j                  |j                  k7  ry| j                   j                  |dgdg      }| j                   j                  |dgdg      }||y|d   |d   }}| j                   j                  |g d	g d
      }| j                   j                  |g dg d      }| j                   j                  |g dg d      }| j                   j                  |g dg d      }||||y|d   j                  |j                  k7  sT|d   j                  |j                  k7  s8|d   j                  |j                  k7  s|d   j                  |j                  k7  ry| j                   j                  |dgdg      }|y|d   }| j                   j                  |g d	g d
      } | j                   j                  |g dg d      }!| |!y| d   j                  |j                  k7  s|!d   j                  |j                  k7  ry| j                   j                  |dgdg      }"|"y|"d   }#| j                   j                  |#g d	g d
      }$| j                   j                  |#g dg d      }%|$|%y|$d   j                  |j                  k7  s|%d   j                  |j                  k7  ry|$d   }&| d   }'|d   }(|j                  d   })|&j                  d   |)k7  s$|'j                  d   |)k7  s|(j                  d   |)k7  ry| j                   j                  |g dg d      }*| j                   j                  |g dg d      }+|*|*\  }},}-n|+|+\  }}},}-ny|-j                  d   dvry| j                   j                  |,g dg d      }.| j                   j                  |-g dg d      }/| j                   j                  |-dgdg      }0|.|/|0y|.d   j                  |/d   j                  k7  s|.d   j                  |/d   j                  k7  ry|/d   j                  d   |0d   j                  d   k7  ryy)NConcat   Fr   	UnsqueezeGatherShaper   r   r   rE   r   r   )   r   r   )rG   MulrH   rI   r   r   r   r   )rG   r   rH   rI   rE   r   r   r   rL   )rL   r   r   r   rD   SlicerQ   CastrD   rQ   rQ   >   r#   attention_mask)rL   r   rE   r   rG   T)r   match_parent_pathr   r1   r   )1r   reshape_qkv_2reshape_qkv_1reshape_q_2reshape_k_2reshape_v_2reshape_v_1r$   
root_inputconcat_qkv_2_pathconcat_qkv_1_pathconcat_qkv_2concat_qkv_1reshape_qkv_2_path_1reshape_qkv_2_path_2reshape_qkv_1_path_1reshape_qkv_1_path_2_gather_1shape_1gather_2shape_2concat_v_2_pathconcat_v_1_path
concat_v_2
concat_v_1reshape_v_2_path_1reshape_v_2_path_2reshape_v_1_path_1reshape_v_1_path_2concat_k_2_path
concat_k_2reshape_k_2_path_1reshape_k_2_path_2concat_q_2_path
concat_q_2reshape_q_2_path_1reshape_q_2_path_2mul_qmul_kmul_vgather_1_outattn_mask_path_1attn_mask_path_2
slice_qk_2
slice_qk_1slice_qk_2_pathslice_qk_1_path_1slice_qk_1_path_2s1                                                    r   &check_runtime_shape_paths_for_functionz<FusionRotaryAttention.check_runtime_shape_paths_for_function`   s    !JJ88
UVTWX JJ88
UVTWX$(9(A%6q%9;LQ;Ol#zz;;LJjluv#zz;;LJjluv#zz;;LJjluv#zz;;LJjluv (#+#+#+38W38W ==z)W]]1-=-K  "''8==8<PQR<S<X<X\d\i\i<i **66{XJQRPST**66{XJQRPST"o&=!0!3_Q5GJ
!ZZ99?
 "ZZ99?
 "ZZ99*Ffhqr!ZZ99*Ffhqr&!)!)!) q!&&(--7!!$))X]]:!!$))X]]:!!$))X]]: **66{XJQRPST"$Q'
!ZZ99?
 "ZZ99?
 %);)C a %%6:LQ:O:T:TX`XeXe:e **66{XJQRPST"$Q'
!ZZ99?
 "ZZ99*Ffhqr%);)C a %%6:LQ:O:T:TX`XeXe:e #1%"1%"1%q);;q>\)U[[^|-Ku{{[\~amOm  ::77@\^gh::77@dfrs'(8%Az:)+;(Aq*jA&EE**66?
 !JJ88?
 !JJ88k]UVTWX"&7&?CTC\ 1""&7&:&?&???STCUCZCZ^opq^r^w^wCw Q%%a(,=a,@,F,Fq,IIr   c                 ,   | j                   j                  |dgdg      }|y|d   }| j                   j                  |g dg d      }| j                   j                  |g dg d      }	||	y|\  }
}}|	\  }
}}|j                  d   |k7  s|j                  d   |k7  ry| j                   j                  |dgdg      }|y|d   }| j                   j                  |g dg d      }| j                   j                  |g dg d      }||y|d   j                  |j                  k7  s|d   j                  |j                  k7  ry| j                   j                  |dgdg      }|y|d   }| j                   j                  |g dg d      }| j                   j                  |g dg d      }||y|d   j                  |j                  k7  s|d   j                  |j                  k7  ry| j                   j                  |dgdg      }|y|d   }| j                   j                  |g dg d      }| j                   j                  |g dg d      }||y|d   j                  |j                  k7  s|d   j                  |j                  k7  ryy)	NrD   rE   Fr   rF   rJ   rK   T)r   rU   r   r1   )r   reshape_qkv	reshape_q	reshape_k	reshape_vr\   concat_qkv_path
concat_qkvreshape_qkv_path_1reshape_qkv_path_2re   rf   rg   rh   ri   concat_v_pathconcat_vreshape_v_path_1reshape_v_path_2concat_k_pathconcat_kreshape_k_path_1reshape_k_path_2concat_q_pathconcat_qreshape_q_path_1reshape_q_path_2s                              r   #check_runtime_shape_paths_for_nodesz9FusionRotaryAttention.check_runtime_shape_paths_for_nodes   s    **66{XJQRPST"$Q'
!ZZ99*Ffhqr!ZZ99*Ffhqr%);)C18W18W ==z)W]]1-=-K 

44Y
QCP  #::77Bbdmn::77Bbdmn#'7'? A##x}}48H8K8P8PT\TaTa8a 

44Y
QCP  #::77Bbdmn::77Bbdmn#'7'? A##x}}48H8K8P8PT\TaTa8a 

44Y
QCP  #::77Bbdmn::77Bbdmn#'7'? A##x}}48H8K8P8PT\TaTa8ar   c                 |    |j                   dvry d } j                  j                  |g dg d      } j                  j                  |g dg d      } j                  j                  |g dg d      }||\  }}	}}
}|}n/|
|\  }}}}|}n#||\  }}}}}|}nt        j	                  d       y d\  }}}d }d } j                  j                  |g d	g d
      } j                  j                  |g dg d      } j                  j                  |g dg d      } j                  j                  |g dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg	d       \  }}} j                  j                  |g d g d!      }|x|\  }}}}}}|} j                  j                  |d"d#gd$d%g      }|t        j	                  d&       y |d$   j                  d$   }|d'   j                  d$   }|j                  d$   }n|(|\  }}}}|}|j                  d$   }|j                  d$   }n||\  }}}|}|j                  d$   }n|<t        |      d(k(  r.|d$   d)d  \  }}}}|}|j                  d$   }|j                  d$   }nC|+|\  }}}}}|}|}|j                  d$   }|j                  d$   }nt        j	                  d*       y  j                  j                  |g d+g d,      }d-\  }} ||\  }}}} nt        j	                  d.       y d/\  }!}" j                  j                  |g d0g d      }# j                  j                  |g d1g d      }$ j                  j                  |g d2g d3      }% j                  j                  |g d4g d5      }& j                  j                  |g d6g d7      }' j                  j                  |g d8g d3      }( j                  j                  |g d9g d:      })|#|#\  }}*}+|*j                  d$   }!n|$|$\  }}}*}+|*j                  d$   }!n|%" j                  |%d$   j                  d$         }"n|&" j                  |&d$   j                  d$         }"nd|'|'d$   j                  d$   }"nO|(|(d$   j                  d$   }"n:|)" j                  |)d$   j                  d$         }"nt        j	                  d;       y d/\  },}-d }.d }/d }0 j                  j                  | g d<g d
      }1 j                  j                  | g d=g d      }2 j                  j                  | g d>g d?      }3 j                  j                  | g d@g d:fg dAg dBfg dCg dDfg dEg dFfg dGg dHfg dIg dJfg dKg dLfg dIg dMfg dIg dNfg	d       \  }}4} j                  j                  | g dOg dP      }5|1~|1\  }6}}7}}8}9|1}. j                  j                  |7d"d#gd$d%g      }:|:t        j	                  dQ       y |:d$   j                  d$   },|:d'   j                  d$   };|7j                  d$   }-||;k(  sJ |2|2\  }}8}}<}9|2}.|8j                  d$   }-n|3*|3\  }}7}8}}<}9|3}.|7j                  d$   },|7j                  d$   }-n|4Et        |4      d(k(  r7|4d$   dRd  \  }<}9|4d$   dSdT \  }7}8|4}.|7j                  d$   },|7j                  d$   }-nE|5-|5\	  }}7}0}8}/}}<}}9|5}.|7j                  d$   },|7j                  d$   }-nt        j	                  dU       y d }=d }>d }? j                  j                  | g dVg d,      }@ j                  j                  | g dWg d,      }A j                  j                  | g dXg dY      }B|@
@\  }C}}D}E|@}=n1A
A\  }D}}F}E|A}=n%BB\  }?}D}>}}F}}E|B}=nt        j	                  dZ       y Ej                  d$   |9j                  d$   k7  r5|9j                  d$   |j                  d$   k7  rt        j	                  d[       y d\}G||k(  rK j                  	
C6|Ej                  d$         st        j	                  d]       y |	j                  d$   }Gn|||fv rɉ j                  F<Ej                  d$         st        j	                  d]       y |j                  d$   }G|>r|>j                  d$   nEj                  d$   Dj                  d$<   |/r|/j                  d$   n|9j                  d$   |8j                  d$<   |?|8j                  d^z   |8j                  d$<   ||k(  r|d_d  } fd`}H|?r|0r j                  j                  da      }I|Id^z   }Jt        j                  da|0j                  d$   g|Jg|Ib      }K|Kj                   j#                  t        j$                  dcg dd      g        j                  j                  da      }L|Ld^z   }Mt        j                  da|?j                  d$   g|Mg|Lb      }N|Nj                   j#                  t        j$                  dcg dd      g        H<      }O|Ot        j	                  de       y  j                  j                  dfdgh      }Pt        j                  dfKj                  d$   Oj                  d$   g|Pd^z   g|Pb      }Q j                  j                  dfdih      }Rt        j                  dfNj                  d$   |Oj                  d$   g|Rd^z   g|Rb      }S|Q}8|S}D j&                  j)                  |O        j&                  j)                  |K        j&                  j)                  |N        j&                  j)                  |Q        j&                  j)                  |S        j*                   j,                  |Oj                  <    j*                   j,                  |Kj                  <    j*                   j,                  |Nj                  <    j*                   j,                  |Qj                  <    j*                   j,                  |Sj                  <    j/                  Ej                  d$   GD|8||!|"|,||-|      }T|Tt        j	                  dj       y  j&                  j)                  T        j*                   j,                  |Tj                  <    j0                  j#                  |d_d         ||k7  r& j0                  j#                  ||d d' n|d dR        n"|d$   d'   g}U|D ]  }V j3                  |VU         j0                  j#                  |       |.|1k(  r  j0                  j#                  |.d dR        nN|.|2k(  r[ j0                  j)                  |.d$           j0                  j)                  |.d%           j0                  j)                  |.dk          n|.|3k(  ry j0                  j)                  |.d$           j0                  j)                  |.d_           j0                  j)                  |.dk           j0                  j)                  |.dl          np|.|5k(  r= j0                  j)                  |.d$           j0                  j)                  |.d_          n.|.|4k(  r)|.d$   d'   |.d$   d)   g}U|.D ]  }V j3                  |VU        |=@k(  r& j0                  j#                  |=d dR        dm _        y |=Ak(  r< j0                  j)                  |=d_           j0                  j)                  |=d%          dm _        y )nN>   r   r   r   )MatMulReshape	Transposer   r   rE   r   r   r   r   )r   r   r   r   rO   )	AllReducer   r   r   r   z0fuse_rotary_attention: failed to match qkv nodes)r-   r-   r-   )r   r   rD   r   r   r   )rE   r   r   rE   r   r   )rD   r   r   r   )rE   rE   r   r   )r   r   r   rK   )r   ExpandrG   rD   r   r   r   )rE   r   r   r   rE   r   r   )r   r   WhereEqualr   rD   rG   rH   rI   rD   r   r   r   )rE   r   rE   r   r   r   r   r   r   r   rE   r   r   )r   r   r   r   rM   ConstantOfShaperI   r   rD   rG   rH   rI   rD   r   r   r   )rE   r   rE   r   rE   r   r   r   r   rE   r   r   r   rE   r   r   )r   r   r   r   rI   r   rD   rG   rH   rI   rD   r   r   r   )rE   r   rE   rE   r   r   r      r   r   r   rE   r   r   )r   r   r   r   rD   rG   rH   rI   rD   r   r   r   )rE   r   rE   rL   r      r   r   r   rE   r   r   )	r   rD   rG   rH   rI   rD   r   r   r   )	rE   rE   r   r   r   r   rE   r   r   )
r   rD   rG   rM   rH   rI   rD   r   r   r   )
rE   rE   rE   r   r   r   r   rE   r   r   )	rE   rE   rL   r   r   r   rE   r   r   )	rE   rE   r   r   r   r   rE   r   r   )output_name_to_node)rD   r   r   r   r   )rE   rE   r   r   rE   rQ   rG   r   rL   zDfuse_rotary_attention: failed to match past/present concat in v path	   z-fuse_rotary_attention: failed to match v path)Softmaxr   Divr   rN   NNz/fuse_rotary_attention: failed to match qk nodes)r-   r-   rP   rR   )r   r   SubrS   r   rG   rG   )rE   r   rL   rE   r   r   r   )r   r   rS   r   rG   rG   )rE   rL   rE   r   r   r   )r   r   r   r   rS   r   rG   rG   )rE   r   r   rL   rE   r   r   r   )r   r   r   rS   r   rG   rG   )	r   rS   r   rS   r   rS   r   rG   rG   )	rE   r   r   r   r   rE   r   r   r   z;fuse_rotary_attention: failed to match attention mask nodes)r   r   rD   r   RotaryEmbeddingr   )r   r   r   r   r   )r   rD   r   r   r   r   )rE   r   rE   r   r   r   )	r   r   r   rG   rD   r   r   r   r   )r   r   r   r   r   r   rD   rG   rH   rI   rD   r   r   r   r   )rE   r   r   rE   r   r   r   r   r   r   r   rE   r   r   r   )r   r   r   r   r   rM   r   rI   r   rD   rG   rH   rI   rD   r   r   r   r   )rE   r   r   rE   r   rE   r   r   r   r   rE   r   r   r   rE   r   r   r   )r   r   r   r   r   rI   r   rD   rG   rH   rI   rD   r   r   r   r   )rE   r   r   rE   rE   r   r   r   r   r   r   r   rE   r   r   r   )r   r   r   r   r   rD   rG   rH   rI   rD   r   r   r   r   )rE   r   r   rE   rL   r   r   r   r   r   rE   r   r   r   )r   r   rD   rG   rH   rI   rD   r   r   r   r   )rE   r   rE   r   r   r   r   rE   r   r   r   )r   r   rD   rG   rM   rH   rI   rD   r   r   r   r   )rE   r   rE   rE   r   r   r   r   rE   r   r   r   )rE   r   rE   rL   r   r   r   rE   r   r   r   )rE   r   rE   r   r   r   r   rE   r   r   r   )	r   rD   rD   r   rQ   r   r   r   r   )	rE   r   rE   r   r   r   r   r   rE   zDfuse_rotary_attention: failed to match past/present concat in k pathz.fuse_rotary_attention: failed to match k nodes)r   r   r   r   )r   r   r   r   )rD   r   rQ   r   r   r   r   )r   r   r   r   r   r   rE   z.fuse_rotary_attention: failed to match q nodeszKfuse_rotary_attention: failed to find the same root_input for q, k, v pathsr-   z;fuse_rotary_attention: failed to verify runtime shape paths	_output_0rE   c                 .   
j                   j                  | dd      }|t        j                  d       y
j                   j	                  |j
                  d         }
j                   j	                  |j
                  d         }||t        j                  d       y|d   }|d   }||z  }
j                   j                  d	d
      }
j                   j                  |      &
j                  |t        j                  dg|gd       
j                   j                  dd      }t        j                  d|j
                  d   |j
                  d   |g|dz   g|      }	|	j                  j                  t        j                  dd      g       |	S )zDetect num_heads and hidden_size for ONNX model from phi-2
            Args:
                reshape_q (NodeProto): reshape node for q
            Returns:
                hidden_size_concat_node(NodeProto): Concat node to be used by reshape
            rD   rE   NzEfuse_rotary_attention: failed to trace the concat node from reshape_qrL   r   zMfuse_rotary_attention: failed to get constant nodes of num_heads or head_sizer   Initializerr   name_prefixF)r1   	data_typedimsvalsrawhidden_size_concatoutput_0r.   axis)r   match_parentr4   r5   get_constant_valuer   r6   get_initializeradd_initializerr   INT64r   r8   r:   r7   r;   )r   concatnum_head_constant_nodehead_size_constant_nodenum_head_valuehead_size_valuer   hidden_size_initilizerhidden_size_reshape_node_namehidden_size_concat_noder   s             r   create_hidden_size_concat_nodezBFusionRotaryAttention.fuse.<locals>.create_hidden_size_concat_node  s    ZZ,,Y!DF~de &*ZZ%B%B6<<PQ?%S"&*jj&C&CFLLQRO&T#%-1H1Plm3A6N5a8O(?:K%)ZZ%@%@\i%@%j"zz))*@AI$$/)//% %  -1JJ,G,G^r,G,s)&,&6&6LLOLLO*
 7CD2	'# $--44f6K6KFTU6V5WX**r   r   r.   perm)r   rL   rE   r   z?fuse_rotary_attention: failed to create hidden_size_concat_noder   concat_k_halfr   concat_q_halfzSfuse_rotary_attention: failed to create multi-head attention with rotary embeddingsr   r   T)op_typer   rU   r4   r5   match_parent_paths_allr   r   lenreshape_add_qkr   r   r1   r6   r   r8   r:   r7   r;   nodes_to_addappendthis_graph_namenode_name_to_graph_namerB   nodes_to_remove&add_nodes_to_remove_with_nodes_to_keepprune_graph)Wr   normalize_nodeinput_name_to_nodesr   	qkv_nodesqkv_nodes_1qkv_nodes_2qkv_nodes_3re   rV   rW   
matmul_qkvr   r&   r(   past_seq_lenv_nodesadd_v	v_nodes_1	v_nodes_2	v_nodes_3	v_nodes_4	v_nodes_5rZ   r   r[   matmul_vr   transpose_vr   qk_nodesr$   	matmul_qkr#   
add_qk_strattn_mask_nodes_1attn_mask_nodes_2attn_mask_nodes_3attn_mask_nodes_4attn_mask_nodes_5attn_mask_nodes_6attn_mask_nodes_7slice_mask_1slice_mask_2r%   r'   k_nodesslice_kr   	k_nodes_1	k_nodes_2	k_nodes_3	k_nodes_4	k_nodes_5rY   r   rotary_kmatmul_kr   shared_past_seq_lenr   q_nodesslice_qr   	q_nodes_1	q_nodes_2	q_nodes_3rX   rotary_qmatmul_qr   root_outputr   k_transpose_node_namek_tranpose_output_namek_transpose_nodeq_transpose_node_nameq_tranpose_output_nameq_transpose_noder   concat_k_reshape_node_nameconcat_k_reshape_nodeconcat_q_reshape_node_nameconcat_q_reshape_nodenew_nodenodes_to_keep	temp_pathsW   `                                                                                      r   fusezFusionRotaryAttention.fuseE  s   !!)nn
 	jj22C

 jj228

 jj22E

 "=H:A}a
#I$,7)A{Az#I$/:,Aq+q*#ILLKL +5'	<JJ00P
	
 JJ008
	
 JJ00.
	
 **;; c)
 <!&$ E',  ?#( 9$ v/
 3  v/
 v/OkX !%] < o
9a` JJ00?
	
  AJ>KHahG JJ88+&AM
 $cd"1%++A.F(,2215L *I"9B6Hk9hG^^A&F *I"/8,KHG#**1-I"s9~':9B1bc9J6Hk9hG^^A&F *I"@I=Hk9eXHG^^A&F *ILLHI::///

 '	&.#Avq)LLJK !'	: JJ88(

 !JJ880

 !JJ88O!

 !JJ88H

 !JJ88Y$

 !JJ88R!

 !JJ88a'

 (,=)A|\$++A.I*/@,Aq,$++A.I*,,->q-A-H-H-KLJ*,,->q-A-H-H-KLJ**1-44Q7J**1-44Q7J*,,->q-A-H-H-KLJLLVW
 #	JJ00X
	
 JJ00N
	
 JJ00X
	
 **;;
 0" B%*( K+0$ E',  ?#( 6" 9$ 6" 6gcH !%M < g
9aP JJ00r'
	
  >G;KHa8G JJ88+&AM
 $cd"1%++A.F"/"3"9"9!"< *I#6666"2;/AxIxG *I"<E9Ax1iG^^A&F *I"s9~':"+A,rs"3Ix!*1b!4HhG^^A&F *I"W`TAx'1iHG^^A&F *ILLIJ
 JJ00A
	
 JJ00A
	
 JJ00[!
	
  1:.KHhG"/8,HaHG"JSGM8WaAxGLLIJ>>!q 11hnnQ6G8>>Z[K\6\LLfg#>>q!	 Z['..q1K;44;;q! Z[%,,Q/K
 6=q 1(//RSBTHNN15<q 1(//RSBTHNN1 $%-]][%@"K'%abM	2	+j ]$(JJ$?$?$L!%:[%H"%//%,,Q/0/0*	  &&--v/D/DV\/Z.[\ %)JJ$?$?$L!%:[%H"%//%,,Q/0/0*	  &&--v/D/DV\/Z.[\&DY&O#&.^_ *.)D)DY\k)D)l&$*$4$4(//24K4R4RST4UV3kAB/	%! *.)D)DY\k)D)l&$*$4$4(//24K4R4RST4UV3kAB/	%! -H,H$$%<=$$%56$$%56$$%:;$$%:;IMI]I]D(()@)E)EFBFBVBVD(()9)>)>?BFBVBVD(()9)>)>?GKG[G[D(()>)C)CDGKG[G[D(()>)C)CD''NN1
 LLno  *6:6J6J$$X]]3##IabM2i  ''7SVTV<X$QZ^,M$	;;I}U % 	##H-i  ''5	!  ''
3  ''
3  ''
3	!  ''
3  ''
3  ''
3  ''
3	!  ''
3  ''
3	!$QZ^WQZ^<M$	;;I}U % i  ''5   	!  ''
3  ''
3r   )r-   r-   r-   r-   r-   r-   N)__name__
__module____qualname____doc__r
   intr   strr   r<   rB   r   r   r  __classcell__r   s   @r   r   r      s    

 
 	
6 "66 6 	6
 6 6 6 6 6 6 6 6 t|6 
T	6pZxGRK r   r   c            
       b     e Zd Zdef fdZdedefdZdefdZde	d	e	d
e	de	de	f
dZ
d Z xZS )FusionRotaryEmbeddingsr   c                 ~    d| _         t        | 	  || j                   | j                   | j                   dz   dg       y )Nr   z.1r   )	base_namer   r   )r   r   r   s     r   r   zFusionRotaryEmbeddings.__init__T  s5    *RVAVX]0^_r   rot_emb_nodefunctionc                 B   g g }}|j                   D ]  }|j                  dk(  s|j                  g k(  s#|j                  d   |j                  v s?|j	                  |       t        |j                        j                  |j                  d         }|j	                  |j                  |           g }|D ]q  }|j                  d   j                  }	| j                  j                  d      |	_        | j                  j                  |	       |j	                  |	j                         s t        ||d      D ]c  \  }
t        t        fd| j                  j                  j                  j                               }|D ]  }t!        j"                  ||
        e |S )NConstantr   F)strictc                      | j                   v S N)r   )entryextra_outputs    r   <lambda>z?FusionRotaryEmbeddings.reassign_extra_outputs.<locals>.<lambda>n  s    8Sr   )noder   r   r   r   listindexr:   tr   r6   r1   r   zipfiltergraphr
   replace_node_input)r   r%  r&  extra_constantsextra_outputsfn_nodeoutput_indexextra_initializersextra_constantconstant_tensorprotoextra_initializernodes_to_updatenode_to_updater-  s                @r   reassign_extra_outputsz-FusionRotaryEmbeddings.reassign_extra_outputs[  si   )+R}}G*,"1DXYIZ^f^m^mIm&&w/#HOO4::7>>!;LM$$\%8%8%FG	 %  -N#1#;#;A#>#@#@ (,

(C(CJ(O %JJ&&';<%%&:&?&?@	 . 03=BT]b/c+L+"6*SUYU_U_UeUeUkUkUpUp#qrO"1,,^\K\] #2 0d
 r   r/  c                    | j                   j                  | j                        }| j                   j                  ddgddg      }||\  }}nt        j                  d       y |j                  d   j                  d   g}t        t        fd| j                   j                   j                  j                              }t        t        fd| j                   j                   j                  j                              }d\  }	}
t        |      dk(  rt        |      dk(  r| j                   j                  |	      | j                   j                  |
      t        j                  |d   j                   d   j"                        j%                         }t        j                  |d   j                   d   j"                        j%                         }t'        j(                  |	t*        j,                  t        |j.                        |j1                         j3                         	      }| j                   j5                  || j6                         t'        j(                  |
t*        j,                  t        |j.                        |j1                         j3                         	      }| j                   j5                  || j6                         | j8                  j;                  |d   |d   g       |j;                  |	|
g       j                  }t        |      dkD  rt        t        fd
| j                   j                   j<                              }t        |      dk(  sJ | j?                  |d         t        t        fd|            }t        |      dk(  sJ t'        j@                  | j                  |||d      }d|_!        | j8                  jE                  |       |S )Nr   r   r   z.fuse_rotary_embeddings: failed to match MatMulrE   c                 B    | j                   d   j                  d   k(  S )Nr   rL   r   r   constantr/  s    r   r.  zOFusionRotaryEmbeddings.create_rotary_embeddings_from_function.<locals>.<lambda>      hooa6HDJJWXM6Yr   c                 B    | j                   d   j                  d   k(  S )Nr   r   rD  rE  s    r   r.  zOFusionRotaryEmbeddings.create_rotary_embeddings_from_function.<locals>.<lambda>  rG  r   	cos_cache	sin_cacher1   r   r   r   c                 6    | j                   j                  k(  S r+  )r1   r   )fnr/  s    r   r.  zOFusionRotaryEmbeddings.create_rotary_embeddings_from_function.<locals>.<lambda>  s    "''T\\*Ar   c                     | vS r+   )output_namer8  s    r   r.  zOFusionRotaryEmbeddings.create_rotary_embeddings_from_function.<locals>.<lambda>  s    TaAar   r/   r0   r1   interleavedr2   )#r   r6   r$  rU   r4   r5   r   r   r0  r4  r5  r/  r   r   r	   to_arrayr:   r2  squeezer   make_tensorr   FLOATshapeflattentolistr   r   r   r7   	functionsrA  r8   r9   r   )r   r/  rotary_emb_node_namematmul_pathreshape_nodematmul_noderotary_emb_inputscos_cache_nodesin_cache_nodecos_cache_namesin_cache_namerJ  rK  cos_cache_tensorsin_cache_tensorrotary_emb_outputsfuncrotary_emb_noder8  s    `                @r   &create_rotary_embeddings_from_functionz=FusionRotaryEmbeddings.create_rotary_embeddings_from_functiont  sH   #zz::4>>Jjj22!F

 "(3%L+LLIJ q!JJqM
 f%Y[_[e[e[k[k[q[q[v[vwxf%Y[_[e[e[k[k[q[q[v[vwx)A& 1$N#q(

**>:B

**>:B$--nQ.?.I.I!.L.N.NOWWYI$--nQ.?.I.I!.L.N.NOWWYI%11#%++)//*&&(//1	  JJ&&'79M9MN%11#%++)//*&&(//1	  JJ&&'79M9MN  ''):N1<M(NO  ..!AB![[!"Q&A4::CSCSC]C]^_Dt9>!> 77d1gFM!%f-acu&v!w)*a/// **NN$&%
 "1##L1r   r\   position_ids	cos_slice	sin_slicer   c                    | j                   j                  | j                        }t        t	        fd| j                   j                   j
                  j                              }t        t	        fd| j                   j                   j
                  j                              }d\  }	}
t        |      dk(  rt        |      dk(  r| j                   j                  |	      | j                   j                  |
      t        j                  |d   j                  d   j                        j                         }t        j                  |d   j                  d   j                        j                         }|j                  d   }|d d d |dz  f   }|d d d |dz  f   }t        j                   |	t"        j$                  t        |j                        |j'                         j)                               }| j                   j+                  || j,                         t        j                   |
t"        j$                  t        |j                        |j'                         j)                               }| j                   j+                  || j,                         | j.                  j1                  |d   |d   g       t        j2                  | j                  |||	|
g|g|d      }d	|_        |S )
Nc                 (    | j                   d   k(  S Nr   r   )rF  rl  s    r   r.  zLFusionRotaryEmbeddings.create_rotary_embeddings_from_nodes.<locals>.<lambda>      hooa6HI6Ur   c                 (    | j                   d   k(  S rp  rq  )rF  rm  s    r   r.  zLFusionRotaryEmbeddings.create_rotary_embeddings_from_nodes.<locals>.<lambda>  rr  r   rI  rE   r   rL   rL  rR  r2   )r   r6   r$  r0  r4  r5  r/  r   r   r	   rT  r:   r2  rU  rX  r   rV  r   rW  rY  rZ  r   r   r   r7   r8   r9   )r   r\   rk  rl  rm  r   r\  ra  rb  rc  rd  rJ  rK  	head_sizere  rf  ri  s      ``            r   #create_rotary_embeddings_from_nodesz:FusionRotaryEmbeddings.create_rotary_embeddings_from_nodes  s}     $zz::4>>J f%UW[WaWaWgWgWmWmWrWrstf%UW[WaWaWgWgWmWmWrWrst)A& 1$N#q(

**>:B

**>:B$--nQ.?.I.I!.L.N.NOWWYI$--nQ.?.I.I!.L.N.NOWWYI "*I!!%7	Q%7"78I!!%7	Q%7"78I%11#%++)//*&&(//1	  JJ&&'79M9MN%11#%++)//*&&(//1	  JJ&&'79M9MN  ''):N1<M(NO **NNnnMH%
 "1r   c                 &  % | j                   |j                  vr|j                  dk7  ry d %|j                  dk7  rt        |j                        dvs|j                  d   dvrt        j                  d       y | j                  |      %%t        j                  d       y | j                  j                  |       t        t        %fd| j                  j                  j                  j                              }t        |      dk(  sJ | j                  j                  j                  j                  j                  |d          n| j                  j                  |g d	g d
      }| j                  j                  |g dg d
      }|xs |}| j                  j                  |g dg d      }| j                  j                  |g dg d      }	|xs |	}
||
t        j                  d       y | j                  j                  |g dg d      }| j                  j                  |g dg d      }|xs |}| j                  j                  |g dg d      }| j                  j                  |g dg d      }|xs |}||t        j                  d       y |d   j                   |d   j                   k7  s]|d   j                   |
d   j                   k7  s>|d   j                   |d   j                   k7  s|d   j                   |
d   j                   k7  rt        j                  d       y | j                  j                  |ddgddg      }| j                  j                  |ddgddg      }|xs |}|t        j                  d       y d\  }}}| j                  j                  |g dg d      }| j                  j                  |g d g d!      }| j                  j                  |g d"g d#      }| j                  j                  |g d$g d%      }||}|d&   j                  d   }n||}|d'   j                  d   }nh|'|}|d&   j                  d   }|d(   j                  d   }n?|'|}|d'   j                  d   }|d(   j                  d   }nt        j                  d)       y d*\  }}| j                  j                  |g dg d+      }| j                  j                  |g d g d,      }| j                  j                  |g d"g d-      }| j                  j                  |g d$g d.      } ||}|d&   j                  d   }n||}|d'   j                  d   }nh|'|}|d&   j                  d   }|d(   j                  d   }n?| '| }|d'   j                  d   }|d(   j                  d   }nt        j                  d)       y |d/k(  r| j                  j                  |d(   d0gdg      }!| j                  j                  |d(   d0gdg      }"|!!|"|!d   j                   |"d   j                   k7  rt        j                  d1       y |"d   j                  d   }ng }!g }"d2\  }#}$||k(  r||k(  s
||k(  rZ||k(  rU|d3   j                   |d3   j                   k7  s |d   j                   |d   j                   k7  rt        j                  d4       y ||k(  r||k(  s
||k(  r|| k(  r|d   j                   |d   j                   k7  rt        j                  d5       y | j                  j                  |d   d6d7gddg      }#| j                  j                  |d   g d8g d9      }$|#?|$=| j                  j#                  |#d   j                  d         |$d   j                  dk7  r+t        j                  d:       y t        j                  d;       | j%                  |d   j&                  d   ||||j&                  d         %%t        j                  d       y | j)                  |g       | j)                  |d d        | j)                  |d d        | j)                  |d d        | j)                  |
d d        | j)                  |d d        | j)                  |       | j)                  |       | j)                  |!d d        | j)                  |"d d        |#;t        | j                  j+                  |#d               dk(  r| j)                  |#       |$| j)                  |$d d        | j-                  | j                          | j.                  | j0                  %j                   <   | j2                  j                  %       d<| _        y )=Nr   >   r      rE   >   pospos_idpos_idsposition_idrk  zLfuse_rotary_embeddings: failed to verify inputs for RotaryEmbedding functionz=fuse_rotary_embeddings: failed to create RotaryEmbedding nodec                 <    | j                   j                  d   k(  S rp  )r1   r   )r/  ri  s    r   r.  z-FusionRotaryEmbeddings.fuse.<locals>.<lambda>  s    DII1G1G1J$Jr   r   )rM   rD   NegrQ   r   r   )rM   rD   r}  rQ   rQ   )	rM   rD   r}  rQ   rG   r   rH   rI   r   )	rE   r   r   r   rE   r   r   r   r   )	rM   rD   r}  rQ   rG   r   rH   rI   rQ   z9fuse_rotary_embeddings: failed to match x2 in rotate_half)rM   rD   rQ   r   )rE   r   rE   r   )rM   rD   rQ   rQ   )rM   rD   rQ   rG   r   rH   rI   r   )rE   r   rE   rL   r   r   r   r   )rM   rD   rQ   rG   r   rH   rI   rQ   z9fuse_rotary_embeddings: failed to match x1 in rotate_halfr   zCfuse_rotary_embeddings: failed to match common input in rotate_halfrM   r   rQ   z8fuse_rotary_embeddings: failed to match x in rotate_half)Nr-   r-   )	rM   rG   rH   Squeezer~  rQ   rG   rH   rI   )	rE   rE   r   r   r   r   rL   r   r   )rM   rG   rH   r~  r~  rQ   rG   r   )rE   rE   r   r   r   r   rL   r   )rM   rG   rH   rQ   rG   rH   rI   )rE   rE   r   r   rL   r   r   )rM   rG   rH   rQ   rG   r   )rE   rE   r   r   rL   r   r   r   rL   z>fuse_rotary_embeddings: failed to match sin path in apply_rope)Nr-   )	r   rE   r   r   r   r   rL   r   r   )r   rE   r   r   r   r   rL   r   )r   rE   r   r   rL   r   r   )r   rE   r   r   rL   r   r-   r   zGfuse_rotary_embeddings: failed to match position ids path in apply_roper   r   zdfuse_rotary_embeddings: failed to match common Gather node and Shape node in sin cache and cos cachezRfuse_rotary_embeddings: failed to match common Add node in sin cache and cos cacherH   rI   )rH   rI   r   rJ   zKfuse_rotary_embeddings: failed to match past_seq_len and curr_seq_len pathsz:fuse_rotary_embeddings: failed to match common cache pathsT)r$  r   r   r   r4   r5   rj  r   r   r0  r4  r   r5  
value_inforemoverU   r1   find_graph_inputru  r   add_nodes_to_removeget_childrenr=   r   r   r   r   )&r   r/  r   r   old_shape_inferrotate_half_x2_path_1_1rotate_half_x2_path_1_2rotate_half_x2_path_1rotate_half_x2_path_2_1rotate_half_x2_path_2_2rotate_half_x2_path_2rotate_half_x1_path_1_1rotate_half_x1_path_1_2rotate_half_x1_path_1rotate_half_x1_path_2_1rotate_half_x1_path_2_2rotate_half_x1_path_2x_path_1x_path_2x_pathsin_pathrK  rk  
sin_path_1
sin_path_2
sin_path_3
sin_path_4cos_pathrJ  
cos_path_1
cos_path_2
cos_path_3
cos_path_4position_ids_from_sin_pathposition_ids_from_cos_pathpast_seq_len_pathcurr_seq_len_pathri  s&                                        @r   r  zFusionRotaryEmbeddings.fuse  s	   >>-$,,%2G <<5 4::f,

1 F 1 kl"II$OO&\]   ''- #JDJJL\L\LbLbLmLmnO '1,,,JJ""--44_Q5GH( '+jj&B&B>'# '+jj&B&B:'# %<$V?V!&*jj&B&Be+'# '+jj&B&Ba+'# %<$V?V!$,0E0MXY&*jj&B&B7'# '+jj&B&B3'# %<$V?V!&*jj&B&B^('# '+jj&B&BZ('# %<$V?V!$,0E0MXY &b)..2G2K2P2PP(,115J25N5S5SS(,115J25N5S5SS(,115J25N5S5SSbc zz33$AH zz33 AH )F~WX 1=-Hi55m+J
 55a(J
 55W%J
 55K"J
 %%$RL..q1	'%$RL..q1	'%$RL..q1	'{003'%$RL..q1	'{003]^ #+Hi55m+J
 55a(J
 55W%J
 55K"J
 %%$RL..q1	'%$RL..q1	'%$RL..q1	'{003'%$RL..q1	'{003]^ r!-1ZZ-I-IQKKC.*
 .2ZZ-I-IQKKC.* /6191!499=WXY=Z=_=__LL!jk9!<BB1E-/*-/*3=00J&8z+AJ&8z+AB<$$(9(99Xb\=N=NRZ[]R^RcRc=cLL~ j(X-CJ&8z+AB<$$(9(99LL!uv$(JJ$@$@RLw'F%! %)JJ$@$@RL4%! &-(0zz223DR3H3N3Nq3QRZ(,44CLL!noYZ"FF%b)003AO &\] $$dV,$$%:3B%?@$$%:3B%?@$$%:3B%?@$$%:3B%?@$$VCR[1$$X.$$X.$$%?%DE$$%?%DE ,TZZ5L5LM^_`Ma5b1cgh1h (():; ,(():3B)?@dnn-=A=Q=Q$$_%9%9:  1r   )r  r  r  r
   r   r   r   rA  rj  r  ru  r  r  r   s   @r   r"  r"  S  so    `i `9  2H9 HT66 6 	6
 6 6pA r   r"  )loggingfusion_attentionr   fusion_baser   onnxr   r   r   r   r	   
onnx_modelr
   	getLoggerr  r4   r   r"  rP  r   r   <module>r     sH   
  ,  L L  			8	$A O A H"d V d r   