
    Bvh=b                     z    d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZ d dlmZ  ee      Z G d de      Zy)	    )	getLoggerN)Fusion)FusionUtils)	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                   $    e Zd ZdZdef fdZd dedefdZdede	defd	Z
d
ededefdZdededz  fdZdededz  fdZdedefdZdedeeef   de	fdZdededz  fdZdededz  fdZdededz  fdZdedededededefdZd Z xZS )!FusionMultiHeadAttentionMMDitzO
    Fuse MultiHeadAttention for Multimodal Diffusion Transformer (MMDiT).
    modelc                 :    t         |   |ddg       i | _        y )NMultiHeadAttentionSoftmax)fused_op_typesearch_op_types)super__init__unsqueeze_update_map)selfr   	__class__s     S/RAG/venv/lib/python3.12/site-packages/onnxruntime/transformers/fusion_mha_mmdit.pyr   z&FusionMultiHeadAttentionMMDit.__init__   s$    .BU^T_`$&!    
start_nodereturnc                 0   | j                   j                  |g d|ddg|      }|y|d   }t        |j                        dk7  ry| j                   j	                  |j                  d         }|yt        |j
                        dk7  ryt        |d         S )a  
        Detect num_heads from Reshape & Transpose of q/k/v for both Stable Diffusion 3.x and Flux 1.x:

                MatMul    .. [-1] [24] ..
                 |        |  |  /   /
                Add     Concat(axis=0)
                  |      /
                  Reshape
                     |
                 Transpose(perm=0,1,3,2)
                     |
               (start_node)
        )	TransposeReshapeConcatr      output_name_to_node      )r   match_parent_pathleninputget_constant_valueshapeint)r   r   r"   input_indexnodesconcat_shapevalues          r   get_num_headsz+FusionMultiHeadAttentionMMDit.get_num_heads   s     

,,:[!Q<Oex - 
 =Ry|!!"a'

--l.@.@.CD=u{{q 58}r   transpose_kconcat_before_transposec                     |r;| j                   j                  |ddgddg|      }|r| j                  |d   |      S y| j                   j                  |dgdg|      }|r| j                  |d   |      S y)a  
                Detect num_heads from subgraph like the following (num_heads=24 in this example):
                               MatMu    .. [-1] [24] ..
                                 |       |  |  /   /
                                Add     Concat
                                  |      /
                                 Reshape
                                    |
                             Transpose(perm=0,2,1,3)
                                    |
                             SimplifiedLayerNormalization
                                    |
                            Transpose(perm=0,1,3,2)

                Another variant is to an extra Concat node to join two symmetrical subgraphs:

                           |              |
                          MatMul        MatMul   .. [-1] [24] ..
                           |              |       |  |  /   /
                          Add  Concat    Add      Concat
                            |  /          |      /
                          Reshape         Reshape
                            |              |
                         Transpose     Transpose(perm=0,2,1,3)
                            |              |
        SimplifiedLayerNormalization  SimplifiedLayerNormalization
                                |     /
                               Concat
                                 |
                            Transpose(perm=0,1,3,2)

                    Both patterns are used in stable diffusion 3.5 model.
        r   SimplifiedLayerNormalizationr   r    r!   )r   r&   r0   )r   r1   r"   r2   r-   s        r   get_num_heads_from_kz2FusionMultiHeadAttentionMMDit.get_num_heads_from_k:   s    D #JJ00h(FG!Qex 1 E ))%(4GHH  JJ00<=sXk 1 E ))%(4GHHr   
input_nameoutput_namec                    d}| j                   j                  |      }|Tt        j                  t	        j
                  g dd      |      }| j                   j                  || j                         t        j                  d||g|g| j                   j                  d            }| j                  j                  |       | j                  | j                  |j                  <   |j                  d   S )	a+  Add a Reshape node to convert 4D BxSxNxH to 3D BxSxD.

        Args:
            input_name (str): input name for the 4D tensor of shape BxSxNxH.
            output_name (str): output name for the 3D tensor of shape BxSxD, where D = N * H.

        Returns:
            str: the output name
        bsnh_to_bsd_reshape_dims)r   r   r#   int64)dtype)namer   inputsoutputsr<   r   )r   get_initializerr	   
from_arraynparrayadd_initializerthis_graph_namer   	make_nodecreate_node_namenodes_to_addappendnode_name_to_graph_namer<   output)r   r6   r7   new_dims_namenew_dims	reshape_qs         r   reshape_to_3dz+FusionMultiHeadAttentionMMDit.reshape_to_3dk   s     3::--m<#..rxx
'/RYfgHJJ&&x1E1EF$$. M,,Y7	
	 	  +7;7K7K$$Y^^4""r   mul_qNc                 F   | j                   j                  |ddgddg      }|y|\  }}t        j                  |dg d      sy|j                  d   |j                  d<   |j
                  d   }|dz   |j
                  d<   | j                  |j
                  d   |dz         S )	a  
        MultiHeadAttenion requires query in BSD format. This function adjusts query from BNSH to BSD format.

        Before:
                               MatMul
                                 |
                               Add      Concat
                                 |      /
                                 Reshape
                                  |
                               Transpose(perm=0,2,1,3)
                                  |
                       SimplifiedLayerNorm
                                  |
                                 Mul

        After:
                               MatMul
                                 |
                                Add      Concat
                                 |      /
                                 Reshape
                                   |
                           SimplifiedLayerNorm
                                   |
                        Reshape (shape=[0, 0, -1])
        r4   r   r   Npermr   r%   r       _BSNH_BSD)r   r&   r   check_node_attributer(   rK   rO   )r   rP   r"   pathsln_atranspose_a
sln_outputs          r   'adjust_query_from_bnsh_to_bsd_no_concatzEFusionMultiHeadAttentionMMDit.adjust_query_from_bnsh_to_bsd_no_concat   s    : zz+++[9F

 <!{//V\R %**1-A\\!_
$w.Q!!%,,q/:3FGGr   c                 |   | j                   j                  |g dg d      }|y|\  }}}t        |j                        dk7  ry| j                   j                  |ddgddg      }|y|\  }}t	        j
                  |d	g d
      syt	        j
                  |d	g d
      syt	        j
                  |dd      sy|j                  d   |j                  d<   |j                  d   |j                  d<   t        j                  d|j                  d   |j                  d   g|j                  d   dz   g| j                   j                  d      d      }	| j                  j                  |	       | j                  | j                  |	j                  <   | j                  |	j                  d   |j                  d   dz         S )a  
        MultiHeadAttenion requires query in BSD format. This function adjusts query from BNSH to BSD format.

            Before:
                      MatMul      MatMul
                        |            |
                        Add Concat  Add    Concat
                         |    /      |      /
                         Reshape     Reshape
                            |           |
        Transpose(perm=0,2,1,3)      Transpose(perm=0,2,1,3)
                            |           |
            SimplifiedLayerNorm  SimplifiedLayerNorm
                            |     /
                            Concat(axis=2)
                             |
                            Mul

            After:
                   MatMul        MatMul
                     |              |
                    Add Concat     Add     Concat
                     |    /         |     /
                     Reshape       Reshape
                        |            |
           SimplifiedLayerNorm  SimplifiedLayerNorm
                        |       /
                      Concat(axis=1)
                         |
                      Reshape (shape=[0, 0, -1])
        )r   r4   r   )r   r   r   Nr%   r4   r   r    r   rR   rS   axisr   rU   r>   r?   r<   r^   rV   )r   r&   r'   r(   r   rW   r   rF   rK   rG   rH   rI   rE   rJ   r<   rO   )
r   rP   r"   rX   concatrY   rZ   sln_btranspose_bnew_concat_nodes
             r   adjust_query_from_bnsh_to_bsdz;FusionMultiHeadAttentionMMDit.adjust_query_from_bnsh_to_bsd   s   B zz++C

 <%)"{v||!zz+++[9F

 <!{//V\R//V\R//B %**1-A$**1-A **LLOU\\!_5]]1%/0,,X6
 	  1=A=Q=Q$$_%9%9:!!/"8"8";V]]1=MPV=VWWr   	unsqueezec                 n   | j                   j                  |j                        }|t        |j                        dk(  rPt        j                  d|j                  |j                  d   dz   g| j                  j                  d      dg      }nd}| j                  j                  |      Ot        j                  |t        j                  dgdg      }| j                  j                  || j                         t        j                  d|j                  d   |g|j                  d   dz   g| j                  j                  d      	      }| j                   j#                  |       | j                  | j$                  |j                  <   |j                  d   }|| j                   |j                  <   |S )
Nr    	Unsqueezer   rU   r%   )r>   r?   r<   axesunsqueeze_axes_2)r<   	data_typedimsvalsr=   )r   getr<   r'   r(   r   rF   rK   r   rG   r@   make_tensorr   INT64rD   rE   rH   rI   rJ   )r   re   updated_unsqueeze_outputnew_nodeinitializer_nameri   s         r   update_unsqueeze_axes_1_to_2z:FusionMultiHeadAttentionMMDit.update_unsqueeze_axes_1_to_2  s   #'#<#<#@#@#P #+9??#q(!++$??&--a07:;44[A $6 ::--.>?G'-'9'9-"-"3"3SS	($ JJ../?AUAUV!++%OOA.0@A&--a07:;44[A	 $$X.:>:N:ND((7'/q'9$8PD%%inn5''r   addr"   c                 l   t        |j                        dk7  ry| j                  j                  |g dg d|      }|yt	        | j                        }|j                  |d         }||dgk7  ry|j                  |d         }||dgk7  ry| j                  j                  |g dg d|      }|y|j                  |d         }||dgk7  ry|j                  |d         }||dgk7  ry| j                  |d         |d   j                  d<   | j                  |d         |d   j                  d<   y)	a  
        Update axes of Unsqueeze from [1] to [2] in the following pattern:
                  Unsqueeze        Unsqueeze
                  (axes=[0])       (axes=[0])
                     |              |
                  Unsqueeze        Unsqueeze
              ... (axes=[1])  ...  (axes=[1])
                |     /        |   /
                   Mul         Mul
                    |       /
                     Add
        Args:
            add (NodeProto): the Add node
            output_name_to_node (Dict[str, NodeProto]): mapping from output name to node

        Returns:
            bool: True if the pattern is matched and updated successfully, False otherwise.
        r%   F)Mulrg   rg   )r    r    r   r    r   )r   r    r   T)r'   r(   r   r&   r   get_squeeze_or_unsqueeze_axesrs   )r   rt   r"   nodes_bfusion_utilsaxes_1axes_0nodes_as           r   update_unsqueeze_axesz3FusionMultiHeadAttentionMMDit.update_unsqueeze_axes(  sL   & syy>Q **..s4UW`buv?"4::.;;GAJG>Vs];;GAJG>Vs] **..s4UW`buv?;;GAJG>Vs];;GAJG>Vs]"??
K
"??
K
r   c                    | j                   j                  |g dg d      }|y|\  }}}}}t        |j                        dk7  ry| j                   j                  |ddgddg      }|y|\  }	}
t	        j
                  |d	g d
      syt	        j
                  |
d	g d
      syt	        j
                  |dd      sy| j                  ||      sy|j                  d   |j                  d<   |
j                  d   |	j                  d<   t        j                  d|j                  d   |	j                  d   g|j                  d   dz   g| j                   j                  d      d      }| j                  j                  |       | j                  | j                  |j                  <   | j                   j!                  |j                  d   |j                  d          | j#                  |j                  d   |j                  d   dz         S )a3  
        Adjust graph to change query format from BNSH to BSD for Flux model.
        Note that the graph pattern is complex, and we only do a shallow match here.

        Before:
                       |               |
        Transpose(perm=0,2,1,3)    Transpose(perm=0,2,1,3)
                        |              |
        SimplifiedLayerNorm  SimplifiedLayerNorm
                        |             /
                        Concat(axis=2)
                         |
                        Mul     Mul
                         |    /
                          Add
                           |
                          Mul

        After (Transpose nods are removed, and a Reshape is added):

                        |           |
            SimplifiedLayerNorm  SimplifiedLayerNorm
                        |         /
                    Concat(axis=1)
                        |
                        Mul    Mul
                         |    /
                          Add
                           |
                       Reshape (shape=[0, 0, -1])
        )Addrv   r   r4   r   )r   r   r   r   r   Nr%   r4   r   r    r   rR   rS   r^   r   rU   r_   rV   )r   r&   r'   r(   r   rW   r}   r   rF   rK   rG   rH   rI   rE   rJ   r<   replace_input_of_all_nodesrO   )r   rP   r"   rX   rt   _mul_ar`   rY   rZ   ra   rb   rc   s               r   "adjust_flux_query_from_bnsh_to_bsdz@FusionMultiHeadAttentionMMDit.adjust_flux_query_from_bnsh_to_bsd]  s   B zz++Q

 <26/VVUKv||!zz+++[9F

 <!{//V\R//V\R//B ))#/BC %**1-A$**1-A **LLOU\\!_5]]1%/0,,X6
 	  1=A=Q=Q$$_%9%9:

--fmmA.>@V@VWX@YZ!!#**Q-A1GHHr   c                    | j                   j                  |g dg d      }|y|\  }}}}t        j                  |dg d      sy| j	                  ||      sy|j
                  d   |j
                  d<   |j                  d   dz   |j                  d<   | j                  |j                  d   |j                  d   dz         S )	a0  
        Adjust graph to change query format from BNSH to BSD for Flux model.
        Note that the graph pattern is complex, and we only do a shallow match here.

        Before:
                      |
                    Transpose(perm=0,2,1,3)
                      |
                    SimplifiedLayerNorm
                      |
                     Mul     Mul
                       |   /
                       Add
                        |
                       Mul

        After (Transpose is removed, and a Reshape is added):

                        |
                      SimplifiedLayerNorm
                        |
                        Mul   Mul
                         |   /
                         Add
                          |
                       Reshape (shape=[0, 0, -1])
        )r   rv   r4   r   )r   r   r   r   NrR   rS   r   rU   rV   )r   r&   r   rW   r}   r(   rK   rO   )r   rP   r"   rX   rt   r   rY   rZ   s           r   )adjust_flux_single_query_from_bnsh_to_bsdzGFusionMultiHeadAttentionMMDit.adjust_flux_single_query_from_bnsh_to_bsd  s    : zz++G

 <*.'VUK//V\R ))#/BC %**1-A

1/

1!!#**Q-A1GHHr   qc           	      &   t        j                  d|g|dz   g| j                  j                  dd      g d      }| j                  j                  |       | j                  | j                  |j                  <   | j                  |dz   |dz         S )Nr   rU   Transpose_BNSH_to_BSNH)name_prefixrS   )r<   rR   rV   )
r   rF   r   rG   rH   rI   rE   rJ   r<   rO   )r   r   r"   transpose_qs       r   transpose_reshape_bnsh_to_bsdz;FusionMultiHeadAttentionMMDit.transpose_reshape_bnsh_to_bsd  s    &&C[M,,[F^,_
 	  -9=9M9M$$[%5%56!!!g+q6z::r   kvrK   	num_headsc                     |dkD  sJ |||g}|g}t        j                  d||| j                  j                  d            }d|_        |j
                  j                  t        j                  d|      g       |S )a~  
        Create a MultiHeadAttention node.

        Args:
            q (str): name of q
            k (str): name of k
            v (str): name of v
            output (str): output name of MHA
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.

        Returns:
            NodeProto: the node created.
        r   r   r=   zcom.microsoftr   )r   rF   r   rG   domain	attributeextendmake_attribute)	r   r   r   r   rK   r   
mha_inputsmha_outputsmha_nodes	            r   create_multihead_attention_nodez=FusionMultiHeadAttentionMMDit.create_multihead_attention_node  s    , 1}} AY
 h## ,,-AB	
 *!!6#8#8i#P"QR r   c                    |j                   dk(  sJ |}| j                  j                  |j                  d         ry | j                  j	                  |g dg d|      }|y |\  }}}t        j                  |dg d      sy | j                  j                  |g dg d      }	|	y |	\  }
}}}}}}}|j                  d   }||j                  d   k7  ry | j                  j                  |
d	d
gddg      }|y |\  }}|j                  d   }t        j                  |dg d      sy | j                  j                  |ddgddg      }|y |d   j                  d   |j                  d   k7  ry |j                  d   }| j                  j                  |dd|      }|x| j                  j                  |d
d|      }|y t        j                  |dg d      sy | j                  j                  |d
d|      }|y t        j                  |dg d      s=y | j                  j                  |d
d|      }|y t        j                  |dg d      sy |r| j                  ||      n| j                  ||d      }|dk(  r| j                  |||d u      }|dk  ry || j                  ||      }n| j                  ||      }|:| j                  ||      }|&| j                  ||      }|| j!                  ||      }| j#                  ||||j                  d   |      }| j$                  j'                  |       | j(                  | j*                  |j,                  <   | j.                  j1                  |||g       d| _        y )Nr   r   )MatMulr   r   )r   r   r   r   rR   rS   )r   rv   SqrtDivr   CastSliceShape)r   r   r    r   r    r   r   r   rv   r   r    )r   r    rT   r%   r   r   r   )r,   r"   )r,   )r   r   r   rK   r   T)op_typer   find_graph_outputrK   match_child_pathr   rW   r&   r(   match_parentr0   r5   rd   r\   r   r   r   r   rH   rI   rE   rJ   r<   nodes_to_remover   prune_graph)r   nodeinput_name_to_nodesr"   softmaxr-   
matmul_s_vtranspose_outreshape_outq_nodes	matmul_qkrP   sqrt_q_2div_qsqrt_q_shape_qq_bnshk_nodesmul_kr1   r   k_scale_nodesr   concat_vtranspose_1transpose_2r   queryrq   s                                 r   fusez"FusionMultiHeadAttentionMMDit.fuse  s   ||y((( ::''q(9:

++79QSf
 =16.
M;//v|T**..N$
 ?CJ@	5(E61aQW]]1%%**..y5+:NQRTUPVW?${a //V\R

44UVUOaQRVT !!!$q(99Q ::**:xQdw*x **11+1J] 2 K "33KV**11+1J] 2 K "33KV **11KQL_ 2 K "33KV
  x)<=##J0CQR#S 	 >11+?RT\dhThiIA~ 66u>QRE@@H[\E=;;ECVWE}FFuNab= !>>vGZ[E77%%a( 8 
 	  *6:6J6J$$X]]3##Z$LM  r   )r   )__name__
__module____qualname____doc__r
   r   r   r+   r0   boolr5   strrO   r\   rd   rs   dictr}   r   r   r   r   r   __classcell__)r   s   @r   r   r      sv   'i '	 Z] B/	 /im /ru /b# ## ## #4.HY .H`cfj`j .H`MX9 MXVY\`V` MX^"(i "(C "(H3 3cS\nI] 3bf 3jRI	 RI[^ae[e RIh1Iy 1Ibehlbl 1If;s ;CRVJ ;)) ) 	)
 ) ) 
)V r   r   )loggingr   numpyrB   fusion_baser   ry   r   onnxr   r   r   r	   
onnx_modelr
   r   loggerr    r   r   <module>r      s4   
    $ = =  	8	K
 F K
 r   