
    BvhO                         d dl mZ d dlmZ d dlmZmZ d dlmZ  ee	      Z
 G d de      Z G d de      Z G d	 d
e      Zy)    )	getLogger)Fusion)TensorProtohelper)	OnnxModelc                   >     e Zd Zddededef fdZdedefdZ xZS )	FusionLayerNormalizationmodelcheck_constant_and_dimensionforcec                 D    t         |   |dd       || _        || _        y NLayerNormalization
ReduceMean)super__init__r   r   )selfr
   r   r   	__class__s       S/RAG/venv/lib/python3.12/site-packages/onnxruntime/transformers/fusion_layernorm.pyr   z!FusionLayerNormalization.__init__   s$     4lC,H)
    input_name_to_nodesoutput_name_to_nodec           
      	   g }| j                   j                  ||      }t        |      dk(  st        |      dkD  ry|j                  d   }|d   j                  dk7  s|d   j                  d   |k7  ryt        |      dk(  r(|d   j                  dk7  s|d   j                  d   |k7  ryd}|D ]N  }| j                   j                  |d|d      }	|	|	} n)| j                   j                  |d	dg      }
|
I|
d
   } n |y| j                   j                  |g dg dfg dg dfg|      \  }}}|y|d
   }||vry|d   }| j                   j                  |      \  }}|
|dk  s|dkD  rt        j                  d|        y|d   }| j                   j                  |d      dk7  ry|j                  d   |vry||j                  d      }|D ]  }|j                  d	k(  r9|j                  |       |j                  d   |vr6||j                  d      d   }n|}|j                  dk7  r^|j                  d   |vrp||j                  d      d   }|j                  dk7  r|j                  |       |j                  |       |j                  |dd
        |j                  |||g       |j                  d	k7  r|n|}|j                  d| j                   j                  |j                  d   |      z
     }| j                   r| j                   j#                  |dd      sV|j                  d| j                   j                  |j                  d   |      z
     }| j                   r| j                   j#                  |dd      s|j                  d   }| j                   j%                  ||j                  ||      s+| j&                  rd| _        n2t        j                  d       | j*                  j                  |       t-        j.                  d|j                  d   ||g|g| j                   j1                  dd            }|j2                  j                  t-        j4                  dt7        |            g       | j8                  j                  |       | j:                  | j<                  |j>                  <    y)a  
        Fuse Layer Normalization subgraph into one node LayerNormalization:
              +----------------------+
              |                      |
              |                      v
          [Root] --> ReduceMean -->  Sub  --> Pow --> ReduceMean --> Add --> Sqrt --> Div --> Mul --> Add
                     (axis=2 or -1)  |      (Y=2)   (axis=2 or -1)  (B=E-6 or E-12)    ^
                                     |                                                 |
                                     +-------------------------------------------------+

         It also handles cases of duplicated sub nodes exported from older version of PyTorch:
              +----------------------+
              |                      v
              |           +-------> Sub-----------------------------------------------+
              |           |                                                           |
              |           |                                                           v
          [Root] --> ReduceMean -->  Sub  --> Pow --> ReduceMean --> Add --> Sqrt --> Div  --> Mul --> Add
              |                      ^
              |                      |
              +----------------------+
        r      NSub   DivF	recursiveCastSqrtAddr   Powr   r   r   r   r   r   )r#   r$   r   r%   r    r   )r   r   r   r   r   r   -C6?Hskip SkipLayerNormalization fusion since epsilon value is not expected:           @Mulr$   layernorm weightlayernorm biasT4It is not safe to fuse LayerNormalization node. Skipr   	LayerNormname_prefixinputsoutputsnameepsilon) r
   get_childrenleninputop_typefind_first_child_by_typematch_child_pathmatch_parent_pathsget_constant_inputloggerdebugfind_constant_inputoutputappendextendinput_indexr   $is_constant_with_specified_dimensionis_safe_to_fuse_nodesr   prune_graphnodes_to_remover   	make_nodecreate_node_name	attributemake_attributefloatnodes_to_addthis_graph_namenode_name_to_graph_namer5   )r   noder   r   subgraph_nodeschildren
root_inputdiv_nodechild
div_node_1
div_node_2_path_idparent_nodes_sub_nodeadd_eps_nodeir6   pow_nodediv_children	temp_nodemul_nodelast_add_nodenode_before_weightweight_input
bias_inputlayer_norm_outputnormalize_nodes                               r   fusezFusionLayerNormalization.fuse   s   , ::**41DEx=AX!2ZZ]
A;%'8A;+<+<Q+?:+Mx=A{""e+x{/@/@/Cz/QE<<UEK^jo<pJ%% "ZZ88P
))"~H  $(JJ$A$A<oNDFXY  %
!, #8##AZZ22<@
7?glg.>LLcdkclmn?::))(C8A=??1%88 +8??1+=>%I  F*%%i0##A&.AA.y/?/?/BCAF %5(q!)<</0BCAFM$$-!!$'!!(+!!,s"34!!=(H"EF-6->->&-Hi#>>!djj.D.DEWE^E^_`Eack.l*lmL009h9ha!3: &,,Q1G1GXYHZ\i1j-jkJ009h9hA/:  - 4 4Q 7::33$$##	 ::'+D$LL!WX$$++N;#--$

1|Z@*+ZZ001ES^0_	N $$++V-B-B9eT[n-],^_$$^4@D@T@TD(()<)<=A &r   )TF)	__name__
__module____qualname__r   boolr   dictrj   __classcell__r   s   @r   r	   r	      s7    i t [_ 
SUd SU SUr   r	   c                   R     e Zd Zdef fdZd Zd
dedee   fdZ	de
de
fd	Z xZS )FusionLayerNormalizationNCHWr
   c                 (    t         |   |dd       y r   r   r   r   r
   r   s     r   r   z%FusionLayerNormalizationNCHW.__init__   s     4lCr   c                 z   | j                   j                  |      }|t        j                  | d| d       y t	        |j
                        dk7  s$|j
                  d   dk7  s|j
                  d   dk7  r(t        j                  | d| d|j
                          y |j                  |j
                  d   g      S )N z is not initializer.r)   r   r   z* shall have 3 dimensions Cx1x1. Got shape r   )r
   get_constant_valuer?   r@   r8   shapereshape)r   output_namedescriptionvalues       r   get_weight_or_biasz/FusionLayerNormalizationNCHW.get_weight_or_bias   s    

--k:=LLK=+6JKLu{{q EKKNa$75;;q>Q;NLLK=+6`afalal`mno}}ekk!n-..r   
input_namepermc                     | j                   j                  d      }||dz   dz   |z   }t        j                  d|g|g|      }|j                  j                  t        j                  d|      g       |S )z&Append a Transpose node after an input	Transpose_out-r2   r   )r
   rK   r   rJ   rL   rD   rM   )r   r   r   r|   	node_nametranspose_nodes         r   create_transpose_nodez2FusionLayerNormalizationNCHW.create_transpose_node   sw    JJ//<	#f,s2Z?K))+zlU`Tahqr  '')>)>vt)L(MNr   r   r   c                 
   t        j                  |d      }t        |t              r|dgk7  ryg }| j                  j                  ||      }t        |      dk7  ry|j                  d   }|d   j                  dk7  s|d   j                  d   |k7  ry|d   }| j                  j                  |d|d      }	|	y| j                  j                  |	g d	g d
|      }
|
y|
\  }}}}}||k7  ry| j                  j                  |      \  }}|
|dk  s|dkD  rt        j                  d|        yt        j                  |d      }t        |t              sJ |dgk7  ry| j                  j                  |d      dk7  ry||	j                  d      d   }|}|j                  dk7  ry||j                  d      d   }|j                  dk7  ry|j!                  |       |j#                  |
       |j#                  |||	g       | j                  j%                  ||j                  ||      st        j                  d       y|j                  dk7  r|	n|}|j                  d| j                  j'                  |j                  d   |      z
     }| j)                  |d      }|y|j                  d| j                  j'                  |j                  d   |      z
     }| j)                  |d      }|yt+        j,                  |dz   t.        j0                  |j2                  |      }t+        j,                  |dz   t.        j0                  |j2                  |      }| j                  j5                  || j6                         | j                  j5                  || j6                         | j8                  j#                  |       | j;                  |j                  d   g d      }| j                  j=                  dd      }| j;                  |dz   g d|j                  d         }t+        j>                  d|j                  d   |dz   |dz   g|dz   g|      }|j@                  j#                  t+        jB                  dtE        |            g       | jF                  j!                  |       | jF                  j!                  |       | jF                  j!                  |       | j6                  | jH                  |jJ                  <   | j6                  | jH                  |jJ                  <   | j6                  | jH                  |jJ                  <   d} | jM                  |        y)a*  
        Fuse Layer Normalization subgraph into one node LayerNormalization:
              +----------------------+
              | NxCxHxW              |
              |                      v                                                     (Cx1x1)  (Cx1x1)
          [Root] --> ReduceMean -->  Sub --> Pow --> ReduceMean --> Add --> Sqrt --> Div --> Mul --> Add -->
                     (axes=1)        |      (Y=2)     (axes=1)     (E-6)             ^
                                     |                                               |
                                     +-----------------------------------------------+

        Fused subgraph:
                       (0,2,3,1)                            (0,3,1,2)
            [Root] --> Transpose --> LayerNormalization --> Transpose -->
        axesr   Nr   r   r   Fr   r"   r&   r'   r(   r*   r+   r$   r.   r    r,   r-   _NHWC)r   r   r)   r   r   r/   r0   	_out_nhwc)r   r)   r   r   r2   r6   zLayerNormalization(NHWC))'r   get_node_attribute
isinstancelistr
   r7   r8   r9   r:   r;   match_parent_pathr>   r?   r@   rA   rB   rC   rD   rG   rE   r   r   make_tensorr   FLOATrz   add_initializerrP   rI   r   rK   rJ   rL   rM   rN   rO   rQ   r5   increase_counter)!r   rR   r   r   r   rS   rT   rU   subrV   r[   
_sqrt_nodesecond_add_nodereduce_mean_noder`   r]   r_   r6   rb   rc   rd   re   rf   weightrg   biasweight_nhwc	bias_nhwctranspose_inputlayernorm_node_nametranspose_outputri   counter_names!                                    r   rj   z!FusionLayerNormalizationNCHW.fuse   s    ++D&94&4A3;::**41DEx=AZZ]
A;%'8A;+<+<Q+?:+Mqk::66sECVbg6hzz337	
 LXI
O%5x(?ZZ22?C
7?glg.>LLcdkclmn++,<fE$%%%A3;::))(C8A='(:;A>	u$+HOOA,>?B  E)d#l+}hABzz//  	
 LLOP)2):):f)DX)~~a$***@*@ASAZAZ[\A]_g*h&hi((7IJ>"((TZZ-C-CHOOTUDVXe-f)fg
&&z3CD<(()?ARARTZT`T`bhi&&zG';[=N=NPVP\P\^de	

"";0D0DE

""9d.B.BC##N344TZZ]LQ"jj99:N\g9h55+-|]=Q=QRS=T
  )) #**1-|g/EzT[G[\(;67$	
 	  '')>)>y%PW.)Y(Z[  1  0  !12=A=Q=Q$$_%9%9:<@<P<P$$^%8%89>B>R>R$$%5%:%:;1l+r   )N)rk   rl   rm   r   r   r   strr   intr   ro   rj   rp   rq   s   @r   rs   rs      sC    Di D
/
 
49 
},d }, },r   rs   c                   4     e Zd Zdef fdZdedefdZ xZS )FusionLayerNormalizationTFr
   c                 *    t         |   |ddd       y )Nr   r$   TFru   rv   s     r   r   z#FusionLayerNormalizationTF.__init__G  s     4eTBr   r   r   c                 ,   g }| j                   j                  |g dg dfg dg dfg|      \  }}}|yt        |      dk(  sJ |d   dv r|d	   dv r|d
   dv st        j	                  d       y|dd \  }}}	}
}}|dd \  }}}}d}t        |      dk(  r|d   }|j
                  dk(  sJ | j                   j                  |dd|      }|t        j	                  d       y| j                   j                  |d|      }||n| j                   j                  |d|      }|t        j	                  d       y| j                   j                  |      \  }}||dk  s|dkD  r|t        j	                  d       y|L|j                  d   |j                  vs|j                  d   |j                  vrt        j	                  d       y|L|j                  d   |j                  vs|j                  d   |j                  vrt        j	                  d       y|j                  d   |j                  d	   k7  rt        j	                  d       y||||	|
|||||||g}|J| j                   j                  |dd|      }|t        j	                  d       y|j                  |||g       | j                   j                  ||j                  | j                   j                         | j                   j                               st        j	                  d       y| j                  j                  |       |	j                  d	   }|j                  d   }t!        j"                  d|j                  d   ||g|j                  d   g| j                   j%                  dd            }|j&                  j                  t!        j(                  dt+        |            g       | j,                  j/                  |       | j0                  | j2                  |j4                  <   y)aU  
         Layer Norm from Tensorflow model(using keras2onnx or tf2onnx):
          +------------------------------------+
          |                                    |
          |                                    |
        (Cast_1)                               |
          |                                    |
          |                                    v                                           (B)                             (B)             (A)
         Add --> (Cast_1) --> ReduceMean -->  Sub  --> Mul --> ReduceMean --> (Cast_3) --> Add --> Sqrt --> Reciprocol --> Mul --> Mul --> Sub --> Add
          |                       |                                                                                         |       ^              ^
          |                       |                                                                                         |       |              |
          |                       +--------------------------------------------------(Cast_2)-------------------------------|-------+              |
          |                                                                                                                 v                      |
          +---------------------------------------------------------------------------------------------------------------> Mul--------------------+
        )
r   r+   r+   
Reciprocalr#   r$   r   r+   r   r   )
r   r   Nr   r   r   Nr   r   N)r   r+   r+   r   r#   r$   r    r   r+   r   r   )r   r   Nr   r   r   r   Nr   r   NNr)   r   )r   r   r   r   z=return indice is exepected in [0, 1], but got {return_indice}      r    r+   zmul_node_3 not foundzroot node is nonegh㈵>zepsilon is not matchedz;reduce_mean_node_1 and mul_node_3 shall link from root nodez%mul_node_2 shall have two same inputszcast_node_2 not foundz$not safe to fuse layer normalizationr   r/   r0   r2   r6   )r
   r=   r8   r?   r@   r:   match_parent
get_parentr>   r9   rD   rG   rB   r   r   rI   r   rJ   rK   rL   rM   rN   rO   rC   rP   rQ   r5   )r   rR   r   r   return_indicer\   r[   
sub_node_0
mul_node_0
mul_node_1reciprocol_node	sqrt_node
add_node_0reduce_mean_node_0
mul_node_2
sub_node_1reduce_mean_node_1cast_node_3
mul_node_3node_before_reduce	root_noder_   r6   rS   cast_node_2rf   rg   
fused_nodes                               r   rj   zFusionLayerNormalizationTF.fuseJ  sG     )-)F)F <  ?! B  G$*
&<L =!Q&&&a F*}Q/?6/Im\]N^bhNhLLXY !	
IUVXVYIZFJ
4F|"&q/K&&&000ZZ,,T5!=PQ
LL/0!ZZ223EqJ]^ " &&'91>QR 	
 LL,-ZZ22:>
7?glw/?KDWLL12$$Q'z/?/??CUC[C[\]C^fpfvfvCvLLVW"$$Q'z/?/??CUC[C[\]C^fpfvfvCvLLVWA*"2"21"55LL@A 
 "**11*faI\]K"45!!#5{K"PQzz//KKJJ**,JJ**,	
 LL?@##N3!''*%%a(
 %% $$Q'zB[[^$,,-A{,[	

 	##V%:%:9eGn%U$VW  ,8<8L8L$$Z__5r   )rk   rl   rm   r   r   ro   rj   rp   rq   s   @r   r   r   F  s*    Ci C_Md _M _Mr   r   N)loggingr   fusion_baser   onnxr   r   
onnx_modelr   rk   r?   r	   rs   r    r   r   <module>r      sM   
   $  	8	YUv YUxY,6 Y,xcM cMr   