
    Bvh                         d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ  e j                  e      Z G d d	e      Zy)
    N)FusionLayerNormalization)FusionMultiHeadAttentionMMDit)FusionOptions)is_installed)
ModelProto)BertOnnxModelc                   r     e Zd Zddededef fdZd Zd Zd Zdd	e	dz  d
e
fdZdd	e	dz  fdZd Z xZS )MmditOnnxModelmodel	num_headshidden_sizec                 \    |dk(  r|dk(  s|dkD  r||z  dk(  sJ t         |   |||       y)ak  Initialize Multimodal Diffusion Transformer (MMDiT) ONNX Model.

        Args:
            model (ModelProto): the ONNX model
            num_heads (int, optional): number of attention heads. Defaults to 0 (detect the parameter automatically).
            hidden_size (int, optional): hidden dimension. Defaults to 0 (detect the parameter automatically).
        r   )r   r   N)super__init__)selfr   r   r   	__class__s       S/RAG/venv/lib/python3.12/site-packages/onnxruntime/transformers/onnx_model_mmdit.pyr   zMmditOnnxModel.__init__   sA     Q;!#3Q;YbKbfgKghh)M    c                 D    | j                          | j                          y N)prune_graphremove_unused_constant)r   s    r   postprocesszMmditOnnxModel.postprocess   s    ##%r   c                 p    d}t         j                  d       t        | | d      }|j                          y )NTzwThe optimized model requires LayerNormalization with broadcast support. Please use onnxruntime-gpu>=1.21 for inference.)check_constant_and_dimensionforce)loggerwarningr   apply)r   layernorm_support_broadcastfusions      r   fuse_layer_normzMmditOnnxModel.fuse_layer_norm"   s<    &*#>	
 *3N/NVZ
 	r   c                 :    t        |       }|j                          y r   )r   r   )r   r!   s     r   fuse_multi_head_attentionz(MmditOnnxModel.fuse_multi_head_attention-   s    .t4r   Noptionsadd_dynamic_axesc                     |rJ t        d      rLdd l}ddlm}  |       5  d}|j                  t	        |      dd      }| j                  ||       d d d        y t        j                  d       | j                  |d        y # 1 sw Y   y xY w)Ntqdmr   )logging_redirect_tqdm   r!   )initialdescz<tqdm is not installed. Run optimization without progress bar)r   r(   tqdm.contrib.loggingr)   range	_optimizer   info)r   r%   r&   r(   r)   stepsprogress_bars          r   optimizezMmditOnnxModel.optimize1   sx    ###B&(#yyuqxyPw5 )(
 KKVWNN7D) )(s   2BBc                 r   ||j                   s| j                          | j                  j                          |r|j	                  d       ||j
                  r | j                          | j                          |r|j	                  d       ||j                  r| j                          |r|j	                  d       ||j                  r| j                          |r|j	                  d       | j                          |r|j	                  d       t        j                  d| j                                 y )N   zopset version: )enable_shape_inferencedisable_shape_inferenceutilsremove_useless_cast_nodesupdateenable_layer_normr"   fuse_simplified_layer_normenable_gelu	fuse_geluenable_attentionr$   r   r   r0   get_opset_version)r   r%   r2   s      r   r/   zMmditOnnxModel._optimize@   s   )G)G((* 	

,,."O 9 9  "++-"O 3 3NN"O 8 8**,""od&<&<&>%?@Ar   c                     i }g d}|D ]!  }| j                  |      }t        |      ||<   # t        j                  d|        |S )z8
        Returns node count of fused operators.
        )FastGeluMultiHeadAttentionLayerNormalizationSimplifiedLayerNormalizationzOptimized operators:)get_nodes_by_op_typelenr   r0   )r   op_countopsopnodess        r   get_fused_operator_statisticsz,MmditOnnxModel.get_fused_operator_statistics_   sT     
 B--b1Eu:HRL  	*8*56r   )r   r   )NF)NN)__name__
__module____qualname__r   intr   r   r"   r$   r   boolr3   r/   rL   __classcell__)r   s   @r   r
   r
      s`    	Nj 	NS 	N3 	N&	* 4 *t *B!5 B>r   r
   )loggingfusion_layernormr   fusion_mha_mmditr   fusion_optionsr   import_utilsr   onnxr   onnx_model_bertr   	getLoggerrM   r   r
    r   r   <module>r\      s<     5 : ( %  )			8	$^] ^r   