
    Bvh/                         d dl Z d dlZd dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZmZ d dlmZmZ d d	lmZ  e j,                  e      Zg d
Zg dZ G d d      Zy)    N)Path)float_to_float16_max_diff)	OnnxModel)optimize_model)	T5DecoderT5DecoderHelper)T5EncoderDecoderInitT5EncoderDecoderInitHelper)MT5ForConditionalGenerationT5ForConditionalGeneration)InferenceSession)zt5-smallzt5-basezt5-largezt5-3bzt5-11b)zgoogle/mt5-smallzgoogle/mt5-basezgoogle/mt5-largezgoogle/mt5-xlzgoogle/mt5-xxlc                      e Zd Ze	 	 d$dededededef
d       Ze	 	 	 d%dededej                  d	ed
edede	ee
ez  f   fd       Ze	 	 	 	 d&dee
z  dej                  dededededefd       Ze	 	 	 d'dedee   dz  dedefd       Ze	 	 	 	 d(dedededededededed efd!       Zedee
z  d"edej                  defd#       Zy))T5Helper
output_dirmodel_name_or_pathsuffix
new_folderreturnc                 6   |}t         j                  j                  |      rt        |      j                  d   }n|j                  d      d    ||z  }|r t         j                  j                  | |      n| }t         j                  j                  ||dz         S )a  Build onnx path

        Args:
            output_dir (str): output directory
            model_name_or_path (str): pretrained model name, or path to the model checkpoint
            suffix (str, optional): suffix like "_encoder" or "_decoder_fp16" will be appended to file name. Defaults to None.
            new_folder (bool, optional): create a new directory for the model. Defaults to False.

        Returns:
            str: path of onnx model
        /z.onnx)ospathisdirr   partssplitjoin)r   r   r   r   
model_name	directorys         V/RAG/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/t5/t5_helper.pyget_onnx_pathzT5Helper.get_onnx_path!   s    $ (
77==+,0177;JS!"%f
<FBGGLLZ8J	ww||IzG';<<    	cache_dirdevice
model_typestate_dict_pathencoder_decoder_initc                    |dk(  rt        j                  | |      }n(|dk(  rt        j                  | |      }nt        d      |r$|j	                  t        j                  |             t        |j                  |j                  |j                        }|j                         j                  |       t        |j                  |j                  |j                  |j                  d|       }|rdnd}	|	|d	|iS )
a{  Load model given a pretrained name or path, then build models for ONNX conversion.

        Args:
            model_name_or_path (str): pretrained model name or path
            cache_dir (str): cache directory
            device (torch.device): device to run the model
            model_type (str, optional): model type "t5" or "mt5"
            state_dict_path(str, optional): state dictionary path
            encoder_decoder_init (bool, optional): combine encoder and decoder kv cache initialization into one model.
        Returns:
            Dict[str, torch.nn.Module]: mapping from name to modules for ONNX conversion.
        t5)r#   mt5z only support mode_type=t5 or mt5N)decoder_start_token_idoutput_cross_onlyr'   encoderdecoder)r   from_pretrainedr   
ValueErrorload_state_dicttorchloadr   r.   lm_headconfigevaltor	   r-   )
r   r#   r$   r%   r&   r'   modelr.   r-   encoder_names
             r    
load_modelzT5Helper.load_model>   s    * .>>?Q]fgE5 /??@R^ghE?@@!!%**_"=>EMM5==%,,G&!&MMMMMMLL#'"66
 2F-9gy'::r"   r8   onnx_model_pathverboseuse_external_data_formatuse_decoder_input_idsuse_int32_inputsc           	          t        | t              rt        j                  | ||||||       y t	        j                  | |||||       y )N)
isinstancer	   r
   export_onnxr   )r8   r$   r;   r<   r=   r>   r?   s          r    rB   zT5Helper.export_onnxl   sT     e12&22%(  ''( r"   N
onnx_modelop_block_listforce_fp16_logitsuse_symbolic_shape_inferc                 *   |g d}| j                         D ch c]  }|j                   }}t        |      }|j                  |      }t        j                  d| d|        | j                         j                  d   j                  }d}	| j                         }
||
v sJ |
|   }d}|j                  dk(  r|}t        j                  d|j                          d}|j                  D ]  }| j                  |      }| n t        |      }t        j                  d	|j                   d
|        |dk  }	n/t        j                  d|j                   d|j                          g }g }|	s||s|g}|j                  g}d|vr| j                         }d}d}| j!                  d      }|rd}|D ]  }|j                  |vs| j#                  ||
      }| j%                  ||      }|D cg c]  }|j                  |v s||v s| }}|D cg c]  }|j                  |v s||v s| }}t'        |      t'        |      z   dkD  s|j)                  |j                         |dz  }d} |rt'        |      |z
  }t        j                  d| d|        t        j                  d|        ||||	d}t        j                  d|        |r | j*                  dddi| |S ddlm}  || j.                  fddi| |S c c}w c c}w c c}w )a  Convert model to mixed precision.
           It detects whether original model has fp16 precision weights, and set parameters for float16 conversion automatically.
        Args:
            onnx_model (OnnxModel): optimized ONNX model
            op_block_list (List[str], optional): operators need to run in fp32.
            force_fp16_logits (bool, optional): force logits and last MatMul node to be in float16. Defaults to False.
            use_symbolic_shape_infer (bool, optional): use symbolic shape inference to convert float to float16. Defaults to True.
        Returns:
            parameters(dict): a dictionary of parameters used in float16 conversion
        N)SimplifiedLayerNormalization SkipSimplifiedLayerNormalizationReluAddz	fp32 op: z
 fp16 op: r   FMatMulz#Found last MatMul node for logits: z3max diff of converting weights in last MatMul node z: gư>z-Failed to find MatMul node for logits. Found z	 of node rK   T   z#node counter of Add operator: fp32=z fp16=znode_block_list: )keep_io_typesrD   node_block_listforce_fp16_initializersz!auto_mixed_precision parameters: rF   )convert_float_to_float16disable_shape_infer )nodesop_typeset
differenceloggerinfographoutputnameoutput_name_to_nodeinputget_initializerr   debugwarninginput_name_to_nodesget_nodes_by_op_typeget_parentsget_childrenlenappendrQ   float16r8   )rC   rD   rE   rF   nodeop_full_setfp32_op_setfp16_op_setlogits_output_nameis_weight_fp16_precisionr]   last_matmul_nodeinitializerr^   max_diffrN   rO   rb   fp32_addchanged	add_nodesparentschildrenchildblocked_childrenparentblocked_parentsfp16_add
parametersrQ   s                                 r    auto_mixed_precisionzT5Helper.auto_mixed_precision   s   "  M 1;0@0@0BC0Bt||0BC-(!,,[9i}J{mDE (--/66q9>> $) (<<>!%8888"#56<<8##KK=dii[IJK(88?* $ 1=HLLNtyykY[\d[efg'/$$NNJ4<<.Xabfbkbkalmn(/?/KUf/0M/445O%","@"@"BHG"77>I%Dyy7","8"8?R"S#-#:#:4AT#U/7,/7e5==M;Y]bfu]uEx ) , 29+18vFNNm<[_eix_xF ( +   01C4HHAM+22499=$MH&*G & " 9~0HKK=hZvhZXY''89: +*.'?	

 	7
|DE#/J//\\Q[\  9$  $(  m Db,+s   L/LLL)Loptimized_model_path
is_float16num_attention_headshidden_sizer}   use_gpuforce_fp16_ioc	           	          ddl m}	 d}
|r |	d      }
| |
_        t        | d||d|
|      }|r,|rt        j                  ||       n|j                  |       |j                  ||d	       y)
zHOptimize ONNX model with an option to convert it to use mixed precision.r   )FusionOptionsNr)   )r%   	num_headsr   	opt_leveloptimization_optionsr   )rE   )cast_input_outputT)all_tensors_to_one_file)fusion_optionsr   enable_skip_layer_normr   r   r}    convert_model_float32_to_float16save_model_to_file)r;   r~   r   r   r   r=   r}   r   r   r   r   ms               r    optimize_onnxzT5Helper.optimize_onnx   s     	1##0#6 >R:R 7)#!5
 #--a=-Q22]2S	13Keijr"   ort_sessionc                     t        | t              rt        j                  | |||      S t	        j                  | |||      S )zQCompare the result from PyTorch and OnnxRuntime to verify the ONNX model is good.)rA   r	   r
   verify_onnxr   )r8   r   r$   r?   s       r    r   zT5Helper.verify_onnx#  s?     e12-99%fVfgg**5+vGWXXr"   ) F)r)   r   F)TFTF)NFT)FTFF)__name__
__module____qualname__staticmethodstrboolr!   r2   r$   dictr	   r   r:   rB   r   listr}   intr   r   r   rS   r"   r    r   r       sM     	=== = 	=
 
= =8 
 !%*+;+;+; +; 	+;
 +; #+; 
c')33	4+; +;Z 
 ).&*!&//  	
 #'  $  :  +/"')-	nnCy4'n  n #'	n n`  */%)#%k%k!%k %k !	%k
 %k #'%k #%k %k %k %kN 
Y//
Y%
Y 
Y 	
Y 
Yr"   r   )loggingr   pathlibr   r2   rh   r   rC   r   	optimizerr   
t5_decoderr   r   t5_encoder_decoder_initr	   r
   transformersr   r   onnxruntimer   	getLoggerr   rX   PRETRAINED_T5_MODELSPRETRAINED_MT5_MODELSr   rS   r"   r    <module>r      sV     	   -   $ 1 T P (			8	$M  NY NYr"   