
    BvhoA                     `   d dl Z d dlZd dlZd dlmZ d dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ  e j.                  e      Z G d
 dej4                  j6                        Z G d dej4                  j6                        Z G d d      Z G d d      Zy)    N)Path)
TypeHelper)	OnnxModel)PastKeyValuesHelper)T5EncoderInputs)torch_onnx_export)	MT5ConfigT5Config)InferenceSessionc            
            e Zd ZdZ	 ddej
                  j                  dej
                  j                  deez  de	dz  f fdZ
dej                  d	ej                  d
ej                  fdZ xZS )T5DecoderInitz~A T5 decoder with LM head to create initial past key values.
    This model is only called once during starting decoding.
    Ndecoderlm_headconfigdecoder_start_token_idc                     t         |           || _        || _        || _        ||n| j                  j
                  | _        t        | j                  d      r| j                  j                  | _        y d| _        y Ntie_word_embeddingsT)super__init__r   r   r   r   hasattrr   )selfr   r   r   r   	__class__s        W/RAG/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/t5/t5_decoder.pyr   zT5DecoderInit.__init__    sr     	&<&H"dkkNpNp 	# 07t{{DY/ZDKK++ 	 `d 	     decoder_input_idsencoder_attention_maskencoder_hidden_statesc                    |N|j                   d   }t        j                  |dft        j                  |j                        | j
                  z  }| j                  |||dd      }|j                  }|j                  }| j                  r|| j                  j                  dz  z  }| j                  |      }t        j                  |      \  }	}
||	|
fS )Nr      dtypedeviceT)	input_idsr   r   	use_cachereturn_dict      )shapetorchoneslongr#   r   r   last_hidden_statepast_key_valuesr   r   d_modelr   r   group_by_self_or_cross)r   r   r   r   
batch_sizedecoder_outputssequence_outputpresent_key_values	lm_logits	past_self
past_crosss              r   forwardzT5DecoderInit.forward2   s     $/55a8J

O**188
 --.  ,,'"7#9 ' 
 *;;,<<##-1D1Dd1JKOLL1	 3 J JK] ^	:)Z//r   N)__name__
__module____qualname____doc__r)   nnModuler
   r	   intr   TensorFloatTensorr7   __classcell__r   s   @r   r   r      s     .2

 
 9$	

 !$d

$!0 <<!0 !&!0  %00	!0r   r   c                   (     e Zd ZdZ fdZd Z xZS )	T5Decoderz-A T5 decoder with LM head and past key valuesc                     t         |           || _        || _        || _        t        | j                  d      r| j                  j                  | _        y d| _        y r   )r   r   r   r   r   r   r   )r   r   r   r   r   s       r   r   zT5Decoder.__init__Y   sQ    /6t{{DY/ZDKK++ 	 `d 	 r   c                    | j                   j                  }t        j                  ||      }|j	                  d      }| j                  ||||dd      }|j                  }|j                  }	| j                  r|| j                   j                  dz  z  }| j                  |      }
t        j                  |	      \  }}|
|fS )N   T)r$   r-   r   r   r%   r&   r'   )r   num_decoder_layersr   group_by_layer	unsqueezer   r,   r-   r   r.   r   r/   )r   r   r   pastrI   r-   dummy_encoder_hidden_statesr1   r2   r3   r4   present_self_s                r   r7   zT5Decoder.forwardb   s    ![[;;-<<TCUV '=&F&Fq&I#,,'+"=#9 ' 
 *;;,<<##-1D1Dd1JKOLL1	-DDEWXa ,&&r   )r9   r:   r;   r<   r   r7   rB   rC   s   @r   rE   rE   V   s    7
'r   rE   c                   v    e Zd Z	 ddZe	 	 ddeez  dedededej                  de
d	e
fd
       ZdefdZd Zy)T5DecoderInputsNc                 .    || _         || _        || _        y r8   )r   r   r-   )r   r   r   r-   s       r   r   zT5DecoderInputs.__init__   s     4E8N#Xgr   r   r0   encode_sequence_lengthpast_decode_sequence_lengthr#   float16use_int32_inputsc                    | j                   }| j                  }| j                  }	| j                  }
d}t	        j
                  d|	dz
  ||f|rt        j                  nt        j                  |      }t        j                  |||	||      }|rt        j                  nt        j                  }|dkD  r||||
g}||||
g}g }t        d|z        D ])  }|j                  t	        j                  |||             + t        d|z        D ])  }|j                  t	        j                  |||             + nd}t        ||j                   |      S )aZ  Create dummy inputs for T5Decoder.

        Args:
            decoder: decoder
            batch_size (int): batch size
            encode_sequence_length (int): sequence length of input_ids for encoder
            past_decode_sequence_length (int): past sequence length of input_ids for decoder
            device (torch.device): device of output tensors
            float16 (bool): whether the model uses float32 or float16 in input
            use_int32_inputs(bool): whether use int32 instead of int64 for some inputs

        Returns:
            T5DecoderInputs: dummy inputs for decoder
        r    r   )lowhighsizer"   r#   )rV   rH   r!   N)	num_headsrI   
vocab_sized_kvr)   randintint32int64r   create_dummyrU   float32rangeappendrandrQ   attention_mask)r   r0   rS   rT   r#   rU   rV   num_attention_heads
num_layersr\   	head_sizesequence_lengthr   encoder_inputs
float_typeself_attention_past_shapecross_attention_past_shaperL   rO   s                      r   ra   zT5DecoderInputs.create_dummy   sR   0 $*#3#3 33
 ++
  	 !MMao."25;;
 )55"-
 '.U]]5==
&*#+	)% #&	*& D1z>*EJJ'@
[abc + 1z>*EJJ'A\bcd + D0.2O2OQUVVr   returnc                     | j                   | j                  g}| j                  r|j                  | j                         |S r8   )r   r   r-   extend)r   
input_lists     r   to_listzT5DecoderInputs.to_list   s@    ""''

 d223r   c                    | j                   r8| j                   D cg c]"  }|j                  t        j                        $ c}nd }t	        | j
                  j                         | j                  j                         |      S c c}w )N)r"   )r-   tor)   rb   rQ   r   cloner   )r   prL   s      r   to_fp32zT5DecoderInputs.to_fp32   sr    LPL`L`43G3GH3Ga5==)3GHfj""((*''--/
 	
 Is   'Br8   )FF)r9   r:   r;   r   staticmethodr
   r	   r?   r)   r#   boolra   listrs   rx    r   r   rQ   rQ   ~   s    
 	h  !&IW9$IWIW !$IW &)	IW
 IW IW IW IWV 
r   rQ   c                       e Zd Ze	 	 	 ddeez  dej                  dede	de	de	fd       Z
edefd	       Ze	 dd
eez  dedej                  de	def
d       Zy)T5DecoderHelperr   r#   onnx_model_pathverboseuse_external_data_formatrV   c                    t        | t        t        f      sJ t        j	                  | j
                  ddt        | t              rdnd||      }|j                         }| j
                  j                  }t        j                  |d      }	t        j                  |d      }
|
d	d|z   }t        | t              r|	ng }t        | t              r|n|
}d
g|}dg}|j                  d       |j                  |       ddiddddddddid}|D ]  }dd|v rdndd||<    |D ]/  }d|v r	ddd||<   t        | t              r	ddd||<   )ddi||<   1 t        |      j                  j                  dd       t        j                          5 }t"        j$                  j'                  |d      }t        |      j                  j                  dd       t)        | t+        |      |r|n|d|||dd||       |r0t-        j.                  |d      }t1        j2                  ||dd       d	d	d	       y	# 1 sw Y   y	xY w)a  Export decoder to ONNX

        Args:
            decoder (Union[T5Decoder, T5DecoderNoPastState]): decoder object
            device (torch.device): device of decoder object
            onnx_model_path (str): onnx path
            verbose (bool, optional): print verbose information. Defaults to True.
            use_external_data_format (bool, optional): use external data format or not. Defaults to False.
            use_int32_inputs (bool, optional): use int32 inputs
        rH         r   )r0   rS   rT   r#   rV   F)presentTNlogitsr$   r   r0   rS   )r   r    )r$   r   r   r   r   rT   )r   rH   crosszpast_decode_sequence_length + 1)parentsexist_okzdecoder.onnx   )
argsfexport_paramsinput_namesoutput_namesdynamic_axesopset_versiondo_constant_foldingr   r   )load_external_data)save_as_external_dataall_tensors_to_one_file)
isinstancerE   r   rQ   ra   r   rs   rI   r   get_past_namesrd   rq   r   parentmkdirtempfileTemporaryDirectoryospathjoinr   tupleonnx
load_modelr   save)r   r#   r   r   r   rV   inputsrr   rI   
past_namespresent_namespresent_self_namesinput_past_namesoutput_present_namesr   r   r   nametmp_dir_nametemp_onnx_model_pathmodels                        r   export_onnxzT5DecoderHelper.export_onnx   s~   & 'I}#=>>> --NN#$-7-KQR- . 
 ^^%
$^^>>(778JTYZ
+::;MW[\*+CQ1C-CD)3GY)G:R5?5S1Yf 8#78 #m34+, < +7;S&T)5:R%S<
 %D4:dN0H`"L % )D$)5:R%ST"gy1'<*L& <*L& ) 	_$$**4$*G((*l#%77<<n#M %&--33D43P:&*B&"')) $()A ((<QUV#*.,0	' +**s   BH77I r   c                    t         j                  d       t        j                  |j                  j                         j                               t        j                  |j                  j                         j                               d}|j                  rt        |j                        dz  dk(  sJ t        t        |j                        dz        }t        j                  |      }t        |j                        D ]<  \  }}t        j                  |j                         j                               |||   <   > | j                  d|      }|S )zRun inference of ONNX model.zstart onnxruntime_inference)r$   r      r   N)loggerdebugnumpyascontiguousarrayr   cpur   r-   lenr?   r   r   	enumeraterun)ort_sessionr   
ort_inputsrh   r   ipast_tensorort_outputss           r   onnxruntime_inferencez%T5DecoderHelper.onnxruntime_inference^  s    	23 001I1I1M1M1O1U1U1WX&+&=&=f>[>[>_>_>a>g>g>i&j


 !!v--.2a777S!7!781<=J,;;JGJ"+F,B,B"C;,1,C,CKOODUD[D[D],^
:a=) #D "oodJ7r   r   r   	max_casesc                 H   t        j                  |d      dk(  }g d}g }|d| D ]l  \  }}	}
t        | t              rd}
t        j                  | j                  ||	|
|||      }|j                         j                         }t        j                         5   | | }ddd       t        j                  ||      }| j                  j                  }t        j                  t        j                   d   j#                         j                         |d   z
              }|}t$        j'                  d|        t)        d|z        D ]|  }t        j                  t        j                   |d	   |   j#                         j                         |d	|z      z
              }t$        j'                  d
| d|        t+        ||      }~ t        | t              rt)        d|z        D ]  }t        j                  t        j                   |d   |   j#                         j                         |d	d|z  z   |z      z
              }t$        j'                  d| d|        t+        ||      } |j-                  |       t$        j/                  d||	|
|       o S # 1 sw Y   xY w)zQCompare the result from PyTorch and OnnxRuntime to verify the ONNX model is good.past_key_self_0ztensor(float16)))r      r   )r    rH   r   )r   r    r    )   r   rH   Nr   )r#   rU   rV   zlogits max_diff=rH   r    zself attention past state z
 max_diff=zcross attention past state zUbatch_size=%s, encode_sequence_length=%s, past_decode_sequence_length=%s, max_diff=%s)r   get_input_typer   r   rQ   ra   r   rx   rs   r)   no_gradr~   r   rI   r   amaxabsr   r   r   rc   maxrd   info)r   r   r#   rV   r   rU   
test_casestest_cases_max_diffr0   rS   rT   r   rr   torch_outputsr   rI   max_diffmax_diff_allr   s                      r   verify_onnxzT5DecoderHelper.verify_onnxr  s    #11+?PQUffB
 
 
#	
"'%/./+$11&+!1 2 F  )113J  %z 2 ! *??VTK!&!@!@zz%))M!,<,@,@,B,H,H,J[YZ^,["\]H#LLL+H:671112 ::eiia0@0C0G0G0I0O0O0QT_`ade`eTf0f&gh9!JxjQR"<: 3
 %/q#556A$zz		-"21"5"9"9";"A"A"CkRSVWZlVlRlopRpFq"qr H LL#>qcH:!VW#&|X#>L 7  &&|4KKg&+U $d A !s   JJ!	N)TFF)r   )r9   r:   r;   ry   rE   r   r)   r#   strrz   r   rQ   r   r   r?   r   r|   r   r   r~   r~      s    
 ).!&s]*ss s 	s
 #'s s sj ?  &  B=(B%B B 	B
 B Br   r~   ) loggingr   r   pathlibr   r   r   r)   io_binding_helperr   
onnx_modelr   past_helperr   
t5_encoderr   torch_onnx_export_helperr   transformersr	   r
   onnxruntimer   	getLoggerr9   r   r=   r>   r   rE   rQ   r~   r|   r   r   <module>r      s     	      (   + & 6 , (			8	$80EHHOO 80v%' %'Pf
 f
RN Nr   