
    BvhS                        d dl Z d dlZd dlZd dlmZ d dlmZ d dlZd dl	Z	d dl
Z
d dlmZ d dlmZ d dl	mZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZ d dlmZ  e j<                  e      Z  G d de
jB                  jD                        Z#y)    N)chain)Path)convert_float_to_float16)RepeatedCompositeFieldContainer)
ModelProtoValueInfoProto)	OnnxModel)PastKeyValuesHelper)WhisperConfig)convert_inputs_for_ortget_model_dynamic_axesget_sample_decoder_inputsgroup_past_key_values)InferenceSessionc                   b    e Zd ZdZd%dedej                  j                  dede	f fdZ
	 	 d&dej                  d	ej                  dz  d
eeej                        dz  fdZ	 	 d&dej                  d	ej                  dz  d
eeej                        dz  fdZ	 	 d&dej                  d	ej                  dz  d
eeej                        dz  fdZd Zd Zd Zd%de	de	de	fdZd'dede	de	fdZd%dede	fdZdefdZdede	fdZ	 	 	 	 	 	 d(dedede	d e	de	de	d!e	d"e	fd#Zdedede	de	fd$Z xZS ))WhisperDecoderz/A Whisper decoder with optional past key valuesconfigmodel
model_implno_beam_search_opc                    t         |           || _        |j                  | _        || _        || _        |dk(  rd n|j                  j                  | _        |dk(  rd n|j                  | _        |dk(  r|nd | _        | j                  j                  | _	        | j                  j                  | _        | j                  j                  | j                  z  | _        y Nopenai)super__init__r   devicer   r   r   decoderproj_outmax_source_positionsdecoder_attention_heads	num_headsd_model	head_size)selfr   r   r   r   	__class__s        a/RAG/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/whisper/whisper_decoder.pyr   zWhisperDecoder.__init__%   s    ll$!2)X5t5;;;N;N *h 6ENN(H4U$
$(KK$D$D!<<,,>    Ndecoder_input_idsencoder_hidden_statespast_key_valuesc                     | j                  |||d      }| j                  |j                        }|j                  }|||fS t	        j
                  |      \  }}||fS )NT)r)   	input_idsr*   	use_cache)r   r   last_hidden_stater*   r
   group_by_self_and_cross)	r$   r(   r)   r*   outputslogitspresent_key_valuespresent_selfpresent_crosss	            r&   
hf_forwardzWhisperDecoder.hf_forward4   s|     ,,"7'+	  
 w889$44"--- ':&Q&QRd&e#m |##r'   c                    i }|[t        |      \  }}|D cg c]  }|j                  dd       }}|D cg c]#  }|j                  |j                  d d dz         % }}|D cg c]  }|j                  dd       }}|D cg c]#  }|j                  |j                  d d dz         % }}t	        | j
                  j                  j                        D ]  \  }}	|d|z     ||	j                  j                  <   |d|z  dz      ||	j                  j                  <   |d|z     ||	j                  j                  <   |d|z  dz      ||	j                  j                  <    | j
                  j                         \  }
}| j
                  j                  |||      }|| j
                  j                  j                  D ]  }	t        j                  ||	j                  j                     |
|	j                  j                     gd      j                         |
|	j                  j                  <   t        j                  ||	j                  j                     |
|	j                  j                     gd      j                         |
|	j                  j                  <    g g }}| j
                  j                  j                  D ]  }	|j!                  |
|	j                  j                            |j!                  |
|	j                  j                            |V|j!                  |
|	j                  j                            |j!                  |
|	j                  j                             |D cg c]?  }|j                  |j                  d d d| j"                  fz         j                  dd      A }}|D cg c]?  }|j                  |j                  d d d| j"                  fz         j                  dd      A }}|D ]  }|j%                           |)t'        j(                  ||z   t+        |      dz        }||fS ||fS c c}w c c}w c c}w c c}w c c}w c c}w )N      ))xxakv_cache)dimr9   )r   	transposereshapeshape	enumerater   r   blocksattnkeyvalue
cross_attninstall_kv_cache_hookstorchcatdetachappendr#   remover
   group_by_layerlen)r$   r(   r)   r*   past_kv_cacheself_attn_kv_cachescross_attn_kv_cachespast_kvidxblockr<   hooksr1   r3   r4   
present_kvhookr2   s                     r&   oai_forwardzWhisperDecoder.oai_forwardP   s,    &8Mo8^5!5J]"^J]w7#4#4Q#:J]"^]p"q]pRY7??7==!3Du3L#M]p"qK_#`K_G$5$5a$;K_ #`^r#s^rSZGOOGMM"14E4M$N^r #s'

(:(:(A(AB
U0CAG0Lejjnn-2Ea#gPQk2Rejj../6J1s76Se..2238LQQTWWX[8Ye..445	 C **;;=% ##&7<Q\i#j &++22+099"5::>>2HUZZ^^4LMST,&( ( .3YY"5::#3#34huzz?O?O6PQWX.&( ))*	 3 ')"mZZ''..E 89)9)9 :;&$$Xe.>.>.B.B%CD$$Xe.>.>.D.D%EF / +
*
 z//3r4>>6JJKUUVWYZ[* 	 
 ,
+
 z//3r4>>6JJKUUVWYZ[+ 	 
 DKKM  "!4!C!C},c,.?1.D" --- |##] #_"q#`#sl

s$   Q(Q%Q(Q+AQ5AQc                 l    | j                   dk(  r| j                  |||      S | j                  |||      S r   )r   rX   r5   )r$   r(   r)   r*   s       r&   forwardzWhisperDecoder.forward   s<     ??h&##$57Lo^^02GYYr'   c           	          | j                   rddg}|S ddgt        t        j                  d t	        | j
                  j                        D                    }|S )Nr,   r)   c              3   @   K   | ]  }d | d| d| d| f  yw)past_key_self_past_value_self_past_key_cross_past_value_cross_N .0is     r&   	<genexpr>z-WhisperDecoder.input_names.<locals>.<genexpr>   sC      (!EA *!-1A!/EYZX[G\`qrsqt^uv!E   
first_passlistr   from_iterableranger   num_hidden_layers)r$   input_namess     r&   rm   zWhisperDecoder.input_names   sl    ??&(?@K  '	 '' (!&t{{'D'D!E( 	K r'   c           	      6   | j                   rGdgt        t        j                  d t	        | j
                  j                        D                    }|S dgt        t        j                  d t	        | j
                  j                        D                    }|S )Nr1   c              3   @   K   | ]  }d | d| d| d| f  yw)present_key_self_present_value_self_present_key_cross_present_value_cross_Nra   rb   s     r&   re   z.WhisperDecoder.output_names.<locals>.<genexpr>   sH      ( "FA 0s31!50421#6	 "Frf   c              3   0   K   | ]  }d | d| f  yw)rp   rq   Nra   rb   s     r&   re   z.WhisperDecoder.output_names.<locals>.<genexpr>   s,      (!EA -QC04Gs2KL!Es   rg   )r$   output_namess     r&   ru   zWhisperDecoder.output_names   s    ??'' ( "'t{{'D'D!E( 
L0  '' (!&t{{'D'D!E( L r'   c                 `    t        | j                  ||      }d|v r| j                  s|d   d= |S )Nr,   r7   )r   r   r   )r$   rm   ru   dynamic_axess       r&   rw   zWhisperDecoder.dynamic_axes   s7    -dkk;U,&t/E/E[)!,r'   use_fp16_inputsuse_int32_inputsreturn_dictc           	          t        | j                  | j                  d| j                  rdnd| j                  rdnd||      }|r| j                  r|d= |S | j                  r
|d   |d   fS |d   |d   |d   fS )	Nr8   r      r7   )
batch_sizepast_sequence_lengthsequence_lengthuse_fp16	use_int32r*   r(   r)   )r   r   r   rh   )r$   rx   ry   rz   inputss        r&   r   zWhisperDecoder.inputs   s    *KKKK'+!A"&//Qq$&
 ,-M??*+./ 
 &'*+$%
 	
r'   iois_cross	is_outputc                 J   |j                   j                  j                  j                  d   }d|j                  v r!|j                          | j                  |_        |j                   j                  j                  j                  d   }d|j                  v r/|j                          |r| j                  |_        n|rdnd|_        |j                   j                  j                  j                  d   }d|j                  v r!|j                          | j                  |_        |S )Nr7   _dim_r8   total_sequence_lengthr~      )
typetensor_typer@   r=   	dim_paramClearr!   	dim_valuer   r#   )r$   r   r   r   r!   r   r#   s          r&   fix_key_value_cache_dimsz'WhisperDecoder.fix_key_value_cache_dims  s     GG''--11!4	i)))OO"&..I''--3377:o///!!#,0,E,E)GP,CVl)GG''--11!4	i)))OO"&..I	r'   io_listc                    g }g }g }|D ]  }d|j                   vr d|j                   vr|j                  |       1d|j                   v rD| j                  |d|      }| j                  r|j                  |       q|j                  |       | j                  |d|      }| j                  r|j                  |       |j                  |        | j                  s|||z   z  }|S )Npastpresentr$   F)r   r   T)namerK   r   r   )r$   r   r   reordered_iorP   rQ   r   new_ios           r&   fix_iozWhisperDecoder.fix_io  s     !BRWW$"'')A##B'277"66rEU^6_)) ''/'..v6 66rDT]6^)) ''/(//7! $ %%/2FFFLr'   c                    | j                  |j                  j                  d      }t        |j                  j                        dkD  rG|j                  j                  j	                          t        |j                  j                        dkD  rG|j                  j                  j                  |       | j                  |j                  j                  d      }t        |j                  j                        dkD  rG|j                  j                  j	                          t        |j                  j                        dkD  rG|j                  j                  j                  |       |S )NF)r   r   T)r   graphinputrN   popextendoutput)r$   r   reordered_inputsreordered_outputss       r&   fix_inputs_and_outputsz%WhisperDecoder.fix_inputs_and_outputs1  s     ;;u{{'8'8E;J%++##$q(KK!!# %++##$q(  !12 KK(:(:dKK%++$$%)KK""$ %++$$%)!!"34r'   c                 >    | j                   dk(  r|rt        |      }|S r   )r   r   )r$   r   rx   s      r&   fix_layernorm_weightsz$WhisperDecoder.fix_layernorm_weights?  s!    ??h&? -U3Er'   onnx_model_pathproviderverboseuse_external_data_formatuse_encoder_hidden_statesuse_kv_cache_inputsc	                 R   |xr | | _         | xr || _        | j                   s| j                  sJ d       | j                  ||      }	| j                         }
| j	                         }| j                  |
|      }t        |      j                  j                  dd       t        j                         5 }t        j                  j                  |d      }t        |      j                  j                  dd       |r|n|}t        j                  j!                  | |	|d|
||dd|
       t        j"                  ||      }| j%                  |      }| j'                  ||      }t)        j*                  |||d	       d
d
d
       | j-                  ||||       y
# 1 sw Y   xY w)al  Export decoder to ONNX

        Args:
            onnx_model_path (str): path to save ONNX model
            provider (str): provider to use for verifying parity on ONNX model
            verbose (bool, optional): print verbose information. Defaults to True.
            use_external_data_format (bool, optional): use external data format or not. Defaults to False.
            use_fp16_inputs (bool, optional): use float16 inputs for the KV caches. Defaults to False.
            use_int32_inputs (bool, optional): use int32 inputs for the decoder_input_ids. Defaults to True.
            use_encoder_hidden_states (bool, optional): use encoder_hidden_states as model input for decoder-init/decoder-without-past models. Defaults to False.
            use_kv_cache_inputs (bool, optional): use KV caches as model inputs for decoder-with-past models. Defaults to True.
        zVOnly one of `use_encoder_hidden_states` and `use_kv_cache_inputs` can be true at once.)rx   ry   T)parentsexist_okzdecoder.onnx   )	argsfexport_paramsrm   ru   rw   opset_versiondo_constant_foldingr   )load_external_data)save_as_external_dataall_tensors_to_one_fileN)rh   
later_passr   rm   ru   rw   r   parentmkdirtempfileTemporaryDirectoryospathjoinrH   onnxexport
load_modelr   r   r	   saveverify_onnx)r$   r   r   r   r   rx   ry   r   r   r   rm   ru   rw   tmp_dir_nametemp_onnx_model_pathout_pathr   s                    r&   export_onnxzWhisperDecoder.export_onnxK  s   H 4O<O8O 87O<O$// 	
d	
1 _O_`&&(((*((lC_$$**4$*G((*l#%77<<n#M %&--33D43P/G+_HJJ"')) $(   OOHAYZE//6E..uoFENN&>(,	+ +8 	(OEUV9 +*s   7C	FF&c                    | j                  ||d      }g }| j                  r | j                  d
i |}|j                  |d   j	                         j                         j                                |d   D ]D  }|D ]=  }	|j                  |	j	                         j                         j                                ? F n | j                  d
i |}|j                  |d   j	                         j                         j                                |d   D ]=  }
|j                  |
j	                         j                         j                                ? t        ||g      }|j                  dt        ||            }	 t        | j                               D ]g  \  }}t        j                  ||   ||   z
        }t        j                  d| d       t        j                  d	t        j                   |              i y#  Y yxY w)aw  Verify ONNX model outputs and PyTorch model outputs match

        Args:
            onnx_model_path (str): path to save ONNX model
            provider (str): execution provider for ONNX model
            use_fp16_inputs (bool, optional): use float16 inputs for the KV caches
            use_int32_inputs (bool, optional): use int32 inputs for the decoder_input_ids
        T)rx   ry   rz   r   r7   )	providersNz
Comparing z...z
Max diff: ra   )r   rh   rZ   rK   rJ   cpunumpyr   runr   rA   ru   npabsloggerwarningmax)r$   r   r   rx   ry   r   
pt_outputsoutpresent_key_value_layerpresent_key_valuepresent_self_key_valuesessort_outputsrd   output_namediffs                   r&   r   zWhisperDecoder.verify_onnx  s   8 _O_mqr
??$,,((Cc!fmmo11399;<+.q6')@%%%&7&>&>&@&D&D&F&L&L&NO *A ,2 $,,((Cc!fmmo11399;<*-a&&!!"8"?"?"A"E"E"G"M"M"OP +1  H:Fhht%;FD%IJ	"+D,=,=,?"@;vvjmk!n<=K=<=BFF4L>:; #A	s    BH H)F)NN)FF)TFFTFT)__name__
__module____qualname____doc__r   rH   nnModulestrboolr   Tensorri   tupler5   rX   rZ   rm   ru   rw   r   r   r   r   r   r   r   r   r   r   __classcell__)r%   s   @r&   r   r   "   s2   9?} ?UXX__ ?RU ?jn ?$ 6:<@	$ <<$  %||d2$ eELL12T9	$> 6:<@	X$ <<X$  %||d2X$ eELL12T9	X$z 6:<@	Z <<Z  %||d2Z eELL12T9	Z 8
d 
d 
QU 
4> T ^b (= $ 8J 
: 
 
  ). %!%*/$(OWOW OW 	OW
 #'OW OW OW $(OW "OWb55 5 	5
 5r'   r   )$loggingr   r   	itertoolsr   pathlibr   r   r   r   rH   float16r   #google.protobuf.internal.containersr   r   r   
onnx_modelr	   past_helperr
   transformersr   whisper_inputsr   r   r   r   onnxruntimer   	getLoggerr   r   r   r   r   ra   r'   r&   <module>r      sh     	       , O +   + &  )			8	$oUXX__ or'   