
    BvhS                         d dl Z d dlZd dlmZ d dlZd dlZd dlmZm	Z	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ  e j6                  e      Zg dZ G d d      Zy)    N)Path)add_cache_indirection_to_mhaadd_output_qk_to_mhafix_past_sequence_length)optimize_model)WhisperConfigWhisperForConditionalGenerationWhisperProcessor)WhisperDecoder)WhisperEncoder)WhisperEncoderDecoderInit)WhisperJumpTimes)InferenceSession)zwhisper-tinyzwhisper-tiny.enzwhisper-basezwhisper-base.enzwhisper-smallzwhisper-small.enzwhisper-mediumzwhisper-medium.enzwhisper-largezwhisper-large-v2zwhisper-large-v3zwhisper-large-v3-turboc                   n   e Zd Ze	 	 d.dededededef
d       Ze	 	 	 d/dededed	ej                  d
ej                  dededede
eej                  j                  f   fd       Zedeez  ez  dedededededededefd       Ze	 	 	 	 	 	 d0dedededededededededededefd        Ze	 	 d1d!ed"ej                  j                  d	ej                  d#ed$ef
d%       Zed#ed$efd&       Zed'ej0                  d(ej0                  d)efd*       Ze	 	 d1deded+ed	ej                  d#ed$efd,       Zy-)2WhisperHelper
output_dirmodel_name_or_pathsuffix
new_folderreturnc                 6   |}t         j                  j                  |      rt        |      j                  d   }n|j                  d      d   }||z  }|r t         j                  j                  | |      n| }t         j                  j                  ||dz         S )a  Build onnx path

        Args:
            output_dir (str): output directory
            model_name_or_path (str): pretrained model name, or path to the model checkpoint
            suffix (str, optional): suffix like "_encoder" or "_decoder_fp16" will be appended to file name. Defaults to None.
            new_folder (bool, optional): create a new directory for the model. Defaults to False.
        Returns:
            str: path of onnx model
        /z.onnx)ospathisdirr   partssplitjoin)r   r   r   r   
model_name	directorys         `/RAG/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/whisper/whisper_helper.pyget_onnx_pathzWhisperHelper.get_onnx_path*   s    " (
77==+,0177;J#))#.r2Jf
<FBGGLLZ8J	ww||IzG';<<    
model_impl	cache_dirdevicedtypemerge_encoder_and_decoder_initno_beam_search_op	output_qkc                    |dk(  rt        j                  | |d      }nRddl}	t        j                  j                  |       s| j                  d      d   dd }
n| }
|	j                  |
||d	
      }|j                         j                  |       |dk(  r|j                  |       t        j                  | |      }t        ||||      j                         }d|i}|r0t        ||||      j                         }|j                  d|i       n/t        |||      j                         }|j                  ||d       |r.t        |||      j                         }|j                  d|i       |S )a|  Load model given a pretrained name or path, then build models for ONNX conversion.

        Args:
            model_name_or_path (str): pretrained model name or path
            model_impl (str): library to load model from
            cache_dir (str): cache directory
            device (torch.device): device to run the model
            dtype (torch.dtype): dtype to run the model
            merge_encoder_and_decoder_init (bool, optional): Whether merge encoder and decoder initialization into one ONNX model. Defaults to True.
            no_beam_search_op (bool, optional): Whether to use beam search op or not. Defaults to False.
            output_qk (bool, optional): Whether to output QKs to calculate batched jump times for word-level timestamps. Defaults to False.
        Returns:
            Dict[str, torch.nn.Module]: mapping from name to modules for ONNX conversion.
        hfeagerr&   attn_implementationr   Nr   r      T)download_root	in_memoryr'   r(   r&   decoderencoder)r8   decoder_init
jump_times)r	   from_pretrainedwhisperr   r   existsr   
load_modelevaltor   r   r   updater   r   )r   r%   r&   r'   r(   r)   r*   r+   modelr<   name_or_pathconfigr7   
componentsencoder_decoder_initr8   batched_jump_timess                    r"   r>   zWhisperHelper.load_modelF   sj   4 3CC"iWE
 77>>"45177<R@D1&&|V9`d&eE 	

v&HH5H!../AYW !
<MNSSU)
)#<VUJXi#j#o#o#q y*>?@$VUJ?DDFG'7KL!1&&)!L!Q!Q!S|-?@Ar$   rB   onnx_model_pathproviderverboseuse_external_data_formatuse_fp16_inputsuse_int32_inputsuse_encoder_hidden_statesuse_kv_cache_inputsc	           
      j   t        | t              r| j                  |||||       yt        | t              r| j                  ||||||       yt        | t              r| j                  ||||||||       yt        | t
              r| j                  ||||||       yt        dt        |              )a/  Export model component to ONNX

        Args:
            model (class): PyTorch class to export
            onnx_model_path (str): path to save ONNX model
            provider (str): provider to use for verifying parity on ONNX model
            verbose (bool): print verbose information.
            use_external_data_format (bool): use external data format or not.
            use_fp16_inputs (bool): use float16 inputs for the audio_features, encoder_hidden_states, logits, and KV caches.
            use_int32_inputs (bool): use int32 inputs for the decoder_input_ids.
            use_encoder_hidden_states (bool): use encoder_hidden_states as model input for decoder-init/decoder-without-past models.
            use_kv_cache_inputs (bool): use KV caches as model inputs for decoder-with-past models.
        z%Unknown instance for model detected: N)
isinstancer   export_onnxr   r   r   
ValueErrortype)	rB   rH   rI   rJ   rK   rL   rM   rN   rO   s	            r"   rR   zWhisperHelper.export_onnx   s    2 e^,( 89(  ~.( )#	 /0(  DT%[MRSSr$   optimized_model_path
is_float16num_attention_headshidden_size
num_layersuse_gpu
is_decoderc           
      "   ddl m}  |d      }d|_        |dk(  |_        t	        | d||d||d      }|	rG|
rE|dk(  rt        |      \  }}t        ||      }|r$t        |t        t        dd	|z  d	            
      }|j                  ||d       y)zHOptimize ONNX model with an option to convert it to use mixed precision.r   )FusionOptionsbartTrocmF)
model_type	num_headsrX   	opt_leveloptimization_optionsrZ   only_onnxruntimecuda   )skip_node_idxs)all_tensors_to_one_fileN)fusion_optionsr]   use_multi_head_attention!disable_multi_head_attention_biasr   r   r   r   listrangesave_model_to_file)rH   rU   rV   rW   rX   rY   rK   rZ   rI   r[   r*   r+   r]   rc   mpast_seq_len_names                   r"   optimize_onnxzWhisperHelper.optimize_onnx   s    " 	1,V48<5AIVAS>)#!5"	
 +6! (@'B$$04EF(4aZYZ@[;\]	13Keijr$   	processorpt_model
batch_sizeprompt_modec           
      ,   	 ddl m} ddl m}  |d	d
d      }g }	|dk(  r | |d   d   d   gd      j                  }
ni | |d   d   d   gd      j                   | |d   d   d   gd      j                  g}	t        |	      |k(  sJ t        j                  |	d   |	d   f      }
d\  }}}}d\  }}|
j                  |      ||||||ddd	}|rddg}|D cg c]  }| j                  |       }}g }g }t        |      D ]  }t        j                  ||         j                  |      |d<   |	|   j                  |      |d<    |j                   di |j#                         j%                         j'                         }|j)                  |       |j)                  | j+                  |d      d           |
|d<   |d= n`g } |j                   di |j#                         j%                         j'                         }| j+                  |d      d   g}t-        |      }|d= |d= ||||fS # t        $ rU}t        j	                  d| d       d}t        j                  d| d       t        j                  |       Y d }~d }~ww xY wc c}w )Nr   )load_datasetz.An error occurred while importing `datasets`: T)exc_infozpip install datasetszCCould not import `datasets`. Attempting to install `datasets` via `z`.z)hf-internal-testing/librispeech_asr_dummyclean
validation)r      audioarraypt)return_tensors   )   r   r{   r{   )      ?r   )	input_features
max_length
min_length	num_beamsnum_return_sequenceslength_penaltyrepetition_penaltyearly_stopping	use_cachezJohn has doubtszMaria has grave doubtsr4   
prompt_idsr   skip_special_tokensr   r    )datasetsrw   	Exceptionloggererrorwarningr   systemr   lentorchcatr@   get_prompt_idsrm   
from_numpygeneratedetachcpunumpyappendbatch_decoderl   )rr   rs   r'   rt   ru   rw   einstall_cmddsinput_features_r   r   r   r   r   r   r   inputspromptspr   pt_transcription
pt_outputsi	pt_outputs                            r"    pt_transcription_for_verify_onnxz.WhisperHelper.pt_transcription_for_verify_onnx   s   	#- 	*EwVbc?&1gw(?'@QUVeeN 2a5>'23DIXX2a5>'23DIXXO ':555"YY(:OA<N'OPNBM?
J	+?-5**,//7$$"$8,"4"

 (*BCG?FGw!)2215wJG!J :&','7'7
1'F'I'IQW'I'X|$+:1+=+@+@+H'(-H--77>>@DDFLLN	!!), ''	(>(>y^b(>(cde(fg ' (6F#$|$J***4V4;;=AACIIKJ ) 6 6zW[ 6 \]^ _`j)J#$;'Z??s  	#LLI!MX\L]0KNN`al`mmopqIIk""		#H Hs   H0 J0	J9A
J		Jc                 J    | dkD  r|rd}d}d}d}||||h}|S d}d}d}	|||	h}|S )	Nr{   z{ John has doubts whether Sir Frederick Layton's work is really Greek after all and can discover in it but little of Rocky Izy John has doubts whether Sir Frederick Latins work is really Greek after all and can discover in it but little of Rocky Iz Maria has grave doubts whether Sir Frederick Layton's work is really Greek after all and can discover in it but little of Rockyz Maria has grave doubts whether Sir Frederick Latins work is really Greek after all and can discover in it but little of Rocky IzX Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.zY Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.zZ "Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.r   )
rt   ru   'expected_transcription_no_comma_prompt1)expected_transcription_misspelled_prompt1'expected_transcription_no_comma_prompt2)expected_transcription_misspelled_prompt2expected_transcription_optionsexpected_transcription_no_comma!expected_transcription_with_comma+expected_transcription_with_quote_and_commas
             r"   select_transcription_optionsz*WhisperHelper.select_transcription_options;  s    
 >k 7t3 9t5 7y3 9{57799	.** .- k , l . m 8 01;.*
 .-r$   r   ort_outputsr   c                 X   | |   ||   }}|j                   |j                   }}||k7  ret        |      dkD  r|d   }|j                   }t        |      dkD  r|d   }|j                   }|d   |d   k7  rt        |d   |d         }|d| }|d| }|j                   |j                   k(  sJ ||fS )z8Get PyTorch and ONNX Runtime output token ids at index ir{   r   N)shaper   min)r   r   r   r   
ort_outputpt_shape	ort_shapemin_lens           r"   get_outputszWhisperHelper.get_outputs\  s     !+1{1~:	'ooz/?/?) y 8}q %aL	$??9~!']
&,,	{il*hqk9Q<8%hw/	'1
*"2"2222*$$r$   ort_sessionc                 	   t        j                  | |d      j                  |      }t        j                  | |      }t	        j                  | |      }t
        j                  |||||      \  }	}
}}|j                  g}|j                  dd      }|D cg c]  }|d   	 }}||z   }|j                         D cg c]  }|j                   }}|j                         D cg c]  }|j                   }}t        j                  t        j                  t        j                  t        j                   t        j"                  t        j$                  d	}d
|v }t'        ||d      D ]5  \  }}|dk(  r3|	|   j)                         j+                         j-                         |	|<   ?|dk(  r(t        j.                  |j0                  ||         |	|<   l|dk(  r*t        j.                  ||j0                  f||         |	|<   |dk(  r|s&|r|gn|g}t        j2                  |||         |	|<   g }t5        |      D ]$  }|j7                  ||   j9                                & t;        d |D              }g }|D ]6  }g ||j<                  g|t?        |      z
  z  }|j7                  ||z          8 t        j2                  |||         |	|<   j|dk(  r t        j2                  dg||         |	|<   |dk(  r"t        j2                  ddgg||         |	|<   |d
k(  r5t        j@                  t        j2                  |g||         |d      |	|<   |dk(  r t        j2                  dg||         |	|<   t        j2                  |	|   g||         |	|<   8 |jC                  d|	      d   dddddf   }|jE                  |d      } t
        jG                  ||      }!d}"t5        |      D ]H  }t
        jI                  |||      \  }#}$|"t        jJ                  |#|$      z  }"|"|
|   |!v xr | |   |!v z  }"J d}%|"sit5        |      D ][  }t
        jI                  |||      \  }#}$|#|$z
  }&t;        |&jM                         |&j;                         tN              }'t;        |%|'      }%] |%dk7  r0tP        jS                  d|
        tP        jS                  d|         yc c}w c c}w c c}w )zRCompare the result from PyTorch and ONNX Runtime to verify the ONNX model is good.r.   r/   r6   )rt   ru   english
transcribe)languagetaskr{   )ztensor(float)ztensor(float16)ztensor(int64)ztensor(int32)ztensor(int8)ztensor(uint8)extra_decoding_idsF)strictr   
vocab_maskr5   prefix_vocab_maskdecoder_input_idsc              3   2   K   | ]  }t        |        y w)N)r   ).0r   s     r"   	<genexpr>z,WhisperHelper.verify_onnx.<locals>.<genexpr>  s     !>+Q#a&+s   logits_processorcross_qk_layer_headr   temperaturer   NTr   )keyzPyTorch outputs: zONNX Runtime outputs: )*r	   r;   r@   r
   r   r   r   decoder_start_token_idget_decoder_prompt_ids
get_inputsnamerT   npfloat32float16int64int32int8uint8zipr   r   r   ones
vocab_sizer}   rm   r   tolistmaxpad_token_idr   repeatrunr   r   r   allcloser   absr   r   )(r   r&   r   r'   rt   ru   rs   rr   rD   r   r   r   decoder_prompt_idsstart_idr   tokenforced_decoder_idsentry	ort_names
ort_dtypes	ort_to_npuse_extra_decoding_idsr   r(   raw_input_idsort_promptsr   max_lenpadded_promptsr   padded_promptr   ort_transcriptionr   parityr   r   max_diffdiff
max_diff_is(                                           r"   verify_onnxzWhisperHelper.verify_onnxw  s    3BB)

"V* 	 %445GS\]	../AYWCPCqCq!# Dr D
@ *.@ 11255y|5\
,67J5eAhJ
7%
2-8-C-C-EF-EEUZZ-E	F.9.D.D.FG.FUejj.F
GZZ!zzXXXXGGXX
	 "6!By*UCCKD%''%d|22488:@@Bt%!wwv'8'8	%@PQt,,!ww
F4E4E'FiX]N^_t,,"2HXJOaNbM#%88M5AQ#RF4L #%K":.#**+=a+@+G+G+IJ /!!>+!>>G%'N(([!([0C0C/DRUVWRXHX/Y([&--m>P.PQ ) $&88N)EBR#SF4L++!xx9U3CDt..!xx!Q	%8HIt--!yy:,iPUFV)WYcefgt&!xxYu5EFt!xxi>NOtG DJ "oodF3A6q!Qw?%22;TX2Y)6)S)ST^`k)l&z"A$1$=$=j+WX$Y!Iz bkk)Z88F  #'EE K%a(,JJF # :&(5(A(A*k[\(]%	: :- TXXZSA
x4 ' q=NN./?.@ABNN34E3FGHe 8 GGs   S%>S*%S/N) F)TFF)FFr   FFF)r{   F)__name__
__module____qualname__staticmethodstrboolr#   r   r'   r(   dictnnModuler>   r   r   r   rR   intrq   r
   r   r   r   ndarrayr   r   r   r   r$   r"   r   r   )   s&     	=== = 	=
 
= =6  04"';;; ; 	;
 {{; )-;  ; ; 
c588??"	#; ;z >T 99NJ>T>T >T 	>T
 #'>T >T >T $(>T ">T >T@  */ "'-k-k!-k -k !	-k
 -k -k #'-k -k -k -k  -k -k -k^ 
 !C@#C@((//C@ C@ 	C@
 C@ C@J ... .@ %JJ%ZZ% % %4  !kkk &k 	k
 k k kr$   r   ) loggingr   pathlibr   r   r   r   convert_generationr   r   r   	optimizerr   transformersr   r	   r
   whisper_decoderr   whisper_encoderr   whisper_encoder_decoder_initr   whisper_jump_timesr   onnxruntimer   	getLoggerr   r   PRETRAINED_WHISPER_MODELSr   r   r$   r"   <module>r     sZ     	    k k $ Y Y * * B / (			8	$  z zr$   