
    BvhN                       d dl mZ d dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZmZ ddZ	 	 d	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd	Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd
ZdddZddZ	 	 	 d	 	 	 	 	 	 	 ddZd dZd!dZ	 	 	 	 	 	 	 	 	 	 	 	 d"dZ	 	 	 	 	 	 	 	 	 	 d#dZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d$dZy)%    )annotationsN)
AutoConfigAutoTokenizer)DynamicCache)InferenceSessionOrtValuec                    | j                         j                  d      dz
  }|j                  | dk(  d       |r|d d df   j                  d      }|S )N   r   )longcumsummasked_fill_	unsqueeze)attention_maskuse_past_kvposition_idss      \/RAG/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/llama/llama_inputs.pyget_position_idsr      sY    !&&(//3a7Ln115#ArE*44R8     c                   t        j                  d| j                  ||ft         j                        }t        j                  ||t         j                        }t        |d      }|dk(  r|j                         n|j                  |      }|dk(  r|j                         n|j                  |      }|dk(  r|j                         n|j                  |      }|s|||fS |||d}	|	S )Nr   lowhighsizedtyper   Fr   ort	input_idsr   r   )torchrandint
vocab_sizeint64onesr   numpyto)
configdevice
batch_sizeseq_lenenginereturn_dictr    r   r   inputss
             r   get_sample_inputsr/       s     !&*;*;:wBW_d_j_jkIZZ
G5;;GN#NFL &,u_	!),,v:NI/5^))+NDUDUV\D]N+1U?<%%'PV@WL><88 ($F
 Mr   c                4   t        j                  d| j                  |dft         j                        }t        j                  ||dz   t         j                        }	t        |	d      }
t        | ||||      }|dk(  r|j                         n|j                  |      }|dk(  r|	j                         n|	j                  |      }	|dk(  r|
j                         n|
j                  |      }
|dk(  rt        |      n5|D cg c]*  }|d   j                  |      |d   j                  |      f, c}}|st        |t              sJ ||	|
|fS ||	|
d	}|dk(  r%t        |t              sJ |j                  |       |S t        |t              sJ ||d
<   |S c c}w )Nr   r   r   r   Tr   
world_sizer   r   past_key_values)r!   r"   r#   r$   r%   r   get_past_kv_inputsr&   r'   flatten_past_kv_inputs
isinstancelistdictupdate)r(   r)   r*   past_seq_lenuse_fp16r,   r-   r2   r    r   r   past_kvkvr.   s                 r   get_sample_with_past_kv_inputsr>   C   s    !&*;*;:q/Y^YdYdeIZZ
L1,<EKKPN#NEL \8XbcG &,u_	!),,v:NI/5^))+NDUDUV\D]N+1U?<%%'PV@WL+1U?w'pw@xpwjl"Q%((6BRTVWXTYT\T\]cTdAepw@x  '4(((><AA ($F
 '4(((g
 M '4((($+ !M) Ays    /Fc                X   t        j                  d| j                  ||ft         j                        }t        j                  |||z   t         j                        }t        ||dk7        }t        | ||||
      }|dk(  r|j                         n|j                  |      }|dk(  r|j                         n|j                  |      }|dk(  r|j                         n|j                  |      }|dk(  rt        |      n5|D cg c]*  }|d   j                  |      |d   j                  |      f, c}}|	st        |t              sJ ||||fS |||d}|dk(  r4t        |t              sJ |j                  |       |rt        |||      }|S t        |t              sJ ||d	<   |S c c}w )
Nr   r   r   r   r1   r   r   r   r3   )r!   r"   r#   r$   r%   r   r4   r&   r'   r5   r6   r7   r8   r9    enable_past_present_share_buffer)r(   r)   r*   r+   r:   max_seq_lenr;   use_buffer_sharer,   r-   r2   r    r   r   r<   r=   r.   s                    r   %get_merged_sample_with_past_kv_inputsrC   y   s    !&*;*;:wBW_d_j_jkIZZ
L7,B%++VN#NQRARTL \8XbcG &,u_	!),,v:NI/5^))+NDUDUV\D]N+1U?<%%'PV@WL+1U?w'pw@xpwjl"Q%((6BRTVWXTYT\T\]cTdAepw@x  '4(((><AA ($F
 '4(((g5flKXF M '4((($+ !M1 Ays   /F'c                   |rt         j                  nt         j                  }| j                  | j                  z  }	|s*t         j
                  j                  ||| j                        j                  |      dt        j                  t        j                  |||f      d      z  j                  |      t         j
                  j                  || j                  || j                  |	      j                  |      t         j
                  j                  || j                  || j                  |	      j                  |      t        j                  |t         j                        d}
|
S t         j
                  j                  ||| j                        j                  |      t        j                  t        j                  |||ft         j                        d      dz
  j                  t         j                        t        j                  |t         j                        d}
t        | j                        D ]  }|
j                  d| dt         j
                  j                  || j                  ||	      j                  |      d	| dt         j
                  j                  || j                  ||	      j                  |      i        |rt!        |
||      }
|
S )
Ng     r   )kr   )x	attn_maskk_cachev_cachepos)rF   rG   rJ   k__cachev_)npfloat16float32hidden_sizenum_attention_headsrandomrandastypetriur%   num_hidden_layersarrayr$   int32ranger9   r@   )r(   r*   r:   r+   rA   r;   rB   split_kvnp_dtype	head_size
ort_inputsis               r   get_msft_sample_inputsr`      s7    &rzz2::H""f&@&@@I
GV5G5GHOOPXY"RWWRWWj+{5[-\`a%bbjjkstyy~~F44lFD^D^`ifXyy~~F44lFD^D^`ifX88L9


D + 
GV5G5GHOOPXY''"'':{K*PXZX`X`"aefgjkkss 88L9

 v//0A6NBIINN"F$>$>i%fX&6NBIINN"F$>$>i%fX&	 1 9*lT_`Jr   c                |   | j                   |z  }t        | d      r| j                  n| j                  | j                  z  }|rt
        j                  nt
        j                  }t        | j                        D cg c]6  }t        j                  |||||      t        j                  |||||      f8 }	}|	S c c}w )Nhead_dimr   )num_key_value_headshasattrrb   rQ   rR   r!   rO   rP   rZ   rW   rT   )
r(   r*   r:   r;   r2   	num_headsr]   torch_dtype_r<   s
             r   r4   r4      s    **j8I#*6:#>FDVDVZ`ZtZtDtI#+%--K v//0
 1A JJz9lI[YJJz9lI[Y	
 1   Ns   :;B9c                   i }t        |       D ]  \  }\  }}t        | t              re|j                         j	                         j                         |d| <   |j                         j	                         j                         |d| <   ~|j                         j	                         j                         |d| d<   |j                         j	                         j                         |d| d<    |S )Npast_key_values_key_cache_past_key_values_value_cache_past_key_values..key.value)	enumerater6   r   detachcpur&   )r3   r<   r_   past_kpast_vs        r   r5   r5      s    G(9FFo|48>8K8K8M8S8S8UG045:@--/:M:M:O:U:U:WG21#6728--/2E2E2G2M2M2OG&qc./4:MMO4G4G4I4O4O4QG&qc01  : Nr   c                6   i }| j                         D ]t  \  }}t        |t        j                        r|||<   &|dk(  r|j	                  t        |             F|j                         j                         j                         ||<   v |rt        |||      }|S )Nr3   )
itemsr6   rN   ndarrayr9   r5   ro   rp   r&   r@   )	pt_inputsrB   r:   rA   r^   rE   vs          r   convert_inputs_for_ortrx      s     J!1a$JqM##4Q78HHJNN,224JqM " 5j,P[\
r   c                    | j                         D ]Z  \  }}d|v sd|v s|j                  \  }}}}t        j                  ||||f|j                        }	||	d |d |d |d |f<   |	| |<   \ | S )Ncacher3   r   )rt   shaperN   zerosr   )
r^   r:   rA   rE   rw   r*   re   rg   r]   new_vs
             r   r@   r@     s      "1a<,123''/J	1iHHj)[)LTUT[T[\EHIE+:+z	z=L=*9*DE!JqM # r   c                   | j                         D ch c]  }|j                   }}t        |j                               }||z
  }t	        |      rt        d|        t        d      ||z
  }t	        |      r
|D ]  }||=  |S c c}w )Nz(The following model inputs are missing: zEThere are missing inputs to the model. Please add them and try again.)
get_inputsnamesetkeyslenprint	Exception)modelr^   model_inputmodel_inputsuser_inputsmissing_inputsunnecessary_inputsunnecessary_inputs           r   verify_ort_inputsr   #  s    8=8H8H8JK8JK$$8JLKjoo'(K!K/N
>88HIJ_`` %|3
!3,- "4  Ls   Bc                   | j                         }| j                         D ch c]  }|j                   }}|j                         D ]  \  }	}
|	|vr|rfd|	v sd|	v r^|	|vr0t	        j
                  |
||      }|j                  |	|       |||	<   I||	   j                  |
       |j                  |	||	          st	        j
                  |
||      }|j                  |	|        | j                         D ]d  }|j                  }|r@d|v sd|v r8|j                  dd      j                  dd      }|j                  |||          Q|j                  |||       f ||fS c c}w )Nrz   r3   )device_type	device_idoutpresent)
io_bindingr   r   rt   r   ortvalue_from_numpybind_ortvalue_inputupdate_inplaceget_outputsreplacebind_ortvalue_outputbind_output)r   r^   r)   r   rB   kv_cache_ortvaluesr   r_   r   rE   rw   v_deviceoutputr   
input_names                  r   add_io_bindings_as_ortvaluesr   7  s}    !!#J$)$4$4$67$6qAFF$6L7  "1 L  A1Ba1G**#77vYbc..q(;(0"1%"1%44Q7..q2DQ2GH33A6U^_H**1h7% #( ##%{{$)t2CeW5==iIZ[J++D2DZ2PQ""4Vy"Q & )))= 8s   E"c           
        t        | |      }d }t        j                  t        j                  t        j                  t        j
                  d}| j                         }|j                         D ]  \  }}	|j                  ||	j                  j                  |	j                  j                  dk(  rdn|	j                  j                  |t        |	j                           t        |	j                        |	j!                                |	j                  } | j#                         D ]  }
|
j$                  }|rd|v r||j'                  dd         n||   }	|j)                  ||j                  |j                  dk(  rdn|j                  |rt        j                  nt        j
                  t        |	j                        |	j!                                 |S )N)ztorch.int32ztorch.int64ztorch.float16ztorch.float32rp   r   )r   r   r   element_typer{   
buffer_ptrr   r3   )r   rN   rY   r$   rO   rP   r   rt   
bind_inputr)   typeindexreprr   tupler{   data_ptrr   r   r   r   )r   r.   outputsr;   rB   r)   pt_to_npr   rE   rw   r   r   s               r   add_io_bindings_as_tensorsr   d  sr    uf-FFxxxx	H !!#J188==E1aqxx~~!$qww-0.zz| 	 	
   ##%{{  I$5 4<<	+<=> 	

 	!;;%/aV\\(0"**bjj.zz| 	 	
 &" r   c           	     V   |j                   |_        |j                  |d      }|rt        j                  nt        j
                  }	t        j                  |d   |t        j                        }
t        j                  |d   |t        j                        }t        |d      }|
j                  d   }||kD  r$|
d d d |f   }
|d d d |f   }t        |d      }n||k  r|
d d d	f   j                  d	      j                  }|d d d	f   j                  d	      j                  }t        ||z
        D ]0  }t        j                  ||
f      }
t        j                  ||f      }2 t        |d      }|
j                  d   }||k(  sJ |d
k(  r|
j                         n|
|d
k(  r|j                         n||d
k(  r|j                         n|d}|d
k7  rg |d<   |
j                  \  }}| j                  }| j                   }t#        | d      r| j$                  n| j&                  | j(                  z  }t        | j*                        D ]  }t        j,                  |||r|nd	|||	      }t        j,                  |||r|nd	|||	      }|d
k(  r:|j/                  d| d|j                         d| d|j                         i       |d   j1                  ||f        d }|d
k(  rt        j,                  ||| j2                  ||	      }d|j                         i}|st        | j*                        D ]q  }t        j,                  ||||||	      }t        j,                  ||||||	      }|j/                  d| d|j                         d| d|j                         i       s ||fS )NT)paddingr    )r)   r   r   Fr   r
   r   r   r   r3   rb   rk   rl   rm   logitszpresent.)	eos_token	pad_tokenbatch_encode_plusr!   rO   rP   tensorr$   r   r{   r   TrZ   hstack
contiguousmax_position_embeddingsrc   rd   rb   rQ   rR   rW   r|   r9   appendr#   )r(   	tokenizerrequested_lengthpromptr)   r;   rB   r,   encodings_dictrf   r    r   r   tokenized_lengthinput_ids_first_colattention_mask_first_colrg   r.   r*   sequence_lengthmax_sequence_lengthre   r]   r_   past_key
past_valuer   r   present_keypresent_values                                 r   get_initial_inputs_and_outputsr     s    $--I000FN#+%--K
 ^K8u{{[I\\.1A"B6Y^YdYdeN#NFL !r***a!2"2!223	'+<,<+<(<='EJ	,	,'1o77:<<#1!Q$#7#A#A!#D#F#F '*::;A&99%EFI"\\+C^*TUN < (EJ r*//// 06Y))+I9?5.335n5;u_//1,F
 $& ! #,//J 88**I#*6:#>FDVDVZ`ZtZtDtI 6++,;;#3
 [[#3

 U?MM&qc.0C0C0E&qc0*2G2G2I $%,,h
-CD3 -6 GZ&:K:KTZbmnV..016334#kk	?If\g !&	?If\g! s$')?)?)AXaSPVCWYfYqYqYst 5 7?r   )r   ztorch.Tensorr   bool)ptF)r(   r   r)   torch.devicer*   intr+   r   r,   strr-   r   )Fr   Fr   )r(   r   r)   r   r*   r   r:   r   r;   r   r,   r   r-   r   r2   r   )FFr   Fr   )r(   r   r)   r   r*   r   r+   r   r:   r   rA   r   r;   r   rB   r   r,   r   r-   r   r2   r   )r(   r   r*   r   r:   r   r+   r   rA   r   r;   r   rB   r   r[   r   )r   )
r(   r   r*   r   r:   r   r;   r   r2   r   )r3   z'list[tuple[torch.Tensor, torch.Tensor]])Fr   i   )rv   r8   rB   r   r:   r   rA   r   )r^   r8   r:   r   rA   r   )r   r   r^   r8   )r   r   r^   r8   r)   r   r   r   rB   r   r   r8   )
r   r   r.   r8   r   r8   r;   r   rB   r   )r(   r   r   r   r   r   r   z	list[str]r)   r   r;   r   rB   r   r,   r   )
__future__r   r&   rN   r!   transformersr   r   transformers.cache_utilsr   onnxruntimer   r   r   r/   r>   rC   r`   r4   r5   rx   r@   r   r   r   r    r   r   <module>r      s   #   2 1 2(   	
  P ))) ) 	)
 ) ) ) )z "000 0 	0
 0 0 0 0 0 0 0h000 0 	0
 0 0 0 0j	 #	  	0	((*(*(* (* 	(*
 (* (*Z,,%),48,DH,\`,`ddd d 	d
 d d d dr   