o
    ©S˜h<=  ã                   @   s@  d dl mZ d dlmZ d dlZd dlZd dlZd dlZd dl	Z
d dlmZ d dlmZ ejejdd eƒ  edƒZd	Zd
d„ Zd)defdd„Zdd„ Zd*dd„Zdd„ Zdd„ Zdd„ Zd+dd„Zd+dd„Zedkržejejd  ed!ƒ ed"ƒ 	 e d$ƒ !¡ Z"e" #¡ d%v rŽed&ƒ dS e"s‘q{ee"ƒZ$ed'e$› d(ƒ q|dS ),é    )ÚQdrantClient)ÚmodelsN)Úload_dotenv)ÚSentenceTransformerz)%(asctime)s - %(levelname)s - %(message)s)ÚlevelÚformatz%paraphrase-multilingual-mpnet-base-v2i   c              
   C   s   d}d|› ddœ}ddddœd	| dœgd
dœ}zt j|||d}| ¡  | ¡ d d d d W S  t jyG } z	t d|› ¡ ‚ d}~ww )uC   Gá»i API OpenRouter Ä‘á»ƒ xá»­ lÃ½ prompt vá»›i mÃ´ hÃ¬nh Llama-3.z-https://openrouter.ai/api/v1/chat/completionszBearer zapplication/json)ÚAuthorizationzContent-Typezmeta-llama/llama-3-8b-instructÚsystemu¥   Báº¡n lÃ  trá»£ lÃ½ AI chuyÃªn tÆ° váº¥n vá» cÃ¡c thÃ´ng tin cá»§a TrÆ°á»ng Äáº¡i há»c Y DÆ°á»£c Cáº§n ThÆ¡, tráº£ lá»i báº±ng tiáº¿ng Viá»‡t tá»‘i Ä‘a 40 tá»«.)ZroleÚcontentÚuserçš™™™™™É?)ZmodelÚmessagesZtemperature)ÚheadersÚjsonÚchoicesr   Úmessager
   u    Lá»—i khi gá»i OpenRouter API: N)ÚrequestsÚpostZraise_for_statusr   ZRequestExceptionÚloggingÚerrorÚlenZquestionÚsplitÚminÚcall_openrouter_llama3ÚwarningÚjoinÚinfoÚ	Exception)ÚpromptÚapi_keyÚurlr   ÚdataÚresponseÚeZ
word_countZ	max_wordsZrefinedZrefined_word_count© r$   ú/RAG/ai_agent.pyr      s&   þþú€þr   úconfig.jsonÚcollection_namec                 C   sl   t |ddd}t |¡}W d  ƒ n1 sw   Y  | dg ¡D ]}| d¡| kr3| dg ¡  S q"g S )uH   Táº£i danh sÃ¡ch keyword tÆ°Æ¡ng á»©ng vá»›i collection tá»« config.jsonÚrúutf-8©ÚencodingNÚcollectionsr'   Úkeywords)Úopenr   ÚloadÚget)r'   Zconfig_pathÚfÚconfigZ
collectionr$   r$   r%   Úload_keywords_from_configA   s   ÿÿr3   c                 C   s>   t  | ¡t  |¡} }tt  | |¡t j | ¡t j |¡  ƒS ©N)ÚnpÚarrayÚfloatÚdotZlinalgÚnorm)Zvec1Zvec2r$   r$   r%   Úcosine_similarityK   s   (r:   é   c                    s^   ‡ fdd„ˆD ƒ}‡fdd„|D ƒ‰t ttˆƒƒ‡fdd„ddd|… }‡‡fd	d„|D ƒS )
u4   TÃ¬m tá»« khÃ³a gáº§n nháº¥t vá»›i vector cÃ¢u há»ic                    s   g | ]}ˆ |ƒ‘qS r$   r$   )Ú.0Úkw)Úembedding_funcr$   r%   Ú
<listcomp>R   s    z(find_closest_keyword.<locals>.<listcomp>c                    s   g | ]}t ˆ |ƒ‘qS r$   )r:   )r<   Zvec)Úquery_vectorr$   r%   r?   S   s    c                    s   ˆ |  S r4   r$   )Úi)Úsimilaritiesr$   r%   Ú<lambda>T   s    z&find_closest_keyword.<locals>.<lambda>T)ÚkeyÚreverseNc                    s   g | ]
}ˆ | ˆ| f‘qS r$   r$   )r<   rA   )r-   rB   r$   r%   r?   U   s    )ÚsortedÚranger   )r@   r-   r>   Útop_kZkeyword_vectorsZtop_indicesr$   )r>   r-   r@   rB   r%   Úfind_closest_keywordP   s   &rI   c              
   C   sÐ   zR|   ¡ }|dd„ |jD ƒvr,| j|tjttjjdd t 	d|› dt› d¡ W dS |  
|¡}|jjjjtkrGtd|jjjj› d	t› ƒ‚t 	d
|› d¡ W dS  tyg } z	t d|› ¡ ‚ d}~ww )u=   Khá»Ÿi táº¡o collection trong Qdrant náº¿u chÆ°a tá»“n táº¡i.c                 S   s   g | ]}|j ‘qS r$   )Úname)r<   Úcr$   r$   r%   r?   \   s    z0initialize_qdrant_collection.<locals>.<listcomp>)ÚsizeZdistance)r'   Zvectors_configu   [RAG] ÄÃ£ táº¡o collection 'u   ' vá»›i vector size Ú.u.   Collection tá»“n táº¡i nhÆ°ng cÃ³ vector size u    khÃ¡c vá»›i yÃªu cáº§u z[RAG] Collection 'u0   ' Ä‘Ã£ tá»“n táº¡i vá»›i vector size phÃ¹ há»£p.u#   Lá»—i khi khá»Ÿi táº¡o collection: N)Zget_collectionsr,   Zcreate_collectionÚqdrant_modelsZVectorParamsÚVECTOR_SIZEZDistanceZCOSINEr   r   Zget_collectionr2   ÚparamsZvectorsrL   Ú
ValueErrorr   r   )Úclientr'   r,   Zcollection_infor#   r$   r$   r%   Úinitialize_qdrant_collectionX   s(   þþ
€þrS   c                 C   sX   | sdS |   d¡}tt |¡ƒ}d |¡ ¡ }g d¢}|D ]}| |d¡}q|r*|S | S )uA   LÃ m sáº¡ch vÄƒn báº£n: loáº¡i bá» láº·p láº¡i vÃ  chuáº©n hÃ³a.Ú ú. )u   Web - Tháº» Span: u   Web - Äoáº¡n vÄƒn: u   Web - Danh sÃ¡ch: u   Web - TiÃªu Ä‘á»: )r   ÚlistÚdictÚfromkeysr   ÚstripÚreplace)ÚtextZ	sentencesZunique_sentencesZcleaned_textZcleaning_patternsÚpatternr$   r$   r%   Ú
clean_texto   s   
r]   c              
   C   sT   zt  | ¡ ¡ }t dt|ƒ› ¡ |W S  ty) } z	t d|› ¡ ‚ d}~ww )uC   Táº¡o embedding cho vÄƒn báº£n sá»­ dá»¥ng model Ä‘Ã£ khá»Ÿi táº¡o.z[RAG] Embedding dimension: u   Lá»—i khi táº¡o embedding: N)Ú_embedding_modelÚencodeÚtolistr   r   r   r   r   )r[   Zembr#   r$   r$   r%   Úget_embedding‡   s   €þra   c           $   
   C   sf  | sddiS t j dd¡}t j d¡}t j d¡}|du r!tdƒ‚ztd	d
dd}t |¡}W d  ƒ n1 s9w   Y  W n tyL   t 	d¡ ‚  tj
yY   t 	d¡ ‚ w z| dg ¡}|rht|tƒsltdƒ‚|d }|d }|d }	W n ttfy }
 z	t 	d|
› ¡ ‚ d}
~
ww t d|› ¡ zX|r£t||d}nt|d}t||ƒ ddlm} z|j|d|jd t d¡ W n tyÝ }
 zt d|
› ¡ W Y d}
~
nd}
~
ww |j|d}t d|› d|j› ¡ W n ty }
 z	t 	d |
› ¡ ‚ d}
~
ww z|  ¡ }| ¡ d!v rW d"S t d#|› ¡ t|ƒ}t|ƒ}dd$lm}m}m} d%}d}|r…t ||td&d'}|d \}}t d(|› d)|d*›d+¡ |d,krqd-}||d||d.d/gd0}t d1|› ¡ nd-}||d||d.d/gd0}t d2¡ z'|j!|||r|ndd3d-d%d4}|s«|r«t d5¡ |j!||d6d-d%d7}W n tyÉ }
 zt 	d8|
› ¡ g }W Y d}
~
nd}
~
ww |D ]6}t"d9|j#› d:|j$d*›ƒ t"d;|j% d<i ¡ d=d>¡ƒ t"d?|j% d<i ¡ d@d>¡ƒ t"dA|j% dd>¡ƒ qÌ|sFt dB|› dC¡ zt&||ƒ}t dD¡ dE|dFœW W S  tyE }
 zt 	dG|
› ¡ ddHt'|
ƒ› iW  Y d}
~
W S d}
~
ww |rL|}ndIdJ„ |D ƒ}|s’t dK¡ zt&||ƒ}t dL¡ dE|dFœW W S  ty‘ }
 zt 	dG|
› ¡ ddHt'|
ƒ› iW  Y d}
~
W S d}
~
ww g }d>}dM}|D ](}t(|j% dd>¡ƒ} t)|ƒt)| ƒ |k r¸|dN|  7 }qš| *| ¡ ¡ | }qš|rÍ| *| ¡ ¡ dOdJ„ t+|ƒD ƒ}!dP ,|!¡}"|	› dQ| › dR|"› dS}#z	t&|#|ƒ}|W W S  ty }
 zt 	dT|
› ¡ W Y d}
~
W dUS d}
~
ww  ty2 }
 ztj	dV|
› d-dW ddXt'|
ƒ› iW  Y d}
~
S d}
~
ww )Yu9   Xá»­ lÃ½ cÃ¢u há»i sá»­ dá»¥ng RAG vá»›i Qdrant vÃ  LLM.r   zInput text is required.Z
QDRANT_URLzhttp://localhost:6333ZOPENROUTER_API_KEYZQDRANT_API_KEYNu:   OPENROUTER_API_KEY khÃ´ng Ä‘Æ°á»£c cáº¥u hÃ¬nh trong .env!r&   r(   r)   r*   u$   KhÃ´ng tÃ¬m tháº¥y file config.json!u6   File config.json cÃ³ Ä‘á»‹nh dáº¡ng khÃ´ng há»£p lá»‡!r,   u;   Cáº¥u trÃºc collections trong config.json khÃ´ng há»£p lá»‡r   r'   r   u*   Cáº¥u hÃ¬nh collection khÃ´ng há»£p lá»‡: u   [RAG] Sá»­ dá»¥ng collection: )r    r   )r    )ÚPayloadSchemaTyper
   )r'   Ú
field_nameZfield_schemau)   [RAG] ÄÃ£ báº­t text index cho 'content'uB   [RAG] KhÃ´ng thá»ƒ táº¡o text index (cÃ³ thá»ƒ Ä‘Ã£ tá»“n táº¡i): )r'   u.   [RAG] Sá»‘ lÆ°á»£ng Ä‘iá»ƒm trong collection 'z': u   Lá»—i khi káº¿t ná»‘i Qdrant: )ZhelloÚhiu	   xin chÃ ou   chÃ ou   chÃ o báº¡nu§   Xin chÃ o! TÃ´i lÃ  trá»£ lÃ½ AI cá»§a TrÆ°á»ng Äáº¡i há»c Y DÆ°á»£c Cáº§n ThÆ¡. TÃ´i cÃ³ thá»ƒ giÃºp báº¡n tÃ¬m kiáº¿m thÃ´ng tin vá» trÆ°á»ng Ä‘áº¡i há»c nÃ y.u)   [RAG] CÃ¢u há»i dÃ¹ng Ä‘á»ƒ truy váº¥n: )ÚFilterÚFieldConditionÚ	MatchTextFr;   )rH   u   [RAG] Tá»« khÃ³a gáº§n nháº¥t: z (score=z.3fú)r   T)r[   )rD   Úmatch)Zmustu0   [RAG] Sá»­ dá»¥ng filter MatchText cho keyword: uZ   [RAG] KhÃ´ng tÃ¬m tháº¥y keyword phÃ¹ há»£p, dÃ¹ng MatchText trá»±c tiáº¿p tá»« cÃ¢u há»ié¾   )r'   r@   Úquery_filterÚlimitÚwith_payloadÚwith_vectorsuP   [RAG] KhÃ´ng cÃ³ káº¿t quáº£ vá»›i filter, fallback sang truy váº¥n vector gá»‘céÈ   )r'   r@   rl   rm   rn   u#   [RAG] Lá»—i khi truy váº¥n Qdrant: zID: z	, Score: zTitle:ÚmetadataÚtitlerT   zSection:ÚsectionzContent:u;   [RAG] Qdrant tráº£ vá» káº¿t quáº£ rá»—ng cho collection 'ú'uH   [RAG] ÄÃ£ gá»i LLM trá»±c tiáº¿p do khÃ´ng cÃ³ káº¿t quáº£ tá»« Qdrantu*   KhÃ´ng tÃ¬m tháº¥y tÃ i liá»‡u liÃªn quan.)r   Úansweru#   Lá»—i khi gá»i LLM trá»±c tiáº¿p: zLLM error: c                 S   s   g | ]	}|j d kr|‘qS )r   )Úscore)r<   Úpointr$   r$   r%   r?   $  s    zRAG_agent.<locals>.<listcomp>u?   [RAG] KhÃ´ng cÃ³ tÃ i liá»‡u nÃ o Ä‘áº¡t ngÆ°á»¡ng score >= 0.2uS   [RAG] ÄÃ£ gá»i LLM trá»±c tiáº¿p do khÃ´ng cÃ³ tÃ i liá»‡u Ä‘áº¡t ngÆ°á»¡ng scoreiÜ  ú c                 S   s"   g | ]\}}|d  › d|› ‘qS )r;   rU   r$   )r<   rA   Údocr$   r$   r%   r?   ?  s    ÿÚ
u    

CÃ¢u há»i cá»§a quÃ½ khÃ¡ch: u!   


        ThÃ´ng tin cÃ³ sáºµn:
u   


        Tráº£ lá»i:u'   Lá»—i khi gá»i LLM Ä‘á»ƒ tráº£ lá»i: u@   Xin lá»—i, tÃ´i gáº·p lá»—i khi xá»­ lÃ½ cÃ¢u há»i cá»§a báº¡n.u   Lá»—i trong quÃ¡ trÃ¬nh RAG: )Úexc_infozRAG error: )-ÚosÚenvironr0   rQ   r.   r   r/   ÚFileNotFoundErrorr   r   ÚJSONDecodeErrorÚ
isinstancerV   ÚKeyErrorÚ
IndexErrorr   r   rS   Zqdrant_client.http.modelsrb   Zcreate_payload_indexZTEXTr   r   ÚcountrY   Úlowerra   r3   re   rf   rg   rI   ÚsearchÚprintÚidru   Úpayloadr   Ústrr]   r   ÚappendÚ	enumerater   )$Ú
input_textZ
qdrant_urlr   Zqdrant_api_keyr1   r2   r,   Zcollection_configr'   Zprompt_templater#   rR   rb   Zpoints_countZrefined_inputr@   r-   re   rf   rg   Z
use_filterrk   Zclosest_keywordsZbest_keywordZ
best_scoreZsearch_resultrv   rt   Zfiltered_pointsZ	documentsZtemp_docÚ
max_lengthr
   Úcontext_linesÚcontextr   r$   r$   r%   Ú	RAG_agent‘   s\  ÿ€

þ€þ

ý€ÿ€þ
þÿÿþÿÿ
ú	
û€€þ

 €þ


 €þÿ
ÿ
€þ€þr   c              
   C   sJ   zt | ƒ}|W S  ty$ } zt dt|ƒ› ¡ W Y d}~dS d}~ww )uâ   
    Xá»­ lÃ½ vÄƒn báº£n Ä‘áº§u vÃ o báº±ng AI Agent vá»›i Qdrant.
    Args:
        input_text (str): VÄƒn báº£n Ä‘áº§u vÃ o cáº§n xá»­ lÃ½
    Returns:
        Káº¿t quáº£ xá»­ lÃ½ tá»« AI Agent hoáº·c thÃ´ng bÃ¡o lá»—i
    u+   Lá»—i trong quÃ¡ trÃ¬nh xá»­ lÃ½ AI Agent: N)r   r   r   r   rˆ   )r‹   r"   r#   r$   r$   r%   Úprocess_with_ai_agentS  s   €þr   Ú__main__)r   z=== AI Agent Qdrant CLI ===u,   Nháº­p cÃ¢u há»i (gÃµ 'exit' Ä‘á»ƒ thoÃ¡t):Tu   Báº¡n: )ÚexitÚquitÚqu   Káº¿t thÃºc.zAI: ry   )r&   )r;   r4   )%Zqdrant_clientr   Zqdrant_client.httpr   rN   r   r{   r   r   Znumpyr5   Údotenvr   Zsentence_transformersr   ÚbasicConfigÚINFOr^   rO   r   rˆ   r3   r:   rI   rS   r]   ra   r   r   Ú__name__ÚDEBUGr…   ÚinputrY   Z
user_inputrƒ   Úresultr$   r$   r$   r%   Ú<module>   sL    ,



 
Cøü