HF-LLM-Intent-Detection / src /OLD_PAGE_Hugging Face.py
georgeek's picture
history blame
4.61 kB
import streamlit as st
#st.title('Watson Assistant VDF TOBi improvement')
.stTextInput > div > div > input {
background-color: #d3d3d3;
body {
background-color: #f0f0f0;
st.header('Watson Assistant VDF TOBi improvement')
st.write('The model is trained on the TOBi 🤖 intents in Romanian language.')
import os
import pandas as pd
import re
from time import time
from src.E_Model_utils import load_model, train_model, get_embeddings
from src.E_Faiss_utils import load_embeddings_and_index, normalize_embeddings
from src.A_Preprocess import load_data, clean_text
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
model_name = st.sidebar.radio("Selectează modelul 👇", ["other","e5_small_fine_tuned_model","multilingual-e5-small","all-MiniLM-L6-v2","all-distilroberta-v1"])
# Load the saved embeddings
#model_name = "xlm-roberta-base" # Choose the desired model
#model_name = "xlm-r-distilroberta-base-paraphrase-v1"
# Model path
# Load the trained model
if model_name != "other":
# future improvement: add a loading spinner
model_path = f"output/fine-tuned-model"
st.write("Model path:", model_path)
#model = load_model(model_path)
if model_name == "multilingual-e5-small":
infloat_model_name = "intfloat/multilingual-e5-small"
model = load_model(infloat_model_name)
elif model_name == "e5_small_fine_tuned_model":
infloat_model_name = "intfloat/multilingual-e5-small"
model = load_model(infloat_model_name)
pass#model = load_model(model_path)
model = load_model(model_name)
st.write(f"Modelul selectat: {model_name}")
st.write("Model loaded successfully!")
# Load the embeddings and the index
#embeddings, index = load_embeddings_and_index(f"embeddings/{model_name}_vector_db.index")
# Upload the intents data
uploaded_file = st.file_uploader("Încarcă fișierul cu intenții", type="csv")
if uploaded_file is not None:
data = pd.read_csv(uploaded_file)
st.write("CSV file successfully uploaded!")
# Save data to session state
st.session_state.data = data
# If no file is uploaded, try to load data from session state
data = st.session_state.data if 'data' in st.session_state else None
if data is not None:
# Extract utterances and intents
utterances = data['utterance'].tolist()
intents = data['intent'].tolist()
user_text = st.text_input("Te rog introdu un text.")
if user_text:
if st.button("Identifică Intenția"):
start = time()
st.write("Procesare text...")
cleaned_text = clean_text(user_text)
input_embedding = get_embeddings(model, [cleaned_text])
normalized_embedding = normalize_embeddings(input_embedding)
embeddings, index = load_embeddings_and_index(f"embeddings/{model_name}_vector_db.index")
D, I = index.search(normalized_embedding, 1) # Caută cel mai apropiat vecin
intents = data['intent'].tolist()
intent = intents[I[0][0]]
distance = D[0][0]
similarity = 1 / (1 + distance)
st.write(f"Intenția identificată: {intent}")
st.write(f"Nivel de încredere: {similarity:.4f}")
st.write(f"Timp de răspuns: {time() - start:.4f} secunde")
st.write("Te rog introdu un text.")
# Endpoint pentru identificarea intenției
input_text = st.text_input("Introdu mai jos textul! 👇", label_visibility="visible")
if input_text:
start = time()
input_embeddings = model.encode([input_text])
if st.button("Identifică Intenția"):
if input_text:
cleaned_text = clean_text(input_text)
input_embedding = get_embeddings(model, [cleaned_text])
normalized_embedding = normalize_embeddings(input_embedding)
D, I = index.search(normalized_embedding, 1) # Caută cel mai apropiat vecin
intents = data['intent'].tolist()
intent = intentions[I[0][0]]
distance = D[0][0]
similarity = 1 / (1 + distance)
st.write(f"Intenția identificată: {intent}")
st.write(f"Nivel de încredere: {similarity:.4f}")
st.write(f"Timp de răspuns: {time() - start:.4f} secunde")
st.write("Te rog introdu un text.")