Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
import json | |
import numpy as np | |
import faiss | |
from sentence_transformers import SentenceTransformer | |
import time | |
from concurrent.futures import ThreadPoolExecutor | |
def process_string(s): | |
return s.lower().replace('&', 'and') | |
#@st.cache | |
def load_model(): | |
return SentenceTransformer(r"finetiuned_model") | |
def process_embedding(ingredient, model): | |
processed_ingredient = process_string(ingredient) | |
return model.encode([processed_ingredient]).tolist() | |
def faiss_query(xq, index, top_k=1): | |
distances, indices = index.search(np.array(xq).astype('float32'), top_k) | |
return distances[0], indices[0] | |
def get_top_matches(ingredients_flat, ingredients, loaded_model, index): | |
matches = [] | |
scores = [] | |
# Generate embeddings in parallel | |
with ThreadPoolExecutor() as executor: | |
embeddings = list(executor.map(lambda ing: process_embedding(ing, loaded_model), ingredients)) | |
# Query Faiss in parallel | |
results = [] | |
with ThreadPoolExecutor() as executor: | |
results = list(executor.map(lambda xq: faiss_query(xq, index), embeddings)) | |
# Extract matches and scores | |
for distances, indices in results: | |
if indices.size > 0: | |
match = ingredients_flat[indices[0]] | |
matches.append(match) | |
scores.append(round(1 - distances[0] / 2, 2)) | |
return matches, scores | |
# Load the Faiss index from disk | |
index = faiss.read_index('faiss_index.bin') | |
# Load the metadata from the JSON file | |
with open('metadata_faiss.json', 'r') as f: | |
metadata = json.load(f) | |
ingredients_flat = [item["Ingredient"] for item in metadata] | |
loaded_model = load_model() | |
def main(): | |
#st.set_page_config(page_title="Ingredients Matching App", page_icon=":smiley:", layout="wide") | |
st.title("Ingredients name matching App :smiley:") | |
st.header("Matches using embeddings (semantic search)") | |
st.write("Enter the JSON input:") | |
json_input = st.text_area("") | |
if st.button("Process"): | |
start_time = time.time() | |
with st.spinner("Processing..."): | |
try: | |
input_data = json.loads(json_input) | |
for menu_item in input_data: | |
ing_list = menu_item.get("ingredients", []) | |
matches, scores = get_top_matches(ingredients_flat, ing_list, loaded_model, index) | |
menu_item["Ingradients_matched"] = matches | |
menu_item["scores"] = scores | |
#st.write("Processed JSON:") | |
#st.write("<pre>" + json.dumps(input_data, indent=4) + "</pre>", unsafe_allow_html=True) | |
output_df = pd.DataFrame(input_data) | |
st.write("Processed Data:") | |
st.write(output_df) | |
except json.JSONDecodeError: | |
st.error("Invalid JSON input. Please check and try again.") | |
end_time = time.time() | |
st.write(f"Processing time: {end_time - start_time:.2f} seconds") | |
if __name__ == "__main__": | |
main() |