Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
import json | |
import os | |
import pandas as pd | |
from sentence_transformers import CrossEncoder | |
import numpy as np | |
# Initialize the HHEM model | |
model = CrossEncoder('vectara/hallucination_evaluation_model') | |
# Function to compute HHEM scores | |
def compute_hhem_scores(texts, summary): | |
pairs = [[text, summary] for text in texts] | |
scores = model.predict(pairs) | |
return scores | |
# Define the Vectara query function | |
def vectara_query(query: str, config: dict): | |
corpus_key = [{ | |
"customerId": config["customer_id"], | |
"corpusId": config["corpus_id"], | |
"lexicalInterpolationConfig": {"lambda": config.get("lambda_val", 0.5)}, | |
}] | |
data = { | |
"query": [{ | |
"query": query, | |
"start": 0, | |
"numResults": config.get("top_k", 10), | |
"contextConfig": { | |
"sentencesBefore": 2, | |
"sentencesAfter": 2, | |
}, | |
"corpusKey": corpus_key, | |
"summary": [{ | |
"responseLang": "eng", | |
"maxSummarizedResults": 5, | |
}] | |
}] | |
} | |
headers = { | |
"x-api-key": config["api_key"], | |
"customer-id": config["customer_id"], | |
"Content-Type": "application/json", | |
} | |
response = requests.post( | |
headers=headers, | |
url="https://api.vectara.io/v1/query", | |
data=json.dumps(data), | |
) | |
if response.status_code != 200: | |
st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})") | |
return [], "" | |
result = response.json() | |
responses = result["responseSet"][0]["response"] | |
summary = result["responseSet"][0]["summary"][0]["text"] | |
res = [[r['text'], r['score']] for r in responses] | |
return res, summary | |
# Streamlit UI setup | |
st.title("Vectara Content Query Interface") | |
# User inputs | |
query = st.text_input("Enter your query here", "") | |
lambda_val = st.slider("Lambda Value", min_value=0.0, max_value=1.0, value=0.5) | |
top_k = st.number_input("Top K Results", min_value=1, max_value=50, value=10) | |
if st.button("Query Vectara"): | |
config = { | |
"api_key": os.environ.get("VECTARA_API_KEY", ""), | |
"customer_id": os.environ.get("VECTARA_CUSTOMER_ID", ""), | |
"corpus_id": os.environ.get("VECTARA_CORPUS_ID", ""), | |
"lambda_val": lambda_val, | |
"top_k": top_k, | |
} | |
results, summary = vectara_query(query, config) | |
if results: | |
st.subheader("Summary") | |
st.write(summary) | |
st.subheader("Top Results") | |
# Extract texts from results | |
texts = [r[0] for r in results[:5]] | |
# Compute HHEM scores | |
scores = compute_hhem_scores(texts, summary) | |
# Prepare and display the dataframe | |
df = pd.DataFrame({'Fact': texts, 'HHEM Score': scores}) | |
st.dataframe(df) | |
else: | |
st.write("No results found.") | |
""" | |
import streamlit as st | |
import requests | |
import json | |
import os | |
import pandas as pd | |
# Assuming the environment variables are already set, we directly use them. | |
# However, in a Streamlit app, you might want to set them up within the script for demonstration purposes | |
# or securely use secrets management for API keys and other sensitive information. | |
# Define the Vectara query function | |
def vectara_query(query: str, config: dict): | |
# Query Vectara and return the results. | |
corpus_key = [{ | |
"customerId": config["customer_id"], | |
"corpusId": config["corpus_id"], | |
"lexicalInterpolationConfig": {"lambda": config.get("lambda_val", 0.5)}, | |
}] | |
data = { | |
"query": [{ | |
"query": query, | |
"start": 0, | |
"numResults": config.get("top_k", 10), | |
"contextConfig": { | |
"sentencesBefore": 2, | |
"sentencesAfter": 2, | |
}, | |
"corpusKey": corpus_key, | |
"summary": [{ | |
"responseLang": "eng", | |
"maxSummarizedResults": 5, | |
}] | |
}] | |
} | |
headers = { | |
"x-api-key": config["api_key"], | |
"customer-id": config["customer_id"], | |
"Content-Type": "application/json", | |
} | |
response = requests.post( | |
headers=headers, | |
url="https://api.vectara.io/v1/query", | |
data=json.dumps(data), | |
) | |
if response.status_code != 200: | |
st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})") | |
return [], "" | |
result = response.json() | |
responses = result["responseSet"][0]["response"] | |
summary = result["responseSet"][0]["summary"][0]["text"] | |
res = [[r['text'], r['score']] for r in responses] | |
return res, summary | |
# Streamlit interface | |
st.title("Vectara Content Query Interface") | |
# User inputs | |
query = st.text_input("Enter your query here", "What does Vectara do?") | |
lambda_val = st.slider("Lambda Value", min_value=0.0, max_value=1.0, value=0.5) | |
top_k = st.number_input("Top K Results", min_value=1, max_value=50, value=10) | |
if st.button("Query Vectara"): | |
config = { | |
"api_key": os.environ.get("VECTARA_API_KEY", ""), | |
"customer_id": os.environ.get("VECTARA_CUSTOMER_ID", ""), | |
"corpus_id": os.environ.get("VECTARA_CORPUS_ID", ""), | |
"lambda_val": lambda_val, | |
"top_k": top_k, | |
} | |
results, summary = vectara_query(query, config) | |
if results: | |
st.subheader("Summary") | |
st.write(summary) | |
st.subheader("Top Results") | |
df = pd.DataFrame(results, columns=['Text', 'Score']) | |
st.dataframe(df) | |
else: | |
st.write("No results found.") | |
# Note: The integration of the model for HHEM scores is omitted as it requires the specific model details and implementation. | |
""" | |