hhem

Sleeping

App Files Files Community

eaglelandsonce commited on Feb 10

Commit

e84a43c

•

1 Parent(s): 37d3b4e

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -0

app.py CHANGED Viewed

@@ -3,6 +3,110 @@ import requests
 import json
 import os
 import pandas as pd
 # Assuming the environment variables are already set, we directly use them.
 # However, in a Streamlit app, you might want to set them up within the script for demonstration purposes
@@ -84,3 +188,5 @@ if st.button("Query Vectara"):
         st.write("No results found.")
 # Note: The integration of the model for HHEM scores is omitted as it requires the specific model details and implementation.

 import json
 import os
 import pandas as pd
+from sentence_transformers import CrossEncoder
+import numpy as np
+# Initialize the HHEM model
+model = CrossEncoder('vectara/hallucination_evaluation_model')
+# Function to compute HHEM scores
+def compute_hhem_scores(texts, summary):
+    pairs = [[text, summary] for text in texts]
+    scores = model.predict(pairs)
+    return scores
+# Define the Vectara query function
+def vectara_query(query: str, config: dict):
+    corpus_key = [{
+        "customerId": config["customer_id"],
+        "corpusId": config["corpus_id"],
+        "lexicalInterpolationConfig": {"lambda": config.get("lambda_val", 0.5)},
+    }]
+    data = {
+        "query": [{
+            "query": query,
+            "start": 0,
+            "numResults": config.get("top_k", 10),
+            "contextConfig": {
+                "sentencesBefore": 2,
+                "sentencesAfter": 2,
+            },
+            "corpusKey": corpus_key,
+            "summary": [{
+                "responseLang": "eng",
+                "maxSummarizedResults": 5,
+            }]
+        }]
+    }
+    headers = {
+        "x-api-key": config["api_key"],
+        "customer-id": config["customer_id"],
+        "Content-Type": "application/json",
+    }
+    response = requests.post(
+        headers=headers,
+        url="https://api.vectara.io/v1/query",
+        data=json.dumps(data),
+    )
+    if response.status_code != 200:
+        st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})")
+        return [], ""
+    result = response.json()
+    responses = result["responseSet"][0]["response"]
+    summary = result["responseSet"][0]["summary"][0]["text"]
+    res = [[r['text'], r['score']] for r in responses]
+    return res, summary
+# Streamlit UI setup
+st.title("Vectara Content Query Interface")
+# User inputs
+query = st.text_input("Enter your query here", "")
+lambda_val = st.slider("Lambda Value", min_value=0.0, max_value=1.0, value=0.5)
+top_k = st.number_input("Top K Results", min_value=1, max_value=50, value=10)
+if st.button("Query Vectara"):
+    config = {
+        "api_key": os.environ.get("VECTARA_API_KEY", ""),
+        "customer_id": os.environ.get("VECTARA_CUSTOMER_ID", ""),
+        "corpus_id": os.environ.get("VECTARA_CORPUS_ID", ""),
+        "lambda_val": lambda_val,
+        "top_k": top_k,
+    }
+    results, summary = vectara_query(query, config)
+    if results:
+        st.subheader("Summary")
+        st.write(summary)
+        st.subheader("Top Results")
+        # Extract texts from results
+        texts = [r[0] for r in results[:5]]
+        # Compute HHEM scores
+        scores = compute_hhem_scores(texts, summary)
+        # Prepare and display the dataframe
+        df = pd.DataFrame({'Fact': texts, 'HHEM Score': scores})
+        st.dataframe(df)
+    else:
+        st.write("No results found.")
+"""
+import streamlit as st
+import requests
+import json
+import os
+import pandas as pd
 # Assuming the environment variables are already set, we directly use them.
 # However, in a Streamlit app, you might want to set them up within the script for demonstration purposes
         st.write("No results found.")
 # Note: The integration of the model for HHEM scores is omitted as it requires the specific model details and implementation.
+"""