Spaces:

SemViQA
/

semviqa-demo

Running

App Files Files Community

xuandin commited on 4 days ago

Commit

f4d5aab

verified ·

1 Parent(s): e813be9

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -80

app.py CHANGED Viewed

@@ -1,80 +1,71 @@
-import streamlit as st
-import torch
-from transformers import AutoTokenizer
-from semviqa.SER.qatc_model import QATCForQuestionAnswering
-# Load QATC Model
-@st.cache_resource()
-def load_qatc_model():
-    tokenizer = AutoTokenizer.from_pretrained("xuandin/semviqa-qatc-vimrc-viwikifc")
-    model = QATCForQuestionAnswering.from_pretrained("xuandin/semviqa-qatc-vimrc-viwikifc")
-    return tokenizer, model
-# Streamlit UI Configuration
-st.set_page_config(page_title="SemViQA Demo", layout="wide")
-# Improved UI Design
-st.markdown("""
-    <style>
-        .big-title {
-            font-size: 36px;
-            font-weight: bold;
-            color: #4A90E2;
-            text-align: center;
-        }
-        .sub-title {
-            font-size: 20px;
-            color: #666;
-            text-align: center;
-        }
-        .stButton>button {
-            background-color: #4CAF50;
-            color: white;
-            font-size: 16px;
-            width: 100%;
-            border-radius: 8px;
-            padding: 10px;
-        }
-        .stTextArea textarea {
-            font-size: 16px;
-        }
-        .result-box {
-            background-color: #f9f9f9;
-            padding: 20px;
-            border-radius: 10px;
-            box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.1);
-        }
-    </style>
-""", unsafe_allow_html=True)
-st.markdown("<p class='big-title'>🔍 SemViQA: A Semantic Question Answering System for Vietnamese Information Fact-Checking</p>", unsafe_allow_html=True)
-st.markdown("<p class='sub-title'>Enter a claim and context to verify its accuracy</p>", unsafe_allow_html=True)
-# Sidebar - Configuration Settings
-st.sidebar.header("⚙️ Settings")
-tfidf_threshold = st.sidebar.slider("🔧 TF-IDF Threshold", 0.0, 1.0, 0.5, 0.01)
-length_ratio_threshold = st.sidebar.slider("📏 Length Ratio Threshold", 0.1, 1.0, 0.5, 0.01)
-qatc_model = st.sidebar.selectbox("🤖 Select QATC Model", ["xuandin/semviqa-qatc-vimrc-viwikifc"])
-# User Input Fields
-claim = st.text_area("✍️ Enter Claim", "Vietnam is a country in Southeast Asia.")
-context = st.text_area("📖 Enter Context", "Vietnam is a country located in Southeast Asia, covering an area of over 331,000 km² with a population of more than 98 million people.")
-if st.button("🔎 Verify"):
-    tokenizer, model = load_qatc_model()
-    inputs = tokenizer(claim, context, return_tensors="pt", truncation=True, max_length=512)
-    with torch.no_grad():
-        outputs = model(**inputs)
-    start_idx = torch.argmax(outputs.start_logits)
-    end_idx = torch.argmax(outputs.end_logits)
-    tokens = inputs["input_ids"][0][start_idx : end_idx + 1]
-    evidence_result = tokenizer.decode(tokens, skip_special_tokens=True)
-    st.markdown("""
-        <div class='result-box'>
-            <h3>📌 Result</h3>
-            <p><strong>🔍 Evidence:</strong> {}</p>
-        </div>
-    """.format(evidence_result), unsafe_allow_html=True)

+import streamlit as st
+import torch
+from transformers import AutoTokenizer
+from semviqa.ser.qatc_model import QATCForQuestionAnswering
+from semviqa.tvc.model import ClaimModelForClassification
+from semviqa.ser.ser_eval import extract_evidence_tfidf_qatc
+from semviqa.tvc.tvc_eval import classify_claim
+# Load models with caching
+@st.cache_resource()
+def load_model(model_name, model_class):
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = model_class.from_pretrained(model_name)
+    return tokenizer, model
+# UI Configuration
+st.set_page_config(page_title="SemViQA Demo", layout="wide")
+st.markdown("""
+    <style>
+        .big-title { font-size: 36px; font-weight: bold; color: #4A90E2; text-align: center; }
+        .sub-title { font-size: 20px; color: #666; text-align: center; }
+        .stButton>button { background-color: #4CAF50; color: white; font-size: 16px; width: 100%; border-radius: 8px; padding: 10px; }
+        .stTextArea textarea { font-size: 16px; }
+        .result-box { background-color: #f9f9f9; padding: 20px; border-radius: 10px; box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.1); }
+    </style>
+""", unsafe_allow_html=True)
+st.markdown("<p class='big-title'>🔍 SemViQA: Vietnamese Fact-Checking System</p>", unsafe_allow_html=True)
+st.markdown("<p class='sub-title'>Enter a claim and context to verify its accuracy</p>", unsafe_allow_html=True)
+# Sidebar - Configuration Settings
+with st.sidebar.expander("⚙️ Settings", expanded=False):
+    tfidf_threshold = st.slider("🔧 TF-IDF Threshold", 0.0, 1.0, 0.5, 0.01)
+    length_ratio_threshold = st.slider("📏 Length Ratio Threshold", 0.1, 1.0, 0.5, 0.01)
+    qatc_model_name = st.selectbox("🤖 QATC Model", ["xuandin/semviqa-qatc-vimrc-viwikifc"])
+    bc_model_name = st.selectbox("🏷️ Binary Classification Model", ["xuandin/semviqa-bc"])
+    tc_model_name = st.selectbox("📊 Three-Class Model", ["xuandin/semviqa-tc"])
+# Load selected models
+tokenizer_qatc, model_qatc = load_model(qatc_model_name, QATCForQuestionAnswering)
+tokenizer_bc, model_bc = load_model(bc_model_name, ClaimModelForClassification)
+tokenizer_tc, model_tc = load_model(tc_model_name, ClaimModelForClassification)
+# User Input Fields
+claim = st.text_area("✍️ Enter Claim", "Vietnam is a country in Southeast Asia.")
+context = st.text_area("📖 Enter Context", "Vietnam is a country located in Southeast Asia, covering an area of over 331,000 km² with a population of more than 98 million people.")
+if st.button("🔎 Verify"):
+    # Extract evidence
+    evidence = extract_evidence_tfidf_qatc(
+        claim, context, model_qatc, tokenizer_qatc, "cuda" if torch.cuda.is_available() else "cpu",
+        confidence_threshold=tfidf_threshold, length_ratio_threshold=length_ratio_threshold
+    )
+    # Claim Classification
+    verdict = "NEI"
+    prob3class, pred_tc = classify_claim(claim, evidence, model_tc, tokenizer_tc, "cuda" if torch.cuda.is_available() else "cpu")
+    if pred_tc != 0:
+        prob2class, pred_bc = classify_claim(claim, evidence, model_bc, tokenizer_bc, "cuda" if torch.cuda.is_available() else "cpu")
+        verdict = "SUPPORTED" if pred_bc == 0 else "REFUTED" if prob2class > prob3class else ["NEI", "SUPPORTED", "REFUTED"][pred_tc]
+    # Display Results
+    st.markdown(f"""
+        <div class='result-box'>
+            <h3>📌 Result</h3>
+            <p><strong>🔍 Evidence:</strong> {evidence}</p>
+            <p><strong>✅ Verdict:</strong> {verdict}</p>
+        </div>
+    """, unsafe_allow_html=True)