Spaces:

SemViQA
/

semviqa-demo

Running

App Files Files Community

xuandin commited on 4 days ago

Commit

8a052ba

verified ·

1 Parent(s): 35de67c

Update app.py

Browse files

Files changed (1) hide show

app.py +168 -82

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ from semviqa.ser.qatc_model import QATCForQuestionAnswering
 from semviqa.tvc.model import ClaimModelForClassification
 from semviqa.ser.ser_eval import extract_evidence_tfidf_qatc
 from semviqa.tvc.tvc_eval import classify_claim
 # Load models with caching
 @st.cache_resource()
@@ -14,19 +15,26 @@ def load_model(model_name, model_class, is_bc=False):
     model.eval()
     return tokenizer, model
-# Page Configuration
 st.set_page_config(page_title="SemViQA Demo", layout="wide")
-# Custom CSS for improved UI
 st.markdown("""
     <style>
-        body {
-            font-family: 'Arial', sans-serif;
         }
         .big-title {
             font-size: 36px;
             font-weight: bold;
-            color: #0078D4;
             text-align: center;
             margin-top: 20px;
         }
@@ -43,10 +51,10 @@ st.markdown("""
             width: 100%;
             border-radius: 8px;
             padding: 10px;
-            transition: 0.3s;
         }
-        .stButton>button:hover {
-            background-color: #45a049;
         }
         .result-box {
             background-color: #f9f9f9;
@@ -58,6 +66,7 @@ st.markdown("""
         .verdict {
             font-size: 24px;
             font-weight: bold;
             display: flex;
             align-items: center;
         }
@@ -67,87 +76,164 @@ st.markdown("""
     </style>
 """, unsafe_allow_html=True)
-# Page Header
-st.markdown("<p class='big-title'>SemViQA: Vietnamese Fact-Checking System</p>", unsafe_allow_html=True)
-st.markdown("<p class='sub-title'>Enter a claim and context to verify its accuracy</p>", unsafe_allow_html=True)
-# Sidebar: Settings
-with st.sidebar.expander("⚙️ Settings", expanded=True):
-    tfidf_threshold = st.slider("TF-IDF Threshold", 0.0, 1.0, 0.5, 0.01)
-    length_ratio_threshold = st.slider("Length Ratio Threshold", 0.1, 1.0, 0.5, 0.01)
-    qatc_model_name = st.selectbox("QATC Model", [
-        "SemViQA/qatc-infoxlm-viwikifc",
-        "SemViQA/qatc-infoxlm-isedsc01",
-        "SemViQA/qatc-vimrc-viwikifc",
-        "SemViQA/qatc-vimrc-isedsc01"
-    ])
-    bc_model_name = st.selectbox("Binary Classification Model", [
-        "SemViQA/bc-xlmr-viwikifc",
-        "SemViQA/bc-xlmr-isedsc01",
-        "SemViQA/bc-infoxlm-viwikifc",
-        "SemViQA/bc-infoxlm-isedsc01",
-        "SemViQA/bc-erniem-viwikifc",
-        "SemViQA/bc-erniem-isedsc01"
-    ])
-    tc_model_name = st.selectbox("Three-Class Classification Model", [
-        "SemViQA/tc-xlmr-viwikifc",
-        "SemViQA/tc-xlmr-isedsc01",
-        "SemViQA/tc-infoxlm-viwikifc",
-        "SemViQA/tc-infoxlm-isedsc01",
-        "SemViQA/tc-erniem-viwikifc",
-        "SemViQA/tc-erniem-isedsc01"
-    ])
-    show_details = st.checkbox("Show probability details", value=False)
-# Load Models
-tokenizer_qatc, model_qatc = load_model(qatc_model_name, QATCForQuestionAnswering)
-tokenizer_bc, model_bc = load_model(bc_model_name, ClaimModelForClassification, is_bc=True)
-tokenizer_tc, model_tc = load_model(tc_model_name, ClaimModelForClassification)
-# Define verdict icons
-verdict_icons = {
-    "SUPPORTED": "✅",
-    "REFUTED": "❌",
-    "NEI": "⚠️"
-}
-# Tabs for functionalities
-tabs = st.tabs(["Verify", "History", "About"])
-# --- Verify Tab ---
-with tabs[0]:
-    st.subheader("Verify a Claim")
-    claim = st.text_area("Enter Claim", "Vietnam is a country in Southeast Asia.")
-    context = st.text_area("Enter Context", "Vietnam is a country located in Southeast Asia.")
-    if st.button("Verify", key="verify_button"):
-        with st.spinner("Verifying..."):
-            with torch.no_grad():
-                evidence = extract_evidence_tfidf_qatc(
-                    claim, context, model_qatc, tokenizer_qatc,
-                    "cuda" if torch.cuda.is_available() else "cpu",
-                    confidence_threshold=tfidf_threshold,
-                    length_ratio_threshold=length_ratio_threshold
-                )
-                verdict = "NEI"
-                prob3class, pred_tc = classify_claim(claim, evidence, model_tc, tokenizer_tc, "cuda" if torch.cuda.is_available() else "cpu")
-                if pred_tc != 0:
-                    prob2class, pred_bc = classify_claim(claim, evidence, model_bc, tokenizer_bc, "cuda" if torch.cuda.is_available() else "cpu")
-                    verdict = "SUPPORTED" if pred_bc == 0 else "REFUTED" if prob2class > prob3class else ["NEI", "SUPPORTED", "REFUTED"][pred_tc]
-                # Display result
                 st.markdown(f"""
                     <div class='result-box'>
-                        <h3>Result</h3>
-                        <p><strong>Evidence:</strong> {evidence}</p>
-                        <p class='verdict'><span class='verdict-icon'>{verdict_icons.get(verdict, '')}</span>{verdict}</p>
                     </div>
                 """, unsafe_allow_html=True)
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
-# --- About Tab ---
-with tabs[2]:
-    st.subheader("About SemViQA")
-    st.markdown("""SemViQA is a semantic fact-checking system for Vietnamese information verification.""")

 from semviqa.tvc.model import ClaimModelForClassification
 from semviqa.ser.ser_eval import extract_evidence_tfidf_qatc
 from semviqa.tvc.tvc_eval import classify_claim
+import io
 # Load models with caching
 @st.cache_resource()
     model.eval()
     return tokenizer, model
+# Set up page configuration
 st.set_page_config(page_title="SemViQA Demo", layout="wide")
+# Custom CSS: giới hạn chiều cao, bố cục hai cột, và định dạng ô Result
 st.markdown("""
     <style>
+        html, body {
+            height: 100%;
+            margin: 0;
+            overflow: hidden;
+        }
+        .main-container {
+            height: 100vh;
+            overflow-y: auto;
+            padding: 20px;
         }
         .big-title {
             font-size: 36px;
             font-weight: bold;
+            color: #4A90E2;
             text-align: center;
             margin-top: 20px;
         }
             width: 100%;
             border-radius: 8px;
             padding: 10px;
         }
+        .stTextArea textarea {
+            font-size: 16px;
+            min-height: 120px;
         }
         .result-box {
             background-color: #f9f9f9;
         .verdict {
             font-size: 24px;
             font-weight: bold;
+            margin: 0;
             display: flex;
             align-items: center;
         }
     </style>
 """, unsafe_allow_html=True)
+# Đóng gói toàn bộ nội dung trong 1 container có chiều cao cố định
+with st.container():
+    st.markdown("<p class='big-title'>SemViQA: Hệ thống Kiểm chứng Thông tin bằng Semantic QA cho Tiếng Việt</p>", unsafe_allow_html=True)
+    st.markdown("<p class='sub-title'>Nhập thông tin cần kiểm chứng và ngữ cảnh để xác minh độ chính xác</p>", unsafe_allow_html=True)
+    # Sidebar: Global Settings
+    with st.sidebar.expander("⚙️ Cài đặt", expanded=True):
+        tfidf_threshold = st.slider("Ngưỡng TF-IDF", 0.0, 1.0, 0.5, 0.01)
+        length_ratio_threshold = st.slider("Ngưỡng Tỉ lệ độ dài", 0.1, 1.0, 0.5, 0.01)
+        qatc_model_name = st.selectbox("Mô hình QATC", [
+            "SemViQA/qatc-infoxlm-viwikifc",
+            "SemViQA/qatc-infoxlm-isedsc01",
+            "SemViQA/qatc-vimrc-viwikifc",
+            "SemViQA/qatc-vimrc-isedsc01"
+        ])
+        bc_model_name = st.selectbox("Mô hình Phân loại Nhị phân", [
+            "SemViQA/bc-xlmr-viwikifc",
+            "SemViQA/bc-xlmr-isedsc01",
+            "SemViQA/bc-infoxlm-viwikifc",
+            "SemViQA/bc-infoxlm-isedsc01",
+            "SemViQA/bc-erniem-viwikifc",
+            "SemViQA/bc-erniem-isedsc01"
+        ])
+        tc_model_name = st.selectbox("Mô hình Phân loại 3 lớp", [
+            "SemViQA/tc-xlmr-viwikifc",
+            "SemViQA/tc-xlmr-isedsc01",
+            "SemViQA/tc-infoxlm-viwikifc",
+            "SemViQA/tc-infoxlm-isedsc01",
+            "SemViQA/tc-erniem-viwikifc",
+            "SemViQA/tc-erniem-isedsc01"
+        ])
+        show_details = st.checkbox("Hiển thị chi tiết xác suất", value=False)
+    # Lưu lịch sử kiểm chứng
+    if 'history' not in st.session_state:
+        st.session_state.history = []
+    if 'latest_result' not in st.session_state:
+        st.session_state.latest_result = None
+    # Load các mô hình đã chọn
+    tokenizer_qatc, model_qatc = load_model(qatc_model_name, QATCForQuestionAnswering)
+    tokenizer_bc, model_bc = load_model(bc_model_name, ClaimModelForClassification, is_bc=True)
+    tokenizer_tc, model_tc = load_model(tc_model_name, ClaimModelForClassification)
+    # Icon cho kết quả
+    verdict_icons = {
+        "SUPPORTED": "✅",
+        "REFUTED": "❌",
+        "NEI": "⚠️"
+    }
+    # Tạo các tab: Verify, History, About
+    tabs = st.tabs(["Verify", "History", "About"])
+    # --- Tab Verify ---
+    with tabs[0]:
+        st.subheader("Kiểm chứng một thông tin")
+        # Sử dụng layout 2 cột: bên trái là input, bên phải là ô hiển thị kết quả
+        col_input, col_result = st.columns([2, 1])
+        with col_input:
+            claim = st.text_area("Nhập Claim", "Vietnam is a country in Southeast Asia.")
+            context = st.text_area("Nhập Context", "Vietnam is a country located in Southeast Asia, covering an area of over 331,000 km² with a population of more than 98 million people.")
+            if st.button("Kiểm chứng", key="verify_button"):
+                with st.spinner("Đang kiểm chứng..."):
+                    with torch.no_grad():
+                        # Trích xuất bằng chứng và phân loại thông tin
+                        evidence = extract_evidence_tfidf_qatc(
+                            claim, context, model_qatc, tokenizer_qatc,
+                            "cuda" if torch.cuda.is_available() else "cpu",
+                            confidence_threshold=tfidf_threshold,
+                            length_ratio_threshold=length_ratio_threshold
+                        )
+                        verdict = "NEI"
+                        details = ""
+                        prob3class, pred_tc = classify_claim(
+                            claim, evidence, model_tc, tokenizer_tc,
+                            "cuda" if torch.cuda.is_available() else "cpu"
+                        )
+                        if pred_tc != 0:
+                            prob2class, pred_bc = classify_claim(
+                                claim, evidence, model_bc, tokenizer_bc,
+                                "cuda" if torch.cuda.is_available() else "cpu"
+                            )
+                            if pred_bc == 0:
+                                verdict = "SUPPORTED"
+                            elif prob2class > prob3class:
+                                verdict = "REFUTED"
+                            else:
+                                verdict = ["NEI", "SUPPORTED", "REFUTED"][pred_tc]
+                            if show_details:
+                                details = f"<p><strong>3-Class Probability:</strong> {prob3class.item():.2f} - <strong>2-Class Probability:</strong> {prob2class.item():.2f}</p>"
+                        # Lưu lịch sử kiểm chứng và kết quả mới nhất
+                        st.session_state.history.append({
+                            "claim": claim,
+                            "evidence": evidence,
+                            "verdict": verdict
+                        })
+                        st.session_state.latest_result = {
+                            "claim": claim,
+                            "evidence": evidence,
+                            "verdict": verdict,
+                            "details": details
+                        }
+                        if torch.cuda.is_available():
+                            torch.cuda.empty_cache()
+        # Hiển thị kết quả ở cột bên phải
+        with col_result:
+            st.markdown("<h3>Kết quả kiểm chứng</h3>", unsafe_allow_html=True)
+            if st.session_state.latest_result is not None:
+                res = st.session_state.latest_result
                 st.markdown(f"""
                     <div class='result-box'>
+                        <p><strong>Claim:</strong> {res['claim']}</p>
+                        <p><strong>Bằng chứng:</strong> {res['evidence']}</p>
+                        <p class='verdict'><span class='verdict-icon'>{verdict_icons.get(res['verdict'], '')}</span>{res['verdict']}</p>
+                        {res['details']}
                     </div>
                 """, unsafe_allow_html=True)
+                # Tính năng tải xuống kết quả kiểm chứng
+                result_text = f"Claim: {res['claim']}\nEvidence: {res['evidence']}\nVerdict: {res['verdict']}\nDetails: {res['details']}"
+                st.download_button("Tải xuống kết quả", data=result_text, file_name="ketqua_kiemchung.txt", mime="text/plain")
+            else:
+                st.info("Chưa có kết quả kiểm chứng nào.")
+    # --- Tab History ---
+    with tabs[1]:
+        st.subheader("Lịch sử kiểm chứng")
+        if st.session_state.history:
+            for idx, record in enumerate(reversed(st.session_state.history), 1):
+                st.markdown(f"**{idx}. Claim:** {record['claim']}  \n**Kết quả:** {verdict_icons.get(record['verdict'], '')} {record['verdict']}")
+        else:
+            st.write("Chưa có lịch sử kiểm chứng nào.")
+    # --- Tab About ---
+    with tabs[2]:
+        st.subheader("Giới thiệu")
+        st.markdown("""
+            <p align="center">
+                <a href="https://arxiv.org/abs/2503.00955">
+                    <img src="https://img.shields.io/badge/arXiv-2411.00918-red?style=flat&label=arXiv">
+                </a>
+                <a href="https://huggingface.co/SemViQA">
+                    <img src="https://img.shields.io/badge/Hugging%20Face-Model-yellow?style=flat">
+                </a>
+                <a href="https://pypi.org/project/SemViQA">
+                    <img src="https://img.shields.io/pypi/v/SemViQA?color=blue&label=PyPI">
+                </a>
+                <a href="https://github.com/DAVID-NGUYEN-S16/SemViQA">
+                    <img src="https://img.shields.io/github/stars/DAVID-NGUYEN-S16/SemViQA?style=social">
+                </a>
+            </p>
+        """, unsafe_allow_html=True)
+        st.markdown("""
+            **Mô tả:**
+            SemViQA là hệ thống Semantic QA được thiết kế để kiểm chứng thông tin trong tiếng Việt.
+            Hệ thống trích xuất bằng chứng từ ngữ cảnh được cung cấp và phân loại thông tin là **SUPPORTED**, **REFUTED**, hoặc **NEI** (Not Enough Information) dựa trên các mô hình tiên tiến.
+        """)