import streamlit as st import torch from transformers import AutoTokenizer from semviqa.ser.qatc_model import QATCForQuestionAnswering from semviqa.tvc.model import ClaimModelForClassification from semviqa.ser.ser_eval import extract_evidence_tfidf_qatc from semviqa.tvc.tvc_eval import classify_claim # Load models with caching @st.cache_resource() def load_model(model_name, model_class): tokenizer = AutoTokenizer.from_pretrained(model_name) model = model_class.from_pretrained(model_name) return tokenizer, model # UI Configuration st.set_page_config(page_title="SemViQA Demo", layout="wide") st.markdown(""" """, unsafe_allow_html=True) st.markdown("

🔍 SemViQA: Vietnamese Fact-Checking System

", unsafe_allow_html=True) st.markdown("

Enter a claim and context to verify its accuracy

", unsafe_allow_html=True) # Sidebar - Configuration Settings with st.sidebar.expander("⚙️ Settings", expanded=False): tfidf_threshold = st.slider("🔧 TF-IDF Threshold", 0.0, 1.0, 0.5, 0.01) length_ratio_threshold = st.slider("📏 Length Ratio Threshold", 0.1, 1.0, 0.5, 0.01) qatc_model_name = st.selectbox("🤖 QATC Model", ["xuandin/semviqa-qatc-vimrc-viwikifc"]) bc_model_name = st.selectbox("🏷️ Binary Classification Model", ["xuandin/semviqa-bc"]) tc_model_name = st.selectbox("📊 Three-Class Model", ["xuandin/semviqa-tc"]) # Load selected models tokenizer_qatc, model_qatc = load_model(qatc_model_name, QATCForQuestionAnswering) tokenizer_bc, model_bc = load_model(bc_model_name, ClaimModelForClassification) tokenizer_tc, model_tc = load_model(tc_model_name, ClaimModelForClassification) # User Input Fields claim = st.text_area("✍️ Enter Claim", "Vietnam is a country in Southeast Asia.") context = st.text_area("📖 Enter Context", "Vietnam is a country located in Southeast Asia, covering an area of over 331,000 km² with a population of more than 98 million people.") if st.button("🔎 Verify"): # Extract evidence evidence = extract_evidence_tfidf_qatc( claim, context, model_qatc, tokenizer_qatc, "cuda" if torch.cuda.is_available() else "cpu", confidence_threshold=tfidf_threshold, length_ratio_threshold=length_ratio_threshold ) # Claim Classification verdict = "NEI" prob3class, pred_tc = classify_claim(claim, evidence, model_tc, tokenizer_tc, "cuda" if torch.cuda.is_available() else "cpu") if pred_tc != 0: prob2class, pred_bc = classify_claim(claim, evidence, model_bc, tokenizer_bc, "cuda" if torch.cuda.is_available() else "cpu") verdict = "SUPPORTED" if pred_bc == 0 else "REFUTED" if prob2class > prob3class else ["NEI", "SUPPORTED", "REFUTED"][pred_tc] # Display Results st.markdown(f"""

📌 Result

🔍 Evidence: {evidence}

✅ Verdict: {verdict}

""", unsafe_allow_html=True)