import gradio as gr from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline import shap from transformers import (AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline) import requests import re from collections import Counter from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS from bs4 import BeautifulSoup from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from keybert import KeyBERT import torch from deep_translator import DeeplTranslator import torch import torch.nn.functional as F api_key_deepl = "69f73328-5f95-4eda-813a-16af8c688404:fx" # Buraya İngilizce modelinizi yazın model = AutoModelForSequenceClassification.from_pretrained("OsBaran/Roberta-Classification-Model") tokenizer = AutoTokenizer.from_pretrained("roberta-base") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Cihazı kontrol et def predict_with_roberta(model, tokenizer, input_text): # Giriş metnini tokenize et ve tensor'a çevir inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).to(device) # Model ile tahmin yap with torch.no_grad(): outputs = model(**inputs) # Logits'leri al ve tahmin yap logits = outputs.logits prediction = torch.argmax(logits, dim=-1).item() # 0: yanlış, 1: doğru return prediction def explain_roberta_prediction(model, tokenizer, input_text): # Tokenize et inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).to(device) # Model ile tahmin yap with torch.no_grad(): outputs = model(**inputs) # Logits'leri al logits = outputs.logits probabilities = torch.softmax(logits, dim=-1).cpu().numpy()[0] # Tahmin sonucunu ve olasılıkları elde et predicted_class = torch.argmax(logits, dim=-1).item() result = "Doğru" if predicted_class == 1 else "Yanlış" explanation = f"Modelin tahmini: {result} (Olasılık: {probabilities[predicted_class]:.2f})\n" # Önemli kelimeleri çıkarma (örnek olarak) tokenized_input = tokenizer.tokenize(tokenizer.decode(inputs['input_ids'][0])) important_tokens = tokenized_input[:10] # İlk 10 tokeni al explanation += "Modelin kararı aşağıdaki anahtar kelimelere dayanıyor:\n" + ', '.join(important_tokens) return explanation pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True, device=device) def score_and_visualize(text): prediction = pipe([text]) print(prediction[0]) explainer = shap.Explainer(pipe) shap_values = explainer([text]) shap.plots.text(shap_values) api_key = '764e3b45715b449a8aedb8cd8018dfed' def fetch_news_from_api(api_key, query, page_size=100): url = f'https://newsapi.org/v2/everything?q={query}&pageSize={page_size}&apiKey={api_key}' response = requests.get(url) # API yanıtını kontrol et if response.status_code == 200: articles = response.json().get('articles', []) return articles else: print(f"Error: {response}") return [] def extract_keywords(text, top_n=5): # 1. Metni temizleme text = re.sub(r'[^\w\s]', '', text.lower()) # Noktalama işaretlerini kaldırma ve küçük harfe çevirme # 2. Tokenizasyon words = text.split() # 3. Durak kelimeleri kaldırma keywords = [word for word in words if word not in ENGLISH_STOP_WORDS] # 4. Anahtar kelimeleri sayma ve en sık geçenleri alma keyword_counts = Counter(keywords) most_common_keywords = keyword_counts.most_common(top_n) return [keyword for keyword, _ in most_common_keywords] kw_model = KeyBERT('all-mpnet-base-v2') # SBERT kullanarak modeli yükleyin def extract_keywords_keybert(text, num_keywords=2): keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english', top_n=num_keywords) return [kw[0] for kw in keywords] def filter_trusted_sources(articles, trusted_sources): trusted_articles = [] for article in articles: source_name = article['source']['name'].lower() # Kaynağı küçük harfe çevir if any(trusted_source in source_name for trusted_source in trusted_sources): trusted_articles.append(article) return trusted_articles def fetch_news_content(link): response = requests.get(link) if response.status_code == 200: soup = BeautifulSoup(response.content, 'html.parser') # Başlık ve içerik çıkarma title = soup.find('h1').get_text() if soup.find('h1') else "Başlık bulunamadı" content = ' '.join([p.get_text() for p in soup.find_all('p')]) return title, content else: print(f"Error fetching content: {response.status_code}") return "", "" def compare_with_thrusted(input_text, bbc_articles): texts = [input_text] + [article[1] for article in bbc_articles] vectorizer = TfidfVectorizer().fit_transform(texts) vectors = vectorizer.toarray() similarities = cosine_similarity(vectors[0:1], vectors[1:]).flatten() return similarities from sentence_transformers import SentenceTransformer, util def sbert_similarity(input_text, bbc_articles): # SBERT modelini yükleyin model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2') # Kullanıcı metni ve internetten çekilen metinleri vektörize edin input_embedding = model.encode(input_text, convert_to_tensor=True) news_embeddings = model.encode([news[1] for news in bbc_articles], convert_to_tensor=True) # Benzerlikleri hesaplayın cosine_scores = util.pytorch_cos_sim(input_embedding, news_embeddings) # En yüksek benzerlik skoru ve karşılık gelen haber max_score, most_similar_news = cosine_scores.max(), bbc_articles[cosine_scores.argmax().item()] print(f"En benzer haber skoru: {max_score:.2f}") def translate_text(text, source_lang='tr', target_lang='en'): translated = DeeplTranslator(api_key=api_key_deepl, source=source_lang, target=target_lang).translate(text) return translated # Türkçe modelini yükle # model_tr_name = "dbmdz/bert-base-turkish-cased" # Buraya Türkçe modelinizi yazın # model_tr = AutoModelForSequenceClassification.from_pretrained(model_tr_name) # tokenizer_tr = AutoTokenizer.from_pretrained(model_tr_name) # classifier_tr = pipeline("sentiment-analysis", model=model_tr, tokenizer=tokenizer_tr) tokenizer_tr = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased") model_tr = AutoModelForSequenceClassification.from_pretrained("OsBaran/Bert-Classification-Model-Tr-3", num_labels=2) def trModelPredictAlgo(input_news): inputs = tokenizer(input_news, return_tensors="pt", padding=True, truncation=True, max_length=512) inputs = {key: value.to(device) for key, value in inputs.items()} # Modelin tahmin yapması with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits # Softmax uygulama (olasılık hesaplama) probabilities = F.softmax(logits, dim=-1) # En yüksek olasılığı ve sınıfı bulma predicted_class = torch.argmax(probabilities, dim=-1) predicted_probability = probabilities[0, predicted_class].item() sonuc = 0 if(predicted_class.item()==0): sonuc = "Yanlış" else : sonuc = "Doğru" # Sonucu yazdırma print(f"Predicted class: {predicted_class.item()}") print(f"Prediction probability: {predicted_probability * 100:.2f}%") return f"Dogruluk tahmini: {sonuc}" + f"Tahmin olasılığı: {predicted_probability * 100:.2f}%" def enModelPredictAlgo(input_news): keywords = extract_keywords_keybert(input_news) search_query = ' '.join(keywords) news_articles = fetch_news_from_api(api_key, search_query) trusted_sources = [ "bbc news", "cnn", "reuters.com", "theguardian.com", "time", # Diğer güvenilir kaynaklar... ] trusted_articles = filter_trusted_sources(news_articles, trusted_sources) # # Sonuçları yazdır trusted_articles_urls = [] for i in trusted_articles: trusted_articles_urls.append(i["url"]) if trusted_articles: print(f"\nGüvenilir kaynaklardan bulunan haberler:\n") print(trusted_articles_urls) bbc_articles = [fetch_news_content(link) for link in trusted_articles_urls] similarities = compare_with_thrusted(input_news, bbc_articles) sbert_similarity(input_news, bbc_articles) print(similarities) max_similarity = max(similarities) threshold = 0.8 if max_similarity > threshold: print(f"Sonuç: Doğru (Benzerlik: {max_similarity:.2f})") else: # Benzerlik bulunmazsa tahmin algoritmasını kullanın ve açıklama sağlayın prediction = predict_with_roberta(model, tokenizer, input_news) explanation = explain_roberta_prediction(model, tokenizer, input_news) # Tahmin sonucunu yazdır # result = "Doğru" if prediction == 1 else "Yanlış" # print(f"Haberin durumu: {result}") print(explanation) return explanation else: print("Güvenilir kaynaklardan hiç haber bulunamadı.") prediction = predict_with_roberta(model, tokenizer, input_news) explanation = explain_roberta_prediction(model, tokenizer, input_news) # Tahmin sonucunu yazdır result = "Doğru" if prediction == 1 else "Yanlış" print(f"Haberin durumu: {result}") print("Haberin açıklaması:") print(explanation) return explanation # Gradio ile API oluştur def predict(input_news, language): if language == "en": result = enModelPredictAlgo(input_news=input_news) return {"Sonuç": result} elif language == "tr": input_news_en= translate_text(input_news) result1 = enModelPredictAlgo(input_news_en) result2= trModelPredictAlgo(input_news=input_news) return {"İngilizce Model Sonucu": result1, "Türkçe Model Sonucu": result2} else: result = {"error": "Unsupported language"} # return result # Arayüz gr.Interface(fn=predict, inputs=[gr.Textbox(label="Text Input"), gr.Dropdown(["en", "tr"], label="Language")], outputs="json").launch()