OsBaran commited on
Commit
dc61da1
1 Parent(s): 53c07ae

Add application

Browse files
Files changed (1) hide show
  1. app.py +81 -48
app.py CHANGED
@@ -13,6 +13,10 @@ from sklearn.feature_extraction.text import TfidfVectorizer
13
  from sklearn.metrics.pairwise import cosine_similarity
14
  from keybert import KeyBERT
15
  import torch
 
 
 
 
16
  # Buraya İngilizce modelinizi yazın
17
  model = AutoModelForSequenceClassification.from_pretrained("OsBaran/Roberta-Classification-Model")
18
  tokenizer = AutoTokenizer.from_pretrained("roberta-base")
@@ -140,71 +144,100 @@ def sbert_similarity(input_text, bbc_articles):
140
  # En yüksek benzerlik skoru ve karşılık gelen haber
141
  max_score, most_similar_news = cosine_scores.max(), bbc_articles[cosine_scores.argmax().item()]
142
  print(f"En benzer haber skoru: {max_score:.2f}")
 
 
 
 
143
  # Türkçe modelini yükle
144
- model_tr_name = "dbmdz/bert-base-turkish-cased" # Buraya Türkçe modelinizi yazın
145
- model_tr = AutoModelForSequenceClassification.from_pretrained(model_tr_name)
146
- tokenizer_tr = AutoTokenizer.from_pretrained(model_tr_name)
147
- classifier_tr = pipeline("sentiment-analysis", model=model_tr, tokenizer=tokenizer_tr)
 
 
 
 
 
 
 
 
 
 
 
148
 
149
- # Gradio ile API oluştur
150
- def predict(input_news, language):
151
- if language == "en":
152
- keywords = extract_keywords_keybert(input_news)
153
- search_query = ' '.join(keywords)
154
- news_articles = fetch_news_from_api(api_key, search_query)
 
 
 
 
 
 
 
 
 
155
 
156
- trusted_sources = [
157
  "bbc news",
158
  "cnn",
159
  "reuters.com",
160
  "theguardian.com",
161
  "time",
162
  # Diğer güvenilir kaynaklar...
163
- ]
164
 
165
- trusted_articles = filter_trusted_sources(news_articles, trusted_sources)
166
  # # Sonuçları yazdır
167
- trusted_articles_urls = []
168
- for i in trusted_articles:
169
- trusted_articles_urls.append(i["url"])
170
-
171
- if trusted_articles:
172
- print(f"\nGüvenilir kaynaklardan bulunan haberler:\n")
173
- print(trusted_articles_urls)
174
- bbc_articles = [fetch_news_content(link) for link in trusted_articles_urls]
175
- similarities = compare_with_thrusted(input_news, bbc_articles)
176
- sbert_similarity(input_news, bbc_articles)
177
- print(similarities)
178
- max_similarity = max(similarities)
179
- threshold = 0.8
180
- if max_similarity > threshold:
181
- print(f"Sonuç: Doğru (Benzerlik: {max_similarity:.2f})")
182
- else:
183
- # Benzerlik bulunmazsa tahmin algoritmasını kullanın ve açıklama sağlayın
184
- prediction = predict_with_roberta(model, tokenizer, input_news)
185
- explanation = explain_roberta_prediction(model, tokenizer, input_news)
186
- # Tahmin sonucunu yazdır
187
- # result = "Doğru" if prediction == 1 else "Yanlış"
188
- # print(f"Haberin durumu: {result}")
189
- print(explanation)
190
- return {explanation}
191
-
192
-
193
-
194
-
195
-
196
  else:
197
- print("Güvenilir kaynaklardan hiç haber bulunamadı.")
198
  prediction = predict_with_roberta(model, tokenizer, input_news)
199
  explanation = explain_roberta_prediction(model, tokenizer, input_news)
200
  # Tahmin sonucunu yazdır
201
- result = "Doğru" if prediction == 1 else "Yanlış"
202
- print(f"Haberin durumu: {result}")
203
- print("Haberin açıklaması:")
204
  print(explanation)
205
- return {explanation}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  elif language == "tr":
207
- result = classifier_tr(text)
 
 
 
 
208
  else:
209
  result = {"error": "Unsupported language"}
210
  # return result
 
13
  from sklearn.metrics.pairwise import cosine_similarity
14
  from keybert import KeyBERT
15
  import torch
16
+ from deep_translator import DeeplTranslator
17
+ import torch
18
+ import torch.nn.functional as F
19
+ api_key = "69f73328-5f95-4eda-813a-16af8c688404:fx"
20
  # Buraya İngilizce modelinizi yazın
21
  model = AutoModelForSequenceClassification.from_pretrained("OsBaran/Roberta-Classification-Model")
22
  tokenizer = AutoTokenizer.from_pretrained("roberta-base")
 
144
  # En yüksek benzerlik skoru ve karşılık gelen haber
145
  max_score, most_similar_news = cosine_scores.max(), bbc_articles[cosine_scores.argmax().item()]
146
  print(f"En benzer haber skoru: {max_score:.2f}")
147
+
148
+ def translate_text(text, source_lang='tr', target_lang='en'):
149
+ translated = DeeplTranslator(api_key=api_key, source=source_lang, target=target_lang).translate(text)
150
+ return translated
151
  # Türkçe modelini yükle
152
+ # model_tr_name = "dbmdz/bert-base-turkish-cased" # Buraya Türkçe modelinizi yazın
153
+ # model_tr = AutoModelForSequenceClassification.from_pretrained(model_tr_name)
154
+ # tokenizer_tr = AutoTokenizer.from_pretrained(model_tr_name)
155
+ # classifier_tr = pipeline("sentiment-analysis", model=model_tr, tokenizer=tokenizer_tr)
156
+
157
+ tokenizer_tr = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
158
+ model_tr = AutoModelForSequenceClassification.from_pretrained("OsBaran/Bert-Classification-Model-Tr-3", num_labels=2)
159
+ def trModelPredictAlgo(input_news):
160
+ inputs = tokenizer(input_news, return_tensors="pt", padding=True, truncation=True, max_length=512)
161
+ inputs = {key: value.to(device) for key, value in inputs.items()}
162
+
163
+ # Modelin tahmin yapması
164
+ with torch.no_grad():
165
+ outputs = model(**inputs)
166
+ logits = outputs.logits
167
 
168
+ # Softmax uygulama (olasılık hesaplama)
169
+ probabilities = F.softmax(logits, dim=-1)
170
+
171
+ # En yüksek olasılığı ve sınıfı bulma
172
+ predicted_class = torch.argmax(probabilities, dim=-1)
173
+ predicted_probability = probabilities[0, predicted_class].item()
174
+
175
+ # Sonucu yazdırma
176
+ print(f"Predicted class: {predicted_class.item()}")
177
+ print(f"Prediction probability: {predicted_probability * 100:.2f}%")
178
+ return f"Predicted class: {predicted_class.item()}" + f"Prediction probability: {predicted_probability * 100:.2f}%"
179
+ def enModelPredictAlgo(input_news):
180
+ keywords = extract_keywords_keybert(input_news)
181
+ search_query = ' '.join(keywords)
182
+ news_articles = fetch_news_from_api(api_key, search_query)
183
 
184
+ trusted_sources = [
185
  "bbc news",
186
  "cnn",
187
  "reuters.com",
188
  "theguardian.com",
189
  "time",
190
  # Diğer güvenilir kaynaklar...
191
+ ]
192
 
193
+ trusted_articles = filter_trusted_sources(news_articles, trusted_sources)
194
  # # Sonuçları yazdır
195
+ trusted_articles_urls = []
196
+ for i in trusted_articles:
197
+ trusted_articles_urls.append(i["url"])
198
+
199
+ if trusted_articles:
200
+ print(f"\nGüvenilir kaynaklardan bulunan haberler:\n")
201
+ print(trusted_articles_urls)
202
+ bbc_articles = [fetch_news_content(link) for link in trusted_articles_urls]
203
+ similarities = compare_with_thrusted(input_news, bbc_articles)
204
+ sbert_similarity(input_news, bbc_articles)
205
+ print(similarities)
206
+ max_similarity = max(similarities)
207
+ threshold = 0.8
208
+ if max_similarity > threshold:
209
+ print(f"Sonuç: Doğru (Benzerlik: {max_similarity:.2f})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  else:
211
+ # Benzerlik bulunmazsa tahmin algoritmasını kullanın ve açıklama sağlayın
212
  prediction = predict_with_roberta(model, tokenizer, input_news)
213
  explanation = explain_roberta_prediction(model, tokenizer, input_news)
214
  # Tahmin sonucunu yazdır
215
+ # result = "Doğru" if prediction == 1 else "Yanlış"
216
+ # print(f"Haberin durumu: {result}")
 
217
  print(explanation)
218
+ return explanation
219
+
220
+ else:
221
+ print("Güvenilir kaynaklardan hiç haber bulunamadı.")
222
+ prediction = predict_with_roberta(model, tokenizer, input_news)
223
+ explanation = explain_roberta_prediction(model, tokenizer, input_news)
224
+ # Tahmin sonucunu yazdır
225
+ result = "Doğru" if prediction == 1 else "Yanlış"
226
+ print(f"Haberin durumu: {result}")
227
+ print("Haberin açıklaması:")
228
+ print(explanation)
229
+ return explanation
230
+ # Gradio ile API oluştur
231
+ def predict(input_news, language):
232
+ if language == "en":
233
+ result = enModelPredictAlgo(input_news=input_news)
234
+ return {"Sonuç": result}
235
  elif language == "tr":
236
+ input_news_en= translate_text(input_news)
237
+ result1 = enModelPredictAlgo(input_news_en)
238
+
239
+ result2= trModelPredictAlgo(input_news=input_news)
240
+ return {"İngilizce Model Sonucu": result1, "Türkçe Model Sonucu": result2}
241
  else:
242
  result = {"error": "Unsupported language"}
243
  # return result