Spaces:
Sleeping
Sleeping
File size: 1,765 Bytes
58d4ef5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
from sys import argv
import nltk
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib
# Load the trained model from the file
loaded_classifier = joblib.load("text_identification_model.pkl")
# Load the TF-IDF vectorizer used for training
vectorizer = joblib.load("text_identification_vectorizer.pkl")
# Define labels for your categories
categories = {0: 'Other', 1: 'Bible', 2: 'Talmud'}
def parse_text(new_text):
# Transform the new text using the TF-IDF vectorizer
new_text_tfidf = vectorizer.transform([new_text])
# Make predictions on the new text
prediction = loaded_classifier.predict(new_text_tfidf)
# Get the confidence score for the predicted class
probabilities = loaded_classifier.predict_proba(new_text_tfidf)
confidence_score = probabilities[0, 1] # Confidence score for class "Bible" (index 1)
# Determine the predicted category label
predicted_category = categories[prediction[0]]
# Print the prediction and the confidence score
print(f"Text: {new_text} | Prediction: {predicted_category} | Confidence Score: {confidence_score:.4f}")
text_list = [
'ืืื ืืคื ืื ืื ืืฉืฉืืืขืื ืืฉืืจื ืฉืืื',
'ืืืฉืืช ืืขืจื: ืฉืืืฉื ืื ืฉืื ื ืฆืื ืืืืขืื ืืืื ืจืช',
'ืืืื ืืขืช ืืืื ืืืคืฉ ืืช ืืจืืฉืืื ืื ืจืืช ืืืืืขืชืื ืืช ืื ืชืืขืืืชืื',
'ืืืืืจ ืืฉื ืื ืื ื ืืฉืจืื',
'ืืืืจ ื ืฉืื ืืืื ืฉืขืืจ ืชื ืื ืืชื ื',
'ืืืจ ืืื ืืืื ืืจื ืืขืืจื',
'ืืืืื ืื ืงื ืืืื ืฉืขืืจื ืืืฉืื',]
if argv[1:]:
new_text = argv[1]
parse_text(new_text)
else:
for new_text in text_list:
parse_text(new_text)
|