File size: 1,765 Bytes
58d4ef5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from sys import argv
import nltk
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib

# Load the trained model from the file
loaded_classifier = joblib.load("text_identification_model.pkl")

# Load the TF-IDF vectorizer used for training
vectorizer = joblib.load("text_identification_vectorizer.pkl")

# Define labels for your categories
categories = {0: 'Other', 1: 'Bible', 2: 'Talmud'}

def parse_text(new_text):
    # Transform the new text using the TF-IDF vectorizer
    new_text_tfidf = vectorizer.transform([new_text])

    # Make predictions on the new text
    prediction = loaded_classifier.predict(new_text_tfidf)

    # Get the confidence score for the predicted class
    probabilities = loaded_classifier.predict_proba(new_text_tfidf)
    confidence_score = probabilities[0, 1]  # Confidence score for class "Bible" (index 1)

    # Determine the predicted category label
    predicted_category = categories[prediction[0]]

    # Print the prediction and the confidence score
    print(f"Text: {new_text} | Prediction: {predicted_category} | Confidence Score: {confidence_score:.4f}")


text_list = [
'ื›ืžื” ื™ืคื” ื•ื ืื” ื›ืฉืฉื•ืžืขื™ื ื”ืฉื™ืจื” ืฉืœื”ื',
'ื—ื“ืฉื•ืช ื”ืขืจื‘: ืฉืœื•ืฉื” ืื ืฉื™ื ื ืฆืื• ื˜ื•ื‘ืขื™ื ื‘ื›ื™ื ืจืช',
'ื•ื”ื™ื” ื‘ืขืช ื”ื”ื™ื ืื—ืคืฉ ืืช ื™ืจื•ืฉืœื™ื ื‘ื ืจื•ืช ื•ื”ื•ื“ืขืชื™ื” ืืช ื›ืœ ืชื•ืขื‘ื•ืชื™ื”',
'ื•ื™ืืžืจ ืžืฉื” ืืœ ื‘ื ื™ ื™ืฉืจืืœ',
'ื“ืืžืจ ื ืฉื™ื ืžื‘ื™ื ืฉืขื™ืจ ืชื• ื”ื ื“ืชื ืŸ',
'ืืžืจ ืœื™ื” ืื‘ื™ื™ ืœืจื‘ ื–ืขื™ืจื',
'ื•ืื™ื”ื• ืœื ืงื ื™ื”ื™ื‘ ืฉืขื•ืจื ื‘ืžืฉื›ื',]


if argv[1:]:
    new_text = argv[1]
    parse_text(new_text)
else:
    for new_text in text_list:
        parse_text(new_text)