Spaces:

Marroco93
/

PacmanAI-2

Sleeping

Marroco93 commited on Apr 24, 2024

Commit

abb61e1

1 Parent(s): 3717137

no message

Files changed (1) hide show

main.py CHANGED Viewed

@@ -11,7 +11,7 @@ import nltk
 import os
 import google.protobuf  # This line should execute without errors if protobuf is installed correctly
 import sentencepiece
-from transformers import pipeline, AutoTokenizer,AutoModelForSeq2SeqLM
 import spacy
@@ -139,7 +139,12 @@ def segment_text(text: str, max_tokens=500):  # Setting a conservative limit bel
     return segments
-tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
 def robust_segment_text(text: str, max_tokens=510):
     doc = nlp(text)
@@ -165,16 +170,17 @@ def robust_segment_text(text: str, max_tokens=510):
     return segments
-classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
 def classify_segments(segments):
-    labels = ["Coverage Details", "Exclusions", "Premiums", "Claims Process",
-              "Policy Limits", "Legal and Regulatory Information", "Renewals and Cancellations",
-              "Discounts and Incentives", "Duties and Responsibilities", "Contact Information"]
     classified_segments = []
     for segment in segments:
-        result = classifier(segment, candidate_labels=labels, multi_label=True)
-        classified_segments.append(result)
     return classified_segments

 import os
 import google.protobuf  # This line should execute without errors if protobuf is installed correctly
 import sentencepiece
+from transformers import pipeline, AutoTokenizer,AutoModelForSequenceClassification
 import spacy
     return segments
+# Load the tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
+model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased")
+# Set up the pipeline
+classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
 def robust_segment_text(text: str, max_tokens=510):
     doc = nlp(text)
     return segments
 def classify_segments(segments):
+    labels = [
+        "Coverage Details", "Exclusions", "Premiums", "Claims Process",
+        "Policy Limits", "Legal and Regulatory Information", "Renewals and Cancellations",
+        "Discounts and Incentives", "Duties and Responsibilities", "Contact Information"
+    ]
     classified_segments = []
     for segment in segments:
+        # Note: Adjust the input here based on how your model was trained
+        predictions = classifier(segment)
+        classified_segments.append(predictions)
     return classified_segments