Spaces:

cdb24
/

Milestone3

Sleeping

App Files Files Community

cdb24 commited on Apr 25, 2023

Commit

e324a14

1 Parent(s): 075edf9

Upload 3 files

Browse files

Files changed (4) hide show

.gitattributes +2 -0
test.csv +3 -0
train.csv +3 -0
train.py +47 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ test.csv filter=lfs diff=lfs merge=lfs -text
2	+ train.csv filter=lfs diff=lfs merge=lfs -text

test.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2513ce4abb98c4d1d216e3ca0d4377d57589a0989aa8c06a840509a16c786e8
+size 60354593

train.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd4084611bd27c939ba98e5e63bc3e5a2c1a4e99477dcba46c829e4c986c429d
+size 68802655

train.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# pip install transformers
+from transformers import pipeline
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import torch.nn.functional as F
+model_name = "distilbert-base-uncased-finetuned-sst-2-english"
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
+res = classifier(["We are very happy to show you the 🤗 Transformers Library", "We hope you don't hate it."])
+#for result in res:
+ #   print(res)
+tokens = tokenizer.tokenize("We are very happy to show you the 🤗 Transformers Library")
+token_ids = tokenizer.convert_tokens_to_ids(tokens)
+input_ids = tokenizer("We are very happy to show you the 🤗 Transformers Library");
+#print(f' Tokens: {tokens}')
+#print(f'Token IDs: {token_ids}')
+#print(f'Input IDs: {input_ids}')
+x_train = ["We are very happy to show you the 🤗 Transformers Library",
+           "We hope you don't hate it."]
+batch = tokenizer(x_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**batch, labels=torch.tensor([1,0]))
+    print(outputs)
+    predictions = F.softmax(outputs.logits, dim=1)
+    print(predictions)
+    labels = torch.argmax(predictions, dim=1)
+    print(labels)
+    labels = [model.config.id2label[label_id] for label_id in labels.tolist()]
+    print(labels)
+save_directory = "saved"
+tokenizer.save_pretrained(save_directory)
+model.save_pretrained(save_directory)
+tokenizer = AutoTokenizer.from_pretrained(save_directory)
+model = AutoModelForSequenceClassification.from_pretrained(save_directory)