Feliks Zaslavskiy
make it baseline
d75ae5b
from sentence_transformers import LoggingHandler, SentenceTransformer, evaluation
from sentence_transformers.readers import InputExample
import csv
import logging
#### Just some code to print debug information to stdout
logging.basicConfig(format='%(asctime)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
level=logging.INFO,
handlers=[LoggingHandler()])
logger = logging.getLogger(__name__)
#### /print debug information to stdout
model_name = 'sentence-transformers/paraphrase-albert-base-v2'
#model_name='output/training_OnlineConstrativeLoss-2023-03-11_23-47-34'
#model_name= 'output/training_OnlineConstrativeLoss-2023-03-14_01-24-44'
#86% so far
# model_name = 'output/training_OnlineConstrativeLoss-2023-03-17_16-10-39'
model_sbert = SentenceTransformer(model_name)
dev_sentences1 = []
dev_sentences2 = []
dev_labels = []
with open( "dev_set_training.csv", encoding='utf8') as fIn:
reader = csv.DictReader(fIn, delimiter='|', quoting=csv.QUOTE_NONE)
for row in reader:
dev_sentences1.append(row['ADDRESS1'])
dev_sentences2.append(row['ADDRESS2'])
dev_labels.append(int(row['ARE_SAME']))
binary_acc_evaluator = evaluation.BinaryClassificationEvaluator(dev_sentences1, dev_sentences2, dev_labels)
binary_acc_evaluator(model_sbert)
print(binary_acc_evaluator)