Spaces:
Runtime error
Runtime error
thankrandomness
commited on
Commit
•
5c01f6c
1
Parent(s):
ef72046
remove similarity_threshold
Browse files
app.py
CHANGED
@@ -78,7 +78,7 @@ def upsert_data(dataset_split):
|
|
78 |
upsert_data(dataset['train'])
|
79 |
|
80 |
# Define retrieval function with similarity threshold
|
81 |
-
def retrieve_relevant_text(input_text
|
82 |
input_embedding = embed_text([input_text])[0]
|
83 |
results = collection.query(
|
84 |
query_embeddings=[input_embedding],
|
@@ -90,20 +90,20 @@ def retrieve_relevant_text(input_text, similarity_threshold=1.0): # Lower thres
|
|
90 |
#print("Retrieved items and their similarity scores:")
|
91 |
for metadata, distance in zip(results['metadatas'][0], results['distances'][0]):
|
92 |
#print(f"Code: {metadata['code']}, Similarity Score: {distance}")
|
93 |
-
if distance <= similarity_threshold:
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
|
101 |
-
if not output:
|
102 |
-
|
103 |
return output
|
104 |
|
105 |
# Evaluate retrieval efficiency on the validation/test set
|
106 |
-
def evaluate_efficiency(dataset_split
|
107 |
y_true = []
|
108 |
y_pred = []
|
109 |
total_similarity = 0
|
@@ -115,7 +115,7 @@ def evaluate_efficiency(dataset_split, similarity_threshold=1.0):
|
|
115 |
annotations_list = [annotation['code'] for annotation in note.get('annotations', []) if 'code' in annotation]
|
116 |
|
117 |
if text and annotations_list:
|
118 |
-
retrieved_results = retrieve_relevant_text(text
|
119 |
retrieved_codes = [result['code'] for result in retrieved_results]
|
120 |
|
121 |
# Sum up similarity scores for average calculation
|
@@ -153,7 +153,7 @@ def evaluate_efficiency(dataset_split, similarity_threshold=1.0):
|
|
153 |
return precision, recall, f1, avg_similarity
|
154 |
|
155 |
# Calculate retrieval efficiency metrics
|
156 |
-
precision, recall, f1, avg_similarity = evaluate_efficiency(dataset['validation']
|
157 |
|
158 |
# Gradio interface
|
159 |
def gradio_interface(input_text):
|
|
|
78 |
upsert_data(dataset['train'])
|
79 |
|
80 |
# Define retrieval function with similarity threshold
|
81 |
+
def retrieve_relevant_text(input_text):
|
82 |
input_embedding = embed_text([input_text])[0]
|
83 |
results = collection.query(
|
84 |
query_embeddings=[input_embedding],
|
|
|
90 |
#print("Retrieved items and their similarity scores:")
|
91 |
for metadata, distance in zip(results['metadatas'][0], results['distances'][0]):
|
92 |
#print(f"Code: {metadata['code']}, Similarity Score: {distance}")
|
93 |
+
#if distance <= similarity_threshold:
|
94 |
+
output.append({
|
95 |
+
"similarity_score": distance,
|
96 |
+
"code": metadata['code'],
|
97 |
+
"code_system": metadata['code_system'],
|
98 |
+
"description": metadata['description']
|
99 |
+
})
|
100 |
|
101 |
+
# if not output:
|
102 |
+
# print("No results met the similarity threshold.")
|
103 |
return output
|
104 |
|
105 |
# Evaluate retrieval efficiency on the validation/test set
|
106 |
+
def evaluate_efficiency(dataset_split):
|
107 |
y_true = []
|
108 |
y_pred = []
|
109 |
total_similarity = 0
|
|
|
115 |
annotations_list = [annotation['code'] for annotation in note.get('annotations', []) if 'code' in annotation]
|
116 |
|
117 |
if text and annotations_list:
|
118 |
+
retrieved_results = retrieve_relevant_text(text)
|
119 |
retrieved_codes = [result['code'] for result in retrieved_results]
|
120 |
|
121 |
# Sum up similarity scores for average calculation
|
|
|
153 |
return precision, recall, f1, avg_similarity
|
154 |
|
155 |
# Calculate retrieval efficiency metrics
|
156 |
+
precision, recall, f1, avg_similarity = evaluate_efficiency(dataset['validation'])
|
157 |
|
158 |
# Gradio interface
|
159 |
def gradio_interface(input_text):
|