# Semantic search with FAISS (TensorFlow)

In [None]:
!pip install datasets evaluate transformers[sentencepiece]
!pip install faiss-cpu

In [None]:
import pandas as pd
from datasets import load_from_disk
from transformers import AutoTokenizer, TFAutoModel

Drugs = ['Acne', 'Adhd', 'Allergies', 'Anaemia', 'Angina', 'Appetite',
 'Arthritis', 'Constipation', 'Contraception', 'Dandruff',
 'Diabetes', 'Digestion', 'Fever', 'Fungal', 'General', 'Glaucoma',
 'Gout', 'Haematopoiesis', 'Haemorrhoid', 'Hyperpigmentation',
 'Hypertension', 'Hyperthyroidism', 'Hypnosis', 'Hypothyroidism',
 'Infection', 'Migraine', 'Osteoporosis', 'Pain', 'Psychosis',
 'Schizophrenia', 'Supplement', 'Thrombolysis', 'Viral', 'Wound']

model_ckpt = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
model = TFAutoModel.from_pretrained(model_ckpt, from_pt=True)

def cls_pooling(model_output):
 return model_output.last_hidden_state[:, 0]

def get_embeddings(text_list):
 encoded_input = tokenizer(
 text_list, padding=True, truncation=True, return_tensors="tf"
 )
 encoded_input = {k: v for k, v in encoded_input.items()}
 model_output = model(**encoded_input)
 return cls_pooling(model_output)


embeddings_dataset = load_from_disk("/content/drive/MyDrive/Drugs")
embeddings_dataset.add_faiss_index(column="embeddings")

def recommendations(question):
 question_embedding = get_embeddings([question]).numpy()
 scores, samples = embeddings_dataset.get_nearest_examples(
 "embeddings", question_embedding, k=5
 )
 samples_df = pd.DataFrame.from_dict(samples)
 samples_df["scores"] = scores
 samples_df.sort_values("sc>ores", ascending=False, inplace=True)
 return samples_df[['Drug_Name', 'Reason', 'scores']]

In [None]:
question = "moderate acne"
recommendations(question)