MindfulMedia_Mentor / llm_response_generator.py
jaelin215's picture
Changed default temperature to 0.5 and token length 128 * 4
b28c76d verified
raw
history blame
5.64 kB
#---
#- Author: Jaelin Lee
#- Date: Mar 16, 2024
#- Description: Calls HuggingFace API to generate natural response.
#- Credit: The initial code is from Abhishek Dutta.
# Most of the code is kept as he created.
# I only added a modification to convert it to class.
# And, I tweaked the prompt to feed into the `streamlit_app.py` file.
#---
import os
from langchain_community.llms import HuggingFaceHub
from langchain_community.llms import OpenAI
# from langchain.llms import HuggingFaceHub, OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import warnings
warnings.filterwarnings("ignore")
class LLLResponseGenerator():
def __init__(self):
print("initialized")
def llm_inference(
self,
model_type: str,
question: str,
prompt_template: str,
context: str,
ai_tone: str,
questionnaire: str,
user_text: str,
openai_model_name: str = "",
# hf_repo_id: str = "tiiuae/falcon-7b-instruct",
hf_repo_id: str = "mistralai/Mistral-7B-Instruct-v0.2",
temperature: float = 0.5,
max_length: int = 128 * 4,
) -> str:
"""Call HuggingFace/OpenAI model for inference
Given a question, prompt_template, and other parameters, this function calls the relevant
API to fetch LLM inference results.
Args:
model_str: Denotes the LLM vendor's name. Can be either 'huggingface' or 'openai'
question: The question to be asked to the LLM.
prompt_template: The prompt template itself.
context: Instructions for the LLM.
ai_tone: Can be either empathy, encouragement or suggest medical help.
questionnaire: Can be either depression, anxiety or adhd.
user_text: Response given by the user.
hf_repo_id: The Huggingface model's repo_id
temperature: (Default: 1.0). Range: Float (0.0-100.0). The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.
max_length: Integer to define the maximum length in tokens of the output summary.
Returns:
A Python string which contains the inference result.
HuggingFace repo_id examples:
- google/flan-t5-xxl
- tiiuae/falcon-7b-instruct
"""
prompt = PromptTemplate(
template=prompt_template,
input_variables=[
"context",
"ai_tone",
"questionnaire",
"question",
"user_text",
],
)
if model_type == "openai":
# https://api.python.langchain.com/en/stable/llms/langchain.llms.openai.OpenAI.html#langchain.llms.openai.OpenAI
llm = OpenAI(
model_name=openai_model_name, temperature=temperature, max_tokens=max_length
)
llm_chain = LLMChain(prompt=prompt, llm=llm)
return llm_chain.run(
context=context,
ai_tone=ai_tone,
questionnaire=questionnaire,
question=question,
user_text=user_text,
)
elif model_type == "huggingface":
# https://python.langchain.com/docs/integrations/llms/huggingface_hub
llm = HuggingFaceHub(
repo_id=hf_repo_id,
model_kwargs={"temperature": temperature, "max_length": max_length},
)
llm_chain = LLMChain(prompt=prompt, llm=llm)
response = llm_chain.run(
context=context,
ai_tone=ai_tone,
questionnaire=questionnaire,
question=question,
user_text=user_text,
)
print(response)
# Extracting only the response part from the output
response_start_index = response.find("Response;")
return response[response_start_index + len("Response;"):].strip()
else:
print(
"Please use the correct value of model_type parameter: It can have a value of either openai or huggingface"
)
if __name__ == "__main__":
# Please ensure you have a .env file available with 'HUGGINGFACEHUB_API_TOKEN' and 'OPENAI_API_KEY' values.
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
context = "You are a mental health supporting non-medical assistant. DO NOT PROVIDE any medical advice with conviction."
ai_tone = "EMPATHY"
questionnaire = "ADHD"
question = (
"How often do you find yourself having trouble focusing on tasks or activities?"
)
user_text = "I feel distracted all the time, and I am never able to finish"
# The user may have signs of {questionnaire}.
template = """INSTRUCTIONS: {context}
Respond to the user with a tone of {ai_tone}.
Question asked to the user: {question}
Response by the user: {user_text}
Provide some advice and ask a relevant question back to the user.
Response;
"""
temperature = 0.5
max_length = 128 *4
model = LLLResponseGenerator()
llm_response = model.llm_inference(
model_type="huggingface",
question=question,
prompt_template=template,
context=context,
ai_tone=ai_tone,
questionnaire=questionnaire,
user_text=user_text,
temperature=temperature,
max_length=max_length,
)
print(llm_response)