Spaces:

pvanand
/

generate-subtopics

Running

App Files Files Community

generate-subtopics / main.py

pvanand

Update main.py

895ffc7 verified 10 months ago

raw

history blame

15.9 kB

	from fuzzy_json import loads
	from half_json.core import JSONFixer
	from openai import OpenAI
	from retry import retry
	import re
	from dotenv import load_dotenv
	import os
	from fastapi import FastAPI
	from fastapi import Query
	from pydantic import BaseModel
	from fastapi.middleware.cors import CORSMiddleware
	from helper_functions_api import md_to_html
	from duckduckgo_search import DDGS
	import time

	# Retrieve environment variables
	TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
	GROQ_API_KEY = "gsk_"+os.getenv("GROQ_API_KEY")
	HELICON_API_KEY = os.getenv("HELICON_API_KEY")

	SysPromptDefault = "You are an expert AI, complete the given task. Do not add any additional comments."
	SysPromptList = "You are now in the role of an expert AI who can extract structured information from user request. All elements must be in double quotes. You must respond ONLY with a valid python List. Do not add any additional comments."
	SysPromptJson = "You are now in the role of an expert AI who can extract structured information from user request. Both key and value pairs must be in double quotes. You must respond ONLY with a valid JSON file. Do not add any additional comments."
	SysPromptMd = "You are an expert AI who can create a structured report using information provided in the context from user request.The report should be in markdown format consists of markdown tables structured into subtopics. Do not add any additional comments."
	SysPromptMdOffline = "You are an expert AI who can create a structured report using your knowledge on user request.The report should be in markdown format consists of markdown tables/lists/paragraphs as needed, structured into subtopics. Do not add any additional comments."


	prompt_topics = """
	Your task is to create a list of subtasks/subtopics along with their descriptions based on the given USER_QUERY.

	Follow these steps to complete the task:

	1. Identify and list 2 to {num_topics} subtasks/subtopics for {user_input}.
	2. Provide a detailed explanation for each subtask/subtopic including all the context required to perform the subtask.
	3. Format your response as a valid Python list of lists, where each sub-list contains the subtask/subtopic and its description.

	Make sure your response is well-organized and follows a logical flow.

	Take a deep breath and work on this problem step-by-step.

	output format
	[
	["Subtask Title 1", "Detailed contextual description of subtask 1."],
	["Subtask Title 2", "Detailed contextual description of subtask 2."],
	...
	]
	YOUR OUTPUT SHOULD CONSIST ONLY A VALID PYTHON LIST, DO NOT ADD ADDITIONAL COMMENTS
	"""

	prompt_subtopics = """You are a professional prompt engineer.

	Objective: Help create 2 to {num_topics} subtasks for an LLM to perform the specified task in the context of the given user query. to achieve each subtopic add a detailed instruction that will be used as a LLM promt, hence keep it contextual. Exclude the specified topics from the subtasks.

	Steps to complete the task:

	1. Understand the Main Task and User Query:
	- Read the main task and user query carefully to grasp the core objectives and context.
	- Identify any specific requirements or constraints mentioned in the query.

	2. Identify Key Components:
	- Break down the main task into its fundamental components.
	- Ensure each component is essential to the overall goal and can be clearly defined.

	3. Create Subtasks and Detailed Instructions:
	- Create 2 to {num_topics} of subtasks.
	- For each subtask provide clear and contextual instructions on what needs to be done.
	- Maintain logical order and ensure that the completion of each subtask contributes to the overall objective.

	4. Exclude Specified Topics:
	- Review the list of topics to be excluded.
	- Ensure that none of the generated subtasks include these topics.

	5. Format the Response:
	- Present the subtasks in a structured Python list of lists format.
	- Each sub-list should contain the subtask title and its detailed description.

	Output Format:
	[
	["Subtask Title 1", "Detailed contextual instruction to complete subtask 1."],
	["Subtask Title 2", "Detailed contextual instruction to complete subtask 2."],
	...
	]

	MAIN TASK: {main_task}
	USER QUERY:{user_input}
	TOPICS TO BE EXCLUDED:{excluded_topics}
	YOUR OUTPUT SHOULD CONSIST ONLY A VALID PYTHON LIST, DO NOT ADD ADDITIONAL COMMENTS
	"""


	# prompt_subtopics = f"""List 2-3 subtopics for each topic in the list of '{topics}' covering all aspects to generate a report, within the context of ###{user_query_keywords}###. to achieve each subtopic add a detailed instruction that will be used as a LLM promt, hence keep it contextual."""\
	# +""" Respond in the following format:
	# {
	# "Topic 1": [
	# ["Subtopic","Instuction"]
	# ],
	# "Topic 2": [
	# ["Subtopic","Instuction"]
	# ]
	# }, RETURN A VALID JSON FILE"""

	# response_subtopics = together_response(prompt_subtopics, model = llm_default_medium, SysPrompt = SysPromptJson)
	# subtopics = json_from_text(response_subtopics)

	# return subtopics

	prompt_clean_subtopics = """
	You are a professional editor for comprehensive reports.

	Follow the steps
	1 Review the provided titles and description.
	Arrange them in a sequence that follows a logical flow for a comprehensive report.

	2 Analyze for Redundancy:
	Check the entire set of descriptions for overlapping or repetitive content.
	Identify descriptions or topics that are repeated or very similar.

	3 Remove or Combine Redundant Descriptions:
	For each identified redundancy, combine similar descriptions to form a single description.

	4 Output the Cleaned List:
	Provide a list of the cleaned and reorganized titles and descriptions.

	Take a deep breath and do it step by step, Your output should only contain the final list"""
	prompt_format_subtopics = """
	Convert the given cleaned list in the format [["title1","description1"],["title2","description2"]..],OUTPUT ONLY THE LIST"""

	### ------LLM CONFIG-------- ###

	together_client = OpenAI(
	api_key=TOGETHER_API_KEY,
	base_url="https://together.hconeai.com/v1",
	default_headers={ "Helicone-Auth": f"Bearer {HELICON_API_KEY}"})

	groq_client = OpenAI(
	api_key=GROQ_API_KEY,
	base_url="https://groq.hconeai.com/openai/v1",
	default_headers={ "Helicone-Auth": f"Bearer {HELICON_API_KEY}"})

	# Groq model names
	llm_default_small = "llama3-8b-8192"
	llm_default_medium = "llama3-70b-8192"

	# Together Model names (fallback)
	llm_fallback_small = "meta-llama/Llama-3-8b-chat-hf"
	llm_fallback_medium = "meta-llama/Llama-3-70b-chat-hf"

	### ------END OF LLM CONFIG-------- ###

	def together_response(message, model = llm_default_small, SysPrompt = SysPromptDefault, temperature=0.2, frequency_penalty =0.1, max_tokens= 2000):

	messages=[{"role": "system", "content": SysPrompt},{"role": "user", "content": message}]
	try:
	response = groq_client.chat.completions.create(
	model= model,
	messages= messages,
	temperature= temperature,
	frequency_penalty= frequency_penalty,
	max_tokens= max_tokens)
	return response.choices[0].message.content

	except Exception as e:
	print(f"Error calling GROQ API: {e}")
	model_fallback = llm_fallback_small if model == llm_default_small else llm_fallback_medium
	response = together_client.chat.completions.create(
	model= model_fallback,
	messages= messages,
	temperature= temperature,
	frequency_penalty= frequency_penalty,
	max_tokens= max_tokens)
	return response.choices[0].message.content


	def json_from_text(text):
	"""
	Extracts JSON from text using regex and fuzzy JSON loading.
	"""
	try:
	return json.loads(text)
	except:
	match = re.search(r'\{[\s\S]*\}', text)
	if match:
	json_out = match.group(0)
	else:
	json_out = text
	# Use Fuzzy JSON loading
	return loads(json_out)

	@retry(tries=3, delay=0.5)
	def generate_topics(user_input, num_topics, previous_queries):
	prompt = prompt_topics.format(user_input=user_input, num_topics=num_topics)
	response_topics = together_response(prompt, model=llm_default_medium, SysPrompt=SysPromptList, temperature=1)
	subtopics = json_from_text(response_topics)
	return subtopics

	@retry(tries=3, delay=0.5)
	def generate_subtopics(main_task,user_input,num_topics,excluded_topics):
	excluded_topics = ",".join(excluded_topics)
	prompt = prompt_subtopics.format(main_task = main_task,user_input=user_input, num_topics=num_topics, excluded_topics=excluded_topics)
	response_topics = together_response(prompt, model=llm_default_medium, SysPrompt=SysPromptList, temperature=1)
	subtopics = json_from_text(response_topics)
	return subtopics

	## V2
	###############################################
	import re
	def parse_topics_regex(text):
	pattern = r'<subtopic>\s(.?)\s<description>(.?)</description>'
	matches = re.findall(pattern, text, re.DOTALL)
	return {title.strip(): desc.strip() for title, desc in matches}

	from bs4 import BeautifulSoup
	def parse_subtopics(xml_text):
	soup = BeautifulSoup(xml_text, 'html.parser')
	result = []

	for task in soup.find_all('task'):
	task_data = {
	'task': task.text.strip().split('\n')[0],
	'subtasks': []
	}

	subtasks = task.find('subtasks')
	if subtasks:
	subtask_text = subtasks.text
	subtask_prompts = re.findall(r'(\d+\.\s*[^<]+)<prompt>([^<]+)</prompt>', str(subtasks))

	for name, prompt in subtask_prompts:
	task_data['subtasks'].append({
	'name': re.sub(r'^\d+\.\s*', '', name.strip()),
	'prompt': prompt.strip()
	})

	result.append(task_data)
	return result

	@retry(tries=3, delay=0.5)
	def generate_topics_v2(user_input, num_topics,num_sub_topics):
	prompt = topics_prompt.format.format(TASK=user_input, NUM_TASKS=num_topics)
	response_topics = together_response(prompt, model=llm_default_medium, SysPrompt=SysPromptDefault, temperature=1)
	print(response_topics)
	topics = parse_topics(response_topics)
	print(topics)
	prompt = sub_topics_prompt.format(TASK_LIST = topics,NUM_SUBTASKS = num_sub_topics)
	response_subtopics = together_response(prompt, model=llm_default_medium, SysPrompt=SysPromptDefault, temperature=1)
	subtopics = parse_topics_regex(response_subtopics)
	return subtopics

	class TopicInputv2(BaseModel):
	user_input: str = Query(default="market research", description="input query to generate subtopics")
	num_topics: int = Query(default=5, description="Number of topics to generate (default: 5)")
	num_subtopics: int = Query(default=5, description="Number of subtopics to generate (default: 5)")

	@app.post("/generate_topicsv2")
	async def create_topics(input: TopicInput):
	topics = generate_topicsv2(input.user_input, input.num_topics,input.num_subtopics)
	return {"topics": topics}

	##############################


	@retry(tries=3, delay=0.5)
	def clean_subtopics_main(subtopics):
	prompt = "remove redundancy if present and output the list ###"
	cleaned_subtopics = together_response(prompt+str(subtopics), model=llm_default_medium, SysPrompt=prompt_clean_subtopics, temperature=0.5,frequency_penalty =0)
	print(cleaned_subtopics)
	formated_subtopics = together_response(cleaned_subtopics, model=llm_default_medium, SysPrompt=prompt_format_subtopics, temperature=1)
	subtopics = json_from_text(formated_subtopics)
	print(subtopics)
	return subtopics

	@retry(tries=3, delay=0.5)
	def generate_report(topic, description):
	prompt = f"""create a detailed report on: {topic} by following the instructions: {description}"""
	md_report = together_response(prompt, model = llm_default_medium, SysPrompt = SysPromptMdOffline)
	return md_to_html(md_report)

	@retry(tries=3, delay=0.5)
	def get_images(query, num_results):
	time.sleep(0.5)
	ddgs = DDGS()
	imgs = ddgs.images(keywords=query, safesearch="on", max_results=num_results)
	return imgs

	# Define the app
	app = FastAPI()

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Create a Pydantic model to handle the input data
	class TopicInput(BaseModel):
	user_input: str = Query(default="market research", description="input query to generate subtopics")
	num_topics: int = Query(default=5, description="Number of subtopics to generate (default: 5)")
	previous_queries: list[str] = Query(default=[], description="Deprecated: Use /generate_subtopics instead for subtopics")

	class SubTopicInput(BaseModel):
	main_task: str = Query(default="detailed market research", description="Main task to be completed")
	user_input: str = Query(default="I want to start a business in retail", description="input query to generate subtopics")
	num_topics: int = Query(default=3, description="Number of max subtopics to generate (default: 3)")
	excluded_topics: list[str] = Query(default=[], description="List all other main tasks to exclude")

	class CleanSubTopicInput(BaseModel):
	subtopics: list = Query(default=[], description="List of selected subtopics and descriptions")

	class imageInput(BaseModel):
	user_input: str = Query(default="market research", description="input query to generate subtopics")
	num_images: int = Query(default=5, description="Number of subtopics to generate (default: 5)")

	class ReportInput(BaseModel):
	topic: str = Query(default="market research",description="The main topic for the report")
	description: str = Query(default="",description="A brief description of the topic")

	class RecommendationInput(BaseModel):
	user_input: str = Query(default="", description="Input query to generate follow-up questions")
	num_recommendations: int = Query(default=5, description="Number of recommendations to generate")

	@app.get("/", tags=["Home"])
	def api_home():
	return {'detail': 'Welcome to FastAPI Subtopics API! Visit https://pvanand-generate-subtopics.hf.space/docs to test'}

	@app.post("/generate_topics")
	async def create_topics(input: TopicInput):
	topics = generate_topics(input.user_input, input.num_topics, input.previous_queries)
	return {"topics": topics}

	@app.post("/generate_subtopics")
	async def create_subtopics(input: SubTopicInput):
	topics = generate_subtopics(input.main_task, input.user_input, input.num_topics, input.excluded_topics)
	return {"subtopics": topics}

	@app.post("/clean_subtopics")
	async def clean_subtopics(input: CleanSubTopicInput):
	subtopics = clean_subtopics_main(input.subtopics)
	return {"subtopics": subtopics}

	@app.post("/generate_report")
	async def create_report(input: ReportInput):
	report = generate_report(input.topic, input.description)
	return {"report": report}

	@app.post("/get_images")
	async def fetch_images(input: imageInput):
	images = get_images(input.user_input, input.num_images)
	return {"images": images}

	@app.post("/get_recommendations")
	async def generate_recommendations(input: RecommendationInput):

	if input.user_input:
	prompt = f"""create a list of {input.num_recommendations} questions that a user might ask following the question: {input.user_input}:"""
	else:
	prompt = f"""create a list of mixed {input.num_recommendations} questions to create a report or plan or course on any of the topics product,market,research topic """

	response_topics = json_from_text(
	together_response(
	prompt, model=llm_default_small, SysPrompt=SysPromptList,temperature=1
	)
	)
	return {"recommendations": response_topics}