Spaces:

Underground-Digital
/

cc-api

Sleeping

App Files Files Community

cc-api / response_formatter.py

Severian

Update response_formatter.py

37259c3 verified 3 months ago

raw

history blame

8.48 kB

	from typing import Dict, Optional, Tuple, List, Any
	import re
	import xml.etree.ElementTree as ET
	from datetime import datetime
	import json
	import logging

	# Setup logger
	logger = logging.getLogger(__name__)
	logger.setLevel(logging.INFO)

	# Create console handler if needed
	if not logger.handlers:
	ch = logging.StreamHandler()
	ch.setLevel(logging.INFO)
	formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
	ch.setFormatter(formatter)
	logger.addHandler(ch)

	class ToolType:
	DUCKDUCKGO = "duckduckgo_search"
	REDDIT_NEWS = "reddit_x_gnews_newswire_crunchbase"
	PUBMED = "pubmed_search"
	CENSUS = "get_census_data"
	HEATMAP = "heatmap_code"
	MERMAID = "mermaid_diagram"
	WISQARS = "wisqars"
	WONDER = "wonder"
	NCHS = "nchs"
	ONESTEP = "onestep"
	DQS = "dqs_nhis_adult_summary_health_statistics"

	class ResponseFormatter:
	@staticmethod
	def format_thought(
	thought: str,
	observation: str,
	citations: List[Dict] = None,
	metadata: Dict = None,
	tool_outputs: List[Dict] = None
	) -> Tuple[str, str]:
	"""Format agent thought for both terminal and XML output"""
	# Terminal format
	terminal_output = {
	"type": "agent_thought",
	"content": thought,
	"metadata": metadata or {}
	}

	if tool_outputs:
	terminal_output["tool_outputs"] = tool_outputs

	# XML format
	root = ET.Element("agent_response")
	thought_elem = ET.SubElement(root, "thought")
	thought_elem.text = thought

	if observation:
	obs_elem = ET.SubElement(root, "observation")
	obs_elem.text = observation

	if tool_outputs:
	tools_elem = ET.SubElement(root, "tool_outputs")
	for tool_output in tool_outputs:
	tool_elem = ET.SubElement(tools_elem, "tool_output")
	tool_elem.attrib["type"] = tool_output.get("type", "")
	tool_elem.text = tool_output.get("content", "")

	if citations:
	cites_elem = ET.SubElement(root, "citations")
	for citation in citations:
	cite_elem = ET.SubElement(cites_elem, "citation")
	for key, value in citation.items():
	cite_elem.attrib[key] = str(value)

	xml_output = ET.tostring(root, encoding='unicode')
	return json.dumps(terminal_output), xml_output

	@staticmethod
	def _create_tool_element(parent: ET.Element, tool_name: str, tool_data: Dict) -> ET.Element:
	"""Create XML element for specific tool type with appropriate structure"""
	tool_elem = ET.SubElement(parent, "tool")
	tool_elem.set("name", tool_name)

	# Handle different tool types
	if tool_name == ToolType.CENSUS:
	ResponseFormatter._format_census_data(tool_elem, tool_data)
	elif tool_name == ToolType.MERMAID:
	ResponseFormatter._format_mermaid_data(tool_elem, tool_data)
	elif tool_name in [ToolType.WISQARS, ToolType.WONDER, ToolType.NCHS]:
	ResponseFormatter._format_health_data(tool_elem, tool_data)
	else:
	# Generic tool output format
	content_elem = ET.SubElement(tool_elem, "content")
	content_elem.text = ResponseFormatter._clean_markdown(str(tool_data))

	return tool_elem

	@staticmethod
	def _format_census_data(tool_elem: ET.Element, data: Dict) -> None:
	"""Format census data with specific structure"""
	try:
	# Extract census tract data
	tracts_elem = ET.SubElement(tool_elem, "census_tracts")

	# Parse the llm_result to extract structured data
	if "llm_result" in data:
	result = json.loads(data["llm_result"])
	for tract_data in result.get("tracts", []):
	tract_elem = ET.SubElement(tracts_elem, "tract")
	tract_elem.set("id", str(tract_data.get("tract", "")))

	# Add tract details
	for key, value in tract_data.items():
	if key != "tract":
	detail_elem = ET.SubElement(tract_elem, key.replace("_", ""))
	detail_elem.text = str(value)
	except:
	# Fallback to simple format if parsing fails
	content_elem = ET.SubElement(tool_elem, "content")
	content_elem.text = ResponseFormatter._clean_markdown(str(data))

	@staticmethod
	def _format_mermaid_data(tool_elem: ET.Element, data: Dict) -> None:
	"""Format mermaid diagram data with improved error handling"""
	try:
	diagram_elem = ET.SubElement(tool_elem, "diagram")

	# Extract content from data
	content = ""
	if isinstance(data, dict):
	content = data.get("content", data.get("mermaid_diagram", ""))
	elif isinstance(data, str):
	content = data

	# Clean any remaining markdown/JSON formatting
	content = re.sub(r'```mermaid\s\|\s```', '', content)
	content = re.sub(r'tool response:.*?{', '{', content)
	content = re.sub(r'}\s*\.$', '}', content)

	# Set cleaned content
	diagram_elem.text = content.strip()

	except Exception as e:
	logger.error(f"Error formatting mermaid data: {e}")
	content_elem = ET.SubElement(tool_elem, "content")
	content_elem.text = "Error formatting diagram"

	@staticmethod
	def _format_health_data(tool_elem: ET.Element, data: Dict) -> None:
	"""Format health-related data from WISQARS, WONDER, etc."""
	try:
	if isinstance(data, dict):
	for key, value in data.items():
	category_elem = ET.SubElement(tool_elem, key.replace("_", ""))
	if isinstance(value, dict):
	for sub_key, sub_value in value.items():
	sub_elem = ET.SubElement(category_elem, sub_key.replace("_", ""))
	sub_elem.text = str(sub_value)
	else:
	category_elem.text = str(value)
	except:
	content_elem = ET.SubElement(tool_elem, "content")
	content_elem.text = ResponseFormatter._clean_markdown(str(data))

	@staticmethod
	def _extract_tool_outputs(observation: str) -> Dict[str, Any]:
	"""Extract and clean tool outputs from observation"""
	tool_outputs = {}
	try:
	if isinstance(observation, str):
	data = json.loads(observation)
	for key, value in data.items():
	if isinstance(value, str) and "llm_result" in value:
	try:
	tool_result = json.loads(value)
	tool_outputs[key] = tool_result
	except:
	tool_outputs[key] = value
	except:
	pass
	return tool_outputs

	@staticmethod
	def format_message(message: str) -> Tuple[str, str]:
	"""Format agent message for both terminal and XML output"""
	# Terminal format
	terminal_output = message.strip()

	# XML format
	root = ET.Element("agent_response")
	msg_elem = ET.SubElement(root, "message")
	msg_elem.text = message.strip()

	xml_output = ET.tostring(root, encoding='unicode')
	return terminal_output, xml_output

	@staticmethod
	def format_error(error: str) -> Tuple[str, str]:
	"""Format error message for both terminal and XML output"""
	# Terminal format
	terminal_output = f"Error: {error}"

	# XML format
	root = ET.Element("agent_response")
	error_elem = ET.SubElement(root, "error")
	error_elem.text = error

	xml_output = ET.tostring(root, encoding='unicode')
	return terminal_output, xml_output

	@staticmethod
	def _clean_markdown(text: str) -> str:
	"""Clean markdown formatting from text"""
	text = re.sub(r'```.*?```', '', text, flags=re.DOTALL)
	text = re.sub(r'[*_`#]', '', text)
	return re.sub(r'\n{3,}', '\n\n', text.strip())