Severian commited on
Commit
896eafc
·
verified ·
1 Parent(s): c5a4eb2

Update json_parser.py

Browse files
Files changed (1) hide show
  1. json_parser.py +55 -19
json_parser.py CHANGED
@@ -60,7 +60,7 @@ class SSEParser:
60
  self.logger = setup_logger("sse_parser")
61
 
62
  def parse_sse_event(self, data: str) -> Optional[Dict]:
63
- """Parse SSE event data and return cleaned dictionary"""
64
  self.logger.debug("Parsing SSE event")
65
 
66
  try:
@@ -71,19 +71,32 @@ class SSEParser:
71
  # Parse JSON data
72
  parsed_data = json.loads(data)
73
 
74
- # Clean tool outputs if present
75
  if "observation" in parsed_data:
76
  try:
77
  observation = parsed_data["observation"]
78
  if observation and isinstance(observation, str):
79
- tool_data = json.loads(observation)
80
- # Extract relevant tool output
81
- for key, value in tool_data.items():
82
- if isinstance(value, str) and "llm_result" in value:
83
- tool_result = json.loads(value)["llm_result"]
84
- parsed_data["observation"] = self.clean_tool_output(tool_result)
85
- except:
86
- pass # Keep original observation if parsing fails
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  return parsed_data
89
 
@@ -94,12 +107,35 @@ class SSEParser:
94
  self.logger.error(f"Parse error: {str(e)}")
95
  return None
96
 
97
- def clean_tool_output(self, output: str) -> str:
98
- """Clean tool output by removing markdown and other formatting"""
99
- # Remove markdown code blocks
100
- output = re.sub(r'```.*?```', '', output, flags=re.DOTALL)
101
- # Remove other markdown formatting
102
- output = re.sub(r'[*_`#]', '', output)
103
- # Clean up whitespace
104
- output = re.sub(r'\n{3,}', '\n\n', output.strip())
105
- return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  self.logger = setup_logger("sse_parser")
61
 
62
  def parse_sse_event(self, data: str) -> Optional[Dict]:
63
+ """Parse SSE event data with improved mermaid handling"""
64
  self.logger.debug("Parsing SSE event")
65
 
66
  try:
 
71
  # Parse JSON data
72
  parsed_data = json.loads(data)
73
 
74
+ # Enhanced mermaid diagram handling
75
  if "observation" in parsed_data:
76
  try:
77
  observation = parsed_data["observation"]
78
  if observation and isinstance(observation, str):
79
+ if "mermaid_diagram" in observation:
80
+ try:
81
+ tool_data = json.loads(observation)
82
+ if isinstance(tool_data, dict):
83
+ mermaid_content = tool_data.get(
84
+ "mermaid_diagram", ""
85
+ )
86
+ if mermaid_content:
87
+ # Clean and format mermaid content
88
+ cleaned_content = self.clean_mermaid_content(
89
+ mermaid_content
90
+ )
91
+ parsed_data["observation"] = json.dumps({
92
+ "mermaid_diagram": cleaned_content
93
+ })
94
+ except json.JSONDecodeError:
95
+ self.logger.warning(
96
+ "Failed to parse mermaid diagram content"
97
+ )
98
+ except Exception as e:
99
+ self.logger.error(f"Error processing observation: {str(e)}")
100
 
101
  return parsed_data
102
 
 
107
  self.logger.error(f"Parse error: {str(e)}")
108
  return None
109
 
110
+ def clean_mermaid_content(self, content: str) -> str:
111
+ """Clean and format mermaid diagram content"""
112
+ try:
113
+ # If content is JSON string, parse it
114
+ if isinstance(content, str) and content.strip().startswith('{'):
115
+ content_dict = json.loads(content)
116
+ if "mermaid_diagram" in content_dict:
117
+ content = content_dict["mermaid_diagram"]
118
+
119
+ # Remove markdown code blocks
120
+ content = re.sub(r'```mermaid\s*|\s*```', '', content)
121
+
122
+ # Remove "tool response:" and any JSON wrapper
123
+ content = re.sub(r'tool response:.*?{', '{', content)
124
+ content = re.sub(r'}\s*\.$', '}', content)
125
+
126
+ # If still JSON, extract mermaid content
127
+ if content.strip().startswith('{'):
128
+ try:
129
+ content_dict = json.loads(content)
130
+ if "mermaid_diagram" in content_dict:
131
+ content = content_dict["mermaid_diagram"]
132
+ except:
133
+ pass
134
+
135
+ # Final cleanup
136
+ content = re.sub(r'\s+', ' ', content.strip())
137
+ return content
138
+
139
+ except Exception as e:
140
+ self.logger.error(f"Error cleaning mermaid content: {e}")
141
+ return content