Severian commited on
Commit
a47d087
·
verified ·
1 Parent(s): 1ea16aa

Update json_parser.py

Browse files
Files changed (1) hide show
  1. json_parser.py +92 -52
json_parser.py CHANGED
@@ -4,6 +4,7 @@ from dataclasses import dataclass
4
  from enum import Enum
5
  import json
6
  import re
 
7
 
8
  logger = setup_logger()
9
 
@@ -39,133 +40,172 @@ class SSEParser:
39
  def _clean_mermaid_content(self, content: str) -> Optional[str]:
40
  """Clean and extract mermaid diagram content"""
41
  try:
 
 
 
42
  # Handle tool output format
43
  if isinstance(content, dict) and "tool_output" in content:
 
44
  content = content["tool_output"]
 
45
 
46
  # Remove tool response prefix/suffix if present
47
  if isinstance(content, str):
 
48
  if "tool response:" in content:
 
49
  content = content.split("tool response:", 1)[1].strip()
50
  if content.endswith('.'):
51
  content = content[:-1]
 
52
 
53
  # Parse JSON if present
54
  try:
55
  if isinstance(content, str):
 
56
  data = json.loads(content)
 
57
  else:
58
  data = content
 
59
 
60
  # Handle different mermaid output formats
61
  if "mermaid_output" in data:
 
62
  content = data["mermaid_output"]
63
  elif "mermaid_diagram" in data:
 
64
  content = data["mermaid_diagram"]
65
 
66
  # If content is still JSON string, parse again
67
  if isinstance(content, str) and content.startswith('{'):
 
68
  try:
69
  data = json.loads(content)
70
  if "mermaid_output" in data:
71
  content = data["mermaid_output"]
 
72
  elif "mermaid_diagram" in data:
73
  content = data["mermaid_diagram"]
74
- except:
75
- pass
 
76
 
77
- except json.JSONDecodeError:
78
- pass
79
 
80
  # Clean up markdown formatting
81
  if isinstance(content, str):
 
82
  content = content.replace("```mermaid\n", "").replace("\n```", "")
83
  content = content.strip()
84
 
85
  # Remove any remaining JSON artifacts
86
  if content.startswith('{'):
 
87
  try:
88
  data = json.loads(content)
89
  if isinstance(data, dict):
90
  content = next(iter(data.values()))
91
- except:
92
- pass
 
93
 
 
94
  return content
95
 
 
96
  return None
97
 
98
  except Exception as e:
99
  self.logger.error(f"Error cleaning mermaid content: {str(e)}")
100
- self.logger.debug(f"Original content: {content}")
 
101
  return None
102
 
103
  def parse_sse_event(self, data: str) -> Optional[Dict]:
104
  """Parse SSE event data and format for frontend consumption"""
105
  try:
 
 
 
 
 
106
  # Extract JSON content from SSE data
107
  json_content = self._extract_json_content(data)
108
  if not json_content:
 
109
  return None
110
 
111
- # Parse JSON content
112
- parsed_data = json.loads(json_content)
 
 
 
 
 
 
 
113
 
114
- # Get event details
115
- event_type = parsed_data.get("event")
116
- message_id = parsed_data.get("message_id")
117
 
118
- # Format based on event type
119
- if event_type == "agent_message":
120
- return {
121
- "type": "message",
122
- "content": parsed_data.get("answer", ""),
123
- "message_id": message_id
124
- }
125
-
126
- elif event_type == "agent_thought":
127
- thought = parsed_data.get("thought", "")
128
- observation = parsed_data.get("observation", "")
129
- tool = parsed_data.get("tool", "")
130
- tool_input = parsed_data.get("tool_input", "")
131
 
132
- # Handle tool-specific formatting
133
- if tool == "mermaid_diagrams" or "mermaid" in str(tool_input).lower():
134
- try:
135
- cleaned_content = self._clean_mermaid_content(observation)
136
- if cleaned_content:
137
- return {
138
- "type": "tool_output",
139
- "tool": "mermaid",
140
- "content": cleaned_content,
141
- "message_id": message_id
142
- }
143
- except Exception as e:
144
- self.logger.error(f"Failed to parse mermaid data: {str(e)}")
145
- self.logger.debug(f"Raw observation: {observation}")
146
 
 
 
147
  return {
148
- "type": "thought",
149
- "content": {
150
- "thought": thought,
151
- "observation": observation,
152
- "tool": tool
153
- },
154
- "message_id": message_id
155
  }
156
 
157
- elif event_type == "message_end":
 
 
 
 
 
 
 
 
 
 
 
158
  return {
159
- "type": "end",
160
- "message_id": message_id,
161
- "metadata": parsed_data.get("metadata", {})
162
  }
163
 
 
 
 
 
 
 
 
 
 
 
 
164
  return None
165
 
166
  except Exception as e:
167
- self.logger.error(f"Parse error: {str(e)}")
168
- self.logger.debug(f"Raw data: {data}")
169
  return None
170
 
171
  def _process_observation(self, data: Dict) -> Dict:
 
4
  from enum import Enum
5
  import json
6
  import re
7
+ import traceback
8
 
9
  logger = setup_logger()
10
 
 
40
  def _clean_mermaid_content(self, content: str) -> Optional[str]:
41
  """Clean and extract mermaid diagram content"""
42
  try:
43
+ self.logger.debug(f"Starting mermaid content cleaning. Input type: {type(content)}")
44
+ self.logger.debug(f"Initial content: {content[:200]}...") # Log first 200 chars
45
+
46
  # Handle tool output format
47
  if isinstance(content, dict) and "tool_output" in content:
48
+ self.logger.debug("Found tool_output in dict, extracting...")
49
  content = content["tool_output"]
50
+ self.logger.debug(f"Extracted tool_output: {content[:200]}...")
51
 
52
  # Remove tool response prefix/suffix if present
53
  if isinstance(content, str):
54
+ self.logger.debug("Processing string content...")
55
  if "tool response:" in content:
56
+ self.logger.debug("Found 'tool response:' prefix, removing...")
57
  content = content.split("tool response:", 1)[1].strip()
58
  if content.endswith('.'):
59
  content = content[:-1]
60
+ self.logger.debug(f"After prefix/suffix removal: {content[:200]}...")
61
 
62
  # Parse JSON if present
63
  try:
64
  if isinstance(content, str):
65
+ self.logger.debug("Attempting to parse content as JSON...")
66
  data = json.loads(content)
67
+ self.logger.debug(f"JSON parsed successfully. Keys: {data.keys()}")
68
  else:
69
  data = content
70
+ self.logger.debug(f"Using content as data directly. Type: {type(data)}")
71
 
72
  # Handle different mermaid output formats
73
  if "mermaid_output" in data:
74
+ self.logger.debug("Found mermaid_output format")
75
  content = data["mermaid_output"]
76
  elif "mermaid_diagram" in data:
77
+ self.logger.debug("Found mermaid_diagram format")
78
  content = data["mermaid_diagram"]
79
 
80
  # If content is still JSON string, parse again
81
  if isinstance(content, str) and content.startswith('{'):
82
+ self.logger.debug("Content still appears to be JSON, attempting second parse...")
83
  try:
84
  data = json.loads(content)
85
  if "mermaid_output" in data:
86
  content = data["mermaid_output"]
87
+ self.logger.debug("Extracted mermaid_output from second parse")
88
  elif "mermaid_diagram" in data:
89
  content = data["mermaid_diagram"]
90
+ self.logger.debug("Extracted mermaid_diagram from second parse")
91
+ except Exception as e:
92
+ self.logger.debug(f"Second JSON parse failed: {str(e)}")
93
 
94
+ except json.JSONDecodeError as e:
95
+ self.logger.debug(f"Initial JSON parse failed: {str(e)}")
96
 
97
  # Clean up markdown formatting
98
  if isinstance(content, str):
99
+ self.logger.debug("Cleaning markdown formatting...")
100
  content = content.replace("```mermaid\n", "").replace("\n```", "")
101
  content = content.strip()
102
 
103
  # Remove any remaining JSON artifacts
104
  if content.startswith('{'):
105
+ self.logger.debug("Attempting to clean remaining JSON artifacts...")
106
  try:
107
  data = json.loads(content)
108
  if isinstance(data, dict):
109
  content = next(iter(data.values()))
110
+ self.logger.debug("Extracted value from remaining JSON")
111
+ except Exception as e:
112
+ self.logger.debug(f"Final JSON cleanup failed: {str(e)}")
113
 
114
+ self.logger.debug(f"Final cleaned content: {content[:200]}...")
115
  return content
116
 
117
+ self.logger.warning("Content not in string format after processing")
118
  return None
119
 
120
  except Exception as e:
121
  self.logger.error(f"Error cleaning mermaid content: {str(e)}")
122
+ self.logger.error(f"Original content: {content}")
123
+ self.logger.error(f"Stack trace: {traceback.format_exc()}")
124
  return None
125
 
126
  def parse_sse_event(self, data: str) -> Optional[Dict]:
127
  """Parse SSE event data and format for frontend consumption"""
128
  try:
129
+ self.logger.debug(f"Parsing SSE event. Raw data length: {len(data)}")
130
+
131
+ # Clean up the data format - remove extra data: prefixes
132
+ data = data.replace('data: data:', 'data:').replace('\r\n', '\n')
133
+
134
  # Extract JSON content from SSE data
135
  json_content = self._extract_json_content(data)
136
  if not json_content:
137
+ self.logger.debug("No JSON content found in SSE data")
138
  return None
139
 
140
+ # Handle text-wrapped JSON
141
+ if json_content.startswith('{"text":'):
142
+ try:
143
+ wrapper = json.loads(json_content)
144
+ json_content = wrapper.get("text", "")
145
+ except:
146
+ pass
147
+
148
+ self.logger.debug(f"Cleaned JSON content: {json_content[:200]}...")
149
 
150
+ # Parse XML content if present
151
+ if '<agent_response>' in json_content:
152
+ return self._parse_xml_response(json_content)
153
 
154
+ # Parse JSON content
155
+ try:
156
+ parsed_data = json.loads(json_content)
157
+ self.logger.debug(f"Parsed data keys: {parsed_data.keys()}")
 
 
 
 
 
 
 
 
 
158
 
159
+ # Handle tool outputs
160
+ if any(key in parsed_data for key in ['mermaid_output', 'mermaid_diagram']):
161
+ return {
162
+ "type": "tool_output",
163
+ "tool": "mermaid",
164
+ "content": self._clean_mermaid_content(json_content)
165
+ }
166
+
167
+ return parsed_data
 
 
 
 
 
168
 
169
+ except json.JSONDecodeError:
170
+ self.logger.debug("Failed to parse as JSON, treating as raw content")
171
  return {
172
+ "type": "message",
173
+ "content": json_content
 
 
 
 
 
174
  }
175
 
176
+ except Exception as e:
177
+ self.logger.error(f"Parse error: {str(e)}")
178
+ self.logger.error(f"Raw data: {data}")
179
+ self.logger.error(f"Stack trace: {traceback.format_exc()}")
180
+ return None
181
+
182
+ def _parse_xml_response(self, content: str) -> Optional[Dict]:
183
+ """Parse XML response format"""
184
+ try:
185
+ # Extract message content
186
+ message_match = re.search(r'<message>(.*?)</message>', content, re.DOTALL)
187
+ if message_match:
188
  return {
189
+ "type": "message",
190
+ "content": message_match.group(1).strip()
 
191
  }
192
 
193
+ # Extract tool output content
194
+ tool_match = re.search(r'<tool_output.*?>(.*?)</tool_output>', content, re.DOTALL)
195
+ if tool_match:
196
+ tool_content = tool_match.group(1)
197
+ if 'mermaid' in content.lower():
198
+ return {
199
+ "type": "tool_output",
200
+ "tool": "mermaid",
201
+ "content": self._clean_mermaid_content(tool_content)
202
+ }
203
+
204
  return None
205
 
206
  except Exception as e:
207
+ self.logger.error(f"XML parse error: {str(e)}")
208
+ self.logger.error(f"Content: {content}")
209
  return None
210
 
211
  def _process_observation(self, data: Dict) -> Dict: