pvanand commited on
Commit
db1b7b5
·
verified ·
1 Parent(s): 3c03878

Update document_generator.py

Browse files
Files changed (1) hide show
  1. document_generator.py +74 -68
document_generator.py CHANGED
@@ -40,29 +40,28 @@ FORMAT YOUR OUTPUT AS MARKDOWN ENCLOSED IN <response></response> tags
40
  DOCUMENT_SECTION_PROMPT_USER = """<prompt>Output the content for the section "{section_or_subsection_title}" formatted as markdown. Follow this instruction: {content_instruction}</prompt>"""
41
 
42
  # File: app.py
43
- import os
44
  import json
45
  import re
46
- import time
47
  import asyncio
48
  from typing import List, Dict, Optional, Any, Callable
49
  from openai import OpenAI
50
  import logging
51
  import functools
52
- from fastapi import APIRouter, HTTPException
 
53
  from pydantic import BaseModel
54
  from fastapi_cache.decorator import cache
55
- from starlette.responses import StreamingResponse
56
 
57
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
58
  logger = logging.getLogger(__name__)
59
 
60
  def log_execution(func: Callable) -> Callable:
61
  @functools.wraps(func)
62
- async def wrapper(*args: Any, **kwargs: Any) -> Any:
63
  logger.info(f"Executing {func.__name__}")
64
  try:
65
- result = await func(*args, **kwargs)
66
  logger.info(f"{func.__name__} completed successfully")
67
  return result
68
  except Exception as e:
@@ -78,7 +77,7 @@ class AIClient:
78
  )
79
 
80
  @log_execution
81
- async def generate_response(
82
  self,
83
  messages: List[Dict[str, str]],
84
  model: str = "openai/gpt-4o-mini",
@@ -86,14 +85,12 @@ class AIClient:
86
  ) -> Optional[str]:
87
  if not messages:
88
  return None
89
- loop = asyncio.get_event_loop()
90
- response = await loop.run_in_executor(None, functools.partial(
91
- self.client.chat.completions.create,
92
  model=model,
93
  messages=messages,
94
  max_tokens=max_tokens,
95
  stream=False
96
- ))
97
  return response.choices[0].message.content
98
 
99
  class DocumentGenerator:
@@ -123,14 +120,14 @@ class DocumentGenerator:
123
  return content.lstrip()
124
 
125
  @log_execution
126
- async def generate_document_outline(self, query: str, max_retries: int = 3) -> Optional[Dict]:
127
  messages = [
128
  {"role": "system", "content": DOCUMENT_OUTLINE_PROMPT_SYSTEM},
129
  {"role": "user", "content": DOCUMENT_OUTLINE_PROMPT_USER.format(query=query)}
130
  ]
131
 
132
  for attempt in range(max_retries):
133
- outline_response = await self.ai_client.generate_response(messages, model="openai/gpt-4o")
134
  outline_json_text = self.extract_between_tags(outline_response, "output")
135
 
136
  try:
@@ -145,7 +142,7 @@ class DocumentGenerator:
145
  return None
146
 
147
  @log_execution
148
- async def generate_content(self, title: str, content_instruction: str, section_number: str) -> str:
149
  self.content_messages.append({
150
  "role": "user",
151
  "content": DOCUMENT_SECTION_PROMPT_USER.format(
@@ -153,7 +150,7 @@ class DocumentGenerator:
153
  content_instruction=content_instruction
154
  )
155
  })
156
- section_response = await self.ai_client.generate_response(self.content_messages)
157
  content = self.extract_between_tags(section_response, "response")
158
  content = self.remove_duplicate_title(content, title, section_number)
159
  self.content_messages.append({
@@ -162,44 +159,6 @@ class DocumentGenerator:
162
  })
163
  return content
164
 
165
- @log_execution
166
- async def generate_full_document(self, document_outline: Dict, query: str):
167
- self.document_outline = document_outline
168
-
169
- overall_objective = query
170
- document_layout = json.dumps(self.document_outline, indent=2)
171
-
172
- self.content_messages = [
173
- {
174
- "role": "system",
175
- "content": DOCUMENT_SECTION_PROMPT_SYSTEM.format(
176
- overall_objective=overall_objective,
177
- document_layout=document_layout
178
- )
179
- }
180
- ]
181
-
182
- for section in self.document_outline["Document"].get("Sections", []):
183
- section_title = section.get("Title", "")
184
- section_number = section.get("SectionNumber", "")
185
- content_instruction = section.get("Content", "")
186
- logger.info(f"Generating content for section: {section_title}")
187
- section["Content"] = await self.generate_content(section_title, content_instruction, section_number)
188
- yield json.dumps({"type": "document_section", "content": section}) + "\n"
189
-
190
- for subsection in section.get("Subsections", []):
191
- subsection_title = subsection.get("Title", "")
192
- subsection_number = subsection.get("SectionNumber", "")
193
- subsection_content_instruction = subsection.get("Content", "")
194
- logger.info(f"Generating content for subsection: {subsection_title}")
195
- subsection["Content"] = await self.generate_content(subsection_title, subsection_content_instruction, subsection_number)
196
- yield json.dumps({"type": "document_subsection", "content": subsection}) + "\n"
197
-
198
- # Generate the complete markdown document
199
- full_document = self.document_outline
200
- markdown_document = MarkdownConverter.convert_to_markdown(full_document["Document"])
201
- yield json.dumps({"type": "complete_document", "content": markdown_document}) + "\n"
202
-
203
  class MarkdownConverter:
204
  @staticmethod
205
  def slugify(text: str) -> str:
@@ -225,17 +184,14 @@ class MarkdownConverter:
225
 
226
  @classmethod
227
  def convert_to_markdown(cls, document: Dict) -> str:
228
- # First page with centered content
229
  markdown = "<div style='text-align: center; padding-top: 33vh;'>\n\n"
230
  markdown += f"<h1 style='color: #2c3e50; border-bottom: 2px solid #3498db; padding-bottom: 10px; display: inline-block;'>{document['Title']}</h1>\n\n"
231
  markdown += f"<p style='color: #7f8c8d;'><em>By {document['Author']}</em></p>\n\n"
232
  markdown += f"<p style='color: #95a5a6;'>Version {document['Version']} | {document['Date']}</p>\n\n"
233
  markdown += "</div>\n\n"
234
 
235
- # Table of Contents on the second page
236
  markdown += cls.generate_toc(document['Sections'])
237
 
238
- # Main content
239
  markdown += "<div style='max-width: 800px; margin: 0 auto; font-family: \"Segoe UI\", Arial, sans-serif; line-height: 1.6;'>\n\n"
240
 
241
  for section in document['Sections']:
@@ -266,8 +222,56 @@ class MarkdownDocumentRequest(BaseModel):
266
  json_document: Dict
267
  query: str
268
 
269
- class MarkdownDocumentResponse(BaseModel):
270
- markdown_document: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
  @cache(expire=600*24*7)
273
  @router.post("/generate-document/json", response_model=JsonDocumentResponse)
@@ -276,8 +280,7 @@ async def generate_document_outline_endpoint(request: DocumentRequest):
276
  document_generator = DocumentGenerator(ai_client)
277
 
278
  try:
279
- # Generate the document outline
280
- json_document = await document_generator.generate_document_outline(request.query)
281
 
282
  if json_document is None:
283
  raise HTTPException(status_code=500, detail="Failed to generate a valid document outline")
@@ -286,20 +289,23 @@ async def generate_document_outline_endpoint(request: DocumentRequest):
286
  except Exception as e:
287
  raise HTTPException(status_code=500, detail=str(e))
288
 
289
- @router.post("/generate-document/markdown")
290
- async def generate_markdown_document_endpoint(request: MarkdownDocumentRequest):
291
  ai_client = AIClient()
292
  document_generator = DocumentGenerator(ai_client)
293
 
294
- async def event_stream():
295
  try:
296
- # Generate the full document content and stream it
297
- async for section in document_generator.generate_full_document(request.json_document, request.query):
298
- yield section
299
  except Exception as e:
300
- yield json.dumps({"type": "error", "message": str(e)}) + "\n"
 
 
301
 
302
- return StreamingResponse(event_stream(), media_type="application/json")
 
 
303
 
304
  @router.post("/generate-document-test", response_model=MarkdownDocumentResponse)
305
  async def test_generate_document_endpoint(request: DocumentRequest):
 
40
  DOCUMENT_SECTION_PROMPT_USER = """<prompt>Output the content for the section "{section_or_subsection_title}" formatted as markdown. Follow this instruction: {content_instruction}</prompt>"""
41
 
42
  # File: app.py
43
+ import os
44
  import json
45
  import re
 
46
  import asyncio
47
  from typing import List, Dict, Optional, Any, Callable
48
  from openai import OpenAI
49
  import logging
50
  import functools
51
+ from fastapi import APIRouter, HTTPException, Request
52
+ from fastapi.responses import StreamingResponse
53
  from pydantic import BaseModel
54
  from fastapi_cache.decorator import cache
 
55
 
56
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
57
  logger = logging.getLogger(__name__)
58
 
59
  def log_execution(func: Callable) -> Callable:
60
  @functools.wraps(func)
61
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
62
  logger.info(f"Executing {func.__name__}")
63
  try:
64
+ result = func(*args, **kwargs)
65
  logger.info(f"{func.__name__} completed successfully")
66
  return result
67
  except Exception as e:
 
77
  )
78
 
79
  @log_execution
80
+ def generate_response(
81
  self,
82
  messages: List[Dict[str, str]],
83
  model: str = "openai/gpt-4o-mini",
 
85
  ) -> Optional[str]:
86
  if not messages:
87
  return None
88
+ response = self.client.chat.completions.create(
 
 
89
  model=model,
90
  messages=messages,
91
  max_tokens=max_tokens,
92
  stream=False
93
+ )
94
  return response.choices[0].message.content
95
 
96
  class DocumentGenerator:
 
120
  return content.lstrip()
121
 
122
  @log_execution
123
+ def generate_document_outline(self, query: str, max_retries: int = 3) -> Optional[Dict]:
124
  messages = [
125
  {"role": "system", "content": DOCUMENT_OUTLINE_PROMPT_SYSTEM},
126
  {"role": "user", "content": DOCUMENT_OUTLINE_PROMPT_USER.format(query=query)}
127
  ]
128
 
129
  for attempt in range(max_retries):
130
+ outline_response = self.ai_client.generate_response(messages, model="openai/gpt-4o")
131
  outline_json_text = self.extract_between_tags(outline_response, "output")
132
 
133
  try:
 
142
  return None
143
 
144
  @log_execution
145
+ def generate_content(self, title: str, content_instruction: str, section_number: str) -> str:
146
  self.content_messages.append({
147
  "role": "user",
148
  "content": DOCUMENT_SECTION_PROMPT_USER.format(
 
150
  content_instruction=content_instruction
151
  )
152
  })
153
+ section_response = self.ai_client.generate_response(self.content_messages)
154
  content = self.extract_between_tags(section_response, "response")
155
  content = self.remove_duplicate_title(content, title, section_number)
156
  self.content_messages.append({
 
159
  })
160
  return content
161
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  class MarkdownConverter:
163
  @staticmethod
164
  def slugify(text: str) -> str:
 
184
 
185
  @classmethod
186
  def convert_to_markdown(cls, document: Dict) -> str:
 
187
  markdown = "<div style='text-align: center; padding-top: 33vh;'>\n\n"
188
  markdown += f"<h1 style='color: #2c3e50; border-bottom: 2px solid #3498db; padding-bottom: 10px; display: inline-block;'>{document['Title']}</h1>\n\n"
189
  markdown += f"<p style='color: #7f8c8d;'><em>By {document['Author']}</em></p>\n\n"
190
  markdown += f"<p style='color: #95a5a6;'>Version {document['Version']} | {document['Date']}</p>\n\n"
191
  markdown += "</div>\n\n"
192
 
 
193
  markdown += cls.generate_toc(document['Sections'])
194
 
 
195
  markdown += "<div style='max-width: 800px; margin: 0 auto; font-family: \"Segoe UI\", Arial, sans-serif; line-height: 1.6;'>\n\n"
196
 
197
  for section in document['Sections']:
 
222
  json_document: Dict
223
  query: str
224
 
225
+ async def generate_document_stream(document_generator: DocumentGenerator, document_outline: Dict, query: str):
226
+ document_generator.document_outline = document_outline
227
+
228
+ overall_objective = query
229
+ document_layout = json.dumps(document_generator.document_outline, indent=2)
230
+
231
+ document_generator.content_messages = [
232
+ {
233
+ "role": "system",
234
+ "content": DOCUMENT_SECTION_PROMPT_SYSTEM.format(
235
+ overall_objective=overall_objective,
236
+ document_layout=document_layout
237
+ )
238
+ }
239
+ ]
240
+
241
+ for section in document_generator.document_outline["Document"].get("Sections", []):
242
+ section_title = section.get("Title", "")
243
+ section_number = section.get("SectionNumber", "")
244
+ content_instruction = section.get("Content", "")
245
+ logging.info(f"Generating content for section: {section_title}")
246
+ content = document_generator.generate_content(section_title, content_instruction, section_number)
247
+ section["Content"] = content
248
+ yield json.dumps({
249
+ "type": "document_section",
250
+ "content": {
251
+ "section_number": section_number,
252
+ "section_title": section_title,
253
+ "content": content
254
+ }
255
+ }) + "\n"
256
+
257
+ for subsection in section.get("Subsections", []):
258
+ subsection_title = subsection.get("Title", "")
259
+ subsection_number = subsection.get("SectionNumber", "")
260
+ subsection_content_instruction = subsection.get("Content", "")
261
+ logging.info(f"Generating content for subsection: {subsection_title}")
262
+ content = document_generator.generate_content(subsection_title, subsection_content_instruction, subsection_number)
263
+ subsection["Content"] = content
264
+ yield json.dumps({
265
+ "type": "document_section",
266
+ "content": {
267
+ "section_number": subsection_number,
268
+ "section_title": subsection_title,
269
+ "content": content
270
+ }
271
+ }) + "\n"
272
+
273
+ markdown_document = MarkdownConverter.convert_to_markdown(document_generator.document_outline["Document"])
274
+ yield json.dumps({"type": "complete_document", "content": markdown_document}) + "\n"
275
 
276
  @cache(expire=600*24*7)
277
  @router.post("/generate-document/json", response_model=JsonDocumentResponse)
 
280
  document_generator = DocumentGenerator(ai_client)
281
 
282
  try:
283
+ json_document = document_generator.generate_document_outline(request.query)
 
284
 
285
  if json_document is None:
286
  raise HTTPException(status_code=500, detail="Failed to generate a valid document outline")
 
289
  except Exception as e:
290
  raise HTTPException(status_code=500, detail=str(e))
291
 
292
+ @router.post("/generate-document/markdown-stream")
293
+ async def generate_markdown_document_stream_endpoint(request: MarkdownDocumentRequest):
294
  ai_client = AIClient()
295
  document_generator = DocumentGenerator(ai_client)
296
 
297
+ async def stream_generator():
298
  try:
299
+ async for chunk in generate_document_stream(document_generator, request.json_document, request.query):
300
+ yield chunk
 
301
  except Exception as e:
302
+ yield json.dumps({"type": "error", "content": str(e)}) + "\n"
303
+
304
+ return StreamingResponse(stream_generator(), media_type="application/x-ndjson")
305
 
306
+ ###########################################
307
+ class MarkdownDocumentResponse(BaseModel):
308
+ markdown_document: str
309
 
310
  @router.post("/generate-document-test", response_model=MarkdownDocumentResponse)
311
  async def test_generate_document_endpoint(request: DocumentRequest):