whispersound commited on
Commit
d72cac0
ยท
verified ยท
1 Parent(s): 8ec6c05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -85
app.py CHANGED
@@ -9,6 +9,9 @@ from datetime import datetime
9
  from zoneinfo import ZoneInfo
10
  from sklearn.feature_extraction.text import CountVectorizer
11
  from weasyprint import HTML, CSS
 
 
 
12
 
13
  # OpenAI API ํด๋ผ์ด์–ธํŠธ ์„ค์ •
14
  openai.api_key = os.getenv("OPENAI_API_KEY")
@@ -200,40 +203,37 @@ def generate_blog_post(category, style, references1, references2, references3, t
200
 
201
  # HTML๋กœ ๋ณ€ํ™˜
202
  html_post = convert_to_html(filtered_post)
203
-
204
  return html_post
205
- except Exception as e:
206
- print(f"๊ธ€ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
207
- return ""
208
 
209
  def convert_to_html(text):
 
 
 
210
  lines = text.split('\n')
211
- html_lines = []
212
  for line in lines:
213
  line = line.strip()
214
- if line.startswith('####'):
215
- html_lines.append(f"<h4>{line[4:].strip()}</h4>")
216
- elif line.startswith('###'):
217
- html_lines.append(f"<h3>{line[3:].strip()}</h3>")
218
- elif line.startswith('##'):
219
- html_lines.append(f"<h2>{line[2:].strip()}</h2>")
220
- elif line.startswith('#'):
221
- html_lines.append(f"<h1>{line[1:].strip()}</h1>")
222
- elif line.startswith('- '): # ๋ฆฌ์ŠคํŠธ ์•„์ดํ…œ
223
- html_lines.append(f"<li>{line[2:]}</li>")
224
- elif line: # ์ผ๋ฐ˜ ํ…์ŠคํŠธ (๋นˆ ์ค„ ์ œ์™ธ)
225
- # '**'๋กœ ๊ฐ์‹ธ์ง„ ๋ถ€๋ถ„์„ <strong> ํƒœ๊ทธ๋กœ ๋ณ€ํ™˜
226
- line = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', line)
227
- html_lines.append(f"<p>{line}</p>")
228
- else: # ๋นˆ ์ค„
229
- html_lines.append("<br>")
 
 
 
230
 
231
- html_content = f"""
232
- <div style="font-family: Arial, sans-serif; line-height: 1.6; color: #333;">
233
- {"".join(html_lines)}
234
- </div>
235
- """
236
- return html_content
237
 
238
  def remove_unwanted_phrases(text):
239
  unwanted_phrases = [
@@ -495,65 +495,38 @@ def format_filename(text):
495
 
496
  def save_to_pdf(blog_post, user_topic):
497
  try:
498
- pdf = PDF()
499
- pdf.add_page()
500
- pdf.set_auto_page_break(auto=True, margin=15)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
 
502
- # HTML ํƒœ๊ทธ๋ฅผ ํŒŒ์‹ฑํ•˜๊ธฐ ์œ„ํ•œ ์ •๊ทœํ‘œํ˜„์‹
503
- tag_pattern = re.compile(r'<(/?)(\w+)([^>]*)>')
504
-
505
- # ํ˜„์žฌ ๋‚ ์งœ์™€ ์‹œ๊ฐ„์„ ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค (๋Œ€ํ•œ๋ฏผ๊ตญ ์‹œ๊ฐ„ ๊ธฐ์ค€)
506
  now = datetime.now(ZoneInfo("Asia/Seoul"))
507
- date_str = now.strftime("%y%m%d")
508
- time_str = now.strftime("%H%M")
509
-
510
- # ์ฒซ ๋ฒˆ์งธ ์ œ๋ชฉ์„ ์ฐพ์•„ ํŒŒ์ผ๋ช…์œผ๋กœ ์‚ฌ์šฉ
511
- title_match = re.search(r'<h[1-3][^>]*>(.*?)</h[1-3]>', blog_post)
512
- title = title_match.group(1) if title_match else "Untitled"
513
- filename = f"{date_str}_{time_str}_{format_filename(title)}.pdf"
514
-
515
- # HTML ๋‚ด์šฉ์„ ์ˆœํšŒํ•˜๋ฉฐ PDF์— ์ž‘์„ฑ
516
- current_tag = ''
517
- buffer = ''
518
- is_bold = False
519
-
520
- for part in re.split(tag_pattern, blog_post):
521
- if part in ['h1', 'h2', 'h3', 'p', 'strong', 'li', 'br']:
522
- if buffer:
523
- if current_tag in ['h1', 'h2', 'h3']:
524
- pdf.set_font("NanumGothic", 'B', 16 if current_tag == 'h1' else 14)
525
- pdf.multi_cell(0, 10, buffer.strip(), align='L')
526
- pdf.ln(5)
527
- elif current_tag == 'p':
528
- pdf.set_font("NanumGothic", '', 11)
529
- pdf.multi_cell(0, 6, buffer.strip(), align='J')
530
- pdf.ln(5)
531
- elif current_tag == 'li':
532
- pdf.set_font("NanumGothic", '', 11)
533
- pdf.multi_cell(0, 6, "โ€ข " + buffer.strip(), align='J')
534
- elif current_tag == 'br':
535
- pdf.ln(5)
536
- buffer = ''
537
- current_tag = part
538
- elif part == 'strong':
539
- is_bold = True
540
- pdf.set_font("NanumGothic", 'B', 11)
541
- elif part == '/strong':
542
- is_bold = False
543
- pdf.set_font("NanumGothic", '', 11)
544
- elif part.startswith('/') or part == 'div':
545
- continue
546
- elif not tag_pattern.match(part) and part.strip():
547
- pdf.write(6, part.strip() + ' ')
548
-
549
- # ๋งˆ์ง€๋ง‰ ๋ฒ„ํผ ์ฒ˜๋ฆฌ
550
- if buffer:
551
- pdf.set_font("NanumGothic", '', 11)
552
- pdf.multi_cell(0, 6, buffer.strip(), align='J')
553
 
554
- # PDF ์ €์žฅ
555
- print(f"Saving PDF as: {filename}")
556
- pdf.output(filename, 'F')
557
  return filename
558
  except Exception as e:
559
  print(f"PDF ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
@@ -650,8 +623,8 @@ with gr.Blocks() as demo:
650
  pdf_output = gr.File(label="์ƒ์„ฑ๋œ PDF ํŒŒ์ผ")
651
 
652
  save_pdf_btn.click(
653
- fn=save_content_to_pdf,
654
- inputs=[output],
655
  outputs=[pdf_output],
656
  show_progress=True
657
  )
 
9
  from zoneinfo import ZoneInfo
10
  from sklearn.feature_extraction.text import CountVectorizer
11
  from weasyprint import HTML, CSS
12
+ from weasyprint.fonts import FontConfiguration
13
+ import tempfile
14
+ from bs4 import BeautifulSoup
15
 
16
  # OpenAI API ํด๋ผ์ด์–ธํŠธ ์„ค์ •
17
  openai.api_key = os.getenv("OPENAI_API_KEY")
 
203
 
204
  # HTML๋กœ ๋ณ€ํ™˜
205
  html_post = convert_to_html(filtered_post)
206
+
207
  return html_post
 
 
 
208
 
209
  def convert_to_html(text):
210
+ soup = BeautifulSoup('<div class="blog-post"></div>', 'html.parser')
211
+ main_div = soup.find('div', class_='blog-post')
212
+
213
  lines = text.split('\n')
 
214
  for line in lines:
215
  line = line.strip()
216
+ if line.startswith('# '):
217
+ main_div.append(soup.new_tag('h1'))
218
+ main_div.h1.string = line[2:]
219
+ elif line.startswith('## '):
220
+ main_div.append(soup.new_tag('h2'))
221
+ main_div.h2.string = line[3:]
222
+ elif line.startswith('### '):
223
+ main_div.append(soup.new_tag('h3'))
224
+ main_div.h3.string = line[4:]
225
+ elif line.startswith('- '):
226
+ if not main_div.find_all('ul'):
227
+ main_div.append(soup.new_tag('ul'))
228
+ li = soup.new_tag('li')
229
+ li.string = line[2:]
230
+ main_div.ul.append(li)
231
+ else:
232
+ p = soup.new_tag('p')
233
+ p.string = line
234
+ main_div.append(p)
235
 
236
+ return str(soup)
 
 
 
 
 
237
 
238
  def remove_unwanted_phrases(text):
239
  unwanted_phrases = [
 
495
 
496
  def save_to_pdf(blog_post, user_topic):
497
  try:
498
+ # ์ž„์‹œ HTML ํŒŒ์ผ ์ƒ์„ฑ
499
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as temp_html:
500
+ temp_html.write(blog_post)
501
+ temp_html_path = temp_html.name
502
+
503
+ # CSS ์„ค์ •
504
+ font_config = FontConfiguration()
505
+ css = CSS(string='''
506
+ @import url('https://fonts.googleapis.com/css2?family=Nanum+Gothic:wght@400;700&display=swap');
507
+ body {
508
+ font-family: 'Nanum Gothic', Arial, sans-serif;
509
+ line-height: 1.6;
510
+ color: #333;
511
+ padding: 20px;
512
+ }
513
+ h1 { font-size: 24px; margin-bottom: 15px; }
514
+ h2 { font-size: 20px; margin-bottom: 10px; }
515
+ h3 { font-size: 18px; margin-bottom: 10px; }
516
+ p { margin-bottom: 10px; }
517
+ ul { margin-bottom: 10px; padding-left: 20px; }
518
+ ''', font_config=font_config)
519
+
520
+ # HTML์„ PDF๋กœ ๋ณ€ํ™˜
521
+ html = HTML(filename=temp_html_path)
522
+ pdf_bytes = html.write_pdf(stylesheets=[css], font_config=font_config)
523
 
524
+ # ํŒŒ์ผ ์ €์žฅ
 
 
 
525
  now = datetime.now(ZoneInfo("Asia/Seoul"))
526
+ filename = f"{now.strftime('%y%m%d_%H%M')}_{format_filename(user_topic)}.pdf"
527
+ with open(filename, 'wb') as f:
528
+ f.write(pdf_bytes)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
529
 
 
 
 
530
  return filename
531
  except Exception as e:
532
  print(f"PDF ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
 
623
  pdf_output = gr.File(label="์ƒ์„ฑ๋œ PDF ํŒŒ์ผ")
624
 
625
  save_pdf_btn.click(
626
+ fn=save_to_pdf,
627
+ inputs=[output, blog_title], # blog_title์„ user_topic์œผ๋กœ ์‚ฌ์šฉ
628
  outputs=[pdf_output],
629
  show_progress=True
630
  )