Spaces:
Running
Running
whispersound
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,9 @@ from datetime import datetime
|
|
9 |
from zoneinfo import ZoneInfo
|
10 |
from sklearn.feature_extraction.text import CountVectorizer
|
11 |
from weasyprint import HTML, CSS
|
|
|
|
|
|
|
12 |
|
13 |
# OpenAI API ํด๋ผ์ด์ธํธ ์ค์
|
14 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
@@ -200,40 +203,37 @@ def generate_blog_post(category, style, references1, references2, references3, t
|
|
200 |
|
201 |
# HTML๋ก ๋ณํ
|
202 |
html_post = convert_to_html(filtered_post)
|
203 |
-
|
204 |
return html_post
|
205 |
-
except Exception as e:
|
206 |
-
print(f"๊ธ ์์ฑ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}")
|
207 |
-
return ""
|
208 |
|
209 |
def convert_to_html(text):
|
|
|
|
|
|
|
210 |
lines = text.split('\n')
|
211 |
-
html_lines = []
|
212 |
for line in lines:
|
213 |
line = line.strip()
|
214 |
-
if line.startswith('
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
elif line.startswith('
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
|
|
|
|
|
|
230 |
|
231 |
-
|
232 |
-
<div style="font-family: Arial, sans-serif; line-height: 1.6; color: #333;">
|
233 |
-
{"".join(html_lines)}
|
234 |
-
</div>
|
235 |
-
"""
|
236 |
-
return html_content
|
237 |
|
238 |
def remove_unwanted_phrases(text):
|
239 |
unwanted_phrases = [
|
@@ -495,65 +495,38 @@ def format_filename(text):
|
|
495 |
|
496 |
def save_to_pdf(blog_post, user_topic):
|
497 |
try:
|
498 |
-
|
499 |
-
|
500 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
501 |
|
502 |
-
#
|
503 |
-
tag_pattern = re.compile(r'<(/?)(\w+)([^>]*)>')
|
504 |
-
|
505 |
-
# ํ์ฌ ๋ ์ง์ ์๊ฐ์ ๊ฐ์ ธ์ต๋๋ค (๋ํ๋ฏผ๊ตญ ์๊ฐ ๊ธฐ์ค)
|
506 |
now = datetime.now(ZoneInfo("Asia/Seoul"))
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
# ์ฒซ ๋ฒ์งธ ์ ๋ชฉ์ ์ฐพ์ ํ์ผ๋ช
์ผ๋ก ์ฌ์ฉ
|
511 |
-
title_match = re.search(r'<h[1-3][^>]*>(.*?)</h[1-3]>', blog_post)
|
512 |
-
title = title_match.group(1) if title_match else "Untitled"
|
513 |
-
filename = f"{date_str}_{time_str}_{format_filename(title)}.pdf"
|
514 |
-
|
515 |
-
# HTML ๋ด์ฉ์ ์ํํ๋ฉฐ PDF์ ์์ฑ
|
516 |
-
current_tag = ''
|
517 |
-
buffer = ''
|
518 |
-
is_bold = False
|
519 |
-
|
520 |
-
for part in re.split(tag_pattern, blog_post):
|
521 |
-
if part in ['h1', 'h2', 'h3', 'p', 'strong', 'li', 'br']:
|
522 |
-
if buffer:
|
523 |
-
if current_tag in ['h1', 'h2', 'h3']:
|
524 |
-
pdf.set_font("NanumGothic", 'B', 16 if current_tag == 'h1' else 14)
|
525 |
-
pdf.multi_cell(0, 10, buffer.strip(), align='L')
|
526 |
-
pdf.ln(5)
|
527 |
-
elif current_tag == 'p':
|
528 |
-
pdf.set_font("NanumGothic", '', 11)
|
529 |
-
pdf.multi_cell(0, 6, buffer.strip(), align='J')
|
530 |
-
pdf.ln(5)
|
531 |
-
elif current_tag == 'li':
|
532 |
-
pdf.set_font("NanumGothic", '', 11)
|
533 |
-
pdf.multi_cell(0, 6, "โข " + buffer.strip(), align='J')
|
534 |
-
elif current_tag == 'br':
|
535 |
-
pdf.ln(5)
|
536 |
-
buffer = ''
|
537 |
-
current_tag = part
|
538 |
-
elif part == 'strong':
|
539 |
-
is_bold = True
|
540 |
-
pdf.set_font("NanumGothic", 'B', 11)
|
541 |
-
elif part == '/strong':
|
542 |
-
is_bold = False
|
543 |
-
pdf.set_font("NanumGothic", '', 11)
|
544 |
-
elif part.startswith('/') or part == 'div':
|
545 |
-
continue
|
546 |
-
elif not tag_pattern.match(part) and part.strip():
|
547 |
-
pdf.write(6, part.strip() + ' ')
|
548 |
-
|
549 |
-
# ๋ง์ง๋ง ๋ฒํผ ์ฒ๋ฆฌ
|
550 |
-
if buffer:
|
551 |
-
pdf.set_font("NanumGothic", '', 11)
|
552 |
-
pdf.multi_cell(0, 6, buffer.strip(), align='J')
|
553 |
|
554 |
-
# PDF ์ ์ฅ
|
555 |
-
print(f"Saving PDF as: {filename}")
|
556 |
-
pdf.output(filename, 'F')
|
557 |
return filename
|
558 |
except Exception as e:
|
559 |
print(f"PDF ์์ฑ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}")
|
@@ -650,8 +623,8 @@ with gr.Blocks() as demo:
|
|
650 |
pdf_output = gr.File(label="์์ฑ๋ PDF ํ์ผ")
|
651 |
|
652 |
save_pdf_btn.click(
|
653 |
-
fn=
|
654 |
-
inputs=[output],
|
655 |
outputs=[pdf_output],
|
656 |
show_progress=True
|
657 |
)
|
|
|
9 |
from zoneinfo import ZoneInfo
|
10 |
from sklearn.feature_extraction.text import CountVectorizer
|
11 |
from weasyprint import HTML, CSS
|
12 |
+
from weasyprint.fonts import FontConfiguration
|
13 |
+
import tempfile
|
14 |
+
from bs4 import BeautifulSoup
|
15 |
|
16 |
# OpenAI API ํด๋ผ์ด์ธํธ ์ค์
|
17 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
|
|
203 |
|
204 |
# HTML๋ก ๋ณํ
|
205 |
html_post = convert_to_html(filtered_post)
|
206 |
+
|
207 |
return html_post
|
|
|
|
|
|
|
208 |
|
209 |
def convert_to_html(text):
|
210 |
+
soup = BeautifulSoup('<div class="blog-post"></div>', 'html.parser')
|
211 |
+
main_div = soup.find('div', class_='blog-post')
|
212 |
+
|
213 |
lines = text.split('\n')
|
|
|
214 |
for line in lines:
|
215 |
line = line.strip()
|
216 |
+
if line.startswith('# '):
|
217 |
+
main_div.append(soup.new_tag('h1'))
|
218 |
+
main_div.h1.string = line[2:]
|
219 |
+
elif line.startswith('## '):
|
220 |
+
main_div.append(soup.new_tag('h2'))
|
221 |
+
main_div.h2.string = line[3:]
|
222 |
+
elif line.startswith('### '):
|
223 |
+
main_div.append(soup.new_tag('h3'))
|
224 |
+
main_div.h3.string = line[4:]
|
225 |
+
elif line.startswith('- '):
|
226 |
+
if not main_div.find_all('ul'):
|
227 |
+
main_div.append(soup.new_tag('ul'))
|
228 |
+
li = soup.new_tag('li')
|
229 |
+
li.string = line[2:]
|
230 |
+
main_div.ul.append(li)
|
231 |
+
else:
|
232 |
+
p = soup.new_tag('p')
|
233 |
+
p.string = line
|
234 |
+
main_div.append(p)
|
235 |
|
236 |
+
return str(soup)
|
|
|
|
|
|
|
|
|
|
|
237 |
|
238 |
def remove_unwanted_phrases(text):
|
239 |
unwanted_phrases = [
|
|
|
495 |
|
496 |
def save_to_pdf(blog_post, user_topic):
|
497 |
try:
|
498 |
+
# ์์ HTML ํ์ผ ์์ฑ
|
499 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as temp_html:
|
500 |
+
temp_html.write(blog_post)
|
501 |
+
temp_html_path = temp_html.name
|
502 |
+
|
503 |
+
# CSS ์ค์
|
504 |
+
font_config = FontConfiguration()
|
505 |
+
css = CSS(string='''
|
506 |
+
@import url('https://fonts.googleapis.com/css2?family=Nanum+Gothic:wght@400;700&display=swap');
|
507 |
+
body {
|
508 |
+
font-family: 'Nanum Gothic', Arial, sans-serif;
|
509 |
+
line-height: 1.6;
|
510 |
+
color: #333;
|
511 |
+
padding: 20px;
|
512 |
+
}
|
513 |
+
h1 { font-size: 24px; margin-bottom: 15px; }
|
514 |
+
h2 { font-size: 20px; margin-bottom: 10px; }
|
515 |
+
h3 { font-size: 18px; margin-bottom: 10px; }
|
516 |
+
p { margin-bottom: 10px; }
|
517 |
+
ul { margin-bottom: 10px; padding-left: 20px; }
|
518 |
+
''', font_config=font_config)
|
519 |
+
|
520 |
+
# HTML์ PDF๋ก ๋ณํ
|
521 |
+
html = HTML(filename=temp_html_path)
|
522 |
+
pdf_bytes = html.write_pdf(stylesheets=[css], font_config=font_config)
|
523 |
|
524 |
+
# ํ์ผ ์ ์ฅ
|
|
|
|
|
|
|
525 |
now = datetime.now(ZoneInfo("Asia/Seoul"))
|
526 |
+
filename = f"{now.strftime('%y%m%d_%H%M')}_{format_filename(user_topic)}.pdf"
|
527 |
+
with open(filename, 'wb') as f:
|
528 |
+
f.write(pdf_bytes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
529 |
|
|
|
|
|
|
|
530 |
return filename
|
531 |
except Exception as e:
|
532 |
print(f"PDF ์์ฑ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}")
|
|
|
623 |
pdf_output = gr.File(label="์์ฑ๋ PDF ํ์ผ")
|
624 |
|
625 |
save_pdf_btn.click(
|
626 |
+
fn=save_to_pdf,
|
627 |
+
inputs=[output, blog_title], # blog_title์ user_topic์ผ๋ก ์ฌ์ฉ
|
628 |
outputs=[pdf_output],
|
629 |
show_progress=True
|
630 |
)
|