Spaces:

whispersound
/

kmkm_1

Running

App Files Files Community

whispersound commited on Oct 11, 2024

Commit

d72cac0

verified ·

1 Parent(s): 8ec6c05

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -85

app.py CHANGED Viewed

@@ -9,6 +9,9 @@ from datetime import datetime
 from zoneinfo import ZoneInfo
 from sklearn.feature_extraction.text import CountVectorizer
 from weasyprint import HTML, CSS
 # OpenAI API 클라이언트 설정
 openai.api_key = os.getenv("OPENAI_API_KEY")
@@ -200,40 +203,37 @@ def generate_blog_post(category, style, references1, references2, references3, t
         # HTML로 변환
         html_post = convert_to_html(filtered_post)
         return html_post
-    except Exception as e:
-        print(f"글 생성 중 오류 발생: {str(e)}")
-        return ""
 def convert_to_html(text):
     lines = text.split('\n')
-    html_lines = []
     for line in lines:
         line = line.strip()
-        if line.startswith('####'):
-            html_lines.append(f"<h4>{line[4:].strip()}</h4>")
-        elif line.startswith('###'):
-            html_lines.append(f"<h3>{line[3:].strip()}</h3>")
-        elif line.startswith('##'):
-            html_lines.append(f"<h2>{line[2:].strip()}</h2>")
-        elif line.startswith('#'):
-            html_lines.append(f"<h1>{line[1:].strip()}</h1>")
-        elif line.startswith('- '):  # 리스트 아이템
-            html_lines.append(f"<li>{line[2:]}</li>")
-        elif line:  # 일반 텍스트 (빈 줄 제외)
-            # '**'로 감싸진 부분을 <strong> 태그로 변환
-            line = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', line)
-            html_lines.append(f"<p>{line}</p>")
-        else:  # 빈 줄
-            html_lines.append("<br>")
-    html_content = f"""
-    <div style="font-family: Arial, sans-serif; line-height: 1.6; color: #333;">
-        {"".join(html_lines)}
-    </div>
-    """
-    return html_content
 def remove_unwanted_phrases(text):
     unwanted_phrases = [
@@ -495,65 +495,38 @@ def format_filename(text):
 def save_to_pdf(blog_post, user_topic):
     try:
-        pdf = PDF()
-        pdf.add_page()
-        pdf.set_auto_page_break(auto=True, margin=15)
-        # HTML 태그를 파싱하기 위한 정규표현식
-        tag_pattern = re.compile(r'<(/?)(\w+)([^>]*)>')
-        # 현재 날짜와 시간을 가져옵니다 (대한민국 시간 기준)
         now = datetime.now(ZoneInfo("Asia/Seoul"))
-        date_str = now.strftime("%y%m%d")
-        time_str = now.strftime("%H%M")
-        # 첫 번째 제목을 찾아 파일명으로 사용
-        title_match = re.search(r'<h[1-3][^>]*>(.*?)</h[1-3]>', blog_post)
-        title = title_match.group(1) if title_match else "Untitled"
-        filename = f"{date_str}_{time_str}_{format_filename(title)}.pdf"
-        # HTML 내용을 순회하며 PDF에 작성
-        current_tag = ''
-        buffer = ''
-        is_bold = False
-        for part in re.split(tag_pattern, blog_post):
-            if part in ['h1', 'h2', 'h3', 'p', 'strong', 'li', 'br']:
-                if buffer:
-                    if current_tag in ['h1', 'h2', 'h3']:
-                        pdf.set_font("NanumGothic", 'B', 16 if current_tag == 'h1' else 14)
-                        pdf.multi_cell(0, 10, buffer.strip(), align='L')
-                        pdf.ln(5)
-                    elif current_tag == 'p':
-                        pdf.set_font("NanumGothic", '', 11)
-                        pdf.multi_cell(0, 6, buffer.strip(), align='J')
-                        pdf.ln(5)
-                    elif current_tag == 'li':
-                        pdf.set_font("NanumGothic", '', 11)
-                        pdf.multi_cell(0, 6, "• " + buffer.strip(), align='J')
-                    elif current_tag == 'br':
-                        pdf.ln(5)
-                buffer = ''
-                current_tag = part
-            elif part == 'strong':
-                is_bold = True
-                pdf.set_font("NanumGothic", 'B', 11)
-            elif part == '/strong':
-                is_bold = False
-                pdf.set_font("NanumGothic", '', 11)
-            elif part.startswith('/') or part == 'div':
-                continue
-            elif not tag_pattern.match(part) and part.strip():
-                pdf.write(6, part.strip() + ' ')
-        # 마지막 버퍼 처리
-        if buffer:
-            pdf.set_font("NanumGothic", '', 11)
-            pdf.multi_cell(0, 6, buffer.strip(), align='J')
-        # PDF 저장
-        print(f"Saving PDF as: {filename}")
-        pdf.output(filename, 'F')
         return filename
     except Exception as e:
         print(f"PDF 생성 중 오류 발생: {str(e)}")
@@ -650,8 +623,8 @@ with gr.Blocks() as demo:
     pdf_output = gr.File(label="생성된 PDF 파일")
     save_pdf_btn.click(
-        fn=save_content_to_pdf,
-        inputs=[output],
         outputs=[pdf_output],
         show_progress=True
     )

 from zoneinfo import ZoneInfo
 from sklearn.feature_extraction.text import CountVectorizer
 from weasyprint import HTML, CSS
+from weasyprint.fonts import FontConfiguration
+import tempfile
+from bs4 import BeautifulSoup
 # OpenAI API 클라이언트 설정
 openai.api_key = os.getenv("OPENAI_API_KEY")
         # HTML로 변환
         html_post = convert_to_html(filtered_post)
         return html_post
 def convert_to_html(text):
+    soup = BeautifulSoup('<div class="blog-post"></div>', 'html.parser')
+    main_div = soup.find('div', class_='blog-post')
     lines = text.split('\n')
     for line in lines:
         line = line.strip()
+        if line.startswith('# '):
+            main_div.append(soup.new_tag('h1'))
+            main_div.h1.string = line[2:]
+        elif line.startswith('## '):
+            main_div.append(soup.new_tag('h2'))
+            main_div.h2.string = line[3:]
+        elif line.startswith('### '):
+            main_div.append(soup.new_tag('h3'))
+            main_div.h3.string = line[4:]
+        elif line.startswith('- '):
+            if not main_div.find_all('ul'):
+                main_div.append(soup.new_tag('ul'))
+            li = soup.new_tag('li')
+            li.string = line[2:]
+            main_div.ul.append(li)
+        else:
+            p = soup.new_tag('p')
+            p.string = line
+            main_div.append(p)
+    return str(soup)
 def remove_unwanted_phrases(text):
     unwanted_phrases = [
 def save_to_pdf(blog_post, user_topic):
     try:
+        # 임시 HTML 파일 생성
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as temp_html:
+            temp_html.write(blog_post)
+            temp_html_path = temp_html.name
+        # CSS 설정
+        font_config = FontConfiguration()
+        css = CSS(string='''
+            @import url('https://fonts.googleapis.com/css2?family=Nanum+Gothic:wght@400;700&display=swap');
+            body {
+                font-family: 'Nanum Gothic', Arial, sans-serif;
+                line-height: 1.6;
+                color: #333;
+                padding: 20px;
+            }
+            h1 { font-size: 24px; margin-bottom: 15px; }
+            h2 { font-size: 20px; margin-bottom: 10px; }
+            h3 { font-size: 18px; margin-bottom: 10px; }
+            p { margin-bottom: 10px; }
+            ul { margin-bottom: 10px; padding-left: 20px; }
+        ''', font_config=font_config)
+        # HTML을 PDF로 변환
+        html = HTML(filename=temp_html_path)
+        pdf_bytes = html.write_pdf(stylesheets=[css], font_config=font_config)
+        # 파일 저장
         now = datetime.now(ZoneInfo("Asia/Seoul"))
+        filename = f"{now.strftime('%y%m%d_%H%M')}_{format_filename(user_topic)}.pdf"
+        with open(filename, 'wb') as f:
+            f.write(pdf_bytes)
         return filename
     except Exception as e:
         print(f"PDF 생성 중 오류 발생: {str(e)}")
     pdf_output = gr.File(label="생성된 PDF 파일")
     save_pdf_btn.click(
+        fn=save_to_pdf,
+        inputs=[output, blog_title],  # blog_title을 user_topic으로 사용
         outputs=[pdf_output],
         show_progress=True
     )