import io import docx import configparser import pandas as pd import asyncio from docx import Document from docxtpl import DocxTemplate from docx.shared import Pt from docx.opc.constants import RELATIONSHIP_TYPE as RT from docx.enum.dml import MSO_THEME_COLOR_INDEX from docx.enum.style import WD_STYLE_TYPE from docx.shared import Cm, Inches from docx.oxml.shared import OxmlElement from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.enum.text import WD_PARAGRAPH_ALIGNMENT from docx.oxml.ns import qn from docx.shared import RGBColor from docx.enum.text import WD_COLOR_INDEX from requests_toolbelt import MultipartEncoder from datetime import datetime, timedelta def count_values(df, col_name): value_counts = df[col_name].value_counts() result_df = pd.DataFrame(value_counts) result_df.columns = ['count'] result_df.reset_index(inplace=True) result_df.rename(columns={'index': col_name}, inplace=True) return result_df def add_hyperlink(paragraph, url, text): """ A function that places a hyperlink within a paragraph object. :param paragraph: The paragraph we are adding the hyperlink to. :param url: A string containing the required url :param text: The text displayed for the url :return: A Run object containing the hyperlink """ # This gets access to the document.xml.rels file and gets a new relation id value part = paragraph.part r_id = part.relate_to(url, RT.HYPERLINK, is_external=True) # Create the w:hyperlink tag and add needed values hyperlink = OxmlElement('w:hyperlink') hyperlink.set(qn('r:id'), r_id, ) hyperlink.set(qn('w:history'), '1') # Create a w:r element new_run = OxmlElement('w:r') # Create a new w:rPr element rPr = OxmlElement('w:rPr') # Create a w:rStyle element, note this currently does not add the hyperlink style as its not in # the default template, I have left it here in case someone uses one that has the style in it rStyle = OxmlElement('w:rStyle') rStyle.set(qn('w:val'), 'Hyperlink') # Join all the xml elements together add add the required text to the w:r element rPr.append(rStyle) new_run.append(rPr) new_run.text = text hyperlink.append(new_run) # Create a new Run object and add the hyperlink into it r = paragraph.add_run() r._r.append(hyperlink) # A workaround for the lack of a hyperlink style (doesn't go purple after using the link) # Delete this if using a template that has the hyperlink style in it r.font.color.theme_color = MSO_THEME_COLOR_INDEX.HYPERLINK r.font.underline = True return r def create_table(document,count_df1): table = document.add_table(rows=2, cols=2) # 设置表格宽度 table.columns[0].width = docx.shared.Inches(3.7) table.columns[1].width = docx.shared.Inches(3.7) # 设置表格边框 table.style = 'Table Grid' # 设置表格第一行内容 table.rows[0].height = docx.shared.Pt(9) first_row_cells = table.rows[0].cells first_row_cells[0].text = "技术进展" first_row_cells[0].paragraphs[0].alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.CENTER first_row_cells[1].text = "业内动态" first_row_cells[1].paragraphs[0].alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.CENTER # 设置第一行字体 font = first_row_cells[0].paragraphs[0].runs[0].font font.name = "思源黑体 Regular" first_row_cells[0].paragraphs[0].runs[0]._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") font.size = docx.shared.Pt(8) font.bold = True font = first_row_cells[1].paragraphs[0].runs[0].font font.name = "思源黑体 Regular" first_row_cells[1].paragraphs[0].runs[0]._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") font.size = docx.shared.Pt(8) font.bold = True # 设置表格第二行内容 second_row_cells = table.rows[1].cells second_row_cells[0].text = '''\t图像理解与生成 \t{0}项\n\t计算光学 \t{1}项\n\t图像处理 \t{2}项\n\t机器学习前沿 \t{3}项\n\t自然语言交互 \t{4}项\n\t量子计算 \t{5}项\n\t计算机视觉前沿 \t{6}项'''.format(count_df1[0],count_df1[1],count_df1[2], count_df1[3],count_df1[4],count_df1[5],count_df1[6]) second_row_cells[0].paragraphs[0].alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT second_row_cells[1].text = "\t大厂动态 \t{0}项\n".format(count_df1[7]) second_row_cells[1].paragraphs[0].alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT # 设置第二行字体 font = second_row_cells[0].paragraphs[0].runs[0].font font.name = "思源黑体 Regular" second_row_cells[0].paragraphs[0].runs[0]._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") font.size = docx.shared.Pt(8) font = second_row_cells[1].paragraphs[0].runs[0].font font.name = "思源黑体 Regular" second_row_cells[1].paragraphs[0].runs[0]._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") font.size = docx.shared.Pt(8) # 设置行高 table.rows[0].height = docx.shared.Pt(9) def 荣耀周报排版(xlsx,template): document = Document(template) df = pd.read_excel(xlsx) res = df.sort_values(by='领域', ascending=True) count_df = count_values(df, '领域') count_df1 = count_df.sort_values(by='领域', ascending=True)["count"] count_df1 = list(count_df1) sections = ["图像理解与生成", "计算光学", "图像处理", "机器学习前沿", "自然语言交互", "计算机视觉前沿","量子计算", "定向追踪"] # 开头标注时间 思源黑体 Regular 四号 try: date_style = document.styles['date_range'] date_style.font.name = "思源黑体 Regular" date_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") date_style.font.bold = True date_style.font.size = Pt(14) except: date_style = document.styles.add_style('date_range', 1) date_style.font.name = "思源黑体 Regular" date_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") date_style.font.bold = True date_style.font.size = Pt(14) # 设置标题样式 思源黑体 Bold 三号 try: title_style = document.styles['title2'] title_style.font.name = "思源黑体 Bold" title_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Bold") title_style.font.bold = True title_style.font.size = Pt(16) except: title_style = document.styles.add_style('title2',1) title_style.font.name = "思源黑体 Bold" title_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Bold") title_style.font.bold = True title_style.font.size = Pt(16) try: title_style = document.styles['title'] title_style.base_style = document.styles['Heading 1'] title_style.font.name = "思源黑体 Bold" title_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Bold") title_style.font.color.rgb=RGBColor(0,0,0) title_style.font.bold = True title_style.font.size = Pt(16) except: title_style = document.styles.add_style('title',1) title_style.base_style = document.styles['Heading 1'] title_style.font.name = "思源黑体 Bold" title_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Bold") title_style.font.color.rgb=RGBColor(0,0,0) title_style.font.bold = True title_style.font.size = Pt(16) # 热点速览技术进展小标题 思源黑体 小五 下划线 try: tech_style = document.styles['tech_progress'] tech_style.font.name = "思源黑体 Regular" tech_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") tech_style.font.bold = False tech_style.font.size = Pt(9) tech_style.font.underline = True except: tech_style = document.styles.add_style('tech_progress', 1) tech_style.font.name = "思源黑体 Regular" tech_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") tech_style.font.bold = False tech_style.font.size = Pt(9) tech_style.font.underline = True # 热点速览注释与详情 思源黑体 小五 try: cont_style = document.styles['content'] cont_style.font.name = "思源黑体 Regular" cont_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") cont_style.font.bold = False cont_style.font.size = Pt(9) cont_style.font.color.rgb=RGBColor(89,89,89) except: cont_style = document.styles.add_style('content', 1) cont_style.font.name = "思源黑体 Regular" cont_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") cont_style.font.bold = False cont_style.font.size = Pt(9) cont_style.font.color.rgb=RGBColor(89,89,89) # 思源黑体 小四 --部分正文--段落 try: part1_style = document.styles['weekly_summary'] part1_style.font.name = "思源黑体 Regular" part1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") part1_style.font.bold = False part1_style.font.size = Pt(12) except: part1_style = document.styles.add_style('weekly_summary', 1) part1_style.font.name = "思源黑体 Regular" part1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") part1_style.font.bold = False part1_style.font.size = Pt(12) # 思源黑体 小四 --部分正文--字符 try: part2_style = document.styles['inside_para'] part2_style.font.name = "思源黑体 Regular" part2_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") part2_style.font.bold = False part2_style.font.size = Pt(12) except: part2_style = document.styles.add_style('inside_para', 2) part2_style.font.name = "思源黑体 Regular" part2_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") part2_style.font.bold = False part2_style.font.size = Pt(12) # 思源黑体 Regular 11号字--热点正文--段落 try: part3_style = document.styles['part3_style'] part3_style.font.name = "思源黑体 Regular" part3_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") part3_style.font.bold = False part3_style.font.size = Pt(11) except: part3_style = document.styles.add_style('part3_style', 1) part3_style.font.name = "思源黑体 Regular" part3_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") part3_style.font.bold = False part3_style.font.size = Pt(11) # 定向追踪-技术进展 思源黑体 Bold 四号 try: tech1_style = document.styles['tech'] tech1_style.font.name = "思源黑体 Bold" tech1_style.base_style = document.styles['Heading 1'] tech1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Bold") tech1_style.font.color.rgb=RGBColor(0,0,0) tech1_style.font.bold = True tech1_style.font.size = Pt(14) except: tech1_style = document.styles.add_style('tech',1) tech1_style.font.name = "思源黑体 Bold" tech1_style.base_style = document.styles['Heading 1'] tech1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Bold") tech1_style.font.color.rgb=RGBColor(0,0,0) tech1_style.font.bold = True tech1_style.font.size = Pt(14) try: tech1_style = document.styles['tech2'] tech1_style.font.name = "思源黑体 Bold" tech1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Bold") tech1_style.font.bold = True tech1_style.font.size = Pt(14) except: tech1_style = document.styles.add_style('tech2',1) tech1_style.font.name = "思源黑体 Bold" tech1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Bold") tech1_style.font.bold = True tech1_style.font.size = Pt(14) # 定向追踪-技术进展 思源黑体 Regular 小四号,背景灰色-25% try: tech1_style = document.styles['tech1'] tech1_style.font.name = "思源黑体 Regular" tech1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") tech1_style.font.bold = True tech1_style.font.size = Pt(12) tech1_style.font.highlight_color=WD_COLOR_INDEX.GRAY_25 except: tech1_style = document.styles.add_style('tech1',1) tech1_style.font.name = "思源黑体 Regular" tech1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") tech1_style.font.bold = True tech1_style.font.size = Pt(12) tech1_style.font.highlight_color=WD_COLOR_INDEX.GRAY_25 # 思源黑体 Bold 小四 --定向追踪标题时间--段落 try: part4_style = document.styles['title_date'] part4_style.font.name = "思源黑体 Bold" part4_style._element.rPr.rFonts.set(qn('w:eastAsia'),"思源黑体 Bold") part4_style.font.bold = False part4_style.font.size = Pt(12) except: part4_style = document.styles.add_style('title_date', 1) part4_style.font.name = "思源黑体 Bold" part4_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Bold") part4_style.font.bold = False part4_style.font.size = Pt(12) # 思源黑体 Light 10 --定向追踪技术--段落 try: part4_style = document.styles['tech_detail'] part4_style.font.name = "思源黑体 Light" part4_style._element.rPr.rFonts.set(qn('w:eastAsia'),"思源黑体 Light") part4_style.font.bold = False part4_style.font.size = Pt(10) except: part4_style = document.styles.add_style('tech_detail', 1) part4_style.font.name = "思源黑体 Light" part4_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Light") part4_style.font.bold = False part4_style.font.size = Pt(10) # 定向追踪-专家点评 思源黑体 Regular 小四号 try: tech1_style = document.styles['expert'] tech1_style.font.name = "思源黑体 Regular" tech1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") tech1_style.font.bold = True tech1_style.font.size = Pt(12) except: tech1_style = document.styles.add_style('expert',1) tech1_style.font.name = "思源黑体 Regular" tech1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular") tech1_style.font.bold = True tech1_style.font.size = Pt(12) # 设置标题 titles = ['一、本期目录', '二、热点速览', '三、定向追踪'] t1 = "2023 年 x 月 x 日 —— 2023 年 x 月 x 日" t2 = "【本期荣耀周报内容概览】" para1 = document.add_paragraph(t1) para1.style = document.styles["date_range"] run = para1.add_run(" ") run.style = document.styles["inside_para"] run = para1.add_run(t2) run.style = document.styles["inside_para"] run.font.bold = False document.add_paragraph("",style = "weekly_summary") #一、本期目录 document.add_paragraph(titles[0], style='title') document.add_paragraph("", style='title2') # 二、热点速览 document.add_paragraph(titles[1], style='title') document.add_paragraph("", style='weekly_summary') document.add_paragraph("【本周期热点总结】", style='weekly_summary') document.add_paragraph("", style='weekly_summary') document.add_paragraph("以下为本周期热点速览,以事件发生时间排序。", style='weekly_summary') # 添加段落 document.add_paragraph("", style='tech_progress') document.add_paragraph("技术进展 · 【领域】 · 【涉及技术】", style='tech_progress') document.add_paragraph("【技术进展正文】", style='part3_style') document.add_paragraph("热点注释:", style='content') document.add_paragraph("查看详情:", style='content') document.add_paragraph("", style='content') # 三、定向追踪 document.add_paragraph(titles[2], style='title') document.add_paragraph("", style='title2') for section in sections: section2 = section if section != "定向追踪": text1 = "技术进展 · "+section2 document.add_paragraph(text1, style='tech') document.add_paragraph("", style='tech2') document.add_paragraph("进展聚焦", style='tech1') document.add_paragraph("", style='tech1') document.add_paragraph("【占位】", style='weekly_summary') document.add_paragraph("", style='tech1') document.add_paragraph("进展详情", style='tech1') # 表格创建 num = int(count_df[count_df["领域"].str.contains(section)]["count"]) table = document.add_table(rows=num, cols=1) # table.style = 'Table Grid' # 表格填充 res1 = res[res["领域"].str.contains(section)].sort_values(by = "时间",ascending=False) for i, row in enumerate(table.rows): for cell in row.cells: cell.text = "" cell.paragraphs[0].style = "title_date" old_format = '%Y.%m.%d' new_format = '%Y-%m-%d' # 将日期字符串转换为 datetime 对象 date_str = res1.iloc[i]["时间"] date_obj = datetime.strptime(date_str, old_format) # 将 datetime 对象转换为新的日期格式字符串 new_date_str = datetime.strftime(date_obj, new_format) text1 = str(new_date_str )+ " | "+ str(res1.iloc[i]["标题"]) cell.add_paragraph(text1) cell.paragraphs[1].style = "title_date" text2 = "· "+ str(res1.iloc[i]["涉及技术"]) cell.add_paragraph(text2) cell.paragraphs[2].style="tech_detail" text3 = str(res1.iloc[i]["简述(摘要)"]) cell.add_paragraph(text3) cell.paragraphs[3].style = "part3_style" add_hyperlink(cell.paragraphs[3], res1.iloc[i]["源链接"], "原文链接") text4 = "" cell.add_paragraph(text4) cell.paragraphs[4].style = "part3_style" if res1.iloc[i]["是否点评"] == "是": text5 = "专家点评" cell.add_paragraph(text5) cell.paragraphs[5].style = "expert" text6 = "" cell.add_paragraph(text6) cell.paragraphs[5].style = "expert" document.add_page_break() elif section == "定向追踪": text1 = "业内动态 · "+"产品发布" document.add_paragraph(text1, style='tech') document.add_paragraph("", style='tech2') document.add_paragraph("进展聚焦", style='tech1') document.add_paragraph("", style='tech1') document.add_paragraph("【占位】", style='weekly_summary') document.add_paragraph("", style='tech1') document.add_paragraph("进展详情", style='tech1') # 表格创建 table = document.add_table(rows=1, cols=1) # table.style = 'Table Grid' # 表格填充 for row in table.rows: for cell in row.cells: cell.text = "" cell.paragraphs[0].style = "title_date" # 将日期字符串转换为 datetime 对象 text1 = "yyyy-mm-dd"+ " | "+ "【标题占位】" cell.add_paragraph(text1) cell.paragraphs[1].style = "title_date" text2 = "· "+ "【涉及技术】" cell.add_paragraph(text2) cell.paragraphs[2].style="tech_detail" text3 = "【简述(摘要)占位】" cell.add_paragraph(text3) cell.paragraphs[3].style = "part3_style" add_hyperlink(cell.paragraphs[3], "【】", "【原文链接占位】") text4 = "" cell.add_paragraph(text4) cell.paragraphs[4].style = "part3_style" text5 = "【专家点评占位】" cell.add_paragraph(text5) cell.paragraphs[5].style = "expert" text6 = "" cell.add_paragraph(text6) cell.paragraphs[5].style = "expert" document.add_page_break() text1 = "业内动态 · "+"大厂动态" document.add_paragraph(text1, style='tech') document.add_paragraph("", style='tech2') document.add_paragraph("进展聚焦", style='tech1') document.add_paragraph("", style='tech1') document.add_paragraph("【占位】", style='weekly_summary') document.add_paragraph("", style='tech1') document.add_paragraph("进展详情", style='tech1') # 表格创建 table = document.add_table(rows=1, cols=1) # table.style = 'Table Grid' # 表格填充 for row in table.rows: for cell in row.cells: cell.text = "" cell.paragraphs[0].style = "title_date" # 将日期字符串转换为 datetime 对象 text1 = "yyyy-mm-dd"+ " | "+ "【标题占位】" cell.add_paragraph(text1) cell.paragraphs[1].style = "title_date" text2 = "· "+ "【涉及技术】" cell.add_paragraph(text2) cell.paragraphs[2].style="tech_detail" text3 = "【简述(摘要)占位】" cell.add_paragraph(text3) cell.paragraphs[3].style = "part3_style" add_hyperlink(cell.paragraphs[3], "【】", "【原文链接占位】") text4 = "" cell.add_paragraph(text4) cell.paragraphs[4].style = "part3_style" text5 = "【专家点评占位】" cell.add_paragraph(text5) cell.paragraphs[5].style = "expert" text6 = "" cell.add_paragraph(text6) cell.paragraphs[5].style = "expert" document.add_page_break() text1 = "业内动态 · "+"项目开源" document.add_paragraph(text1, style='tech') document.add_paragraph("", style='tech2') document.add_paragraph("进展聚焦", style='tech1') document.add_paragraph("", style='tech1') document.add_paragraph("【占位】", style='weekly_summary') document.add_paragraph("", style='tech1') document.add_paragraph("进展详情", style='tech1') # 表格创建 table = document.add_table(rows=1, cols=1) # table.style = 'Table Grid' # 表格填充 for row in table.rows: for cell in row.cells: cell.text = "" cell.paragraphs[0].style = "title_date" # 将日期字符串转换为 datetime 对象 text1 = "yyyy-mm-dd"+ " | "+ "【标题占位】" cell.add_paragraph(text1) cell.paragraphs[1].style = "title_date" text2 = "· "+ "【涉及技术】" cell.add_paragraph(text2) cell.paragraphs[2].style="tech_detail" text3 = "【简述(摘要)占位】" cell.add_paragraph(text3) cell.paragraphs[3].style = "part3_style" add_hyperlink(cell.paragraphs[3], "【】", "【原文链接占位】") text4 = "" cell.add_paragraph(text4) cell.paragraphs[4].style = "part3_style" text5 = "【专家点评占位】" cell.add_paragraph(text5) cell.paragraphs[5].style = "expert" text6 = "" cell.add_paragraph(text6) cell.paragraphs[5].style = "expert" return document import pandas as pd import docx # Gradio 部分 import gradio as gr import streamlit as st from io import BytesIO # def excel_to_docx(xlsx): # # 处理 Excel 文件并生成 docx 文件 # document,name = 荣耀周报排版(xlsx) # return document.getvalue() # 定义 Gradio 的输入和输出界面 # inputs = gr.inputs.File(label="Excel 文件", type=["file"]) # outputs = gr.outputs.File(label="docx 文件") st.title('Translator App') st.markdown("Translate from Docx file") st.subheader("File Upload") datas=st.file_uploader("Original File") template=st.file_uploader("template File") name=st.text_input('Enter New File Name: ') stream = BytesIO() if st.button(label='生成'): st.spinner('Waiting...') document= 荣耀周报排版(datas,template) out = document.save(stream) st.success("Translated") st.download_button(label='Download Translated File',file_name=(f"{name}.docx"), data=stream.getvalue())