Spaces:

CSB261
/

KEYY

Sleeping

CSB261 commited on Dec 26, 2024

Commit

184edb7

verified ·

1 Parent(s): cbc067e

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+import pandas as pd
+import re
+from collections import Counter
+def process_excel(file):
+    # 엑셀 파일 읽기
+    df = pd.read_excel(file)
+    # D열의 데이터 추출
+    product_names = df.iloc[:, 3].dropna()  # D열은 0부터 시작하므로 index는 3
+    # 키워드 추출 및 빈도 계산
+    all_keywords = []
+    for name in product_names:
+        # 특수문자 제거 및 공백 기준으로 분할
+        words = re.sub(r'[^\w\s]', '', name).split()
+        # 중복 제거
+        unique_words = set(words)
+        all_keywords.extend(unique_words)
+    # 빈도 계산
+    keyword_counts = Counter(all_keywords)
+    # 결과를 데이터프레임으로 정리
+    result_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency'])
+    result_df = result_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True)
+    # 엑셀 파일로 저장
+    output_file = "/mnt/data/keyword_counts.xlsx"
+    result_df.to_excel(output_file, index=False)
+    return output_file
+# Gradio 인터페이스 정의
+iface = gr.Interface(
+    fn=process_excel,
+    inputs="file",
+    outputs="file",
+    title="Excel Keyword Extractor",
+    description="엑셀 파일의 D열에서 키워드를 추출하고 빈도를 계산하여 새로운 엑셀 파일로 출력합니다."
+)
+if __name__ == "__main__":
+    iface.launch()