|
import gradio as gr |
|
import pandas as pd |
|
import re |
|
from collections import Counter |
|
|
|
def process_excel(file): |
|
|
|
df = pd.read_excel(file) |
|
|
|
|
|
product_names = df.iloc[:, 3].dropna() |
|
|
|
|
|
all_keywords = [] |
|
|
|
for name in product_names: |
|
|
|
words = re.sub(r'[^\w\s]', '', name).split() |
|
|
|
unique_words = set(words) |
|
all_keywords.extend(unique_words) |
|
|
|
|
|
keyword_counts = Counter(all_keywords) |
|
|
|
|
|
result_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency']) |
|
result_df = result_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True) |
|
|
|
|
|
output_file = "/mnt/data/keyword_counts.xlsx" |
|
result_df.to_excel(output_file, index=False) |
|
|
|
return output_file |
|
|
|
|
|
iface = gr.Interface( |
|
fn=process_excel, |
|
inputs="file", |
|
outputs="file", |
|
title="Excel Keyword Extractor", |
|
description="์์
ํ์ผ์ D์ด์์ ํค์๋๋ฅผ ์ถ์ถํ๊ณ ๋น๋๋ฅผ ๊ณ์ฐํ์ฌ ์๋ก์ด ์์
ํ์ผ๋ก ์ถ๋ ฅํฉ๋๋ค." |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|
|
|