Spaces:
Sleeping
Sleeping
File size: 10,956 Bytes
bc31df8 a64fd96 df36982 efa9b10 e798441 ddce38d e798441 68bf5c0 56f5312 d7d88a6 738c47d ddce38d efa9b10 2696a61 e0f7a1f 2696a61 799f319 3252717 2696a61 cee1edf 2696a61 efa9b10 783a6b1 d7d88a6 5438143 d7d88a6 1188f75 d7d88a6 01936b7 d7d88a6 783a6b1 efa9b10 d9f31af efa9b10 3b26dac 8af7a53 3b26dac 7cb2e91 7fa6d71 7cb2e91 efa9b10 9e3ab50 a1f97d1 8e35445 a1f97d1 8e35445 a1f97d1 8e35445 7cb2e91 cc4d71b b901c76 6fdd23e efa9b10 0ec483d 16f3b53 def0304 7ca5761 def0304 efa9b10 b901c76 7d614a6 3b26dac 6cf0191 3b26dac 6cf0191 cee1edf 6cf0191 cee1edf 6cf0191 2b6d359 019928f f0ce94d f02448c 11711e5 435b599 2b6d359 56f5312 019928f f0ce94d 56f5312 3b26dac 56f5312 da4b039 56f5312 3b26dac 56f5312 738c47d b901c76 738c47d 5438143 738c47d ef82d84 4c4cf13 738c47d efa9b10 3b26dac efa9b10 2696a61 cee1edf 2696a61 783a6b1 d7d88a6 783a6b1 738c47d 7cb2e91 7854dd4 1b0a7da 738c47d 56f5312 019928f ef2a31f 008a26a 2a26969 7f68f7a 8e35445 008a26a cee1edf 07195ae cee1edf 0ce495e f350911 6cf0191 cee1edf 83736e3 738c47d cee1edf 2696a61 418d1f5 9007f52 4c4cf13 738c47d 56f5312 da4b039 56f5312 f02448c efa9b10 724556d efa9b10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
import subprocess
subprocess.run(["pip", "uninstall", "pdfminer"])
subprocess.run(["pip", "install", "pdfminer.six==20231228"])
import gradio as gr
from scrape_3gpp import *
from excel_chat import *
from split_files_to_excel import *
from classification import *
from chart_generation import *
from charts_advanced import *
from users_management import *
from code_df_custom import *
global value
value = set()
def list_attributes_and_values():
global value
attr = 'temp_files'
new_value = getattr(fi_config, attr)
print(f"value: {value}\nnew value: {new_value}")
tmp = list(new_value - value)[0]
value = set(new_value)
html_script = f"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="refresh" content="0; url=https://organizedprogrammers-standard-intelligence-dev.hf.space/file={tmp}">
<title>Redirecting to Google</title>
</head>
<body>
<p>If you are not redirected automatically, please <a href="https://organizedprogrammers-standard-intelligence-dev.hf.space/file={tmp}">click here</a>.</p>
</body>
</html>
"""
return html_script
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
gr.Markdown("## Extraction, Classification and AI tool")
with gr.Column():
md_username = gr.Markdown(value='## Hi Guest!')
btn_logout = gr.Button("Logout")
with gr.Accordion(label="**Login** to keep user preferences", open=False):
st_user = gr.State(value={"name":"Guest", "hashed_password":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", "history": { "keywords": [ "value1", "value3", "value4"], "prompts": [] }})
with gr.Column():
tb_user = gr.Textbox(label='Username')
tb_pwd = gr.Textbox(label='Password', type='password')
with gr.Row():
btn_login = gr.Button('Login')
with gr.Tab("File extraction"):
gr.Markdown("### This part aims to extract the most relevant content and information about every contribution from a 3gpp meeting")
gr.Markdown(" Put either just a link, or a link and an excel file with an 'Actions' column")
with gr.Row():
dd_url = gr.Dropdown(label="(e.g. https://www.3gpp.org/ftp/TSG_SA/WG1_Serv/TSGS1_105_Athens/Docs)", multiselect=False, value="https://www.3gpp.org/ftp/", allow_custom_value=True, scale=9)
btn_search = gr.Button("Search")
with gr.Accordion("Filter by file status", open=False):
with gr.Row():
dd_status = gr.Dropdown(label="Status to look for (Optional)", allow_custom_value=False, multiselect=True, scale=7)
btn_search_status = gr.Button("Search for status", scale=2)
btn_extract = gr.Button("Extract excel from URL")
with gr.Tab("Split Files"):
gr.Markdown("### Upload your standard documentation (pdf, doc, docx) to split it into paragraphs in an Excel file")
radio = gr.Radio(
["Intelligent split", "Intelligent split by keywords", "Non intelligent split"], label="Choose your selection", value = "Intelligent split"
)
dropdown_split = gr.Dropdown(["introduction", "objective", "conclusion", "summary"], multiselect=True, visible=False, allow_custom_value=True, label="Select or add keywords")
nb_split = gr.Number(label="Chunk size", value=1000, interactive=True, visible=False)
fi_input = gr.File(file_count='multiple')
btn_split = gr.Button("Split")
with gr.Tab("Ask LLM"):
gr.Markdown("### This section utilizes Large Language Models (LLMs) to query rows in an Excel file")
dd_source_ask = gr.Dropdown(label="Source Column(s)", multiselect=True)
tb_destcol = gr.Textbox(label="Destination column label (e.g. Summary, ELI5, PAB)")
dd_prompt = gr.Dropdown(label="Prompt", allow_custom_value=True, multiselect=True, max_choices=1)
dd_llm = gr.Dropdown(["Mistral Tiny","Mistral Small","Mistral Medium", "Claude Sonnet", "Claude Opus", "Groq (mixtral)"],value="Groq (mixtral)", label="Choose your LLM")
with gr.Accordion("Filters", open=False):
with gr.Row():
dd_searchcol = gr.Dropdown(label="Column to look into (Optional)", value='[ALL]', multiselect=False, scale=4)
dd_keywords = gr.Dropdown(label="Words to look for (Optional)", multiselect=True, allow_custom_value=True, scale=5)
mist_button = gr.Button("Ask AI")
with gr.Tab("Classification by topic"):
gr.Markdown("### This section will categories each contribution in your own personalized categories")
with gr.Row():
dd_source_class = gr.Dropdown(label="Source Column", multiselect=False, scale=7)
sl_treshold = gr.Slider(minimum=0, maximum=1, value=0.45, step=0.05, label='Similarity Treshold')
gr.Markdown("### The predefined categories can be modified at any time")
dd_filter = gr.Dropdown(choices=df_cat_filter, label = "Choose your filters here", multiselect=True, allow_custom_value=True)
btn_filter = gr.Button("Filter")
df_category = gr.DataFrame(label='categories', value=df_cate, interactive=True)
df_category_hidden = gr.DataFrame(value=df_cate, visible=False)
with gr.Row():
btn_reset_df = gr.Button("Reset categories")
btn_classif = gr.Button("Categorize")
btn_add_categories = gr.Button("Add categories")
with gr.Tab(" Personalised Charts Generation"):
gr.Markdown("### This section will create a chart using two columns of your choice")
with gr.Row():
dd_label1 = gr.Dropdown(label="Label 1", multiselect=False)
dd_label2 = gr.Dropdown(label="Label 2", value="", multiselect=False)
btn_chart = gr.Button("Generate Bar Plot")
plt_figure = gr.Plot()
with gr.Tab("Meeting Report (charts)"):
gr.Markdown("### This section will create a report using multiple charts with your columns")
gr.Markdown("Make sure you have an 'Expert', 'Source' and 'Status' column")
with gr.Tab("Overall"):
btn_overall = gr.Button("Overall Review")
with gr.Tab("By Expert"):
dd_exp=gr.Dropdown(label="Experts", multiselect=False, allow_custom_value=True,)
btn_expert = gr.Button("Top 10 by expert")
with gr.Tab("By Company"):
tb_com=gr.Textbox(label="Company Name",info="You can write 1, 2 or 3 company names at the same time")
btn_type = gr.Button("Company info")
with gr.Row():
plt_chart = gr.Plot(label="Graphique")
plt_chart2 = gr.Plot(label="Graphique")
plt_chart3 = gr.Plot(label="Graphique")
with gr.Tab("Code on your file"):
gr.Markdown("### This section lets you add your own code to add functions and filters to edit the files")
with gr.Accordion("Input DataFrame Preview", open=False):
df_input = gr.DataFrame(interactive=False)
gr.Markdown("```python\ndf = pd.read_excel(YOUR_FILE)\n```")
cd_code = gr.Code(value="# Create a copy of the original DataFrame\nnew_df = df.copy()\n\n# Add a new column to the copy\nnew_df['NewColumn'] = 'New Value'", language='python')
gr.Markdown("```python\nnew_df.to_excel(YOUR_NEW_FILE)\nreturn YOUR_NEW_FILE\n```")
btn_run_code = gr.Button()
error_display = gr.Markdown()
df_output_code = gr.DataFrame(interactive=False)
btn_export_df = gr.Button('Export df as excel')
st_filename = gr.State()
with gr.Accordion("Excel Preview", open=False):
df_output = gr.DataFrame()
fi_excel = gr.File(label="Excel File")
ht_dl = gr.HTML()
global fi_config
fi_config = gr.File(type='binary', visible=False)
# authentication
btn_login.click(auth_user, inputs=[tb_user, tb_pwd], outputs=[st_user, md_username, dd_prompt, dd_keywords])
tb_pwd.submit(auth_user, inputs=[tb_user, tb_pwd], outputs=[st_user, md_username, dd_prompt, dd_keywords])
btn_logout.click(logout, inputs=None, outputs=[st_user, md_username, dd_prompt, dd_keywords])
# 3GPP scraping
btn_search_status.click(extract_statuses, inputs=dd_url, outputs=dd_status)
btn_search.click(browse_folder, inputs=dd_url, outputs=dd_url)
dd_url.change(browse_folder, inputs=dd_url, outputs=dd_url)
#fi_excel.change(get_expert,inputs=fi_excel, outputs=dd_exp)
fi_excel.change(get_columns, inputs=[fi_excel], outputs=[dd_source_ask, dd_source_class, dd_label1, dd_label2, dd_searchcol, df_output,st_filename, df_input])
btn_extract.click(extractionPrincipale, inputs=[dd_url, fi_excel, dd_status], outputs=[fi_excel])
# Split files
#fi_input.upload(split_in_df, inputs=fi_input, outputs=fi_excel)
fi_input.upload(function_split_call, inputs=[fi_input, dropdown_split, radio, nb_split], outputs=fi_excel)
btn_split.click(function_split_call, inputs=[fi_input, dropdown_split, radio, nb_split], outputs=fi_excel)
radio.change(fn=change_textbox, inputs=[dropdown_split,radio], outputs=[dropdown_split, nb_split])
#llm
mist_button.click(chat_with_mistral, inputs=[dd_source_ask, tb_destcol, dd_prompt, fi_excel, dd_url, dd_searchcol, dd_keywords, dd_llm, st_user], outputs=[fi_excel, df_output, dd_prompt, dd_keywords, st_user, fi_config])
#classification
btn_classif.click(classification, inputs=[dd_source_class, fi_excel, df_category, sl_treshold], outputs=[fi_excel, df_output])
btn_reset_df.click(reset_cate, inputs=df_category, outputs=df_category)
btn_filter.click(filterByTopics, inputs=[dd_filter, df_category_hidden], outputs=df_category)
btn_add_categories.click(addCategories, inputs=[df_category, df_category_hidden],outputs=[dd_filter,df_category_hidden])
#charts
btn_chart.click(create_bar_plot, inputs=[fi_excel, dd_label1, dd_label2], outputs=[plt_figure])
#json download
fi_config.change(list_attributes_and_values, inputs=None, outputs=ht_dl)
btn_run_code.click(run_code, inputs=[fi_excel, cd_code], outputs=[df_output_code, error_display])
btn_export_df.click(export_df, inputs=[df_output_code, st_filename], outputs=fi_excel)
btn_overall.click(generate_company_chart,inputs=[fi_excel], outputs=[plt_chart])
btn_overall.click(status_chart,inputs=[fi_excel], outputs=[plt_chart2])
btn_overall.click(category_chart,inputs=[fi_excel], outputs=[plt_chart3])
btn_expert.click(chart_by_expert,inputs=[fi_excel,dd_exp], outputs=[plt_chart])
btn_type.click(company_document_type,inputs=[fi_excel,tb_com], outputs=[plt_chart])
# dd_label1.change(update_label, inputs=[dd_label1], outputs=[dd_label2])
demo.launch(debug=True) |