import csv import gradio as gr # type: ignore import pandas as pd # type: ignore with gr.Blocks(title="Conversation rebuilder",theme="gradio/monochrome") as app: gr.Markdown( """# Conversation rebuilder Please, fill the Database Transcript and the List of matched clips from Kibana, and click the Rebuild button""" ) with gr.Row(): txt_transcript = gr.Code( label="Database Transcript", interactive=True, lines=5, ) txt_clips = gr.Code( label="Kibana clips", interactive=True, lines=5, ) with gr.Row(): gr.ClearButton(value="Clear", variant="secondary", size="sm", components=[txt_transcript, txt_clips]) btn_build = gr.Button(value="Rebuild", variant="primary", size="sm") with gr.Row(): data = gr.Dataframe( label="CONVERSATION", headers=["index", "user", "agent", "gpt", "distance"], datatype=["str", "str", "str", "str", "number"], column_widths=["8%","29%","29%","29%","5%"], # row_count=(1, "fixed"), col_count=(5, "fixed"), interactive=False, wrap=True, ) with gr.Row(): file = gr.File( label="Export files", show_label=True, height=60, container=True, interactive=False, file_count="single", ) COL_TIMESTAMP = 0 COL_CONVERSATION_ID = 1 COL_CLIP_COLLECTION_ID = 2 COL_REQUEST_ID = 2 COL_SENTENCE_INDEX = 4 COL_SENTENCE_ORIGINAL = 5 COL_CLIP_TEXT = 6 COL_CLIP_ID = 7 COL_DISTANCE = 8 def find_clips_matched(agent_text,clips): clip_list = clips.splitlines() for clip in clip_list: parts = clip.strip().split('\t') if parts[COL_CLIP_TEXT] == agent_text: return \ parts[COL_SENTENCE_ORIGINAL], \ int(parts[COL_SENTENCE_INDEX]), \ round(float(parts[COL_DISTANCE]),2) @btn_build.click(inputs=[txt_transcript,txt_clips], outputs=[data, file]) def rebuild_conversation(transcript, clips): df = pd.DataFrame({"index": [], "user": [], "agent": [], "gpt": [], "distance": []}) if not transcript.strip() or not clips.strip(): msg = f"EMPTY TRANSCRIPT OR LIST OF CLIPS!" df.loc[len(df.index)] = ["", msg, "", "", ""] return df, None lines = transcript.splitlines() user_text = "" conversation_line = 1 for i in range(len(lines)): line = lines[i].strip() if line: if line.startswith("user:"): user_text = line.replace("user:","").strip() conversation_line = conversation_line + 1 elif line.startswith("agent:"): agent_text = line.replace("agent:","").strip() gpt, order, distance = find_clips_matched(agent_text,clips) index = f"{str(conversation_line).zfill(3)}-{str(order).zfill(2)}" df.loc[len(df.index)] = [index, user_text, agent_text, gpt, distance] user_text = "" # Get conversation ID clip_list = clips.splitlines() parts = clip_list[0].strip().split('\t') conversation_id = parts[COL_CONVERSATION_ID] tsv_file_name = f'conversation-{conversation_id}.tsv' excel_file_name = f'conversation-{conversation_id}.xlsx' # Build TSV file df.to_csv( tsv_file_name, sep="\t", encoding="utf-8", index=False, header=True, quoting=csv.QUOTE_ALL, ) # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.copy.html df_excel = df.copy(deep=True) # https://blog.enterprisedna.co/pandas-drop-index/ df_excel.set_index('index', inplace=True) # Build Excel file # https://xlsxwriter.readthedocs.io/working_with_pandas.html # Create a Pandas Excel writer using XlsxWriter as the engine. writer = pd.ExcelWriter(excel_file_name, engine='xlsxwriter') # Convert the dataframe to an XlsxWriter Excel object. df_excel.to_excel(writer, sheet_name='Conversation') # Get the xlsxwriter workbook and worksheet objects. workbook = writer.book worksheet = writer.sheets["Conversation"] # https://xlsxwriter.readthedocs.io/format.html#number-formats-in-different-locales number_format = workbook.add_format({'num_format': '#,##0.00'}) text_format = workbook.add_format({'text_wrap': True}) # Set the columns widths. worksheet.set_column("B:D", 50, text_format) worksheet.set_column('E:E', 8, number_format) # Autofit the worksheet. worksheet.autofit() # Close the Pandas Excel writer and output the Excel file. writer.close() return df, [excel_file_name,tsv_file_name] app.launch()