Spaces:

Hexamind
/

GenProp

Runtime error

App Files Files Community

adrien.aribaut-gaudin commited on Jan 26, 2024

Commit

3ca15d8

1 Parent(s): f343031

feat: requirement part fonctionnal

Browse files

Files changed (4) hide show

src/control/controller.py +43 -8
src/llm/llm_tools.py +15 -1
src/tools/excel_tools.py +2 -2
src/view/view.py +2 -3

src/control/controller.py CHANGED Viewed

@@ -7,15 +7,18 @@ import random
 import datetime
 import string
 import docx
 from src.tools.doc_tools import get_title
 from src.domain.doc import Doc
 from src.domain.wikidoc import WikiPage
 from src.view.log_msg import create_msg_from
 import src.tools.semantic_db as semantic_db
 from src.tools.wiki import Wiki
 from src.llm.llm_tools import get_wikilist, get_public_paragraph, get_private_paragraph
 from src.tools.semantic_db import add_texts_to_collection, query_collection
-from src.tools.excel_tools import excel_to_json
 import gradio as gr
 from src.retriever.retriever import Retriever
@@ -290,20 +293,52 @@ class Controller:
         """
         Retriever(doc=doc, collection=collection)
     def generate_response_to_requirements(self):
-        excel_content = self.get_requirements_from_csv()
-        excel_content = json.loads(excel_content)
-        print(excel_content)
         excel_name = self.input_csv
         if '/' in excel_name:
             excel_name = excel_name.split('/')[-1]
         elif '\\' in excel_name:
             excel_name = excel_name.split('\\')[-1]
-        #copy the document and generate the new one
-        shutil.copy(self.input_csv, self.excel_doc_path)
     def get_requirements_from_csv(self):
-        excel_content = excel_to_json(self.input_csv)
         return excel_content

 import datetime
 import string
 import docx
+import pandas as pd
+from src.domain.block import Block
 from src.tools.doc_tools import get_title
 from src.domain.doc import Doc
 from src.domain.wikidoc import WikiPage
 from src.view.log_msg import create_msg_from
 import src.tools.semantic_db as semantic_db
 from src.tools.wiki import Wiki
+from src.llm.llm_tools import generate_response_to_exigence
 from src.llm.llm_tools import get_wikilist, get_public_paragraph, get_private_paragraph
 from src.tools.semantic_db import add_texts_to_collection, query_collection
+from src.tools.excel_tools import excel_to_dict
 import gradio as gr
 from src.retriever.retriever import Retriever
         """
         Retriever(doc=doc, collection=collection)
+    @staticmethod
+    def  _select_best_sources(sources: [Block], delta_1_2=0.15, delta_1_n=0.3, absolute=1.2, alpha=0.9) -> [Block]:
+        """
+        Select the best sources: not far from the very best, not far from the last selected, and not too bad per se
+        """
+        best_sources = []
+        for idx, s in enumerate(sources):
+            if idx == 0 \
+                    or (s.distance - sources[idx - 1].distance < delta_1_2
+                        and s.distance - sources[0].distance < delta_1_n) \
+                    or s.distance < absolute:
+                best_sources.append(s)
+                delta_1_2 *= alpha
+                delta_1_n *= alpha
+                absolute *= alpha
+            else:
+                break
+        return best_sources
     def generate_response_to_requirements(self):
+        dict_of_excel_content = self.get_requirements_from_csv()
+        for exigence in dict_of_excel_content:
+            blocks_sources = self.retriever.similarity_search(queries = exigence["Exigence"])
+            best_sources = self._select_best_sources(blocks_sources)
+            sources_contents = [f"Paragraph title : {s.title}\n-----\n{s.content}" if s.title else f"Paragraph {s.index}\n-----\n{s.content}" for s in best_sources]
+            context = '\n'.join(sources_contents)
+            i = 1
+            while (len(context) > 15000) and i < len(sources_contents):
+                context = "\n".join(sources_contents[:-i])
+                i += 1
+            reponse_exigence = generate_response_to_exigence(exigence = exigence["Exigence"], titre_exigence = exigence["Titre"], context = context)
+            dict_of_excel_content[dict_of_excel_content.index(exigence)]["Conformité"] = reponse_exigence
+            dict_of_excel_content[dict_of_excel_content.index(exigence)]["Document"] = best_sources[0].doc
+            dict_of_excel_content[dict_of_excel_content.index(exigence)]["Paragraphes"] = "; ".join([block.index for block in best_sources])
         excel_name = self.input_csv
         if '/' in excel_name:
             excel_name = excel_name.split('/')[-1]
         elif '\\' in excel_name:
             excel_name = excel_name.split('\\')[-1]
+        df = pd.DataFrame(data=dict_of_excel_content)
+        df.to_excel(f"{self.excel_doc_path}/{excel_name}", index=False)
+        return f"{self.excel_doc_path}/{excel_name}"
     def get_requirements_from_csv(self):
+        excel_content = excel_to_dict(self.input_csv)
         return excel_content

src/llm/llm_tools.py CHANGED Viewed

@@ -11,6 +11,7 @@ import wikipedia
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.prompts import PromptTemplate
 from langchain.chains import LLMChain
 from src.llm.llms import openai_llm
 from src.tools.wiki import Wiki
@@ -334,4 +335,17 @@ def summarize_paragraph_v2(prompt : str, title_doc : str = '', title_para : str
     print("****************")
     print(res)
     print("----")
-    return str(res).strip()

 from langchain.text_splitter import CharacterTextSplitter
 from langchain.prompts import PromptTemplate
 from langchain.chains import LLMChain
+from src.domain.block import Block
 from src.llm.llms import openai_llm
 from src.tools.wiki import Wiki
     print("****************")
     print(res)
     print("----")
+    return str(res).strip()
+def generate_response_to_exigence(exigence : str, titre_exigence : str, content : str):
+    """
+    Generates a response to an exigence depending on the context of the exigence and the blocks of the document.
+    """
+    task = (f"Your task consists in generating a response to a requirement in a tender for Orange, a telecommunication operator."
+            f"The requirement dealing with {titre_exigence} is expressed below between triple backquotes:"
+            f"```{exigence}```"
+            f"Your answer should be precise, consistent and as concise as possible with no politeness formulas and strictly be based on the following text delimited by triple backquotes : ```{content}```"
+            )
+    llm = openai_llm
+    generation = llm.invoke(task)
+    return generation

src/tools/excel_tools.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import pandas as pd
-def excel_to_json(file_path):
     df = pd.read_excel(file_path)
-    return df.to_json(orient='records', force_ascii=False)

 import pandas as pd
+def excel_to_dict(file_path):
     df = pd.read_excel(file_path)
+    return df.to_dict(orient='records')

src/view/view.py CHANGED Viewed

@@ -192,7 +192,7 @@ def run(config: Dict, controller: Controller):
         input_csv_comp.upload(input_csv_fn,
                                 inputs=[input_csv_comp],
                                 outputs=[verif_btn],
-                                )
         def input_csv_clear_fn():
             controller.clear_input_csv()
@@ -217,8 +217,7 @@ def run(config: Dict, controller: Controller):
         verif_btn.click(generate_requirements_excel,
                         inputs=[],
-                        outputs=[output_csv_comp],
-                        )
         def input_files_upload_fn(input_files_):
             for files in input_files_:

         input_csv_comp.upload(input_csv_fn,
                                 inputs=[input_csv_comp],
                                 outputs=[verif_btn],
+                                show_progress="full")
         def input_csv_clear_fn():
             controller.clear_input_csv()
         verif_btn.click(generate_requirements_excel,
                         inputs=[],
+                        outputs=[output_csv_comp],show_progress="full")
         def input_files_upload_fn(input_files_):
             for files in input_files_: