Spaces:

alpcansoydas
/

product-demo

Sleeping

App Files Files Community

alpcansoydas commited on Dec 3, 2024

Commit

9aa4e56

verified ·

1 Parent(s): 5fddeb7

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -37

app.py CHANGED Viewed

@@ -1,11 +1,52 @@
 import gradio as gr
 from langchain.prompts import PromptTemplate
 from langchain_huggingface import HuggingFaceEndpoint
 from langchain_core.output_parsers import JsonOutputParser
 from langdetect import detect
-import time
-# Initialize the LLM and other components
 llm = HuggingFaceEndpoint(
     repo_id="mistralai/Mistral-7B-Instruct-v0.3",
     task="text-generation",
@@ -14,65 +55,76 @@ llm = HuggingFaceEndpoint(
     do_sample=False,
 )
-template_classify = '''
-You are a topic detector bot. Your task is to determine the main topic of given text phrase.
-Answer general main topic not specific words.
-Your answer does not contain specific information from given text.
-Answer just one general main topic. Do not answer two or more topic.
-Answer shortly with two or three word phrase. Do not answer with long sentence.
-Answer topic with context. Example, if it says "My delivery is late", its topic is late delivery.
-If you do not know the topic just answer as General.
-What is the main topic of given text?:
-<text>
-{TEXT}
-</text>
-convert it to json format using 'Answer' as key and return it.
-Your final response MUST contain only the response, no other text.
 Example:
-{{"Answer":["General"]}}
 '''
 json_output_parser = JsonOutputParser()
 # Define the classify_text function
-def classify_text(text):
     global llm
     start = time.time()
-    try:
-        lang = detect(text)
     except:
         lang = "en"
-    prompt_classify = PromptTemplate(
-        template=template_classify,
-        input_variables=["LANG", "TEXT"]
-    )
-    formatted_prompt = prompt_classify.format(TEXT=text, LANG=lang)
-    classify = llm.invoke(formatted_prompt)
-    parsed_output = json_output_parser.parse(classify)
     end = time.time()
-    duration = end - start
-    return lang, parsed_output["Answer"][0], duration #['Answer']
 # Create the Gradio interface
 def create_gradio_interface():
     with gr.Blocks() as iface:
-        text_input = gr.Textbox(label="Text")
         lang_output = gr.Textbox(label="Detected Language")
-        output_text = gr.Textbox(label="Detected Topics")
         time_taken = gr.Textbox(label="Time Taken (seconds)")
-        submit_btn = gr.Button("Detect topic")
         def on_submit(text):
-            lang, classification, duration = classify_text(text)
-            return lang, classification, f"Time taken: {duration:.2f} seconds"
-        submit_btn.click(fn=on_submit, inputs=text_input, outputs=[lang_output, output_text, time_taken])
     iface.launch()
 if __name__ == "__main__":
-    create_gradio_interface()

 import gradio as gr
+import requests
+from duckduckgo_search import DDGS
+import itertools
+import time
 from langchain.prompts import PromptTemplate
 from langchain_huggingface import HuggingFaceEndpoint
 from langchain_core.output_parsers import JsonOutputParser
 from langdetect import detect
+# Fetch proxy list from GitHub
+def get_proxies():
+    url = "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt"
+    response = requests.get(url)
+    proxies = response.text.splitlines()
+    return proxies
+# Proxy cycle for rotation
+proxy_list = get_proxies()
+proxy_cycle = itertools.cycle(proxy_list)
+# Proxy-enabled DDGS
+class ProxiedDDGS(DDGS):
+    def __init__(self, proxy):
+        super().__init__()
+        self.proxy = proxy
+    def _get(self, url, headers=None):
+        response = requests.get(
+            url, headers=headers, proxies={"http": self.proxy, "https": self.proxy}
+        )
+        response.raise_for_status()
+        return response
+# Search function with retries
+def search_with_retries(query, max_results=3, max_retries=5, backoff_factor=1):
+    retries = 0
+    while retries < max_retries:
+        try:
+            proxy = next(proxy_cycle)
+            searcher = ProxiedDDGS(proxy)
+            results = searcher.text(query, max_results=max_results)
+            return results, proxy
+        except Exception:
+            retries += 1
+            time.sleep(backoff_factor * retries)
+    raise RuntimeError(f"All retries failed for query: {query}")
+# Initialize the LLM
 llm = HuggingFaceEndpoint(
     repo_id="mistralai/Mistral-7B-Instruct-v0.3",
     task="text-generation",
     do_sample=False,
 )
+# Prompt template for feature extraction
+template_extract_features = '''
+You are a product feature extractor bot. Your task is to determine features like Brand, Model, Type, RAM, Storage, etc., from the given product description and web search results.
+Given product description: {TEXT}
+Relevant web search results:
+{SEARCH_RESULTS}
+Return features in JSON format with keys like Brand, Model, Type, RAM, Storage, and others.
+Your response MUST only include a valid JSON object and nothing else.
 Example:
+{{
+    "Brand": "Apple",
+    "Model": "iPhone 14",
+    "Type": "Smartphone",
+    "RAM": "4GB",
+    "Storage": "128GB"
+}}
 '''
 json_output_parser = JsonOutputParser()
 # Define the classify_text function
+def extract_features(description):
     global llm
     start = time.time()
+    try:
+        lang = detect(description)
     except:
         lang = "en"
+    # Perform web search
+    try:
+        search_results, _ = search_with_retries(description, max_results=3)
+        search_text = "\n".join([res.get('snippet', '') for res in search_results])
+    except RuntimeError as e:
+        search_text = "No search results available."
+    # Format the prompt
+    prompt_extract = PromptTemplate(
+        template=template_extract_features,
+        input_variables=["TEXT", "SEARCH_RESULTS"]
+    )
+    formatted_prompt = prompt_extract.format(TEXT=description, SEARCH_RESULTS=search_text)
+    # LLM response
+    response = llm.invoke(formatted_prompt)
+    parsed_output = json_output_parser.parse(response)
     end = time.time()
+    return lang, parsed_output, end - start
 # Create the Gradio interface
 def create_gradio_interface():
     with gr.Blocks() as iface:
+        text_input = gr.Textbox(label="Item Description")
         lang_output = gr.Textbox(label="Detected Language")
+        feature_output = gr.Textbox(label="Extracted Features (JSON)")
         time_taken = gr.Textbox(label="Time Taken (seconds)")
+        submit_btn = gr.Button("Extract Features")
         def on_submit(text):
+            lang, features, duration = extract_features(text)
+            return lang, features, f"{duration:.2f} seconds"
+        submit_btn.click(fn=on_submit, inputs=text_input, outputs=[lang_output, feature_output, time_taken])
     iface.launch()
 if __name__ == "__main__":
+    create_gradio_interface()