product-demo / app.py
alpcansoydas's picture
Update app.py
9aa4e56 verified
raw
history blame
4.01 kB
import gradio as gr
import requests
from duckduckgo_search import DDGS
import itertools
import time
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.output_parsers import JsonOutputParser
from langdetect import detect
# Fetch proxy list from GitHub
def get_proxies():
url = "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt"
response = requests.get(url)
proxies = response.text.splitlines()
return proxies
# Proxy cycle for rotation
proxy_list = get_proxies()
proxy_cycle = itertools.cycle(proxy_list)
# Proxy-enabled DDGS
class ProxiedDDGS(DDGS):
def __init__(self, proxy):
super().__init__()
self.proxy = proxy
def _get(self, url, headers=None):
response = requests.get(
url, headers=headers, proxies={"http": self.proxy, "https": self.proxy}
)
response.raise_for_status()
return response
# Search function with retries
def search_with_retries(query, max_results=3, max_retries=5, backoff_factor=1):
retries = 0
while retries < max_retries:
try:
proxy = next(proxy_cycle)
searcher = ProxiedDDGS(proxy)
results = searcher.text(query, max_results=max_results)
return results, proxy
except Exception:
retries += 1
time.sleep(backoff_factor * retries)
raise RuntimeError(f"All retries failed for query: {query}")
# Initialize the LLM
llm = HuggingFaceEndpoint(
repo_id="mistralai/Mistral-7B-Instruct-v0.3",
task="text-generation",
max_new_tokens=128,
temperature=0.7,
do_sample=False,
)
# Prompt template for feature extraction
template_extract_features = '''
You are a product feature extractor bot. Your task is to determine features like Brand, Model, Type, RAM, Storage, etc., from the given product description and web search results.
Given product description: {TEXT}
Relevant web search results:
{SEARCH_RESULTS}
Return features in JSON format with keys like Brand, Model, Type, RAM, Storage, and others.
Your response MUST only include a valid JSON object and nothing else.
Example:
{{
"Brand": "Apple",
"Model": "iPhone 14",
"Type": "Smartphone",
"RAM": "4GB",
"Storage": "128GB"
}}
'''
json_output_parser = JsonOutputParser()
# Define the classify_text function
def extract_features(description):
global llm
start = time.time()
try:
lang = detect(description)
except:
lang = "en"
# Perform web search
try:
search_results, _ = search_with_retries(description, max_results=3)
search_text = "\n".join([res.get('snippet', '') for res in search_results])
except RuntimeError as e:
search_text = "No search results available."
# Format the prompt
prompt_extract = PromptTemplate(
template=template_extract_features,
input_variables=["TEXT", "SEARCH_RESULTS"]
)
formatted_prompt = prompt_extract.format(TEXT=description, SEARCH_RESULTS=search_text)
# LLM response
response = llm.invoke(formatted_prompt)
parsed_output = json_output_parser.parse(response)
end = time.time()
return lang, parsed_output, end - start
# Create the Gradio interface
def create_gradio_interface():
with gr.Blocks() as iface:
text_input = gr.Textbox(label="Item Description")
lang_output = gr.Textbox(label="Detected Language")
feature_output = gr.Textbox(label="Extracted Features (JSON)")
time_taken = gr.Textbox(label="Time Taken (seconds)")
submit_btn = gr.Button("Extract Features")
def on_submit(text):
lang, features, duration = extract_features(text)
return lang, features, f"{duration:.2f} seconds"
submit_btn.click(fn=on_submit, inputs=text_input, outputs=[lang_output, feature_output, time_taken])
iface.launch()
if __name__ == "__main__":
create_gradio_interface()