Spaces:
Sleeping
Sleeping
alpcansoydas
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,52 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
2 |
from langchain.prompts import PromptTemplate
|
3 |
from langchain_huggingface import HuggingFaceEndpoint
|
4 |
from langchain_core.output_parsers import JsonOutputParser
|
5 |
from langdetect import detect
|
6 |
-
import time
|
7 |
|
8 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
llm = HuggingFaceEndpoint(
|
10 |
repo_id="mistralai/Mistral-7B-Instruct-v0.3",
|
11 |
task="text-generation",
|
@@ -14,65 +55,76 @@ llm = HuggingFaceEndpoint(
|
|
14 |
do_sample=False,
|
15 |
)
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
</text>
|
29 |
-
convert it to json format using 'Answer' as key and return it.
|
30 |
-
Your final response MUST contain only the response, no other text.
|
31 |
Example:
|
32 |
-
{{
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
'''
|
34 |
|
35 |
json_output_parser = JsonOutputParser()
|
36 |
|
37 |
# Define the classify_text function
|
38 |
-
def
|
39 |
global llm
|
40 |
-
|
41 |
start = time.time()
|
42 |
-
try:
|
43 |
-
lang = detect(text)
|
44 |
|
|
|
|
|
45 |
except:
|
46 |
lang = "en"
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
end = time.time()
|
57 |
-
|
58 |
-
return lang, parsed_output
|
59 |
|
60 |
# Create the Gradio interface
|
61 |
def create_gradio_interface():
|
62 |
with gr.Blocks() as iface:
|
63 |
-
text_input = gr.Textbox(label="
|
64 |
lang_output = gr.Textbox(label="Detected Language")
|
65 |
-
|
66 |
time_taken = gr.Textbox(label="Time Taken (seconds)")
|
67 |
-
submit_btn = gr.Button("
|
68 |
|
69 |
def on_submit(text):
|
70 |
-
lang,
|
71 |
-
return lang,
|
72 |
|
73 |
-
submit_btn.click(fn=on_submit, inputs=text_input, outputs=[lang_output,
|
74 |
|
75 |
iface.launch()
|
76 |
|
77 |
if __name__ == "__main__":
|
78 |
-
create_gradio_interface()
|
|
|
1 |
import gradio as gr
|
2 |
+
import requests
|
3 |
+
from duckduckgo_search import DDGS
|
4 |
+
import itertools
|
5 |
+
import time
|
6 |
from langchain.prompts import PromptTemplate
|
7 |
from langchain_huggingface import HuggingFaceEndpoint
|
8 |
from langchain_core.output_parsers import JsonOutputParser
|
9 |
from langdetect import detect
|
|
|
10 |
|
11 |
+
# Fetch proxy list from GitHub
|
12 |
+
def get_proxies():
|
13 |
+
url = "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt"
|
14 |
+
response = requests.get(url)
|
15 |
+
proxies = response.text.splitlines()
|
16 |
+
return proxies
|
17 |
+
|
18 |
+
# Proxy cycle for rotation
|
19 |
+
proxy_list = get_proxies()
|
20 |
+
proxy_cycle = itertools.cycle(proxy_list)
|
21 |
+
|
22 |
+
# Proxy-enabled DDGS
|
23 |
+
class ProxiedDDGS(DDGS):
|
24 |
+
def __init__(self, proxy):
|
25 |
+
super().__init__()
|
26 |
+
self.proxy = proxy
|
27 |
+
|
28 |
+
def _get(self, url, headers=None):
|
29 |
+
response = requests.get(
|
30 |
+
url, headers=headers, proxies={"http": self.proxy, "https": self.proxy}
|
31 |
+
)
|
32 |
+
response.raise_for_status()
|
33 |
+
return response
|
34 |
+
|
35 |
+
# Search function with retries
|
36 |
+
def search_with_retries(query, max_results=3, max_retries=5, backoff_factor=1):
|
37 |
+
retries = 0
|
38 |
+
while retries < max_retries:
|
39 |
+
try:
|
40 |
+
proxy = next(proxy_cycle)
|
41 |
+
searcher = ProxiedDDGS(proxy)
|
42 |
+
results = searcher.text(query, max_results=max_results)
|
43 |
+
return results, proxy
|
44 |
+
except Exception:
|
45 |
+
retries += 1
|
46 |
+
time.sleep(backoff_factor * retries)
|
47 |
+
raise RuntimeError(f"All retries failed for query: {query}")
|
48 |
+
|
49 |
+
# Initialize the LLM
|
50 |
llm = HuggingFaceEndpoint(
|
51 |
repo_id="mistralai/Mistral-7B-Instruct-v0.3",
|
52 |
task="text-generation",
|
|
|
55 |
do_sample=False,
|
56 |
)
|
57 |
|
58 |
+
# Prompt template for feature extraction
|
59 |
+
template_extract_features = '''
|
60 |
+
You are a product feature extractor bot. Your task is to determine features like Brand, Model, Type, RAM, Storage, etc., from the given product description and web search results.
|
61 |
+
|
62 |
+
Given product description: {TEXT}
|
63 |
+
Relevant web search results:
|
64 |
+
{SEARCH_RESULTS}
|
65 |
+
|
66 |
+
Return features in JSON format with keys like Brand, Model, Type, RAM, Storage, and others.
|
67 |
+
Your response MUST only include a valid JSON object and nothing else.
|
68 |
+
|
|
|
|
|
|
|
69 |
Example:
|
70 |
+
{{
|
71 |
+
"Brand": "Apple",
|
72 |
+
"Model": "iPhone 14",
|
73 |
+
"Type": "Smartphone",
|
74 |
+
"RAM": "4GB",
|
75 |
+
"Storage": "128GB"
|
76 |
+
}}
|
77 |
'''
|
78 |
|
79 |
json_output_parser = JsonOutputParser()
|
80 |
|
81 |
# Define the classify_text function
|
82 |
+
def extract_features(description):
|
83 |
global llm
|
|
|
84 |
start = time.time()
|
|
|
|
|
85 |
|
86 |
+
try:
|
87 |
+
lang = detect(description)
|
88 |
except:
|
89 |
lang = "en"
|
90 |
|
91 |
+
# Perform web search
|
92 |
+
try:
|
93 |
+
search_results, _ = search_with_retries(description, max_results=3)
|
94 |
+
search_text = "\n".join([res.get('snippet', '') for res in search_results])
|
95 |
+
except RuntimeError as e:
|
96 |
+
search_text = "No search results available."
|
97 |
|
98 |
+
# Format the prompt
|
99 |
+
prompt_extract = PromptTemplate(
|
100 |
+
template=template_extract_features,
|
101 |
+
input_variables=["TEXT", "SEARCH_RESULTS"]
|
102 |
+
)
|
103 |
+
formatted_prompt = prompt_extract.format(TEXT=description, SEARCH_RESULTS=search_text)
|
104 |
+
|
105 |
+
# LLM response
|
106 |
+
response = llm.invoke(formatted_prompt)
|
107 |
+
parsed_output = json_output_parser.parse(response)
|
108 |
end = time.time()
|
109 |
+
|
110 |
+
return lang, parsed_output, end - start
|
111 |
|
112 |
# Create the Gradio interface
|
113 |
def create_gradio_interface():
|
114 |
with gr.Blocks() as iface:
|
115 |
+
text_input = gr.Textbox(label="Item Description")
|
116 |
lang_output = gr.Textbox(label="Detected Language")
|
117 |
+
feature_output = gr.Textbox(label="Extracted Features (JSON)")
|
118 |
time_taken = gr.Textbox(label="Time Taken (seconds)")
|
119 |
+
submit_btn = gr.Button("Extract Features")
|
120 |
|
121 |
def on_submit(text):
|
122 |
+
lang, features, duration = extract_features(text)
|
123 |
+
return lang, features, f"{duration:.2f} seconds"
|
124 |
|
125 |
+
submit_btn.click(fn=on_submit, inputs=text_input, outputs=[lang_output, feature_output, time_taken])
|
126 |
|
127 |
iface.launch()
|
128 |
|
129 |
if __name__ == "__main__":
|
130 |
+
create_gradio_interface()
|