alpcansoydas commited on
Commit
9aa4e56
·
verified ·
1 Parent(s): 5fddeb7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -37
app.py CHANGED
@@ -1,11 +1,52 @@
1
  import gradio as gr
 
 
 
 
2
  from langchain.prompts import PromptTemplate
3
  from langchain_huggingface import HuggingFaceEndpoint
4
  from langchain_core.output_parsers import JsonOutputParser
5
  from langdetect import detect
6
- import time
7
 
8
- # Initialize the LLM and other components
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  llm = HuggingFaceEndpoint(
10
  repo_id="mistralai/Mistral-7B-Instruct-v0.3",
11
  task="text-generation",
@@ -14,65 +55,76 @@ llm = HuggingFaceEndpoint(
14
  do_sample=False,
15
  )
16
 
17
- template_classify = '''
18
- You are a topic detector bot. Your task is to determine the main topic of given text phrase.
19
- Answer general main topic not specific words.
20
- Your answer does not contain specific information from given text.
21
- Answer just one general main topic. Do not answer two or more topic.
22
- Answer shortly with two or three word phrase. Do not answer with long sentence.
23
- Answer topic with context. Example, if it says "My delivery is late", its topic is late delivery.
24
- If you do not know the topic just answer as General.
25
- What is the main topic of given text?:
26
- <text>
27
- {TEXT}
28
- </text>
29
- convert it to json format using 'Answer' as key and return it.
30
- Your final response MUST contain only the response, no other text.
31
  Example:
32
- {{"Answer":["General"]}}
 
 
 
 
 
 
33
  '''
34
 
35
  json_output_parser = JsonOutputParser()
36
 
37
  # Define the classify_text function
38
- def classify_text(text):
39
  global llm
40
-
41
  start = time.time()
42
- try:
43
- lang = detect(text)
44
 
 
 
45
  except:
46
  lang = "en"
47
 
48
- prompt_classify = PromptTemplate(
49
- template=template_classify,
50
- input_variables=["LANG", "TEXT"]
51
- )
52
- formatted_prompt = prompt_classify.format(TEXT=text, LANG=lang)
53
- classify = llm.invoke(formatted_prompt)
54
 
55
- parsed_output = json_output_parser.parse(classify)
 
 
 
 
 
 
 
 
 
56
  end = time.time()
57
- duration = end - start
58
- return lang, parsed_output["Answer"][0], duration #['Answer']
59
 
60
  # Create the Gradio interface
61
  def create_gradio_interface():
62
  with gr.Blocks() as iface:
63
- text_input = gr.Textbox(label="Text")
64
  lang_output = gr.Textbox(label="Detected Language")
65
- output_text = gr.Textbox(label="Detected Topics")
66
  time_taken = gr.Textbox(label="Time Taken (seconds)")
67
- submit_btn = gr.Button("Detect topic")
68
 
69
  def on_submit(text):
70
- lang, classification, duration = classify_text(text)
71
- return lang, classification, f"Time taken: {duration:.2f} seconds"
72
 
73
- submit_btn.click(fn=on_submit, inputs=text_input, outputs=[lang_output, output_text, time_taken])
74
 
75
  iface.launch()
76
 
77
  if __name__ == "__main__":
78
- create_gradio_interface()
 
1
  import gradio as gr
2
+ import requests
3
+ from duckduckgo_search import DDGS
4
+ import itertools
5
+ import time
6
  from langchain.prompts import PromptTemplate
7
  from langchain_huggingface import HuggingFaceEndpoint
8
  from langchain_core.output_parsers import JsonOutputParser
9
  from langdetect import detect
 
10
 
11
+ # Fetch proxy list from GitHub
12
+ def get_proxies():
13
+ url = "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt"
14
+ response = requests.get(url)
15
+ proxies = response.text.splitlines()
16
+ return proxies
17
+
18
+ # Proxy cycle for rotation
19
+ proxy_list = get_proxies()
20
+ proxy_cycle = itertools.cycle(proxy_list)
21
+
22
+ # Proxy-enabled DDGS
23
+ class ProxiedDDGS(DDGS):
24
+ def __init__(self, proxy):
25
+ super().__init__()
26
+ self.proxy = proxy
27
+
28
+ def _get(self, url, headers=None):
29
+ response = requests.get(
30
+ url, headers=headers, proxies={"http": self.proxy, "https": self.proxy}
31
+ )
32
+ response.raise_for_status()
33
+ return response
34
+
35
+ # Search function with retries
36
+ def search_with_retries(query, max_results=3, max_retries=5, backoff_factor=1):
37
+ retries = 0
38
+ while retries < max_retries:
39
+ try:
40
+ proxy = next(proxy_cycle)
41
+ searcher = ProxiedDDGS(proxy)
42
+ results = searcher.text(query, max_results=max_results)
43
+ return results, proxy
44
+ except Exception:
45
+ retries += 1
46
+ time.sleep(backoff_factor * retries)
47
+ raise RuntimeError(f"All retries failed for query: {query}")
48
+
49
+ # Initialize the LLM
50
  llm = HuggingFaceEndpoint(
51
  repo_id="mistralai/Mistral-7B-Instruct-v0.3",
52
  task="text-generation",
 
55
  do_sample=False,
56
  )
57
 
58
+ # Prompt template for feature extraction
59
+ template_extract_features = '''
60
+ You are a product feature extractor bot. Your task is to determine features like Brand, Model, Type, RAM, Storage, etc., from the given product description and web search results.
61
+
62
+ Given product description: {TEXT}
63
+ Relevant web search results:
64
+ {SEARCH_RESULTS}
65
+
66
+ Return features in JSON format with keys like Brand, Model, Type, RAM, Storage, and others.
67
+ Your response MUST only include a valid JSON object and nothing else.
68
+
 
 
 
69
  Example:
70
+ {{
71
+ "Brand": "Apple",
72
+ "Model": "iPhone 14",
73
+ "Type": "Smartphone",
74
+ "RAM": "4GB",
75
+ "Storage": "128GB"
76
+ }}
77
  '''
78
 
79
  json_output_parser = JsonOutputParser()
80
 
81
  # Define the classify_text function
82
+ def extract_features(description):
83
  global llm
 
84
  start = time.time()
 
 
85
 
86
+ try:
87
+ lang = detect(description)
88
  except:
89
  lang = "en"
90
 
91
+ # Perform web search
92
+ try:
93
+ search_results, _ = search_with_retries(description, max_results=3)
94
+ search_text = "\n".join([res.get('snippet', '') for res in search_results])
95
+ except RuntimeError as e:
96
+ search_text = "No search results available."
97
 
98
+ # Format the prompt
99
+ prompt_extract = PromptTemplate(
100
+ template=template_extract_features,
101
+ input_variables=["TEXT", "SEARCH_RESULTS"]
102
+ )
103
+ formatted_prompt = prompt_extract.format(TEXT=description, SEARCH_RESULTS=search_text)
104
+
105
+ # LLM response
106
+ response = llm.invoke(formatted_prompt)
107
+ parsed_output = json_output_parser.parse(response)
108
  end = time.time()
109
+
110
+ return lang, parsed_output, end - start
111
 
112
  # Create the Gradio interface
113
  def create_gradio_interface():
114
  with gr.Blocks() as iface:
115
+ text_input = gr.Textbox(label="Item Description")
116
  lang_output = gr.Textbox(label="Detected Language")
117
+ feature_output = gr.Textbox(label="Extracted Features (JSON)")
118
  time_taken = gr.Textbox(label="Time Taken (seconds)")
119
+ submit_btn = gr.Button("Extract Features")
120
 
121
  def on_submit(text):
122
+ lang, features, duration = extract_features(text)
123
+ return lang, features, f"{duration:.2f} seconds"
124
 
125
+ submit_btn.click(fn=on_submit, inputs=text_input, outputs=[lang_output, feature_output, time_taken])
126
 
127
  iface.launch()
128
 
129
  if __name__ == "__main__":
130
+ create_gradio_interface()