abhi1nandy2 commited on
Commit
4721d91
·
verified ·
1 Parent(s): a08a613

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -13
app.py CHANGED
@@ -1,9 +1,9 @@
1
  import gradio as gr
2
  import requests
3
  from bs4 import BeautifulSoup, Comment
 
4
  from llama_cpp import Llama
5
 
6
- # Function to extract visible text from a webpage
7
  def tag_visible(element):
8
  if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']:
9
  return False
@@ -14,7 +14,8 @@ def tag_visible(element):
14
  def get_text_from_url(url):
15
  response = requests.get(url, timeout=10)
16
  soup = BeautifulSoup(response.text, 'html.parser')
17
- texts = soup.find_all(text=True)
 
18
  visible_texts = filter(tag_visible, texts)
19
  return " ".join(t.strip() for t in visible_texts)
20
 
@@ -26,35 +27,35 @@ extensions = ["", "pmrf-profile-page"]
26
  for ext in extensions:
27
  try:
28
  full_text = get_text_from_url(homepage_url + ext)
29
- truncated_text = full_text[:2000] # Using first 2000 characters for more context
30
  text_list.append(truncated_text)
31
  except Exception as e:
32
  text_list.append(f"Error fetching {homepage_url+ext}: {str(e)}")
33
 
34
  CONTEXT = " ".join(text_list)
35
 
36
- # Load the Mistral model (low-latency, CPU optimized)
37
- llm = Llama(model_path="mistral-7b-instruct-v0.1.Q4_K_M.gguf", n_ctx=4096, n_threads=6, verbose=False)
 
 
 
 
38
 
39
- # Function to answer queries
40
  def answer_query(query):
41
  prompt = (
42
- "You are an AI chatbot answering queries based on the homepage of Abhilash Nandy. "
43
- "Your responses should be concise (under 30 words) and directly relevant to the provided context.\n\n"
44
  f"Context: {CONTEXT}\n\nUser: {query}\nAI:"
45
  )
46
-
47
  response = llm(prompt, max_tokens=50, stop=["\nUser:", "\nAI:"], echo=False)
48
-
49
  return response["choices"][0]["text"].strip()
50
 
51
- # Gradio Interface
52
  iface = gr.Interface(
53
  fn=answer_query,
54
- inputs=gr.Textbox(lines=2, placeholder="Ask a question about Abhilash Nandy..."),
55
  outputs="text",
56
  title="Homepage QA Chatbot",
57
- description="Ask me anything about Abhilash Nandy's homepage."
58
  )
59
 
60
  if __name__ == '__main__':
 
1
  import gradio as gr
2
  import requests
3
  from bs4 import BeautifulSoup, Comment
4
+ import os
5
  from llama_cpp import Llama
6
 
 
7
  def tag_visible(element):
8
  if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']:
9
  return False
 
14
  def get_text_from_url(url):
15
  response = requests.get(url, timeout=10)
16
  soup = BeautifulSoup(response.text, 'html.parser')
17
+ # Use 'string=True' instead of deprecated 'text=True'
18
+ texts = soup.find_all(string=True)
19
  visible_texts = filter(tag_visible, texts)
20
  return " ".join(t.strip() for t in visible_texts)
21
 
 
27
  for ext in extensions:
28
  try:
29
  full_text = get_text_from_url(homepage_url + ext)
30
+ truncated_text = full_text[:2000] # Adjust truncation length as needed
31
  text_list.append(truncated_text)
32
  except Exception as e:
33
  text_list.append(f"Error fetching {homepage_url+ext}: {str(e)}")
34
 
35
  CONTEXT = " ".join(text_list)
36
 
37
+ # Set the model path. Make sure the model file is downloaded and placed in the 'models' directory.
38
+ model_path = "models/mistral-7b-instruct-v0.1.Q4_K_M.gguf"
39
+ if not os.path.exists(model_path):
40
+ raise ValueError(f"Model file not found at {model_path}. Please download the model file and place it in the 'models' folder.")
41
+
42
+ llm = Llama(model_path=model_path, n_ctx=4096, n_threads=6, verbose=False)
43
 
 
44
  def answer_query(query):
45
  prompt = (
46
+ "You are an AI chatbot answering queries based on Abhilash Nandy's homepage. "
47
+ "Provide concise answers (under 30 words).\n\n"
48
  f"Context: {CONTEXT}\n\nUser: {query}\nAI:"
49
  )
 
50
  response = llm(prompt, max_tokens=50, stop=["\nUser:", "\nAI:"], echo=False)
 
51
  return response["choices"][0]["text"].strip()
52
 
 
53
  iface = gr.Interface(
54
  fn=answer_query,
55
+ inputs=gr.Textbox(lines=2, placeholder="Ask a question about Abhilash Nandy's homepage..."),
56
  outputs="text",
57
  title="Homepage QA Chatbot",
58
+ description="A chatbot answering queries based on homepage context."
59
  )
60
 
61
  if __name__ == '__main__':