ccm commited on
Commit
d0b143b
·
verified ·
1 Parent(s): 80d6350

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -42
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import gradio # Interface handling
2
- import spaces # For GPU
3
  import langchain_community.vectorstores # Vectorstore for publications
4
  import langchain_huggingface # Embeddings
5
- import transformers
6
 
7
  # The number of publications to retrieve for the prompt
8
  PUBLICATIONS_TO_RETRIEVE = 5
@@ -11,13 +11,10 @@ PUBLICATIONS_TO_RETRIEVE = 5
11
  RAG_TEMPLATE = """You are an AI assistant who enjoys helping users learn about research.
12
  Answer the USER_QUERY on additive manufacturing research using the RESEARCH_EXCERPTS.
13
  Provide a concise ANSWER based on these excerpts. Avoid listing references.
14
-
15
  ===== RESEARCH_EXCERPTS =====
16
  {research_excerpts}
17
-
18
  ===== USER_QUERY =====
19
  {query}
20
-
21
  ===== ANSWER =====
22
  """
23
 
@@ -31,22 +28,23 @@ publication_vectorstore = langchain_community.vectorstores.FAISS.load_local(
31
  ),
32
  allow_dangerous_deserialization=True,
33
  )
34
- #
35
- # # Create the callable LLM
36
- # llm = transformers.pipeline(
37
- # task="text-generation",
38
- # model="Qwen/Qwen2.5-7B-Instruct-AWQ",
39
- # device="cuda",
40
- # )
 
 
 
41
 
42
 
43
  def preprocess(query: str) -> str:
44
  """
45
  Generates a prompt based on the top k documents matching the query.
46
-
47
  Args:
48
  query (str): The user's query.
49
-
50
  Returns:
51
  str: The formatted prompt containing research excerpts and the user's query.
52
  """
@@ -67,47 +65,29 @@ def preprocess(query: str) -> str:
67
  return prompt
68
 
69
 
70
- import threading
71
-
72
-
73
  @spaces.GPU
74
  def reply(message: str, history: list[str]) -> str:
75
  """
76
  Generates a response to the user’s message.
77
-
78
  Args:
79
  message (str): The user's message or query.
80
  history (list[str]): The conversation history.
81
-
82
  Returns:
83
  str: The generated response from the language model.
84
  """
85
 
86
- tok = transformers.AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct-AWQ")
87
- model = transformers.AutoModelForCausalLM.from_pretrained(
88
- "Qwen/Qwen2.5-7B-Instruct-AWQ"
89
- )
90
- inputs = tok([preprocess(message)], return_tensors="pt")
91
- streamer = transformers.TextIteratorStreamer(tok)
92
 
93
- generation_kwargs = dict(
94
- inputs, streamer=streamer, max_new_tokens=512, return_full_text=False
 
 
 
95
  )
96
- thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
97
- thread.start()
98
- generated_text = ""
99
- for new_text in streamer:
100
- generated_text += new_text
101
- yield generated_text
102
-
103
- # yield llm(
104
- # preprocess(message),
105
- # max_new_tokens=512,
106
- # return_full_text=False,
107
- # streamer=transformers.TextIteratorStreamer(
108
- # transformers.AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct-AWQ")
109
- # ),
110
- # )[0]["generated_text"]
111
 
112
 
113
  # Example Queries for Interface
 
1
  import gradio # Interface handling
2
+ import spaces # GPU
3
  import langchain_community.vectorstores # Vectorstore for publications
4
  import langchain_huggingface # Embeddings
5
+ import transformers # LLM
6
 
7
  # The number of publications to retrieve for the prompt
8
  PUBLICATIONS_TO_RETRIEVE = 5
 
11
  RAG_TEMPLATE = """You are an AI assistant who enjoys helping users learn about research.
12
  Answer the USER_QUERY on additive manufacturing research using the RESEARCH_EXCERPTS.
13
  Provide a concise ANSWER based on these excerpts. Avoid listing references.
 
14
  ===== RESEARCH_EXCERPTS =====
15
  {research_excerpts}
 
16
  ===== USER_QUERY =====
17
  {query}
 
18
  ===== ANSWER =====
19
  """
20
 
 
28
  ),
29
  allow_dangerous_deserialization=True,
30
  )
31
+
32
+ # Create the callable LLM
33
+ llm = transformers.pipeline(
34
+ task="text-generation",
35
+ model="Qwen/Qwen2.5-7B-Instruct-AWQ",
36
+ device="cuda",
37
+ streamer=transformers.TextStreamer(
38
+ transformers.AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct-AWQ")
39
+ ),
40
+ )
41
 
42
 
43
  def preprocess(query: str) -> str:
44
  """
45
  Generates a prompt based on the top k documents matching the query.
 
46
  Args:
47
  query (str): The user's query.
 
48
  Returns:
49
  str: The formatted prompt containing research excerpts and the user's query.
50
  """
 
65
  return prompt
66
 
67
 
 
 
 
68
  @spaces.GPU
69
  def reply(message: str, history: list[str]) -> str:
70
  """
71
  Generates a response to the user’s message.
 
72
  Args:
73
  message (str): The user's message or query.
74
  history (list[str]): The conversation history.
 
75
  Returns:
76
  str: The generated response from the language model.
77
  """
78
 
79
+ # Preprocess the user's message
80
+ rag_prompt = preprocess(message)
 
 
 
 
81
 
82
+ # Generate a response from the language model
83
+ response = llm(
84
+ rag_prompt,
85
+ max_new_tokens=512,
86
+ return_full_text=False,
87
  )
88
+
89
+ # Return the generated response
90
+ return response[0]["generated_text"].strip("= ")
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
 
93
  # Example Queries for Interface