Spaces:
Runtime error
Runtime error
Update app.py - CPU + GPU Inference
Browse files
app.py
CHANGED
@@ -1,59 +1,58 @@
|
|
|
|
1 |
from langchain.llms import CTransformers
|
2 |
from langchain.chains import LLMChain
|
3 |
-
from langchain import PromptTemplate
|
4 |
-
import os
|
5 |
-
import io
|
6 |
import gradio as gr
|
7 |
import time
|
8 |
|
9 |
-
|
10 |
custom_prompt_template = """
|
11 |
-
You are an AI Chatbot named Sunny,
|
12 |
Query: {query}
|
13 |
|
14 |
-
You just return the helpful message in English and always try to provide relevant answers to user's query.
|
15 |
"""
|
16 |
|
|
|
17 |
def set_custom_prompt():
|
18 |
-
prompt = PromptTemplate(
|
19 |
-
|
20 |
return prompt
|
21 |
|
22 |
|
23 |
-
#Loading the model
|
24 |
def load_model():
|
25 |
-
#
|
|
|
|
|
|
|
|
|
26 |
llm = CTransformers(
|
27 |
-
model
|
28 |
model_type="llama",
|
29 |
-
max_new_tokens
|
30 |
-
temperature
|
31 |
-
repetition_penalty
|
32 |
-
|
33 |
-
gpu_layers = 3,
|
34 |
)
|
35 |
|
36 |
return llm
|
37 |
|
38 |
-
print(load_model())
|
39 |
|
40 |
def chain_pipeline():
|
41 |
llm = load_model()
|
42 |
main_prompt = set_custom_prompt()
|
43 |
-
main_chain = LLMChain(
|
44 |
-
prompt=main_prompt,
|
45 |
-
llm=llm
|
46 |
-
)
|
47 |
return main_chain
|
48 |
|
|
|
49 |
llmchain = chain_pipeline()
|
50 |
|
|
|
51 |
def bot(query):
|
52 |
llm_response = llmchain.run({"query": query})
|
53 |
return llm_response
|
54 |
|
|
|
55 |
with gr.Blocks(title='Sunny') as main:
|
56 |
-
# gr.HTML("Code Llama main")
|
57 |
gr.Markdown("# Sunny Chatbot")
|
58 |
chatbot = gr.Chatbot([], elem_id="chatbot", height=700)
|
59 |
msg = gr.Textbox()
|
@@ -65,8 +64,6 @@ with gr.Blocks(title='Sunny') as main:
|
|
65 |
time.sleep(2)
|
66 |
return "", chat_history
|
67 |
|
68 |
-
|
69 |
msg.submit(respond, [msg, chatbot], [msg, chatbot])
|
70 |
|
71 |
-
main.launch(share=
|
72 |
-
|
|
|
1 |
+
import torch
|
2 |
from langchain.llms import CTransformers
|
3 |
from langchain.chains import LLMChain
|
4 |
+
from langchain import PromptTemplate
|
|
|
|
|
5 |
import gradio as gr
|
6 |
import time
|
7 |
|
|
|
8 |
custom_prompt_template = """
|
9 |
+
You are an AI Chatbot named Sunny, created by 'Sic Team', and your task is to provide information to users and chat with them based on given user's query. Below is the user's query.
|
10 |
Query: {query}
|
11 |
|
12 |
+
You just return the helpful message in English and always try to provide relevant answers to the user's query.
|
13 |
"""
|
14 |
|
15 |
+
|
16 |
def set_custom_prompt():
|
17 |
+
prompt = PromptTemplate(
|
18 |
+
template=custom_prompt_template, input_variables=['query'])
|
19 |
return prompt
|
20 |
|
21 |
|
|
|
22 |
def load_model():
|
23 |
+
# Check if GPU is available
|
24 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
25 |
+
print(f"Using device: {device}")
|
26 |
+
|
27 |
+
# Load the locally downloaded model here, specifying the device
|
28 |
llm = CTransformers(
|
29 |
+
model="TheBloke/zephyr-7B-beta-GGUF",
|
30 |
model_type="llama",
|
31 |
+
max_new_tokens=1096,
|
32 |
+
temperature=0.2,
|
33 |
+
repetition_penalty=1.13,
|
34 |
+
device=device # Set the device explicitly during model initialization
|
|
|
35 |
)
|
36 |
|
37 |
return llm
|
38 |
|
|
|
39 |
|
40 |
def chain_pipeline():
|
41 |
llm = load_model()
|
42 |
main_prompt = set_custom_prompt()
|
43 |
+
main_chain = LLMChain(prompt=main_prompt, llm=llm)
|
|
|
|
|
|
|
44 |
return main_chain
|
45 |
|
46 |
+
|
47 |
llmchain = chain_pipeline()
|
48 |
|
49 |
+
|
50 |
def bot(query):
|
51 |
llm_response = llmchain.run({"query": query})
|
52 |
return llm_response
|
53 |
|
54 |
+
|
55 |
with gr.Blocks(title='Sunny') as main:
|
|
|
56 |
gr.Markdown("# Sunny Chatbot")
|
57 |
chatbot = gr.Chatbot([], elem_id="chatbot", height=700)
|
58 |
msg = gr.Textbox()
|
|
|
64 |
time.sleep(2)
|
65 |
return "", chat_history
|
66 |
|
|
|
67 |
msg.submit(respond, [msg, chatbot], [msg, chatbot])
|
68 |
|
69 |
+
main.launch(share=False)
|
|