kwabs22 commited on
Commit
9c1188f
·
1 Parent(s): 7a0f469

Testing Stable LM 2 1.6B Zephyr

Browse files
Files changed (3) hide show
  1. Dockerfile +23 -0
  2. app.py +30 -0
  3. requirements.txt +2 -0
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.8-slim
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /usr/src/app
6
+
7
+ # Install wget
8
+ RUN apt-get update && apt-get install -y wget
9
+
10
+ # Download the model file
11
+ RUN wget -O stablelm-2-zephyr-1_6b-Q4_0.gguf https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b/resolve/main/stablelm-2-zephyr-1_6b-Q4_0.gguf?download=true
12
+
13
+ # Copy the current directory contents into the container at /usr/src/app
14
+ COPY . .
15
+
16
+ # Install any needed packages specified in requirements.txt
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
+
19
+ # Make port 7860 available to the world outside this container
20
+ EXPOSE 7860
21
+
22
+ # Run app.py when the container launches
23
+ CMD ["python", "./app.py"]
app.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+ import random
4
+
5
+ # Initialize model
6
+ llm = Llama(model_path="/stablelm-2-zephyr-1_6b-Q4_0.gguf", n_gpu_layers=0, seed=random.randint(1, 2**31))
7
+
8
+ def generate_response(user_message):
9
+ encodeduserm = b"### Human: " + user_message.encode('utf-8') + b"\n### Assistant:"
10
+ tokens = llm.tokenize(encodeduserm)
11
+ output = b""
12
+ count = 0
13
+
14
+ for token in llm.generate(tokens, top_k=40, top_p=0.95, temp=0.72, repeat_penalty=1.1):
15
+ text = llm.detokenize([token])
16
+ output += text
17
+ count += 1
18
+ if count >= 500 or (token == llm.token_eos()):
19
+ break
20
+ return output.decode()
21
+
22
+ iface = gr.Interface(
23
+ fn=generate_response,
24
+ inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
25
+ outputs="text",
26
+ title="LLaMA Chat Interface",
27
+ description="Enter your message and get a response from the LLaMA model."
28
+ )
29
+
30
+ iface.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ llama-cpp-python
2
+ gradio