Spaces:

KwabsHug
/

FrontEndasPromptEngineeringTest

Sleeping

kwabs22 commited on Jan 30, 2024

Commit

9c1188f

1 Parent(s): 7a0f469

Testing Stable LM 2 1.6B Zephyr

Files changed (3) hide show

Dockerfile ADDED Viewed

+# Use an official Python runtime as a parent image
+FROM python:3.8-slim
+# Set the working directory in the container
+WORKDIR /usr/src/app
+# Install wget
+RUN apt-get update && apt-get install -y wget
+# Download the model file
+RUN wget -O stablelm-2-zephyr-1_6b-Q4_0.gguf https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b/resolve/main/stablelm-2-zephyr-1_6b-Q4_0.gguf?download=true
+# Copy the current directory contents into the container at /usr/src/app
+COPY . .
+# Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+# Make port 7860 available to the world outside this container
+EXPOSE 7860
+# Run app.py when the container launches
+CMD ["python", "./app.py"]

app.py ADDED Viewed

+import gradio as gr
+from llama_cpp import Llama
+import random
+# Initialize model
+llm = Llama(model_path="/stablelm-2-zephyr-1_6b-Q4_0.gguf", n_gpu_layers=0, seed=random.randint(1, 2**31))
+def generate_response(user_message):
+    encodeduserm = b"### Human: " + user_message.encode('utf-8') + b"\n### Assistant:"
+    tokens = llm.tokenize(encodeduserm)
+    output = b""
+    count = 0
+    for token in llm.generate(tokens, top_k=40, top_p=0.95, temp=0.72, repeat_penalty=1.1):
+        text = llm.detokenize([token])
+        output += text
+        count += 1
+        if count >= 500 or (token == llm.token_eos()):
+            break
+    return output.decode()
+iface = gr.Interface(
+    fn=generate_response,
+    inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
+    outputs="text",
+    title="LLaMA Chat Interface",
+    description="Enter your message and get a response from the LLaMA model."
+)
+iface.launch(share=True)

requirements.txt ADDED Viewed