kwabs22
commited on
Commit
•
e3894fb
1
Parent(s):
5dd2646
Working but added inference times
Browse files
README.md
CHANGED
@@ -5,6 +5,10 @@ colorFrom: blue
|
|
5 |
colorTo: purple
|
6 |
sdk: docker
|
7 |
pinned: false
|
|
|
|
|
8 |
---
|
9 |
|
|
|
|
|
10 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
5 |
colorTo: purple
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
+
models:
|
9 |
+
- stabilityai/stablelm-2-zephyr-1_6b
|
10 |
---
|
11 |
|
12 |
+
Example of running llama.cpp (and by extension simple cpp) from python without pip package dependency issues
|
13 |
+
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -2,6 +2,8 @@ import gradio as gr
|
|
2 |
#from llama_cpp import Llama
|
3 |
import random
|
4 |
import subprocess
|
|
|
|
|
5 |
|
6 |
# Initialize model
|
7 |
#llm = Llama(model_path="/stablelm-2-zephyr-1_6b-Q4_0.gguf", n_gpu_layers=0, seed=random.randint(1, 2**31))
|
@@ -46,21 +48,21 @@ def generate_response(user_message):
|
|
46 |
"-e"
|
47 |
]
|
48 |
|
49 |
-
print("Before request")
|
50 |
# Start the subprocess
|
51 |
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
52 |
|
|
|
53 |
alllines = ""
|
54 |
|
55 |
# Yield each line of output as it becomes available
|
56 |
for line in process.stdout:
|
57 |
alllines += " " + line
|
58 |
-
|
|
|
59 |
|
60 |
# Wait for the subprocess to finish if it hasn't already
|
61 |
process.wait()
|
62 |
|
63 |
-
print("After response")
|
64 |
# Check for any errors
|
65 |
if process.returncode != 0:
|
66 |
error_message = process.stderr.read()
|
|
|
2 |
#from llama_cpp import Llama
|
3 |
import random
|
4 |
import subprocess
|
5 |
+
import time
|
6 |
+
|
7 |
|
8 |
# Initialize model
|
9 |
#llm = Llama(model_path="/stablelm-2-zephyr-1_6b-Q4_0.gguf", n_gpu_layers=0, seed=random.randint(1, 2**31))
|
|
|
48 |
"-e"
|
49 |
]
|
50 |
|
|
|
51 |
# Start the subprocess
|
52 |
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
53 |
|
54 |
+
start_time = time.time()
|
55 |
alllines = ""
|
56 |
|
57 |
# Yield each line of output as it becomes available
|
58 |
for line in process.stdout:
|
59 |
alllines += " " + line
|
60 |
+
elapsed_time = time.time() - start_time # Calculate elapsed time
|
61 |
+
yield f"{alllines} [Inference time: {elapsed_time:.2f} seconds]"
|
62 |
|
63 |
# Wait for the subprocess to finish if it hasn't already
|
64 |
process.wait()
|
65 |
|
|
|
66 |
# Check for any errors
|
67 |
if process.returncode != 0:
|
68 |
error_message = process.stderr.read()
|