Spaces:

KwabsHug
/

FrontEndasPromptEngineeringTest

Sleeping

kwabs22 commited on Jan 30, 2024

Commit

e3894fb

1 Parent(s): 5dd2646

Working but added inference times

Files changed (2) hide show

README.md CHANGED Viewed

@@ -5,6 +5,10 @@ colorFrom: blue
 colorTo: purple
 sdk: docker
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 colorTo: purple
 sdk: docker
 pinned: false
+models:
+- stabilityai/stablelm-2-zephyr-1_6b
 ---
+Example of running llama.cpp (and by extension simple cpp) from python without pip package dependency issues
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -2,6 +2,8 @@ import gradio as gr
 #from llama_cpp import Llama
 import random
 import subprocess
 # Initialize model
 #llm = Llama(model_path="/stablelm-2-zephyr-1_6b-Q4_0.gguf", n_gpu_layers=0, seed=random.randint(1, 2**31))
@@ -46,21 +48,21 @@ def generate_response(user_message):
         "-e"
     ]
-    print("Before request")
     # Start the subprocess
     process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
     alllines = ""
     # Yield each line of output as it becomes available
     for line in process.stdout:
         alllines += " " + line
-        yield alllines
     # Wait for the subprocess to finish if it hasn't already
     process.wait()
-    print("After response")
     # Check for any errors
     if process.returncode != 0:
         error_message = process.stderr.read()

 #from llama_cpp import Llama
 import random
 import subprocess
+import time
 # Initialize model
 #llm = Llama(model_path="/stablelm-2-zephyr-1_6b-Q4_0.gguf", n_gpu_layers=0, seed=random.randint(1, 2**31))
         "-e"
     ]
     # Start the subprocess
     process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    start_time = time.time()
     alllines = ""
     # Yield each line of output as it becomes available
     for line in process.stdout:
         alllines += " " + line
+        elapsed_time = time.time() - start_time  # Calculate elapsed time
+        yield f"{alllines} [Inference time: {elapsed_time:.2f} seconds]"
     # Wait for the subprocess to finish if it hasn't already
     process.wait()
     # Check for any errors
     if process.returncode != 0:
         error_message = process.stderr.read()