File size: 6,683 Bytes
9c1188f 6af00ba 4213f50 e3894fb 7d94414 7e3fb58 d9e0520 6af00ba d9e0520 0ebff8f ee032a8 d9e0520 0ebff8f 6af00ba 0ebff8f 6af00ba 0ebff8f 6af00ba 0ebff8f d9e0520 6af00ba b46f5ab 0ebff8f 6af00ba 43b1821 e8f7917 078637c 43b1821 35ed179 078637c 7d94414 35ed179 078637c 35ed179 74d7e67 078637c 43b1821 7d94414 9c1188f bbb68cf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import gradio as gr
import psutil
import subprocess
import time
def generate_response_by_api():
FinalOutput = ""
#return FinalOutput
pass
def generate_response(user_message): #generate_response_token_by_token
cmd = [
"/app/llama.cpp/main", # Path to the executable
"-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
"-p", user_message,
"-n", "400",
"-e"
]
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1)
process_monitor = psutil.Process(process.pid)
start_time = time.time()
monitor_start_time = time.time()
alltokens = ""
token_buffer = ''
tokencount = 0
try:
while True:
# Read one character at a time
char = process.stdout.read(1)
if char == '' and process.poll() is not None:
break
if char != '':
token_buffer += char
if char == ' ' or char == '\n': # Token delimiters
elapsed_time = time.time() - start_time # Calculate elapsed time
alltokens += token_buffer
tokencount += 1
yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds | Tokens: { tokencount }]"
token_buffer = '' # Reset token buffer
# Log resource usage every minute
if time.time() - monitor_start_time > 60:
cpu_usage = process_monitor.cpu_percent()
memory_usage = process_monitor.memory_info().rss # in bytes
print(f"Subprocess CPU Usage: {cpu_usage}%, Memory Usage: {memory_usage / 1024 ** 2} MB")
monitor_start_time = time.time() # Reset the timer
# Yield the last token if there is any
if token_buffer:
elapsed_time = time.time() - start_time # Calculate elapsed time
alltokens += token_buffer
yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds | Average Tokens per second: { round(tokencount / elapsed_time, 2) }]"
finally:
try:
# Wait for the process to complete, with a timeout
process.wait(timeout=60) # Timeout in seconds
except subprocess.TimeoutExpired:
print("Process didn't complete within the timeout. Killing it.")
process.kill()
process.wait() # Ensure proper cleanup
# Wait for the subprocess to finish if it hasn't already
process.stdout.close()
process.stderr.close()
# Check for any errors
if process.returncode != 0:
error_message = process.stderr.read()
print(f"Error: {error_message}")
def custom_generate_response(cust_user_message, prompt_index):
"""
Generates a custom response based on the user message and the selected prompt,
including a custom ending specific to the prompt.
Parameters:
- cust_user_message: The message input from the user.
- prompt_index: The index of the custom prompt to use.
"""
prompt, ending = CustomPrompts[prompt_index] # Unpack the prompt and its ending
cust_user_message = f"{prompt}\n\n{cust_user_message}\n\n{ending}"
yield from generate_response(cust_user_message)
CustomPrompts = [
("Write a Class Diagram based on the following text:", "Class Diagram:"),
("Write a Pydot code based on the following text:", "Pydot Code:"),
("Describe what a standard happy scene in any movie would be planned in great detail, based on the following text:", "Scene Details"),
("Explain a teardown of the product mentioned in the following text:", "Teardown Details:"),
("Explain the manufacturing of the product mentioned in the following text:", "Manufacturing Details:"),
("Explain the marketing considerations of the product mentioned in the following text:", "Considerations:"),
("Explain the target users considerations of the product mentioned in the following text:", "Target Users Considerations:"),
("My problem to solve is", "- please make 10 sub problems have to solve from this:"),
]
BusinessPrompts = [
("Write an outline for a business plan for " , "")
("Write an outline for a Executive Summary for " , "Executive Summary:")
("Write an outline for a Company Description for " , "Company Description:")
("Write an outline for a Market Analysis for " , "Market Analysis:")
("Write an outline for a Marketing and Sales Strategy for " , "Marketing and Sales Strategy:")
("Write an outline for a Product Development for " , "Product Development:")
("Write an outline for a Operations and Management for " , "Operations and Management:")
("Write an outline for a Financial Projections for " , "Financial Projections:")
]
with gr.Blocks() as iface:
gr.HTML("Stabilityai's demo - https://huggingface.co/spaces/stabilityai/stablelm-2-1_6b-zephyr")
gr.Interface(
fn=generate_response,
inputs=gr.Textbox(lines=2, placeholder="Type your message here..."),
outputs="text",
title="Stable LM 2 Zephyr (1.6b) LLama.cpp Interface Test (Inconsistent Performance - 100 tokens in 50 secs (when this HF space is updated) or 800+ secs(HF space open for long))",
description="No Prompt template used yet (Essentially autocomplete). No Message History for now - Enter your message and get a response.",
flagging_dir="/usr/src/app/flagged",
)
gr.HTML("Any standard way of thinking / Repetitive idea / rule of thumb / advice can be turned into a button (In a timeline?)")
with gr.Accordion("Random Ideas"):
with gr.Group():
gr.HTML("Test for wrapping generator (Instead of buttons tabs and dropdowns?)")
MainOutput = gr.TextArea(placeholder='Output will show here')
CustomButtonInput = gr.TextArea(lines=1, placeholder='Prompt goes here')
# Dynamically create buttons and assign actions
for index, (prompt, _) in enumerate(CustomPrompts):
button = gr.Button(prompt)
button.click(custom_generate_response, inputs=[CustomButtonInput, gr.State(index)], outputs=MainOutput)
with gr.Accordion("General Product based"):
with gr.Group():
# Dynamically create buttons and assign actions
for index, (prompt, _) in enumerate(CustomPrompts):
button = gr.Button(prompt)
button.click(custom_generate_response, inputs=[CustomButtonInput, gr.State(index)], outputs=MainOutput)
iface.queue().launch(server_name="0.0.0.0", share=True) |