|
import gradio as gr |
|
import psutil |
|
import subprocess |
|
import time |
|
|
|
|
|
def generate_response_by_api(user_message): |
|
FinalOutput = "" |
|
|
|
pass |
|
|
|
def custom_generate_response_by_api(cust_user_message, prompt_index, prompts_list): |
|
prompt, ending = prompts_list[prompt_index] |
|
cust_user_message = f"{prompt}\n\n{cust_user_message}\n\n{ending}" |
|
|
|
pass |
|
|
|
|
|
|
|
|
|
def generate_response(user_message): |
|
cmd = [ |
|
"/app/llama.cpp/main", |
|
"-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf", |
|
"-p", user_message, |
|
"-n", "400", |
|
"-e" |
|
] |
|
|
|
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1) |
|
process_monitor = psutil.Process(process.pid) |
|
|
|
|
|
start_time = time.time() |
|
monitor_start_time = time.time() |
|
alltokens = "" |
|
token_buffer = '' |
|
tokencount = 0 |
|
try: |
|
while True: |
|
|
|
char = process.stdout.read(1) |
|
if char == '' and process.poll() is not None: |
|
break |
|
if char != '': |
|
token_buffer += char |
|
if char == ' ' or char == '\n': |
|
elapsed_time = time.time() - start_time |
|
alltokens += token_buffer |
|
tokencount += 1 |
|
yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds | Tokens: { tokencount }]" |
|
token_buffer = '' |
|
|
|
if time.time() - monitor_start_time > 60: |
|
cpu_usage = process_monitor.cpu_percent() |
|
memory_usage = process_monitor.memory_info().rss |
|
print(f"Subprocess CPU Usage: {cpu_usage}%, Memory Usage: {memory_usage / 1024 ** 2} MB") |
|
monitor_start_time = time.time() |
|
|
|
|
|
if token_buffer: |
|
elapsed_time = time.time() - start_time |
|
alltokens += token_buffer |
|
yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds | Average Tokens per second: { round(tokencount / elapsed_time, 2) }]" |
|
|
|
finally: |
|
try: |
|
|
|
process.wait(timeout=60) |
|
except subprocess.TimeoutExpired: |
|
print("Process didn't complete within the timeout. Killing it.") |
|
process.kill() |
|
process.wait() |
|
|
|
process.stdout.close() |
|
process.stderr.close() |
|
|
|
|
|
if process.returncode != 0: |
|
error_message = process.stderr.read() |
|
print(f"Error: {error_message}") |
|
|
|
def custom_generate_response(cust_user_message, prompt_index, prompts_list): |
|
""" |
|
Generates a custom response based on the user message, the selected prompt, |
|
and the provided list of prompts, including a custom ending specific to the prompt. |
|
|
|
Parameters: |
|
- cust_user_message: The message input from the user. |
|
- prompt_index: The index of the custom prompt to use. |
|
- prompts_list: The list of prompts to use for generating the response. |
|
""" |
|
prompt, ending = prompts_list[prompt_index] |
|
cust_user_message = f"{prompt}\n\n{cust_user_message}\n\n{ending}" |
|
yield from generate_response(cust_user_message) |
|
|
|
Allprompts = { |
|
"Custom Prompts" : [ |
|
("Write a Class Diagram based on the following text:", "Class Diagram:"), |
|
("Write a Pydot code based on the following text:", "Pydot Code:"), |
|
("Describe what a standard happy scene in any movie would be planned in great detail, based on the following text:", "Scene Details"), |
|
("Explain a teardown of the product mentioned in the following text:", "Teardown Details:"), |
|
("Explain the manufacturing of the product mentioned in the following text:", "Manufacturing Details:"), |
|
("Explain the marketing considerations of the product mentioned in the following text:", "Considerations:"), |
|
("Explain the target users considerations of the product mentioned in the following text:", "Target Users Considerations:"), |
|
("My problem to solve is", "- please make 10 sub problems have to solve from this:"), |
|
], |
|
"Business Prompts" : [ |
|
("Suggest Product ideas just based off the following text:", "Products:"), |
|
("Write an outline for a business plan for: " , ""), |
|
("Write an example of a detailed report for a Executive Summary for " , "Executive Summary:"), |
|
("Write an example of a detailed report for a Company Description for " , "Company Description:"), |
|
("Write an example of a detailed report for a Market Analysis for " , "Market Analysis:"), |
|
("Write an example of a detailed report for a Marketing and Sales Strategy for " , "Marketing and Sales Strategy:"), |
|
("Write an example of a detailed report for a Product Development for " , "Product Development:"), |
|
("Write an example of a detailed report for a Operations and Management for " , "Operations and Management:"), |
|
("Write an example of a detailed report for a Financial Projections for " , "Financial Projections:"), |
|
("Explain how this to make this product unique from competitors:", "Considerations:"), |
|
], |
|
"Programming Pattern Prompts" : [ |
|
("Demonstrate a builder pattern in go:", ""), |
|
("Demonstrate a zero cost abstractions in go:", ""), |
|
("Demonstrate a builder pattern in rust:", ""), |
|
("Demonstrate a Polymorphism in rust:", ""), |
|
("Explain how RAII pattern affects rust:", ""), |
|
("Demonstrate a builder pattern in c++:", ""), |
|
("Explain when to consider using a builder pattern in go:", ""), |
|
], |
|
"Creativity Prompts" : [ |
|
("Make the following text more vague:", "Vague version:"), |
|
("Turn the following text into a bunch of rules:", "Rules:"), |
|
("What Syllogisms can be made from this text:", "Syllogisms:"), |
|
("Reimagine the following text:", ""), |
|
], |
|
"Game Based" : [ |
|
("What obstacles to growth exist in the following text:", "Obstacles:"), |
|
("Write a story for the basis of a random game", ""), |
|
("What are common themes in games?", ""), |
|
("Write Three factions and why they are at conflict based on the following text:", "Faction 1:"), |
|
] |
|
} |
|
|
|
with gr.Blocks() as iface: |
|
with gr.Tab("Single prompt"): |
|
gr.HTML("<a href='https://huggingface.co/spaces/stabilityai/stablelm-2-1_6b-zephyr'> -- Original StabilityAI demo -- </a> | ") |
|
gr.Interface( |
|
fn=generate_response, |
|
inputs=gr.Textbox(lines=2, placeholder="Type your message here..."), |
|
outputs="text", |
|
title="Stable LM 2 Zephyr (1.6b) LLama.cpp Interface Test (Inconsistent Performance - 100 tokens in 50 secs (when this HF space is updated) or 800+ secs(HF space open for long))", |
|
description="No Prompt template used yet (Essentially autocomplete). No Message History for now - Enter your message and get a response.", |
|
flagging_dir="/usr/src/app/flagged", |
|
) |
|
|
|
gr.HTML("Any standard way of thinking / Repetitive idea / rule of thumb / advice can be turned into a button (In a timeline?)") |
|
gr.HTML("LLM powered Buttons as the new notetaking? (Youtube Video to prompt pipeline?)<br><br>List to buttons (Instead of buttons tabs and dropdowns maybe?)") |
|
|
|
MainOutput = gr.TextArea(placeholder='Output will show here') |
|
CustomButtonInput = gr.TextArea(lines=1, placeholder='Prompt goes here') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for category_name, category_prompts in Allprompts.items(): |
|
with gr.Accordion(f"General {category_name} Pattern based", open=False): |
|
with gr.Group(): |
|
for index, (prompt, _) in enumerate(category_prompts): |
|
button = gr.Button(prompt) |
|
button.click(custom_generate_response, inputs=[CustomButtonInput, gr.State(index), gr.State(category_name)], outputs=MainOutput) |
|
|
|
|
|
with gr.Tab("Workflow Brainstom"): |
|
gr.HTML("Workflow = premeditated events --- need a timeline before prompts") |
|
|
|
iface.queue().launch(server_name="0.0.0.0", share=True) |