Spaces:

Mososopo
/

TrillabitV1

Sleeping

App Files Files Community

Mososopo commited on Apr 10, 2024

Commit

154522f

verified ·

1 Parent(s): b79b62d

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -28

app.py CHANGED Viewed

@@ -1,29 +1,87 @@
 import gradio as gr
-from pydantic import BaseModel
-from ctransformers import AutoModelForCausalLM
-# Load your model (adjust the path to where your model is located)
-llm = AutoModelForCausalLM.from_pretrained("TrillaTag-0.0.3_V2.gguf",
-                                           model_type='mistral',
-                                           max_new_tokens=1096,
-                                           threads=3)
-# Define a function that will use your model to generate a response
-def generate_completion(prompt):
-    try:
-        # Generate a response from your model based on the user's prompt
-        response = llm.generate(prompt)
-        return response
-    except Exception as e:
-        # If something goes wrong, you could log the exception or handle it as needed
-        return str(e)  # For simplicity, we just return the error as a string
-# Update the Interface instantiation to use the current Gradio components
-iface = gr.Interface(fn=generate_completion,
-                     inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),  # Updated
-                     outputs="text",
-                     title="TrillaTag Model Generator",
-                     description="Enter a prompt to generate text from the TrillaTag Model.")
-# Launch the Gradio app
-iface.launch(share=True)

 import gradio as gr
+from llama_cpp import Llama
+import datetime
+convHistory = ''
+modelfile = "TrillaTag-0.0.3_V2.gguf"
+contextlength=128
+print("loading model...")
+stt = datetime.datetime.now()
+llm = Llama(
+  model_path=modelfile,
+  n_ctx=contextlength,
+)
+dt = datetime.datetime.now() - stt
+print(f"Model loaded in {dt}")
+def combine(prompt, temperature, max_new_tokens, top_p, repeat_penalty):
+    global convHistory
+    prompt = f"[INST]{prompt}[/INST]"
+    start = datetime.datetime.now()
+    generation = ""
+    prompt_tokens = f"Prompt Tokens: {len(llm.tokenize(bytes(prompt,encoding='utf-8')))}"
+    for character in llm(prompt,
+                max_tokens=max_new_tokens,
+                stop=["</s>"],
+                temperature = temperature,
+                repeat_penalty = repeat_penalty,
+                top_p = top_p,
+                echo=False,
+                stream=True):
+        generation += character["choices"][0]["text"]
+        answer_tokens = f"Out Tkns: {len(llm.tokenize(bytes(generation,encoding='utf-8')))}"
+        total_tokens = f"Total Tkns: {len(llm.tokenize(bytes(prompt,encoding='utf-8'))) + len(llm.tokenize(bytes(generation,encoding='utf-8')))}"
+        delta = datetime.datetime.now() - start
+        yield generation, delta, prompt_tokens, answer_tokens, total_tokens
+    print(convHistory)
+    return generation, delta, prompt_tokens, answer_tokens, total_tokens
+    #return generation, delta
+# MAIN GRADIO INTERFACE
+with gr.Blocks(theme='Medguy/base2') as demo:   #theme=gr.themes.Glass()  #theme='remilia/Ghostly'
+    #TITLE SECTION
+    with gr.Row(variant='compact'):
+            with gr.Column(scale=10):
+                with gr.Row():
+                        with gr.Column(min_width=80):
+                            gentime = gr.Textbox(value="", placeholder="Generation Time:", min_width=50, show_label=False)
+                        with gr.Column(min_width=80):
+                            prompttokens = gr.Textbox(value="", placeholder="Prompt Tkn:", min_width=50, show_label=False)
+                        with gr.Column(min_width=80):
+                            outputokens = gr.Textbox(value="", placeholder="Output Tkn:", min_width=50, show_label=False)
+                        with gr.Column(min_width=80):
+                            totaltokens = gr.Textbox(value="", placeholder="Total Tokens:", min_width=50, show_label=False)
+    # INTERACTIVE INFOGRAPHIC SECTION
+    # PLAYGROUND INTERFACE SECTION
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown(
+            f"""
+            ### Tunning Parameters""")
+            temp = gr.Slider(label="Temperature",minimum=0.0, maximum=1.0, step=0.01, value=0.15)
+            top_p = gr.Slider(label="Top_P",minimum=0.0, maximum=1.0, step=0.01, value=0.15)
+            repPen = gr.Slider(label="Repetition Penalty",minimum=0.0, maximum=4.0, step=0.01, value=1)
+            max_len = gr.Slider(label="Maximum output length", minimum=10,maximum=contextlength,step=2, value=20)
+            btn = gr.Button(value="Generate", variant='primary')
+        with gr.Column(scale=4):
+            prompt = gr.Textbox(label="User Prompt", lines=6, show_copy_button=True)
+            output = gr.Textbox(value="", label="Output", lines = 12, show_copy_button=True)
+            btn.click(combine, inputs=[prompt,temp,max_len,top_p,repPen], outputs=[output,gentime,prompttokens,outputokens,totaltokens])
+if __name__ == "__main__":
+    demo.launch(inbrowser=True)