Mososopo commited on
Commit
154522f
1 Parent(s): b79b62d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -28
app.py CHANGED
@@ -1,29 +1,87 @@
1
  import gradio as gr
2
- from pydantic import BaseModel
3
- from ctransformers import AutoModelForCausalLM
4
-
5
- # Load your model (adjust the path to where your model is located)
6
- llm = AutoModelForCausalLM.from_pretrained("TrillaTag-0.0.3_V2.gguf",
7
- model_type='mistral',
8
- max_new_tokens=1096,
9
- threads=3)
10
-
11
- # Define a function that will use your model to generate a response
12
- def generate_completion(prompt):
13
- try:
14
- # Generate a response from your model based on the user's prompt
15
- response = llm.generate(prompt)
16
- return response
17
- except Exception as e:
18
- # If something goes wrong, you could log the exception or handle it as needed
19
- return str(e) # For simplicity, we just return the error as a string
20
-
21
- # Update the Interface instantiation to use the current Gradio components
22
- iface = gr.Interface(fn=generate_completion,
23
- inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), # Updated
24
- outputs="text",
25
- title="TrillaTag Model Generator",
26
- description="Enter a prompt to generate text from the TrillaTag Model.")
27
-
28
- # Launch the Gradio app
29
- iface.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from llama_cpp import Llama
3
+ import datetime
4
+
5
+ convHistory = ''
6
+ modelfile = "TrillaTag-0.0.3_V2.gguf"
7
+
8
+ contextlength=128
9
+
10
+ print("loading model...")
11
+ stt = datetime.datetime.now()
12
+
13
+ llm = Llama(
14
+ model_path=modelfile,
15
+ n_ctx=contextlength,
16
+ )
17
+ dt = datetime.datetime.now() - stt
18
+ print(f"Model loaded in {dt}")
19
+
20
+
21
+ def combine(prompt, temperature, max_new_tokens, top_p, repeat_penalty):
22
+ global convHistory
23
+
24
+ prompt = f"[INST]{prompt}[/INST]"
25
+ start = datetime.datetime.now()
26
+ generation = ""
27
+
28
+ prompt_tokens = f"Prompt Tokens: {len(llm.tokenize(bytes(prompt,encoding='utf-8')))}"
29
+ for character in llm(prompt,
30
+ max_tokens=max_new_tokens,
31
+ stop=["</s>"],
32
+ temperature = temperature,
33
+ repeat_penalty = repeat_penalty,
34
+ top_p = top_p,
35
+ echo=False,
36
+ stream=True):
37
+ generation += character["choices"][0]["text"]
38
+
39
+ answer_tokens = f"Out Tkns: {len(llm.tokenize(bytes(generation,encoding='utf-8')))}"
40
+ total_tokens = f"Total Tkns: {len(llm.tokenize(bytes(prompt,encoding='utf-8'))) + len(llm.tokenize(bytes(generation,encoding='utf-8')))}"
41
+ delta = datetime.datetime.now() - start
42
+ yield generation, delta, prompt_tokens, answer_tokens, total_tokens
43
+
44
+ print(convHistory)
45
+ return generation, delta, prompt_tokens, answer_tokens, total_tokens
46
+ #return generation, delta
47
+
48
+
49
+ # MAIN GRADIO INTERFACE
50
+ with gr.Blocks(theme='Medguy/base2') as demo: #theme=gr.themes.Glass() #theme='remilia/Ghostly'
51
+ #TITLE SECTION
52
+ with gr.Row(variant='compact'):
53
+ with gr.Column(scale=10):
54
+ with gr.Row():
55
+ with gr.Column(min_width=80):
56
+ gentime = gr.Textbox(value="", placeholder="Generation Time:", min_width=50, show_label=False)
57
+ with gr.Column(min_width=80):
58
+ prompttokens = gr.Textbox(value="", placeholder="Prompt Tkn:", min_width=50, show_label=False)
59
+ with gr.Column(min_width=80):
60
+ outputokens = gr.Textbox(value="", placeholder="Output Tkn:", min_width=50, show_label=False)
61
+ with gr.Column(min_width=80):
62
+ totaltokens = gr.Textbox(value="", placeholder="Total Tokens:", min_width=50, show_label=False)
63
+ # INTERACTIVE INFOGRAPHIC SECTION
64
+
65
+
66
+ # PLAYGROUND INTERFACE SECTION
67
+ with gr.Row():
68
+ with gr.Column(scale=1):
69
+ gr.Markdown(
70
+ f"""
71
+ ### Tunning Parameters""")
72
+ temp = gr.Slider(label="Temperature",minimum=0.0, maximum=1.0, step=0.01, value=0.15)
73
+ top_p = gr.Slider(label="Top_P",minimum=0.0, maximum=1.0, step=0.01, value=0.15)
74
+ repPen = gr.Slider(label="Repetition Penalty",minimum=0.0, maximum=4.0, step=0.01, value=1)
75
+ max_len = gr.Slider(label="Maximum output length", minimum=10,maximum=contextlength,step=2, value=20)
76
+
77
+ btn = gr.Button(value="Generate", variant='primary')
78
+
79
+
80
+ with gr.Column(scale=4):
81
+ prompt = gr.Textbox(label="User Prompt", lines=6, show_copy_button=True)
82
+ output = gr.Textbox(value="", label="Output", lines = 12, show_copy_button=True)
83
+ btn.click(combine, inputs=[prompt,temp,max_len,top_p,repPen], outputs=[output,gentime,prompttokens,outputokens,totaltokens])
84
+
85
+
86
+ if __name__ == "__main__":
87
+ demo.launch(inbrowser=True)