MuntasirHossain commited on
Commit
fb1e4d5
·
verified ·
1 Parent(s): b3c7558

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -0
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import requests
4
+ from llama_cpp import Llama
5
+ from transformers import AutoTokenizer
6
+ import transformers
7
+ import torch
8
+
9
+ llm_name = "MuntasirHossain/Meta-Llama-3-8B-OpenOrca-GGUF"
10
+ llm_path = os.path.basename(llm_name)
11
+
12
+ # download gguf model
13
+ def download_llms(llm_name):
14
+ """Download GGUF model"""
15
+
16
+ download_url = ""
17
+ print("Downloading " + llm_name)
18
+ download_url = "https://huggingface.co/MuntasirHossain/Meta-Llama-3-8B-OpenOrca-GGUF/resolve/main/Q4_K_M.gguf"
19
+
20
+ # elif selected_llm == 'microsoft/Phi-3-mini-4k-instruct':
21
+ # download_url = "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf"
22
+ # elif selected_llm == 'mistralai/Mistral-7B-Instruct-v0.2':
23
+ # download_url = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q2_K.gguf"
24
+
25
+ if not os.path.exists("model"):
26
+ os.makedirs("model")
27
+
28
+ llm_filename = os.path.basename(download_url)
29
+ llm_temp_file_path = os.path.join("model", llm_filename)
30
+
31
+ if os.path.exists(llm_temp_file_path):
32
+ print("Model already available")
33
+ else:
34
+ response = requests.get(download_url, stream=True)
35
+ if response.status_code == 200:
36
+ with open(llm_temp_file_path, 'wb') as f:
37
+ for chunk in response.iter_content(chunk_size=1024):
38
+ if chunk:
39
+ f.write(chunk)
40
+
41
+ print("Download completed")
42
+ else:
43
+ print(f"Model download completed {response.status_code}")
44
+
45
+ # define model pipeline with llama-cpp
46
+ def initialize_llm(llm_model):
47
+ model_path = ""
48
+ if llm_model == llm_name:
49
+ model_path = "model/Q4_K_M.gguf"
50
+ download_llms(llm_model)
51
+ llm = LlamaCpp(
52
+ model_path=model_path,
53
+ # temperature=temperature,
54
+ # max_tokens=256,
55
+ # top_p=1,
56
+ # top_k= top_k,
57
+ n_ctx=1024,
58
+ verbose=False
59
+ )
60
+ return llm
61
+
62
+ llm = initialize_llm(llm_name)
63
+
64
+ # format prompt as per the chat template on the official model page: https://huggingface.co/google/gemma-7b-it
65
+ def format_prompt(input_text, history):
66
+ system_prompt = "You are a helpful AI assistant. You are truthful in your response."
67
+ prompt = ""
68
+ if history:
69
+ for previous_prompt, response in history:
70
+ prompt += f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{previous_prompt}<|im_end|>\n<|im_start|>assistant\n{response}<|im_end|>"
71
+ # <start_of_turn>user
72
+ # {previous_prompt}<end_of_turn>
73
+ # <start_of_turn>model
74
+ # {response}<end_of_turn>
75
+ prompt += f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{input_text}<|im_end|>\n<|im_start|>assistant"
76
+ # <start_of_turn>user
77
+ # {input_text}<end_of_turn>
78
+ # <start_of_turn>model"""
79
+ return prompt
80
+
81
+ def generate(prompt, history, max_new_tokens=256): # temperature=0.95, top_p=0.9, repetition_penalty=1.0
82
+ if not history:
83
+ history = []
84
+
85
+ # temperature = float(temperature)
86
+ # top_p = float(top_p)
87
+
88
+ kwargs = dict(
89
+ # temperature=temperature,
90
+ max_tokens=max_new_tokens,
91
+ # top_p=top_p,
92
+ # repetition_penalty=repetition_penalty,
93
+ # do_sample=True,
94
+ stop=["<|im_end|>"]
95
+ )
96
+
97
+ formatted_prompt = format_prompt(prompt, history)
98
+
99
+ # response = llm(formatted_prompt, **kwargs, stream=True)
100
+ # output = ""
101
+ # for chunk in response:
102
+ # output += chunk.token.text
103
+ # yield output
104
+ # return output
105
+
106
+ response = llm(formatted_prompt, **kwargs)
107
+ return response['choices'][0]['text']
108
+
109
+ chatbot = gr.Chatbot(height=500)
110
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
111
+ gr.HTML("<center><h1>Google Gemma 7B IT</h1><center>")
112
+ gr.ChatInterface(
113
+ generate,
114
+ chatbot=chatbot,
115
+ retry_btn=None,
116
+ undo_btn=None,
117
+ clear_btn="Clear",
118
+ description="This AI agent is using the MuntasirHossain/Meta-Llama-3-8B-OpenOrca-GGUF model for text-generation",
119
+ # additional_inputs=additional_inputs,
120
+ examples=[["Explain artificial intelligence in a few lines."]]
121
+ )
122
+ demo.queue().launch()