cali72mero commited on
Commit
3b872a0
Β·
verified Β·
1 Parent(s): 623c2e2

Upload 8 files

Browse files
Files changed (8) hide show
  1. OpenAI_logo.png +0 -0
  2. README.md +32 -9
  3. app.py +59 -0
  4. chatbot.py +464 -0
  5. gitattributes +35 -0
  6. live_chat.py +31 -0
  7. requirements.txt +18 -0
  8. voice_chat.py +64 -0
OpenAI_logo.png ADDED
README.md CHANGED
@@ -1,13 +1,36 @@
1
  ---
2
- title: Ki
3
- emoji: 😻
4
- colorFrom: gray
5
- colorTo: gray
6
- sdk: streamlit
7
- sdk_version: 1.36.0
8
  app_file: app.py
9
- pinned: false
10
- license: apache-2.0
 
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: OpenGPT 4o
3
+ emoji: πŸ”₯
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 4.37.2
8
  app_file: app.py
9
+ pinned: true
10
+ short_description: GPT 4o like bot.
11
+ header: mini
12
  ---
13
 
14
+ # OpenGPT-4o
15
+ OpenGPT 4o is a fee alternative to OpenAI GPT 4o
16
+
17
+ Try HERE: https://huggingface.co/spaces/KingNish/GPT-4o
18
+
19
+ GPT 4o vs OpenGPT 4o
20
+
21
+ | Feature | GPT 4o | OpenGPT 4o |
22
+ |-----------------------|-----------------------|-----------------------|
23
+ | Pricing | FREE and Paid both | FREE |
24
+ | Image Generation | Paid only | Yes |
25
+ |Video Generation|No|Yes|
26
+ | Image QnA | Yes | Yes |
27
+ | Video QnA | Yes (but very limited) | Yes |
28
+ | Voice Chat | Yes but Very Limited | Yes (Unlimited) |
29
+ | Video Chat | Paid Only | Yes |
30
+ | Multilingual | Yes | Chat Only |
31
+ | Team Members | 450+ | 1 [LOL] |
32
+ | Human Like Speech | Paid Only | NO |
33
+ | Speed | 345 ms | 2 second (Also Depends on queue) |
34
+ | Customization | Limited | High (Coming Soon) |
35
+ | Learning Capability | Continuous | Static |
36
+ |Privacy|Questionable|100%|
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ # Import modules from other files
4
+ from chatbot import model_inference, EXAMPLES, chatbot
5
+ from voice_chat import respond
6
+
7
+ # Define Gradio theme
8
+ theme = gr.themes.Soft(
9
+ primary_hue="sky",
10
+ secondary_hue="violet",
11
+ neutral_hue="gray",
12
+ font=[gr.themes.GoogleFont('orbitron')]
13
+ )
14
+
15
+
16
+ # Create Gradio blocks for different functionalities
17
+
18
+ # Chat interface block
19
+ with gr.Blocks(
20
+ css=""".gradio-container .avatar-container {height: 40px width: 40px !important;} #duplicate-button {margin: auto; color: white; background: #f1a139; border-radius: 100vh; margin-top: 2px; margin-bottom: 2px;}""",
21
+ ) as chat:
22
+ gr.Markdown("### Image Chat, Image Generation, Image classification and Normal Chat")
23
+ gr.ChatInterface(
24
+ fn=model_inference,
25
+ chatbot = chatbot,
26
+ examples=EXAMPLES,
27
+ multimodal=True,
28
+ cache_examples=False,
29
+ autofocus=False,
30
+ concurrency_limit=10,
31
+ )
32
+
33
+ # Voice chat block
34
+ with gr.Blocks() as voice:
35
+ gr.Markdown("Sometimes, it takes because of long queue")
36
+ with gr.Row():
37
+ audio_input = gr.Audio(label="Voice Chat (BETA)", sources="microphone", type="filepath", waveform_options=False)
38
+ output = gr.Audio(label="OUTPUT", type="filepath", interactive=False, autoplay=True, elem_classes="audio")
39
+ audio_input.change( fn=respond, inputs=[audio_input], outputs=[output], queue=False)
40
+
41
+ with gr.Blocks() as image:
42
+ gr.HTML("<iframe src='https://kingnish-image-gen-pro.hf.space' width='100%' height='2000px' style='border-radius: 8px;'></iframe>")
43
+
44
+ with gr.Blocks() as instant2:
45
+ gr.HTML("<iframe src='https://kingnish-instant-video.hf.space' width='100%' height='3000px' style='border-radius: 8px;'></iframe>")
46
+
47
+ with gr.Blocks() as video:
48
+ gr.Markdown("""More Models are coming""")
49
+ gr.TabbedInterface([ instant2], ['InstantπŸŽ₯'])
50
+
51
+ # Main application block
52
+ with gr.Blocks(theme=theme, title="OpenGPT 4o DEMO") as demo:
53
+ gr.Markdown("# OpenGPT 4o")
54
+ gr.TabbedInterface([chat, voice, image, video], ['πŸ’¬ SuperChat','πŸ—£οΈ Voice Chat', 'πŸ–ΌοΈ Image Engine', 'πŸŽ₯ Video Engine'])
55
+
56
+ demo.queue(max_size=300)
57
+ demo.launch()
58
+
59
+ #pip uninstall gradio
chatbot.py ADDED
@@ -0,0 +1,464 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import requests
4
+ import random
5
+ from threading import Thread
6
+ from typing import List, Dict, Union
7
+ import subprocess
8
+ subprocess.run(
9
+ "pip install flash-attn --no-build-isolation",
10
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
11
+ shell=True,
12
+ )
13
+ import torch
14
+ import gradio as gr
15
+ from bs4 import BeautifulSoup
16
+ from transformers import LlavaProcessor, LlavaForConditionalGeneration, TextIteratorStreamer
17
+ from huggingface_hub import InferenceClient
18
+ from PIL import Image
19
+ import spaces
20
+ from functools import lru_cache
21
+ import cv2
22
+ import re
23
+ import io
24
+ import json
25
+ from gradio_client import Client, file
26
+ from groq import Groq
27
+
28
+ # You can also use models that are commented below
29
+ # model_id = "llava-hf/llava-interleave-qwen-0.5b-hf"
30
+ model_id = "llava-hf/llava-interleave-qwen-7b-hf"
31
+ # model_id = "llava-hf/llava-interleave-qwen-7b-dpo-hf"
32
+ processor = LlavaProcessor.from_pretrained(model_id)
33
+ model = LlavaForConditionalGeneration.from_pretrained(model_id,torch_dtype=torch.float16, use_flash_attention_2=True, low_cpu_mem_usage=True)
34
+ model.to("cuda")
35
+ # Credit to merve for code of llava interleave qwen
36
+
37
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY", None)
38
+
39
+ client_groq = Groq(api_key=GROQ_API_KEY)
40
+
41
+ def sample_frames(video_file) :
42
+ try:
43
+ video = cv2.VideoCapture(video_file)
44
+ total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
45
+ num_frames = 12
46
+ interval = total_frames // num_frames
47
+ frames = []
48
+ for i in range(total_frames):
49
+ ret, frame = video.read()
50
+ pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
51
+ if not ret:
52
+ continue
53
+ if i % interval == 0:
54
+ frames.append(pil_img)
55
+ video.release()
56
+ return frames
57
+ except:
58
+ frames=[]
59
+ return frames
60
+
61
+
62
+ # Path to example images
63
+ examples_path = os.path.dirname(__file__)
64
+ EXAMPLES = [
65
+ [
66
+ {
67
+ "text": "What is Friction? Explain in Detail.",
68
+ }
69
+ ],
70
+ [
71
+ {
72
+ "text": "Write me a Python function to generate unique passwords.",
73
+ }
74
+ ],
75
+ [
76
+ {
77
+ "text": "What's the latest price of Bitcoin?",
78
+ }
79
+ ],
80
+ [
81
+ {
82
+ "text": "Search and give me list of spaces trending on HuggingFace.",
83
+ }
84
+ ],
85
+ [
86
+ {
87
+ "text": "Create a Beautiful Picture of Effiel at Night.",
88
+ }
89
+ ],
90
+ [
91
+ {
92
+ "text": "Create image of cute cat.",
93
+ }
94
+ ],
95
+ [
96
+ {
97
+ "text": "What unusual happens in this video.",
98
+ "files": [f"{examples_path}/example_video/accident.gif"],
99
+ }
100
+ ],
101
+ [
102
+ {
103
+ "text": "What's name of superhero in this clip",
104
+ "files": [f"{examples_path}/example_video/spiderman.gif"],
105
+ }
106
+ ],
107
+ [
108
+ {
109
+ "text": "What's written on this paper",
110
+ "files": [f"{examples_path}/example_images/paper_with_text.png"],
111
+ }
112
+ ],
113
+ [
114
+ {
115
+ "text": "Who are they? Tell me about both of them",
116
+ "files": [f"{examples_path}/example_images/elon_smoking.jpg",
117
+ f"{examples_path}/example_images/steve_jobs.jpg", ]
118
+ }
119
+ ]
120
+ ]
121
+
122
+ # Set bot avatar image
123
+ BOT_AVATAR = "OpenAI_logo.png"
124
+
125
+ # Perform a Google search and return the results
126
+ @lru_cache(maxsize=128)
127
+ def extract_text_from_webpage(html_content):
128
+ """Extracts visible text from HTML content using BeautifulSoup."""
129
+ soup = BeautifulSoup(html_content, "html.parser")
130
+ for tag in soup(["script", "style", "header", "footer", "nav", "form", "svg"]):
131
+ tag.extract()
132
+ visible_text = soup.get_text(strip=True)
133
+ return visible_text
134
+
135
+ # Perform a Google search and return the results
136
+ def search(query):
137
+ term = query
138
+ start = 0
139
+ all_results = []
140
+ max_chars_per_page = 6000
141
+ with requests.Session() as session:
142
+ resp = session.get(
143
+ url="https://www.google.com/search",
144
+ headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"},
145
+ params={"q": term, "num": 4, "udm": 14},
146
+ timeout=5,
147
+ verify=None,
148
+ )
149
+ resp.raise_for_status()
150
+ soup = BeautifulSoup(resp.text, "html.parser")
151
+ result_block = soup.find_all("div", attrs={"class": "g"})
152
+ for result in result_block:
153
+ link = result.find("a", href=True)
154
+ link = link["href"]
155
+ try:
156
+ webpage = session.get(link, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"}, timeout=5, verify=False)
157
+ webpage.raise_for_status()
158
+ visible_text = extract_text_from_webpage(webpage.text)
159
+ if len(visible_text) > max_chars_per_page:
160
+ visible_text = visible_text[:max_chars_per_page]
161
+ all_results.append({"link": link, "text": visible_text})
162
+ except requests.exceptions.RequestException:
163
+ all_results.append({"link": link, "text": None})
164
+ return all_results
165
+
166
+
167
+ def image_gen(prompt):
168
+ client = Client("KingNish/Image-Gen-Pro")
169
+ return client.predict("Image Generation",None, prompt, api_name="/image_gen_pro")
170
+
171
+ def video_gen(prompt):
172
+ client = Client("KingNish/Instant-Video")
173
+ return client.predict(prompt, api_name="/instant_video")
174
+
175
+ def llava(user_prompt, chat_history):
176
+ if user_prompt["files"]:
177
+ image = user_prompt["files"][0]
178
+ else:
179
+ for hist in chat_history:
180
+ if type(hist[0])==tuple:
181
+ image = hist[0][0]
182
+
183
+ txt = user_prompt["text"]
184
+ img = user_prompt["files"]
185
+
186
+ video_extensions = ("avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg", "wav", "gif", "webm", "m4v", "3gp")
187
+ image_extensions = Image.registered_extensions()
188
+ image_extensions = tuple([ex for ex, f in image_extensions.items()])
189
+
190
+ if image.endswith(video_extensions):
191
+ image = sample_frames(image)
192
+ gr.Info("Analyzing Video")
193
+ image_tokens = "<image>" * int(len(image))
194
+ prompt = f"<|im_start|>user {image_tokens}\n{user_prompt}<|im_end|><|im_start|>assistant"
195
+
196
+ elif image.endswith(image_extensions):
197
+ image = Image.open(image).convert("RGB")
198
+ gr.Info("Analyzing image")
199
+ prompt = f"<|im_start|>user <image>\n{user_prompt}<|im_end|><|im_start|>assistant"
200
+
201
+ system_llava = "<|im_start|>system\nYou are OpenGPT 4o, an exceptionally capable and versatile AI assistant made by KingNish. Your task is to fulfill users query in best possible way. You are provided with image, videos and 3d structures as input with question your task is to give best possible detailed results to user according to their query. Reply the question asked by user properly and best possible way.<|im_end|>"
202
+
203
+ final_prompt = f"{system_llava}\n{prompt}"
204
+
205
+ inputs = processor(final_prompt, image, return_tensors="pt").to("cuda", torch.float16)
206
+
207
+ return inputs
208
+
209
+ # Initialize inference clients for different models
210
+ client_gemma = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
211
+ client_mixtral = InferenceClient("NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO")
212
+ client_llama = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
213
+
214
+ @spaces.GPU(duration=60, queue=False)
215
+ def model_inference( user_prompt, chat_history):
216
+ if user_prompt["files"]:
217
+ inputs = llava(user_prompt, chat_history)
218
+ streamer = TextIteratorStreamer(processor, skip_prompt=True, **{"skip_special_tokens": True})
219
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
220
+
221
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
222
+ thread.start()
223
+
224
+ buffer = ""
225
+ for new_text in streamer:
226
+ buffer += new_text
227
+ yield buffer
228
+
229
+ else:
230
+ func_caller = []
231
+ message = user_prompt
232
+
233
+ functions_metadata = [
234
+ {"type": "function", "function": {"name": "web_search", "description": "Search query on google and find latest information, info about any person, object, place thing, everything that available on google.", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "web search query"}}, "required": ["query"]}}},
235
+ {"type": "function", "function": {"name": "general_query", "description": "Reply general query of USER, with LLM like you. But it does not answer tough questions and latest info's.", "parameters": {"type": "object", "properties": {"prompt": {"type": "string", "description": "A detailed prompt"}}, "required": ["prompt"]}}},
236
+ {"type": "function", "function": {"name": "hard_query", "description": "Reply tough query of USER, using powerful LLM. But it does not answer latest info's.", "parameters": {"type": "object", "properties": {"prompt": {"type": "string", "description": "A detailed prompt"}}, "required": ["prompt"]}}},
237
+ {"type": "function", "function": {"name": "image_generation", "description": "Generate image for user", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "image generation prompt"}}, "required": ["query"]}}},
238
+ {"type": "function", "function": {"name": "video_generation", "description": "Generate video for user", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "video generation prompt"}}, "required": ["query"]}}},
239
+ {"type": "function", "function": {"name": "image_qna", "description": "Answer question asked by user related to image", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Question by user"}}, "required": ["query"]}}},
240
+ ]
241
+
242
+ for msg in chat_history:
243
+ func_caller.append({"role": "user", "content": f"{str(msg[0])}"})
244
+ func_caller.append({"role": "assistant", "content": f"{str(msg[1])}"})
245
+
246
+ message_text = message["text"]
247
+ func_caller.append({"role": "user", "content": f'[SYSTEM]You are a helpful assistant. You have access to the following functions: \n {str(functions_metadata)}\n\nTo use these functions respond with:\n<functioncall> {{ "name": "function_name", "arguments": {{ "arg_1": "value_1", "arg_1": "value_1", ... }} }} </functioncall> [USER] {message_text}'})
248
+
249
+ response = client_gemma.chat_completion(func_caller, max_tokens=200)
250
+ response = str(response)
251
+ try:
252
+ response = response[response.find("{"):response.index("</")]
253
+ except:
254
+ response = response[response.find("{"):(response.rfind("}")+1)]
255
+ response = response.replace("\\n", "")
256
+ response = response.replace("\\'", "'")
257
+ response = response.replace('\\"', '"')
258
+ response = response.replace('\\', '')
259
+ print(f"\n{response}")
260
+
261
+ try:
262
+ json_data = json.loads(str(response))
263
+ if json_data["name"] == "web_search":
264
+ query = json_data["arguments"]["query"]
265
+
266
+ gr.Info("Searching Web")
267
+ yield "Searching Web"
268
+ web_results = search(query)
269
+
270
+ gr.Info("Extracting relevant Info")
271
+ yield "Extracting Relevant Info"
272
+ web2 = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in web_results])
273
+
274
+ try:
275
+ message_groq = []
276
+ message_groq.append({"role":"system", "content": "You are OpenGPT 4o a helpful and powerful assistant made by KingNish. a helpful and very powerful chatbot web assistant made by KingNish. You are provided with WEB results from which you can find informations to answer users query in Structured, Deatailed and Better way, in Human Style. You are also Expert in every field and also learn and try to answer from contexts related to previous question. Try your best to give best response possible to user. You also try to show emotions using Emojis and reply in detail like human, use short forms, structured format, friendly tone and emotions."})
277
+ for msg in chat_history:
278
+ message_groq.append({"role": "user", "content": f"{str(msg[0])}"})
279
+ message_groq.append({"role": "assistant", "content": f"{str(msg[1])}"})
280
+ message_groq.append({"role": "user", "content": f"[USER] {str(message_text)} , [WEB RESULTS] {str(web2)}"})
281
+ # its meta-llama/Meta-Llama-3.1-8B-Instruct
282
+ stream = client_groq.chat.completions.create(model="llama-3.1-8b-instant", messages=message_groq, max_tokens=4096, stream=True)
283
+ output = ""
284
+ for chunk in stream:
285
+ content = chunk.choices[0].delta.content
286
+ if content:
287
+ output += chunk.choices[0].delta.content
288
+ yield output
289
+ except Exception as e:
290
+ messages = f"<|im_start|>system\nYou are OpenGPT 4o a helpful and very powerful chatbot web assistant made by KingNish. You are provided with WEB results from which you can find informations to answer users query in Structured, Better and in Human Way. You do not say Unnecesarry things. You are also Expert in every field and also learn and try to answer from contexts related to previous question. Try your best to give best response possible to user. You also try to show emotions using Emojis and reply in details like human, use short forms, friendly tone and emotions.<|im_end|>"
291
+ for msg in chat_history:
292
+ messages += f"\n<|im_start|>user\n{str(msg[0])}<|im_end|>"
293
+ messages += f"\n<|im_start|>assistant\n{str(msg[1])}<|im_end|>"
294
+ messages+=f"\n<|im_start|>user\n{message_text}<|im_end|>\n<|im_start|>web_result\n{web2}<|im_end|>\n<|im_start|>assistant\n"
295
+
296
+ stream = client_mixtral.text_generation(messages, max_new_tokens=4000, do_sample=True, stream=True, details=True, return_full_text=False)
297
+ output = ""
298
+ for response in stream:
299
+ if not response.token.text == "<|im_end|>":
300
+ output += response.token.text
301
+ yield output
302
+
303
+ elif json_data["name"] == "image_generation":
304
+ query = json_data["arguments"]["query"]
305
+ gr.Info("Generating Image, Please wait 10 sec...")
306
+ yield "Generating Image, Please wait 10 sec..."
307
+ image = image_gen(f"{str(query)}")
308
+ yield gr.Image(image[1])
309
+
310
+ elif json_data["name"] == "video_generation":
311
+ query = json_data["arguments"]["query"]
312
+ gr.Info("Generating Video, Please wait 15 sec...")
313
+ yield "Generating Video, Please wait 15 sec..."
314
+ video = video_gen(f"{str(query)}")
315
+ yield gr.Video(video)
316
+
317
+ elif json_data["name"] == "image_qna":
318
+ inputs = llava(user_prompt, chat_history)
319
+ streamer = TextIteratorStreamer(processor, skip_prompt=True, **{"skip_special_tokens": True})
320
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
321
+
322
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
323
+ thread.start()
324
+
325
+ buffer = ""
326
+ for new_text in streamer:
327
+ buffer += new_text
328
+ yield buffer
329
+
330
+ elif json_data["name"] == "hard_query":
331
+ try:
332
+ message_groq = []
333
+ message_groq.append({"role":"system", "content": "You are OpenGPT 4o a helpful and powerful assistant made by KingNish. You answers users query in detail and structured format and style like human. You are also Expert in every field and also learn and try to answer from contexts related to previous question. You also try to show emotions using Emojis and reply like human, use short forms, structured manner, detailed explaination, friendly tone and emotions."})
334
+ for msg in chat_history:
335
+ message_groq.append({"role": "user", "content": f"{str(msg[0])}"})
336
+ message_groq.append({"role": "assistant", "content": f"{str(msg[1])}"})
337
+ message_groq.append({"role": "user", "content": f"{str(message_text)}"})
338
+ # its meta-llama/Meta-Llama-3.1-70B-Instruct
339
+ stream = client_groq.chat.completions.create(model="llama-3.1-70b-versatile", messages=message_groq, max_tokens=4096, stream=True)
340
+ output = ""
341
+ for chunk in stream:
342
+ content = chunk.choices[0].delta.content
343
+ if content:
344
+ output += chunk.choices[0].delta.content
345
+ yield output
346
+ except Exception as e:
347
+ print(e)
348
+ try:
349
+ message_groq = []
350
+ message_groq.append({"role":"system", "content": "You are OpenGPT 4o a helpful and powerful assistant made by KingNish. You answers users query in detail and structured format and style like human. You are also Expert in every field and also learn and try to answer from contexts related to previous question. You also try to show emotions using Emojis and reply like human, use short forms, structured manner, detailed explaination, friendly tone and emotions."})
351
+ for msg in chat_history:
352
+ message_groq.append({"role": "user", "content": f"{str(msg[0])}"})
353
+ message_groq.append({"role": "assistant", "content": f"{str(msg[1])}"})
354
+ message_groq.append({"role": "user", "content": f"{str(message_text)}"})
355
+ # its meta-llama/Meta-Llama-3-70B-Instruct
356
+ stream = client_groq.chat.completions.create(model="llama3-70b-8192", messages=message_groq, max_tokens=4096, stream=True)
357
+ output = ""
358
+ for chunk in stream:
359
+ content = chunk.choices[0].delta.content
360
+ if content:
361
+ output += chunk.choices[0].delta.content
362
+ yield output
363
+ except Exception as e:
364
+ print(e)
365
+ message_groq = []
366
+ message_groq.append({"role":"system", "content": "You are OpenGPT 4o a helpful and powerful assistant made by KingNish. You answers users query in detail and structured format and style like human. You are also Expert in every field and also learn and try to answer from contexts related to previous question. You also try to show emotions using Emojis and reply like human, use short forms, structured manner, detailed explaination, friendly tone and emotions."})
367
+ for msg in chat_history:
368
+ message_groq.append({"role": "user", "content": f"{str(msg[0])}"})
369
+ message_groq.append({"role": "assistant", "content": f"{str(msg[1])}"})
370
+ message_groq.append({"role": "user", "content": f"{str(message_text)}"})
371
+ stream = client_groq.chat.completions.create(model="llama3-groq-70b-8192-tool-use-preview", messages=message_groq, max_tokens=4096, stream=True)
372
+ output = ""
373
+ for chunk in stream:
374
+ content = chunk.choices[0].delta.content
375
+ if content:
376
+ output += chunk.choices[0].delta.content
377
+ yield output
378
+ else:
379
+ try:
380
+ message_groq = []
381
+ message_groq.append({"role":"system", "content": "You are OpenGPT 4o a helpful and powerful assistant made by KingNish. You answers users query in detail and structured format and style like human. You are also Expert in every field and also learn and try to answer from contexts related to previous question. You also try to show emotions using Emojis and reply like human, use short forms, structured manner, detailed explaination, friendly tone and emotions."})
382
+ for msg in chat_history:
383
+ message_groq.append({"role": "user", "content": f"{str(msg[0])}"})
384
+ message_groq.append({"role": "assistant", "content": f"{str(msg[1])}"})
385
+ message_groq.append({"role": "user", "content": f"{str(message_text)}"})
386
+ # its meta-llama/Meta-Llama-3-70B-Instruct
387
+ stream = client_groq.chat.completions.create(model="llama3-70b-8192", messages=message_groq, max_tokens=4096, stream=True)
388
+ output = ""
389
+ for chunk in stream:
390
+ content = chunk.choices[0].delta.content
391
+ if content:
392
+ output += chunk.choices[0].delta.content
393
+ yield output
394
+ except Exception as e:
395
+ print(e)
396
+ try:
397
+ message_groq = []
398
+ message_groq.append({"role":"system", "content": "You are OpenGPT 4o a helpful and powerful assistant made by KingNish. You answers users query in detail and structured format and style like human. You are also Expert in every field and also learn and try to answer from contexts related to previous question. You also try to show emotions using Emojis and reply like human, use short forms, structured manner, detailed explaination, friendly tone and emotions."})
399
+ for msg in chat_history:
400
+ message_groq.append({"role": "user", "content": f"{str(msg[0])}"})
401
+ message_groq.append({"role": "assistant", "content": f"{str(msg[1])}"})
402
+ message_groq.append({"role": "user", "content": f"{str(message_text)}"})
403
+ # its meta-llama/Meta-Llama-3-8B-Instruct
404
+ stream = client_groq.chat.completions.create(model="llama3-8b-8192", messages=message_groq, max_tokens=4096, stream=True)
405
+ output = ""
406
+ for chunk in stream:
407
+ content = chunk.choices[0].delta.content
408
+ if content:
409
+ output += chunk.choices[0].delta.content
410
+ yield output
411
+ except Exception as e:
412
+ print(e)
413
+ messages = f"<|start_header_id|>system\nYou are OpenGPT 4o a helpful and powerful assistant made by KingNish. You answers users query in detail and structured format and style like human. You are also Expert in every field and also learn and try to answer from contexts related to previous question. You also try to show emotions using Emojis and reply like human, use short forms, structured manner, detailed explaination, friendly tone and emotions.<|end_header_id|>"
414
+ for msg in chat_history:
415
+ messages += f"\n<|start_header_id|>user\n{str(msg[0])}<|end_header_id|>"
416
+ messages += f"\n<|start_header_id|>assistant\n{str(msg[1])}<|end_header_id|>"
417
+ messages+=f"\n<|start_header_id|>user\n{message_text}<|end_header_id|>\n<|start_header_id|>assistant\n"
418
+ stream = client_llama.text_generation(messages, max_new_tokens=2000, do_sample=True, stream=True, details=True, return_full_text=False)
419
+ output = ""
420
+ for response in stream:
421
+ if not response.token.text == "<|eot_id|>":
422
+ output += response.token.text
423
+ yield output
424
+ except Exception as e:
425
+ print(e)
426
+ try:
427
+ message_groq = []
428
+ message_groq.append({"role":"system", "content": "You are OpenGPT 4o a helpful and powerful assistant made by KingNish. You answers users query in detail and structured format and style like human. You are also Expert in every field and also learn and try to answer from contexts related to previous question. You also try to show emotions using Emojis and reply like human, use short forms, structured manner, detailed explaination, friendly tone and emotions."})
429
+ for msg in chat_history:
430
+ message_groq.append({"role": "user", "content": f"{str(msg[0])}"})
431
+ message_groq.append({"role": "assistant", "content": f"{str(msg[1])}"})
432
+ message_groq.append({"role": "user", "content": f"{str(message_text)}"})
433
+ # its meta-llama/Meta-Llama-3-8B-Instruct
434
+ stream = client_groq.chat.completions.create(model="llama3-8b-8192", messages=message_groq, max_tokens=4096, stream=True)
435
+ output = ""
436
+ for chunk in stream:
437
+ content = chunk.choices[0].delta.content
438
+ if content:
439
+ output += chunk.choices[0].delta.content
440
+ yield output
441
+ except Exception as e:
442
+ print(e)
443
+ messages = f"<|im_start|>system\nYou are OpenGPT 4o a helpful and powerful assistant made by KingNish. You answers users query in detail and structured format and style like human. You are also Expert in every field and also learn and try to answer from contexts related to previous question. You also try to show emotions using Emojis and reply like human, use short forms, structured manner, detailed explaination, friendly tone and emotions.<|im_end|>"
444
+ for msg in chat_history:
445
+ messages += f"\n<|im_start|>user\n{str(msg[0])}<|im_end|>"
446
+ messages += f"\n<|im_start|>assistant\n{str(msg[1])}<|im_end|>"
447
+ messages+=f"\n<|im_start|>user\n{message_text}<|im_end|>\n<|im_start|>assistant\n"
448
+ stream = client_mixtral.text_generation(messages, max_new_tokens=4000, do_sample=True, stream=True, details=True, return_full_text=False)
449
+ output = ""
450
+ for response in stream:
451
+ if not response.token.text == "<|im_end|>":
452
+ output += response.token.text
453
+ yield output
454
+
455
+ # Create a chatbot interface
456
+ chatbot = gr.Chatbot(
457
+ label="OpenGPT-4o",
458
+ avatar_images=[None, BOT_AVATAR],
459
+ show_copy_button=True,
460
+ likeable=True,
461
+ layout="panel",
462
+ height=400,
463
+ )
464
+ output = gr.Textbox(label="Prompt")
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
live_chat.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from transformers import AutoModel
4
+ from transformers import AutoProcessor
5
+ import spaces
6
+
7
+ # Load pre-trained models for image captioning and language modeling
8
+ model3 = AutoModel.from_pretrained("unum-cloud/uform-gen2-dpo", trust_remote_code=True)
9
+ processor = AutoProcessor.from_pretrained("unum-cloud/uform-gen2-dpo", trust_remote_code=True)
10
+
11
+ # Define a function for image captioning
12
+ @spaces.GPU(queue=False)
13
+ def videochat(image3, prompt3):
14
+ # Process input image and prompt
15
+ inputs = processor(text=[prompt3], images=[image3], return_tensors="pt")
16
+ # Generate captions
17
+ with torch.inference_mode():
18
+ output = model3.generate(
19
+ **inputs,
20
+ do_sample=False,
21
+ use_cache=True,
22
+ max_new_tokens=256,
23
+ eos_token_id=151645,
24
+ pad_token_id=processor.tokenizer.pad_token_id
25
+ )
26
+ prompt_len = inputs["input_ids"].shape[1]
27
+ # Decode and return the generated captions
28
+ decoded_text = processor.batch_decode(output[:, prompt_len:])[0]
29
+ if decoded_text.endswith("<|im_end|>"):
30
+ decoded_text = decoded_text[:-10]
31
+ yield decoded_text
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ datasets
3
+ pillow
4
+ numpy
5
+ torch
6
+ streaming-stt-nemo==0.2.0
7
+ edge-tts
8
+ asyncio
9
+ torchvision
10
+ accelerate
11
+ beautifulsoup4>=4.9
12
+ requests>=2.20
13
+ onnxruntime
14
+ sentencepiece
15
+ soxr
16
+ pydub
17
+ groq
18
+ opencv-python
voice_chat.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import edge_tts
3
+ import asyncio
4
+ import tempfile
5
+ import numpy as np
6
+ import soxr
7
+ from pydub import AudioSegment
8
+ import torch
9
+ import sentencepiece as spm
10
+ import onnxruntime as ort
11
+ from huggingface_hub import hf_hub_download, InferenceClient
12
+
13
+ # Speech Recognition Model Configuration
14
+ model_name = "neongeckocom/stt_en_citrinet_512_gamma_0_25"
15
+ sample_rate = 16000
16
+
17
+ # Download preprocessor, encoder and tokenizer
18
+ preprocessor = torch.jit.load(hf_hub_download(model_name, "preprocessor.ts", subfolder="onnx"))
19
+ encoder = ort.InferenceSession(hf_hub_download(model_name, "model.onnx", subfolder="onnx"))
20
+ tokenizer = spm.SentencePieceProcessor(hf_hub_download(model_name, "tokenizer.spm", subfolder="onnx"))
21
+
22
+ # Mistral Model Configuration
23
+ client1 = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")
24
+ system_instructions1 = "[SYSTEM] Answer as Real OpenGPT 4o, Made by 'KingNish', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. You will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
25
+
26
+ def resample(audio_fp32, sr):
27
+ return soxr.resample(audio_fp32, sr, sample_rate)
28
+
29
+ def to_float32(audio_buffer):
30
+ return np.divide(audio_buffer, np.iinfo(audio_buffer.dtype).max, dtype=np.float32)
31
+
32
+ def transcribe(audio_path):
33
+ audio_file = AudioSegment.from_file(audio_path)
34
+ sr = audio_file.frame_rate
35
+ audio_buffer = np.array(audio_file.get_array_of_samples())
36
+
37
+ audio_fp32 = to_float32(audio_buffer)
38
+ audio_16k = resample(audio_fp32, sr)
39
+
40
+ input_signal = torch.tensor(audio_16k).unsqueeze(0)
41
+ length = torch.tensor(len(audio_16k)).unsqueeze(0)
42
+ processed_signal, _ = preprocessor.forward(input_signal=input_signal, length=length)
43
+
44
+ logits = encoder.run(None, {'audio_signal': processed_signal.numpy(), 'length': length.numpy()})[0][0]
45
+
46
+ blank_id = tokenizer.vocab_size()
47
+ decoded_prediction = [p for p in logits.argmax(axis=1).tolist() if p != blank_id]
48
+ text = tokenizer.decode_ids(decoded_prediction)
49
+
50
+ return text
51
+
52
+ def model(text):
53
+ formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
54
+ stream = client1.text_generation(formatted_prompt, max_new_tokens=300)
55
+ return stream[:-4]
56
+
57
+ async def respond(audio):
58
+ user = transcribe(audio)
59
+ reply = model(user)
60
+ communicate = edge_tts.Communicate(reply)
61
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
62
+ tmp_path = tmp_file.name
63
+ await communicate.save(tmp_path)
64
+ return tmp_path