Spaces:

MuntasirHossain
/

Fine-tuned-Llama-3-8B-Chatbot

Sleeping

App Files Files Community

MuntasirHossain commited on May 17, 2024

Commit

fb1e4d5

verified ·

1 Parent(s): b3c7558

Create app.py

Browse files

Files changed (1) hide show

app.py +122 -0

app.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import gradio as gr
+import os
+import requests
+from llama_cpp import Llama
+from transformers import AutoTokenizer
+import transformers
+import torch
+llm_name = "MuntasirHossain/Meta-Llama-3-8B-OpenOrca-GGUF"
+llm_path = os.path.basename(llm_name)
+# download gguf model
+def download_llms(llm_name):
+    """Download GGUF model"""
+    download_url = ""
+    print("Downloading " + llm_name)
+    download_url = "https://huggingface.co/MuntasirHossain/Meta-Llama-3-8B-OpenOrca-GGUF/resolve/main/Q4_K_M.gguf"
+    # elif selected_llm == 'microsoft/Phi-3-mini-4k-instruct':
+    #     download_url = "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf"
+    # elif selected_llm == 'mistralai/Mistral-7B-Instruct-v0.2':
+    #     download_url = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q2_K.gguf"
+    if not os.path.exists("model"):
+        os.makedirs("model")
+    llm_filename = os.path.basename(download_url)
+    llm_temp_file_path = os.path.join("model", llm_filename)
+    if os.path.exists(llm_temp_file_path):
+        print("Model already available")
+    else:
+        response = requests.get(download_url, stream=True)
+        if response.status_code == 200:
+            with open(llm_temp_file_path, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=1024):
+                    if chunk:
+                        f.write(chunk)
+            print("Download completed")
+        else:
+            print(f"Model download completed {response.status_code}")
+# define model pipeline with llama-cpp
+def initialize_llm(llm_model):
+    model_path = ""
+    if llm_model == llm_name:
+        model_path = "model/Q4_K_M.gguf"
+        download_llms(llm_model)
+    llm = LlamaCpp(
+        model_path=model_path,
+        # temperature=temperature,
+        # max_tokens=256,
+        # top_p=1,
+        # top_k= top_k,
+        n_ctx=1024,
+        verbose=False
+        )
+    return llm
+llm = initialize_llm(llm_name)
+# format prompt as per the chat template on the official model page: https://huggingface.co/google/gemma-7b-it
+def format_prompt(input_text, history):
+    system_prompt = "You are a helpful AI assistant. You are truthful in your response."
+    prompt = ""
+    if history:
+        for previous_prompt, response in history:
+            prompt += f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{previous_prompt}<|im_end|>\n<|im_start|>assistant\n{response}<|im_end|>"
+            # <start_of_turn>user
+            # {previous_prompt}<end_of_turn>
+            # <start_of_turn>model
+            # {response}<end_of_turn>
+    prompt += f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{input_text}<|im_end|>\n<|im_start|>assistant"
+    # <start_of_turn>user
+    # {input_text}<end_of_turn>
+    # <start_of_turn>model"""
+    return prompt
+def generate(prompt, history, max_new_tokens=256): # temperature=0.95, top_p=0.9, repetition_penalty=1.0
+    if not history:
+        history = []
+    # temperature = float(temperature)
+    # top_p = float(top_p)
+    kwargs = dict(
+        # temperature=temperature,
+        max_tokens=max_new_tokens,
+        # top_p=top_p,
+        # repetition_penalty=repetition_penalty,
+        # do_sample=True,
+        stop=["<|im_end|>"]
+    )
+    formatted_prompt = format_prompt(prompt, history)
+    # response = llm(formatted_prompt, **kwargs, stream=True)
+    # output = ""
+    # for chunk in response:
+    #     output += chunk.token.text
+    #     yield output
+    # return output
+    response = llm(formatted_prompt, **kwargs)
+    return response['choices'][0]['text']
+chatbot = gr.Chatbot(height=500)
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.HTML("<center><h1>Google Gemma 7B IT</h1><center>")
+    gr.ChatInterface(
+        generate,
+        chatbot=chatbot,
+        retry_btn=None,
+        undo_btn=None,
+        clear_btn="Clear",
+        description="This AI agent is using the MuntasirHossain/Meta-Llama-3-8B-OpenOrca-GGUF model for text-generation",
+        # additional_inputs=additional_inputs,
+        examples=[["Explain artificial intelligence in a few lines."]]
+    )
+demo.queue().launch()