Spaces:

r-neuschulz
/

KafkaLM-8x7b-German-V0.1-DPO-Gradio

Paused

App Files Files Community

Richard Neuschulz commited on Feb 7

Commit

0939023

•

1 Parent(s): 3bb4043

check 70B model

Browse files

Files changed (2) hide show

app.py +31 -23
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -1,35 +1,43 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-import spaces  # Import the spaces module for ZeroGPU compatibility
-# Define the text generation function
-model_id = "doubledsbv/KafkaLM-8x7B-German-V0.1-AWQ"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to('cuda')
 def generate_text(user_input, system_prompt):
-    # Combine the system prompt and the user input to form the full prompt
-    full_prompt = f"{system_prompt.strip()}\n\n{user_input.strip()}"
-    # Initialize the pipeline for text generation with the model and tokenizer
-    text_generator = pipeline('text-generation', model=model, tokenizer=tokenizer,
-                              return_full_text=True, temperature=0.5,
-                              max_new_tokens=512, top_p=0.95, top_k=50, do_sample=True, device=0)  # Ensure device is set to use GPU
-    # Generate text based on the full prompt
-    results = text_generator(full_prompt)
-    generated_text = results[0]['generated_text']
     return generated_text
 # Setup the Gradio interface
-iface = gr.Interface(fn=generate_text,
-                     inputs=[gr.Textbox(lines=2, label="User Prompt", value="Wer ist Kafka?"), gr.Textbox(lines=5, label="System Prompt", value="Du bist ein freundlicher und hilfsbereiter KI-Assistent. Du beantwortest Fragen faktenorientiert und präzise, ohne dabei relevante Fakten auszulassen.")],
-                     outputs=gr.Textbox(label="Generated Text"),
-                     title="Text Generation with KafkaLM",
-                     description="Enter a user prompt and a system prompt to generate text using the KafkaLM model.")
 # Launch the Gradio app
 if __name__ == "__main__":

+import os
 import gradio as gr
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+model_id = "TheBloke/KafkaLM-70B-German-V0.1-GGUF"
+model_filename = "kafkalm-70b-german-v0.1.Q5_K_M.gguf"
+model_path = hf_hub_download(repo_id=model_id, filename=model_filename, cache_dir="./")
+# Initialize the Llama model
+llm = Llama(
+    model_path=model_path,  # Use the downloaded model file
+    n_ctx=4096,             # Adjust based on the model's max sequence length
+    n_threads=8,            # Tailor to your system
+    n_gpu_layers=35         # Set based on your GPU's capability
+)
 def generate_text(user_input, system_prompt):
+    # Combine the system and user prompts
+    prompt = f"\n{system_prompt.strip()}</s>\n\n{user_input.strip()}</s>\n"
+    # Generate text using the Llama model
+    output = llm(prompt, max_tokens=512, stop=["</s>"], echo=True)
+    # Extract the generated text from the output
+    generated_text = output['completions'][0]['completion']
     return generated_text
 # Setup the Gradio interface
+iface = gr.Interface(
+    fn=generate_text,
+    inputs=[
+        gr.Textbox(lines=2, label="User Prompt", value="Wer ist Kafka?"),
+        gr.Textbox(lines=5, label="System Prompt", value="Du bist ein freundlicher und hilfsbereiter KI-Assistent. Du beantwortest Fragen faktenorientiert und präzise, ohne dabei relevante Fakten auszulassen.")
+    ],
+    outputs=gr.Textbox(label="Generated Text"),
+    title="Text Generation with KafkaLM",
+    description="Enter a user prompt and a system prompt to generate text using the KafkaLM model."
+)
 # Launch the Gradio app
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -3,4 +3,6 @@ gradio
 torch
 bitsandbytes
 accelerate
-autoawq

 torch
 bitsandbytes
 accelerate
+autoawq
+huggingface_hub
+llama-cpp-python