Richard Neuschulz commited on
Commit
0939023
1 Parent(s): 3bb4043

check 70B model

Browse files
Files changed (2) hide show
  1. app.py +31 -23
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,35 +1,43 @@
 
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
- import spaces # Import the spaces module for ZeroGPU compatibility
4
 
5
- # Define the text generation function
 
 
6
 
7
- model_id = "doubledsbv/KafkaLM-8x7B-German-V0.1-AWQ"
8
- tokenizer = AutoTokenizer.from_pretrained(model_id)
9
- model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to('cuda')
 
 
 
 
10
 
11
  def generate_text(user_input, system_prompt):
 
 
 
 
 
 
 
 
12
 
13
- # Combine the system prompt and the user input to form the full prompt
14
- full_prompt = f"{system_prompt.strip()}\n\n{user_input.strip()}"
15
-
16
- # Initialize the pipeline for text generation with the model and tokenizer
17
- text_generator = pipeline('text-generation', model=model, tokenizer=tokenizer,
18
- return_full_text=True, temperature=0.5,
19
- max_new_tokens=512, top_p=0.95, top_k=50, do_sample=True, device=0) # Ensure device is set to use GPU
20
-
21
- # Generate text based on the full prompt
22
- results = text_generator(full_prompt)
23
- generated_text = results[0]['generated_text']
24
-
25
  return generated_text
26
 
27
  # Setup the Gradio interface
28
- iface = gr.Interface(fn=generate_text,
29
- inputs=[gr.Textbox(lines=2, label="User Prompt", value="Wer ist Kafka?"), gr.Textbox(lines=5, label="System Prompt", value="Du bist ein freundlicher und hilfsbereiter KI-Assistent. Du beantwortest Fragen faktenorientiert und präzise, ohne dabei relevante Fakten auszulassen.")],
30
- outputs=gr.Textbox(label="Generated Text"),
31
- title="Text Generation with KafkaLM",
32
- description="Enter a user prompt and a system prompt to generate text using the KafkaLM model.")
 
 
 
 
 
33
 
34
  # Launch the Gradio app
35
  if __name__ == "__main__":
 
1
+ import os
2
  import gradio as gr
3
+ from huggingface_hub import hf_hub_download
4
+ from llama_cpp import Llama
5
 
6
+ model_id = "TheBloke/KafkaLM-70B-German-V0.1-GGUF"
7
+ model_filename = "kafkalm-70b-german-v0.1.Q5_K_M.gguf"
8
+ model_path = hf_hub_download(repo_id=model_id, filename=model_filename, cache_dir="./")
9
 
10
+ # Initialize the Llama model
11
+ llm = Llama(
12
+ model_path=model_path, # Use the downloaded model file
13
+ n_ctx=4096, # Adjust based on the model's max sequence length
14
+ n_threads=8, # Tailor to your system
15
+ n_gpu_layers=35 # Set based on your GPU's capability
16
+ )
17
 
18
  def generate_text(user_input, system_prompt):
19
+ # Combine the system and user prompts
20
+ prompt = f"\n{system_prompt.strip()}</s>\n\n{user_input.strip()}</s>\n"
21
+
22
+ # Generate text using the Llama model
23
+ output = llm(prompt, max_tokens=512, stop=["</s>"], echo=True)
24
+
25
+ # Extract the generated text from the output
26
+ generated_text = output['completions'][0]['completion']
27
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  return generated_text
29
 
30
  # Setup the Gradio interface
31
+ iface = gr.Interface(
32
+ fn=generate_text,
33
+ inputs=[
34
+ gr.Textbox(lines=2, label="User Prompt", value="Wer ist Kafka?"),
35
+ gr.Textbox(lines=5, label="System Prompt", value="Du bist ein freundlicher und hilfsbereiter KI-Assistent. Du beantwortest Fragen faktenorientiert und präzise, ohne dabei relevante Fakten auszulassen.")
36
+ ],
37
+ outputs=gr.Textbox(label="Generated Text"),
38
+ title="Text Generation with KafkaLM",
39
+ description="Enter a user prompt and a system prompt to generate text using the KafkaLM model."
40
+ )
41
 
42
  # Launch the Gradio app
43
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -3,4 +3,6 @@ gradio
3
  torch
4
  bitsandbytes
5
  accelerate
6
- autoawq
 
 
 
3
  torch
4
  bitsandbytes
5
  accelerate
6
+ autoawq
7
+ huggingface_hub
8
+ llama-cpp-python