Robzy commited on
Commit
9e81c6a
·
1 Parent(s): 248f6a0

readme detais

Browse files
Files changed (3) hide show
  1. README.md +15 -1
  2. app-old.py +0 -78
  3. finetuning.ipynb +0 -0
README.md CHANGED
@@ -9,4 +9,18 @@ app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  pinned: false
10
  ---
11
 
12
+ An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
13
+
14
+ ### [HuggingFace Space with Quantized LLMs](https://huggingface.co/spaces/Robzy/llm)
15
+
16
+ **Baseline model**: Llama-3.2-1B-Instruct with 4-bit quantization
17
+
18
+ **Training infrastracture**:
19
+ * Google Colab with NVIDIA Tesla T4 GPU
20
+ * Finetuning with parameter-effecient finetuning (PEFT) by low-rank adaption (LORA) using Unsloth and HuggingFace's supervised finetuning libraries.
21
+ * Weight & Biases for model training monitoring and model checkpointing. Checkpointing every 10 steps.
22
+
23
+ **Finetuning details**
24
+
25
+ **Datasets**:
26
+ * [Code instructions Alpaca 120k](https://huggingface.co/datasets/iamtarun/code_instructions_120k_alpaca)
app-old.py DELETED
@@ -1,78 +0,0 @@
1
- from llama_cpp import Llama
2
- import gradio as gr
3
-
4
- llm = Llama.from_pretrained(
5
- repo_id="Robzy/Llama-3.2-1B-Instruct-Finetuned-q4_k_m",
6
- filename="unsloth.Q4_K_M.gguf",
7
- )
8
-
9
- llm2 = Llama.from_pretrained(
10
- repo_id="Robzy/Llama-3.2-1B-Instruct-Finetuned-16bit",
11
- filename="unsloth.F16.gguf",
12
- )
13
-
14
- def predict(message, history):
15
- messages = [{"role": "system", "content": "You are a helpful assistant."}]
16
- for user_message, bot_message in history:
17
- if user_message:
18
- messages.append({"role": "user", "content": user_message})
19
- if bot_message:
20
- messages.append({"role": "assistant", "content": bot_message})
21
- messages.append({"role": "user", "content": message})
22
-
23
- response = ""
24
- for chunk in llm.create_chat_completion(
25
- stream=True,
26
- messages=messages,
27
- ):
28
- part = chunk["choices"][0]["delta"].get("content", None)
29
- if part:
30
- response += part
31
- yield response
32
-
33
-
34
- def predict2(message, history):
35
- messages = [{"role": "system", "content": "You are a helpful assistant."}]
36
- for user_message, bot_message in history:
37
- if user_message:
38
- messages.append({"role": "user", "content": user_message})
39
- if bot_message:
40
- messages.append({"role": "assistant", "content": bot_message})
41
- messages.append({"role": "user", "content": message})
42
-
43
- response = ""
44
- for chunk in llm2.create_chat_completion(
45
- stream=True,
46
- messages=messages,
47
- ):
48
- part = chunk["choices"][0]["delta"].get("content", None)
49
- if part:
50
- response += part
51
- yield response
52
-
53
-
54
- chat1 = gr.ChatInterface(predict, title="4-bit")
55
- chat2 = gr.ChatInterface(predict2, title="16-bit")
56
- chat3 = gr.ChatInterface(predict2, title="16-bit")
57
-
58
- def update_chat(value):
59
- if value == "4-bit":
60
- chat1.render(visible=True)
61
- chat2.render(visible=False)
62
- chat3.render(visible=False)
63
- elif value == "16-bit":
64
- chat1.render(visible=False)
65
- chat2.render(visible=True)
66
- chat3.render(visible=False)
67
- else:
68
- chat1.render(visible=False)
69
- chat2.render(visible=False)
70
- chat3.render(visible=True)
71
-
72
- with gr.Blocks() as demo:
73
-
74
- gr.Markdown("# Quantized Llama Comparison for Code Generation")
75
- dropdown = gr.Dropdown(["4-bit", "16-bit", "32-bit"], label="Choose model version", value="4-bit")
76
- dropdown.change(fn=update_chat, inputs=dropdown, outputs=[chat1, chat2, chat3])
77
-
78
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
finetuning.ipynb ADDED
The diff for this file is too large to render. See raw diff