.pre-commit-config.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v5.0.0
4
+ hooks:
5
+ - id: check-executables-have-shebangs
6
+ - id: check-json
7
+ - id: check-merge-conflict
8
+ - id: check-shebang-scripts-are-executable
9
+ - id: check-toml
10
+ - id: check-yaml
11
+ - id: end-of-file-fixer
12
+ - id: mixed-line-ending
13
+ args: ["--fix=lf"]
14
+ - id: requirements-txt-fixer
15
+ - id: trailing-whitespace
16
+ - repo: https://github.com/astral-sh/ruff-pre-commit
17
+ rev: v0.8.4
18
+ hooks:
19
+ - id: ruff
20
+ args: ["--fix"]
21
+ - id: ruff-format
22
+ args: ["--line-length", "119"]
23
+ - repo: https://github.com/pre-commit/mirrors-mypy
24
+ rev: v1.14.0
25
+ hooks:
26
+ - id: mypy
27
+ args: ["--ignore-missing-imports"]
28
+ additional_dependencies:
29
+ [
30
+ "types-python-slugify",
31
+ "types-requests",
32
+ "types-PyYAML",
33
+ "types-pytz",
34
+ ]
35
+ - repo: https://github.com/kynan/nbstripout
36
+ rev: 0.8.1
37
+ hooks:
38
+ - id: nbstripout
39
+ args:
40
+ [
41
+ "--extra-keys",
42
+ "metadata.interpreter metadata.kernelspec cell.metadata.pycharm",
43
+ ]
44
+ - repo: https://github.com/nbQA-dev/nbQA
45
+ rev: 1.9.1
46
+ hooks:
47
+ - id: nbqa-black
48
+ - id: nbqa-pyupgrade
49
+ args: ["--py37-plus"]
50
+ - id: nbqa-isort
51
+ args: ["--float-to-top"]
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.10
.vscode/extensions.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "recommendations": [
3
+ "ms-python.python",
4
+ "charliermarsh.ruff",
5
+ "streetsidesoftware.code-spell-checker",
6
+ "tamasfe.even-better-toml"
7
+ ]
8
+ }
.vscode/settings.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "editor.formatOnSave": true,
3
+ "files.insertFinalNewline": false,
4
+ "[python]": {
5
+ "editor.defaultFormatter": "charliermarsh.ruff",
6
+ "editor.formatOnType": true,
7
+ "editor.codeActionsOnSave": {
8
+ "source.fixAll.ruff": "explicit",
9
+ "source.organizeImports": "explicit"
10
+ }
11
+ },
12
+ "[jupyter]": {
13
+ "files.insertFinalNewline": false
14
+ },
15
+ "notebook.output.scrolling": true,
16
+ "notebook.formatOnCellExecution": true,
17
+ "notebook.formatOnSave.enabled": true,
18
+ "notebook.codeActionsOnSave": {
19
+ "source.organizeImports": "explicit"
20
+ }
21
+ }
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 😻
4
  colorFrom: indigo
5
  colorTo: pink
6
  sdk: gradio
7
- sdk_version: 4.39.0
8
  app_file: app.py
9
  pinned: false
10
  short_description: Chatbot
 
4
  colorFrom: indigo
5
  colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 5.9.1
8
  app_file: app.py
9
  pinned: false
10
  short_description: Chatbot
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
 
2
  from threading import Thread
3
- from typing import Iterator
4
 
5
  import gradio as gr
6
  import spaces
@@ -21,7 +21,7 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
21
 
22
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
23
 
24
- model_id = "nltpt/Llama-3.2-3B-Instruct"
25
  tokenizer = AutoTokenizer.from_pretrained(model_id)
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_id,
@@ -34,22 +34,14 @@ model.eval()
34
  @spaces.GPU(duration=90)
35
  def generate(
36
  message: str,
37
- chat_history: list[tuple[str, str]],
38
  max_new_tokens: int = 1024,
39
  temperature: float = 0.6,
40
  top_p: float = 0.9,
41
  top_k: int = 50,
42
  repetition_penalty: float = 1.2,
43
  ) -> Iterator[str]:
44
- conversation = []
45
- for user, assistant in chat_history:
46
- conversation.extend(
47
- [
48
- {"role": "user", "content": user},
49
- {"role": "assistant", "content": assistant},
50
- ]
51
- )
52
- conversation.append({"role": "user", "content": message})
53
 
54
  input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
55
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
@@ -78,7 +70,7 @@ def generate(
78
  yield "".join(outputs)
79
 
80
 
81
- chat_interface = gr.ChatInterface(
82
  fn=generate,
83
  additional_inputs=[
84
  gr.Slider(
@@ -126,12 +118,12 @@ chat_interface = gr.ChatInterface(
126
  ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
127
  ],
128
  cache_examples=False,
 
 
 
 
129
  )
130
 
131
- with gr.Blocks(css="style.css", fill_height=True) as demo:
132
- gr.Markdown(DESCRIPTION)
133
- gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
134
- chat_interface.render()
135
 
136
  if __name__ == "__main__":
137
  demo.queue(max_size=20).launch()
 
1
  import os
2
+ from collections.abc import Iterator
3
  from threading import Thread
 
4
 
5
  import gradio as gr
6
  import spaces
 
21
 
22
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
23
 
24
+ model_id = "meta-llama/Llama-3.2-3B-Instruct"
25
  tokenizer = AutoTokenizer.from_pretrained(model_id)
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_id,
 
34
  @spaces.GPU(duration=90)
35
  def generate(
36
  message: str,
37
+ chat_history: list[dict],
38
  max_new_tokens: int = 1024,
39
  temperature: float = 0.6,
40
  top_p: float = 0.9,
41
  top_k: int = 50,
42
  repetition_penalty: float = 1.2,
43
  ) -> Iterator[str]:
44
+ conversation = [*chat_history, {"role": "user", "content": message}]
 
 
 
 
 
 
 
 
45
 
46
  input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
47
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
 
70
  yield "".join(outputs)
71
 
72
 
73
+ demo = gr.ChatInterface(
74
  fn=generate,
75
  additional_inputs=[
76
  gr.Slider(
 
118
  ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
119
  ],
120
  cache_examples=False,
121
+ type="messages",
122
+ description=DESCRIPTION,
123
+ css_paths="style.css",
124
+ fill_height=True,
125
  )
126
 
 
 
 
 
127
 
128
  if __name__ == "__main__":
129
  demo.queue(max_size=20).launch()
pyproject.toml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "llama-3-2-3b-instruct"
3
+ version = "0.1.0"
4
+ description = ""
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ dependencies = [
8
+ "accelerate>=1.2.1",
9
+ "gradio>=5.9.1",
10
+ "hf-transfer>=0.1.8",
11
+ "spaces>=0.31.1",
12
+ "torch==2.4.0",
13
+ "transformers>=4.47.1",
14
+ ]
15
+
16
+ [tool.ruff]
17
+ line-length = 119
18
+
19
+ [tool.ruff.lint]
20
+ select = ["ALL"]
21
+ ignore = [
22
+ "COM812", # missing-trailing-comma
23
+ "D203", # one-blank-line-before-class
24
+ "D213", # multi-line-summary-second-line
25
+ "E501", # line-too-long
26
+ "SIM117", # multiple-with-statements
27
+ ]
28
+ extend-ignore = [
29
+ "D100", # undocumented-public-module
30
+ "D101", # undocumented-public-class
31
+ "D102", # undocumented-public-method
32
+ "D103", # undocumented-public-function
33
+ "D104", # undocumented-public-package
34
+ "D105", # undocumented-magic-method
35
+ "D107", # undocumented-public-init
36
+ "EM101", # raw-string-in-exception
37
+ "FBT001", # boolean-type-hint-positional-argument
38
+ "FBT002", # boolean-default-value-positional-argument
39
+ "PD901", # pandas-df-variable-name
40
+ "PGH003", # blanket-type-ignore
41
+ "PLR0913", # too-many-arguments
42
+ "PLR0915", # too-many-statements
43
+ "TRY003", # raise-vanilla-args
44
+ ]
45
+ unfixable = [
46
+ "F401", # unused-import
47
+ ]
48
+
49
+ [tool.ruff.format]
50
+ docstring-code-format = true
requirements.txt CHANGED
@@ -1,6 +1,240 @@
1
- accelerate==0.33.0
2
- bitsandbytes==0.43.2
3
- gradio==4.39.0
4
- spaces==0.29.2
5
- torch==2.2.0
6
- transformers==4.43.3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv pip compile pyproject.toml -o requirements.txt
3
+ accelerate==1.2.1
4
+ # via llama-3-2-3b-instruct (pyproject.toml)
5
+ aiofiles==23.2.1
6
+ # via gradio
7
+ annotated-types==0.7.0
8
+ # via pydantic
9
+ anyio==4.7.0
10
+ # via
11
+ # gradio
12
+ # httpx
13
+ # starlette
14
+ certifi==2024.12.14
15
+ # via
16
+ # httpcore
17
+ # httpx
18
+ # requests
19
+ charset-normalizer==3.4.1
20
+ # via requests
21
+ click==8.1.8
22
+ # via
23
+ # typer
24
+ # uvicorn
25
+ exceptiongroup==1.2.2
26
+ # via anyio
27
+ fastapi==0.115.6
28
+ # via gradio
29
+ ffmpy==0.5.0
30
+ # via gradio
31
+ filelock==3.16.1
32
+ # via
33
+ # huggingface-hub
34
+ # torch
35
+ # transformers
36
+ # triton
37
+ fsspec==2024.12.0
38
+ # via
39
+ # gradio-client
40
+ # huggingface-hub
41
+ # torch
42
+ gradio==5.9.1
43
+ # via
44
+ # llama-3-2-3b-instruct (pyproject.toml)
45
+ # spaces
46
+ gradio-client==1.5.2
47
+ # via gradio
48
+ h11==0.14.0
49
+ # via
50
+ # httpcore
51
+ # uvicorn
52
+ hf-transfer==0.1.8
53
+ # via llama-3-2-3b-instruct (pyproject.toml)
54
+ httpcore==1.0.7
55
+ # via httpx
56
+ httpx==0.28.1
57
+ # via
58
+ # gradio
59
+ # gradio-client
60
+ # safehttpx
61
+ # spaces
62
+ huggingface-hub==0.27.0
63
+ # via
64
+ # accelerate
65
+ # gradio
66
+ # gradio-client
67
+ # tokenizers
68
+ # transformers
69
+ idna==3.10
70
+ # via
71
+ # anyio
72
+ # httpx
73
+ # requests
74
+ jinja2==3.1.5
75
+ # via
76
+ # gradio
77
+ # torch
78
+ markdown-it-py==3.0.0
79
+ # via rich
80
+ markupsafe==2.1.5
81
+ # via
82
+ # gradio
83
+ # jinja2
84
+ mdurl==0.1.2
85
+ # via markdown-it-py
86
+ mpmath==1.3.0
87
+ # via sympy
88
+ networkx==3.4.2
89
+ # via torch
90
+ numpy==2.2.1
91
+ # via
92
+ # accelerate
93
+ # gradio
94
+ # pandas
95
+ # transformers
96
+ nvidia-cublas-cu12==12.1.3.1
97
+ # via
98
+ # nvidia-cudnn-cu12
99
+ # nvidia-cusolver-cu12
100
+ # torch
101
+ nvidia-cuda-cupti-cu12==12.1.105
102
+ # via torch
103
+ nvidia-cuda-nvrtc-cu12==12.1.105
104
+ # via torch
105
+ nvidia-cuda-runtime-cu12==12.1.105
106
+ # via torch
107
+ nvidia-cudnn-cu12==9.1.0.70
108
+ # via torch
109
+ nvidia-cufft-cu12==11.0.2.54
110
+ # via torch
111
+ nvidia-curand-cu12==10.3.2.106
112
+ # via torch
113
+ nvidia-cusolver-cu12==11.4.5.107
114
+ # via torch
115
+ nvidia-cusparse-cu12==12.1.0.106
116
+ # via
117
+ # nvidia-cusolver-cu12
118
+ # torch
119
+ nvidia-nccl-cu12==2.20.5
120
+ # via torch
121
+ nvidia-nvjitlink-cu12==12.6.85
122
+ # via
123
+ # nvidia-cusolver-cu12
124
+ # nvidia-cusparse-cu12
125
+ nvidia-nvtx-cu12==12.1.105
126
+ # via torch
127
+ orjson==3.10.13
128
+ # via gradio
129
+ packaging==24.2
130
+ # via
131
+ # accelerate
132
+ # gradio
133
+ # gradio-client
134
+ # huggingface-hub
135
+ # spaces
136
+ # transformers
137
+ pandas==2.2.3
138
+ # via gradio
139
+ pillow==11.1.0
140
+ # via gradio
141
+ psutil==5.9.8
142
+ # via
143
+ # accelerate
144
+ # spaces
145
+ pydantic==2.10.4
146
+ # via
147
+ # fastapi
148
+ # gradio
149
+ # spaces
150
+ pydantic-core==2.27.2
151
+ # via pydantic
152
+ pydub==0.25.1
153
+ # via gradio
154
+ pygments==2.18.0
155
+ # via rich
156
+ python-dateutil==2.9.0.post0
157
+ # via pandas
158
+ python-multipart==0.0.20
159
+ # via gradio
160
+ pytz==2024.2
161
+ # via pandas
162
+ pyyaml==6.0.2
163
+ # via
164
+ # accelerate
165
+ # gradio
166
+ # huggingface-hub
167
+ # transformers
168
+ regex==2024.11.6
169
+ # via transformers
170
+ requests==2.32.3
171
+ # via
172
+ # huggingface-hub
173
+ # spaces
174
+ # transformers
175
+ rich==13.9.4
176
+ # via typer
177
+ ruff==0.8.5
178
+ # via gradio
179
+ safehttpx==0.1.6
180
+ # via gradio
181
+ safetensors==0.5.0
182
+ # via
183
+ # accelerate
184
+ # transformers
185
+ semantic-version==2.10.0
186
+ # via gradio
187
+ shellingham==1.5.4
188
+ # via typer
189
+ six==1.17.0
190
+ # via python-dateutil
191
+ sniffio==1.3.1
192
+ # via anyio
193
+ spaces==0.31.1
194
+ # via llama-3-2-3b-instruct (pyproject.toml)
195
+ starlette==0.41.3
196
+ # via
197
+ # fastapi
198
+ # gradio
199
+ sympy==1.13.3
200
+ # via torch
201
+ tokenizers==0.21.0
202
+ # via transformers
203
+ tomlkit==0.13.2
204
+ # via gradio
205
+ torch==2.4.0
206
+ # via
207
+ # llama-3-2-3b-instruct (pyproject.toml)
208
+ # accelerate
209
+ tqdm==4.67.1
210
+ # via
211
+ # huggingface-hub
212
+ # transformers
213
+ transformers==4.47.1
214
+ # via llama-3-2-3b-instruct (pyproject.toml)
215
+ triton==3.0.0
216
+ # via torch
217
+ typer==0.15.1
218
+ # via gradio
219
+ typing-extensions==4.12.2
220
+ # via
221
+ # anyio
222
+ # fastapi
223
+ # gradio
224
+ # gradio-client
225
+ # huggingface-hub
226
+ # pydantic
227
+ # pydantic-core
228
+ # rich
229
+ # spaces
230
+ # torch
231
+ # typer
232
+ # uvicorn
233
+ tzdata==2024.2
234
+ # via pandas
235
+ urllib3==2.3.0
236
+ # via requests
237
+ uvicorn==0.34.0
238
+ # via gradio
239
+ websockets==14.1
240
+ # via gradio-client
uv.lock ADDED
The diff for this file is too large to render. See raw diff