Files changed (1) hide show
  1. app.py +27 -5
app.py CHANGED
@@ -23,14 +23,30 @@ def script_to_use(model_id, api):
23
  arch = arch[0]
24
  return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
25
 
26
- def process_model(model_id, q_method, hf_token):
27
  model_name = model_id.split('/')[-1]
28
  fp16 = f"{model_name}/{model_name.lower()}.fp16.bin"
29
 
30
  try:
31
  api = HfApi(token=hf_token)
32
 
33
- snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, token=hf_token)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  print("Model downloaded successully!")
35
 
36
  conversion_script = script_to_use(model_id, api)
@@ -49,7 +65,7 @@ def process_model(model_id, q_method, hf_token):
49
  print("Quantised successfully!")
50
 
51
  # Create empty repo
52
- new_repo_url = api.create_repo(repo_id=f"{model_name}-{q_method}-GGUF", exist_ok=True)
53
  new_repo_id = new_repo_url.repo_id
54
  print("Repo created successfully!", new_repo_url)
55
 
@@ -58,6 +74,7 @@ def process_model(model_id, q_method, hf_token):
58
  except:
59
  card = ModelCard("")
60
  card.data.tags = ["llama-cpp"] if card.data.tags is None else card.data.tags + ["llama-cpp"]
 
61
  card.text = dedent(
62
  f"""
63
  # {new_repo_id}
@@ -84,7 +101,7 @@ def process_model(model_id, q_method, hf_token):
84
  llama-server --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -c 2048
85
  ```
86
 
87
- Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the llama.cpp repo as well.
88
 
89
  ```
90
  git clone https://github.com/ggerganov/llama.cpp && cd llama.cpp && make && ./main -m {qtype.split("/")[-1]} -n 128
@@ -138,6 +155,11 @@ iface = gr.Interface(
138
  label="HF Write Token",
139
  info="https://hf.co/settings/token",
140
  type="password",
 
 
 
 
 
141
  )
142
  ],
143
  outputs=[
@@ -145,7 +167,7 @@ iface = gr.Interface(
145
  gr.Image(show_label=False),
146
  ],
147
  title="Create your own GGUF Quants, blazingly fast ⚡!",
148
- description="The space takes a HF repo as an input, quantises it and creates anoter repo containing the selected quant under your HF user namespace. You need to specify a write token obtained in https://hf.co/settings/tokens.",
149
  article="<p>Find your write token at <a href='https://huggingface.co/settings/tokens' target='_blank'>token settings</a></p>",
150
 
151
  )
 
23
  arch = arch[0]
24
  return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
25
 
26
+ def process_model(model_id, q_method, hf_token, private_repo):
27
  model_name = model_id.split('/')[-1]
28
  fp16 = f"{model_name}/{model_name.lower()}.fp16.bin"
29
 
30
  try:
31
  api = HfApi(token=hf_token)
32
 
33
+ dl_pattern = ["*.md", "*.json", "*.model"]
34
+
35
+ pattern = (
36
+ "*.safetensors"
37
+ if any(
38
+ file.path.endswith(".safetensors")
39
+ for file in api.list_repo_tree(
40
+ repo_id=model_id,
41
+ recursive=True,
42
+ )
43
+ )
44
+ else "*.bin"
45
+ )
46
+
47
+ dl_pattern += pattern
48
+
49
+ snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, token=hf_token, allow_patterns=dl_pattern)
50
  print("Model downloaded successully!")
51
 
52
  conversion_script = script_to_use(model_id, api)
 
65
  print("Quantised successfully!")
66
 
67
  # Create empty repo
68
+ new_repo_url = api.create_repo(repo_id=f"{model_name}-{q_method}-GGUF", exist_ok=True, private=private_repo)
69
  new_repo_id = new_repo_url.repo_id
70
  print("Repo created successfully!", new_repo_url)
71
 
 
74
  except:
75
  card = ModelCard("")
76
  card.data.tags = ["llama-cpp"] if card.data.tags is None else card.data.tags + ["llama-cpp"]
77
+ card.data.tags += ["gguf-my-repo"]
78
  card.text = dedent(
79
  f"""
80
  # {new_repo_id}
 
101
  llama-server --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -c 2048
102
  ```
103
 
104
+ Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
105
 
106
  ```
107
  git clone https://github.com/ggerganov/llama.cpp && cd llama.cpp && make && ./main -m {qtype.split("/")[-1]} -n 128
 
155
  label="HF Write Token",
156
  info="https://hf.co/settings/token",
157
  type="password",
158
+ ),
159
+ gr.Checkbox(
160
+ value=False,
161
+ label="Private Repo",
162
+ info="Create a private repo under your username."
163
  )
164
  ],
165
  outputs=[
 
167
  gr.Image(show_label=False),
168
  ],
169
  title="Create your own GGUF Quants, blazingly fast ⚡!",
170
+ description="The space takes an HF repo as an input, quantises it and creates a Public repo containing the selected quant under your HF user namespace. You need to specify a write token obtained in https://hf.co/settings/tokens.",
171
  article="<p>Find your write token at <a href='https://huggingface.co/settings/tokens' target='_blank'>token settings</a></p>",
172
 
173
  )