reach-vb HF staff commited on
Commit
eefa44d
·
verified ·
1 Parent(s): 489798a
Files changed (1) hide show
  1. app.py +9 -92
app.py CHANGED
@@ -16,95 +16,19 @@ from apscheduler.schedulers.background import BackgroundScheduler
16
 
17
  from textwrap import dedent
18
 
 
 
19
  HF_TOKEN = os.environ.get("HF_TOKEN")
20
 
21
- def process_model(model_id, q_method, private_repo, oauth_token: gr.OAuthToken | None):
22
  if oauth_token.token is None:
23
- raise ValueError("You must be logged in to use mlx-my-repo")
24
  model_name = model_id.split('/')[-1]
25
-
 
26
  try:
27
- api = HfApi(token=oauth_token.token)
28
-
29
- dl_pattern = ["*.md", "*.json", "*.model"]
30
-
31
- pattern = (
32
- "*.safetensors"
33
- if any(
34
- file.path.endswith(".safetensors")
35
- for file in api.list_repo_tree(
36
- repo_id=model_id,
37
- recursive=True,
38
- )
39
- )
40
- else "*.bin"
41
- )
42
-
43
- dl_pattern += pattern
44
-
45
- api.snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
46
- print("Model downloaded successfully!")
47
- print(f"Current working directory: {os.getcwd()}")
48
- print(f"Model directory contents: {os.listdir(model_name)}")
49
-
50
- conversion_script = "convert_hf_to_gguf.py"
51
- fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
52
- result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
53
- print(result)
54
- if result.returncode != 0:
55
- raise Exception(f"Error converting to fp16: {result.stderr}")
56
- print("Model converted to fp16 successfully!")
57
- print(f"Converted model path: {fp16}")
58
-
59
- username = whoami(oauth_token.token)["name"]
60
- quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
61
- quantized_gguf_path = quantized_gguf_name
62
-
63
- quantise_ggml = f"./llama.cpp/llama-quantize {fp16} {quantized_gguf_path} {q_method}"
64
- result = subprocess.run(quantise_ggml, shell=True, capture_output=True)
65
- if result.returncode != 0:
66
- raise Exception(f"Error quantizing: {result.stderr}")
67
- print(f"Quantized successfully with {imatrix_q_method if use_imatrix else q_method} option!")
68
- print(f"Quantized model path: {quantized_gguf_path}")
69
-
70
- # Create empty repo
71
- new_repo_url = api.create_repo(repo_id=f"{username}/{model_name}-{imatrix_q_method if use_imatrix else q_method}-GGUF", exist_ok=True, private=private_repo)
72
- new_repo_id = new_repo_url.repo_id
73
- print("Repo created successfully!", new_repo_url)
74
-
75
- try:
76
- card = ModelCard.load(model_id, token=oauth_token.token)
77
- except:
78
- card = ModelCard("")
79
- if card.data.tags is None:
80
- card.data.tags = []
81
- card.data.tags.append("llama-cpp")
82
- card.data.tags.append("gguf-my-repo")
83
- card.data.base_model = model_id
84
- card.text = dedent(
85
- f"""
86
- # {new_repo_id}
87
- """
88
- )
89
- card.save(f"README.md")
90
-
91
- try:
92
- print(f"Uploading quantized model: {quantized_gguf_path}")
93
- api.upload_file(
94
- path_or_fileobj=quantized_gguf_path,
95
- path_in_repo=quantized_gguf_name,
96
- repo_id=new_repo_id,
97
- )
98
- except Exception as e:
99
- raise Exception(f"Error uploading quantized model: {e}")
100
-
101
- api.upload_file(
102
- path_or_fileobj=f"README.md",
103
- path_in_repo=f"README.md",
104
- repo_id=new_repo_id,
105
- )
106
- print(f"Uploaded successfully with {imatrix_q_method if use_imatrix else q_method} option!")
107
-
108
  return (
109
  f'Find your repo <a href=\'{new_repo_url}\' target="_blank" style="text-decoration:underline">here</a>',
110
  "llama.png",
@@ -112,7 +36,7 @@ def process_model(model_id, q_method, private_repo, oauth_token: gr.OAuthToken |
112
  except Exception as e:
113
  return (f"Error: {e}", "error.png")
114
  finally:
115
- shutil.rmtree(model_name, ignore_errors=True)
116
  print("Folder cleaned up successfully!")
117
 
118
  css="""/* Custom CSS to allow scrolling */
@@ -139,18 +63,11 @@ with gr.Blocks(css=css) as demo:
139
  )
140
 
141
 
142
- private_repo = gr.Checkbox(
143
- value=False,
144
- label="Private Repo",
145
- info="Create a private repo under your username."
146
- )
147
-
148
  iface = gr.Interface(
149
  fn=process_model,
150
  inputs=[
151
  model_id,
152
  q_method,
153
- private_repo,
154
  ],
155
  outputs=[
156
  gr.Markdown(label="output"),
 
16
 
17
  from textwrap import dedent
18
 
19
+ import mlx_lm import convert
20
+
21
  HF_TOKEN = os.environ.get("HF_TOKEN")
22
 
23
+ def process_model(model_id, q_method,):
24
  if oauth_token.token is None:
25
+ raise ValueError("You must be logged in to use GGUF-my-repo")
26
  model_name = model_id.split('/')[-1]
27
+ username = whoami(oauth_token.token)["name"]
28
+
29
  try:
30
+ upload_repo = username + "/" + model_name + "-mlx"
31
+ convert(model_id, quantize=True, upload_repo=upload_repo)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  return (
33
  f'Find your repo <a href=\'{new_repo_url}\' target="_blank" style="text-decoration:underline">here</a>',
34
  "llama.png",
 
36
  except Exception as e:
37
  return (f"Error: {e}", "error.png")
38
  finally:
39
+ shutil.rmtree("mlx_model", ignore_errors=True)
40
  print("Folder cleaned up successfully!")
41
 
42
  css="""/* Custom CSS to allow scrolling */
 
63
  )
64
 
65
 
 
 
 
 
 
 
66
  iface = gr.Interface(
67
  fn=process_model,
68
  inputs=[
69
  model_id,
70
  q_method,
 
71
  ],
72
  outputs=[
73
  gr.Markdown(label="output"),