davanstrien's picture
davanstrien HF staff
demo
182c1d0
raw
history blame
2.46 kB
from huggingface_hub import list_models
from cachetools import cached, TTLCache
from toolz import groupby, valmap
import gradio as gr
from tqdm.auto import tqdm
import pandas as pd
@cached(TTLCache(maxsize=10, ttl=60 * 60 * 3))
def get_all_models():
models = list(tqdm(iter(list_models(cardData=True))))
return [model for model in models if model is not None]
def has_base_model_info(model):
try:
if card_data := model.cardData:
if base_model := card_data.get("base_model"):
if isinstance(base_model, str):
return True
except AttributeError:
return False
return False
grouped_by_has_base_model_info = groupby(has_base_model_info, get_all_models())
print(valmap(len, grouped_by_has_base_model_info))
summary = f"""{len(grouped_by_has_base_model_info.get(True))} models have base model info.
{len(grouped_by_has_base_model_info.get(False))} models don't have base model info.
Currently {round(len(grouped_by_has_base_model_info.get(True))/len(get_all_models())*100,2)}% of models have base model info."""
models_with_base_model_info = grouped_by_has_base_model_info.get(True)
base_models = [
model.cardData.get("base_model") for model in models_with_base_model_info
]
df = pd.DataFrame(
pd.DataFrame({"base_model": base_models}).value_counts()
).reset_index()
grouped_by_base_model = groupby(
lambda x: x.cardData.get("base_model"), models_with_base_model_info
)
all_base_models = df["base_model"].to_list()
def return_models_for_base_model(base_model):
models = grouped_by_base_model.get(base_model)
# sort models by downloads
models = sorted(models, key=lambda x: x.downloads, reverse=True)
results = ""
results += f"## {base_model} children\n\n"
results += f"{base_model} has {len(models)} children\n\n"
for model in models:
url = f"https://huggingface.co/{model.modelId}"
results += (
f"[{model.modelId}]({url}) | number of downloads {model.downloads}" + "\n\n"
)
return results
with gr.Blocks() as demo:
gr.Markdown("### Models with base model info")
gr.Markdown(summary)
gr.Markdown("### Find all models trained from a base model")
base_model = gr.Dropdown(all_base_models, label="Base Model")
results = gr.Markdown()
base_model.change(return_models_for_base_model, base_model, results)
# gr.DataFrame(df)
demo.launch()