Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,8 @@ import re
|
|
7 |
import config
|
8 |
import plotly.graph_objects as go
|
9 |
from typing import Dict
|
|
|
|
|
10 |
from leaderboard import (
|
11 |
get_current_leaderboard,
|
12 |
update_leaderboard,
|
@@ -419,6 +421,90 @@ def continue_conversation(prompt, left_chat, right_chat, left_model, right_model
|
|
419 |
tie_count
|
420 |
)
|
421 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
422 |
# Initialize Gradio Blocks
|
423 |
with gr.Blocks(css="""
|
424 |
#dice-button {
|
@@ -561,6 +647,32 @@ with gr.Blocks(css="""
|
|
561 |
outputs=[release_notes]
|
562 |
)
|
563 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
564 |
# Define interactions
|
565 |
submit_btn.click(
|
566 |
battle_arena,
|
|
|
7 |
import config
|
8 |
import plotly.graph_objects as go
|
9 |
from typing import Dict
|
10 |
+
import json
|
11 |
+
import os
|
12 |
from leaderboard import (
|
13 |
get_current_leaderboard,
|
14 |
update_leaderboard,
|
|
|
421 |
tie_count
|
422 |
)
|
423 |
|
424 |
+
def normalize_parameter_size(param_size: str) -> str:
|
425 |
+
"""Convert parameter size to billions (B) format."""
|
426 |
+
try:
|
427 |
+
# Remove any spaces and convert to uppercase for consistency
|
428 |
+
param_size = param_size.replace(" ", "").upper()
|
429 |
+
|
430 |
+
# Extract the number and unit
|
431 |
+
if 'M' in param_size:
|
432 |
+
# Convert millions to billions
|
433 |
+
number = float(param_size.replace('M', '').replace(',', ''))
|
434 |
+
return f"{number/1000:.2f}B"
|
435 |
+
elif 'B' in param_size:
|
436 |
+
# Already in billions, just format consistently
|
437 |
+
number = float(param_size.replace('B', '').replace(',', ''))
|
438 |
+
return f"{number:.2f}B"
|
439 |
+
else:
|
440 |
+
# If no unit or unrecognized format, try to convert the raw number
|
441 |
+
number = float(param_size.replace(',', ''))
|
442 |
+
if number >= 1000000000:
|
443 |
+
return f"{number/1000000000:.2f}B"
|
444 |
+
elif number >= 1000000:
|
445 |
+
return f"{number/1000000000:.2f}B"
|
446 |
+
else:
|
447 |
+
return f"{number/1000000000:.2f}B"
|
448 |
+
except:
|
449 |
+
return param_size # Return original if conversion fails
|
450 |
+
|
451 |
+
def load_latest_model_stats():
|
452 |
+
"""Load model stats from the model_stats.json file."""
|
453 |
+
try:
|
454 |
+
# Read directly from model_stats.json in root directory
|
455 |
+
with open('model_stats.json', 'r') as f:
|
456 |
+
stats = json.load(f)
|
457 |
+
|
458 |
+
# Convert stats to table format
|
459 |
+
table_data = []
|
460 |
+
headers = ["Model", "VRAM (GB)", "Size", "Parameters", "Quantization", "Tokens/sec", "Gen Tokens/sec", "Total Tokens", "Response Time (s)"]
|
461 |
+
|
462 |
+
for model in stats:
|
463 |
+
if not model.get("success", False): # Skip failed tests
|
464 |
+
continue
|
465 |
+
|
466 |
+
perf = model.get("performance", {})
|
467 |
+
info = model.get("model_info", {})
|
468 |
+
|
469 |
+
try:
|
470 |
+
# Format numeric values with 2 decimal places
|
471 |
+
model_size = float(info.get("size", 0)) # Get raw size
|
472 |
+
vram_gb = round(model_size/1024/1024/1024, 2) # Convert to GB
|
473 |
+
tokens_per_sec = round(float(perf.get("tokens_per_second", 0)), 2)
|
474 |
+
gen_tokens_per_sec = round(float(perf.get("generation_tokens_per_second", 0)), 2)
|
475 |
+
total_tokens = perf.get("total_tokens", 0)
|
476 |
+
response_time = round(float(perf.get("response_time", 0)), 2)
|
477 |
+
|
478 |
+
# Normalize parameter size to billions format
|
479 |
+
param_size = normalize_parameter_size(info.get("parameter_size", "Unknown"))
|
480 |
+
|
481 |
+
row = [
|
482 |
+
model.get("model_name", "Unknown"), # String
|
483 |
+
vram_gb, # Number (2 decimals)
|
484 |
+
model_size, # Number (bytes)
|
485 |
+
param_size, # String (normalized to B)
|
486 |
+
info.get("quantization_level", "Unknown"), # String
|
487 |
+
tokens_per_sec, # Number (2 decimals)
|
488 |
+
gen_tokens_per_sec, # Number (2 decimals)
|
489 |
+
total_tokens, # Number (integer)
|
490 |
+
response_time # Number (2 decimals)
|
491 |
+
]
|
492 |
+
table_data.append(row)
|
493 |
+
except Exception as row_error:
|
494 |
+
logger.warning(f"Skipping model {model.get('model_name', 'Unknown')}: {str(row_error)}")
|
495 |
+
continue
|
496 |
+
|
497 |
+
if not table_data:
|
498 |
+
return None, "No valid model stats found"
|
499 |
+
|
500 |
+
# Sort by tokens per second (numerically)
|
501 |
+
table_data.sort(key=lambda x: float(x[5]) if isinstance(x[5], (int, float)) else 0, reverse=True)
|
502 |
+
|
503 |
+
return headers, table_data
|
504 |
+
except Exception as e:
|
505 |
+
logger.error(f"Error in load_latest_model_stats: {str(e)}")
|
506 |
+
return None, f"Error loading model stats: {str(e)}"
|
507 |
+
|
508 |
# Initialize Gradio Blocks
|
509 |
with gr.Blocks(css="""
|
510 |
#dice-button {
|
|
|
647 |
outputs=[release_notes]
|
648 |
)
|
649 |
|
650 |
+
# Model Stats Tab
|
651 |
+
with gr.Tab("Model Stats"):
|
652 |
+
gr.Markdown("""
|
653 |
+
### Model Performance Statistics
|
654 |
+
|
655 |
+
This tab shows detailed performance metrics for each model, tested using a creative writing prompt.
|
656 |
+
The tests were performed on an **AMD Radeon RX 7600 XT 16GB GPU**.
|
657 |
+
|
658 |
+
For detailed information about the testing methodology, parameters, and hardware setup, please refer to the
|
659 |
+
[README_model_stats.md](https://huggingface.co/spaces/k-mktr/gpu-poor-llm-arena/blob/main/README_model_stats.md).
|
660 |
+
|
661 |
+
""")
|
662 |
+
|
663 |
+
headers, table_data = load_latest_model_stats()
|
664 |
+
if headers:
|
665 |
+
model_stats_table = gr.Dataframe(
|
666 |
+
headers=headers,
|
667 |
+
value=table_data,
|
668 |
+
row_count=len(table_data),
|
669 |
+
col_count=len(headers),
|
670 |
+
interactive=False,
|
671 |
+
label="Model Performance Statistics"
|
672 |
+
)
|
673 |
+
else:
|
674 |
+
gr.Markdown(f"⚠️ {table_data}") # Show error message if loading failed
|
675 |
+
|
676 |
# Define interactions
|
677 |
submit_btn.click(
|
678 |
battle_arena,
|