Spaces:

Babyloncoder
/

zeroshot-audio-classification

Sleeping

App Files Files Community

Babyloncoder commited on Mar 21, 2024

Commit

3dfefec

verified ·

1 Parent(s): 9a1a1d0

Create app.py

Browse files

Files changed (1) hide show

app.py +55 -0

app.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import gradio as gr
+from transformers import pipeline
+import numpy as np
+import soundfile as sf
+import pandas as pd
+import matplotlib.pyplot as plt
+import random
+# Load the zero-shot audio classification model
+audio_classifier = pipeline(task="zero-shot-audio-classification", model="laion/clap-htsat-unfused")
+# Function to generate a random color
+def random_color():
+    return [random.uniform(0, 1) for _ in range(3)]
+# Define the classification function
+def classify_audio(audio_filepath, labels):
+    labels = labels.split(',')
+    audio_data, sample_rate = sf.read(audio_filepath)  # Read the audio file
+    # Convert to mono if audio is multi-channel
+    if audio_data.ndim > 1:
+        audio_data = np.mean(audio_data, axis=1)
+    # Get classification results
+    results = audio_classifier(audio_data, candidate_labels=labels)
+    # Convert scores to percentages and create a DataFrame
+    data = [(result['label'], round(result['score'] * 100, 2)) for result in results]  # Multiply by 100 and round
+    df = pd.DataFrame(data, columns=["Label", "Score (%)"])
+    # Create a horizontal bar chart with random colors
+    fig, ax = plt.subplots(figsize=(10, len(labels)))
+    for i in range(len(df)):
+        ax.barh(df['Label'][i], df['Score (%)'][i], color=random_color())
+    ax.set_xlabel('Score (%)')
+    ax.set_title('Audio Classification Scores')
+    ax.grid(axis='x')
+    return df, fig
+# Create the Gradio interface
+iface = gr.Interface(
+    classify_audio,
+    inputs=[
+        gr.Audio(label="Upload your audio file", type="filepath"),
+        gr.Textbox(label="Enter candidate labels separated by commas")
+    ],
+    outputs=[gr.components.Dataframe(), gr.components.Plot()],
+    title="Zero-Shot Audio Classifier",
+    description="Upload an audio file and enter candidate labels to classify the audio."
+)
+# Launch the interface
+iface.launch()