Spaces:

leadr64
/

audi_data

Runtime error

App Files Files Community

leadr64 commited on Jun 12, 2024

Commit

e562a59

1 Parent(s): f030819

Ajouter le script Gradio et les dépendances

Browse files

Files changed (3) hide show

app.py +45 -0
database.py +118 -0
s3_utils.py +66 -0

app.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import gradio as gr
+import laion_clap
+from qdrant_client import QdrantClient
+import os
+# Utilisez les variables d'environnement pour la configuration
+QDRANT_HOST = os.getenv('QDRANT_HOST', 'localhost')
+QDRANT_PORT = int(os.getenv('QDRANT_PORT', 6333))
+# Connexion à Qdrant
+client = QdrantClient(QDRANT_HOST, port=QDRANT_PORT)
+print("[INFO] Client created...")
+# Charger le modèle
+print("[INFO] Loading the model...")
+model_name = "laion/larger_clap_music"
+model = laion_clap.CLAP_Module(enable_fusion=False)
+model.load_ckpt()  # télécharger le checkpoint préentraîné par défaut
+# Interface Gradio
+max_results = 10
+def sound_search(query):
+    text_embed = model.get_text_embedding([query, ''])[0]  # trick because can't accept singleton
+    hits = client.search(
+        collection_name="demo_db7",
+        query_vector=text_embed,
+        limit=max_results,
+    )
+    return [
+        gr.Audio(
+            hit.payload['audio_path'],
+            label=f"style: {hit.payload['style']} -- score: {hit.score}")
+        for hit in hits
+    ]
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """# Sound search database """
+    )
+    inp = gr.Textbox(placeholder="What sound are you looking for ?")
+    out = [gr.Audio(label=f"{x}") for x in range(max_results)]  # Nécessaire pour avoir différents objets
+    inp.change(sound_search, inp, out)
+demo.launch()

database.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import gc
+import hashlib
+import os
+from glob import glob
+from pathlib import Path
+import librosa
+import torch
+from diskcache import Cache
+from qdrant_client import QdrantClient
+from qdrant_client.http import models
+from tqdm import tqdm
+from transformers import ClapModel, ClapProcessor
+from s3_utils import s3_auth, upload_file_to_bucket
+from dotenv import load_dotenv
+load_dotenv()
+# PARAMETERS #######################################################################################
+CACHE_FOLDER = '/home/arthur/data/music/demo_audio_search/audio_embeddings_cache_individual/'
+KAGGLE_DB_PATH = '/home/arthur/data/kaggle/park-spring-2023-music-genre-recognition/train/train'
+AWS_ACCESS_KEY_ID = os.environ['AWS_ACCESS_KEY_ID']
+AWS_SECRET_ACCESS_KEY = os.environ['AWS_SECRET_ACCESS_KEY']
+S3_BUCKET = "synthia-research"
+S3_FOLDER = "huggingface_spaces_demo"
+AWS_REGION = "eu-west-3"
+s3 = s3_auth(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION)
+# Functions utils ##################################################################################
+def get_md5(fpath):
+    with open(fpath, "rb") as f:
+        file_hash = hashlib.md5()
+        while chunk := f.read(8192):
+            file_hash.update(chunk)
+    return file_hash.hexdigest()
+def get_audio_embedding(model, audio_file, cache):
+    # Compute a unique hash for the audio file
+    file_key = f"{model.config._name_or_path}" + get_md5(audio_file)
+    if file_key in cache:
+        # If the embedding for this file is cached, retrieve it
+        embedding = cache[file_key]
+    else:
+        # Otherwise, compute the embedding and cache it
+        y, sr = librosa.load(audio_file, sr=48000)
+        inputs = processor(audios=y, sampling_rate=sr, return_tensors="pt")
+        embedding = model.get_audio_features(**inputs)[0]
+        gc.collect()
+        torch.cuda.empty_cache()
+        cache[file_key] = embedding
+    return embedding
+# ################## Loading the CLAP model ###################
+# loading the model
+print("[INFO] Loading the model...")
+model_name = "laion/larger_clap_general"
+model = ClapModel.from_pretrained(model_name)
+processor = ClapProcessor.from_pretrained(model_name)
+# Initialize the cache
+os.makedirs(CACHE_FOLDER, exist_ok=True)
+cache = Cache(CACHE_FOLDER)
+# Creating a qdrant collection #####################################################################
+client = QdrantClient(os.environ['QDRANT_URL'], api_key=os.environ['QDRANT_KEY'])
+print("[INFO] Client created...")
+print("[INFO] Creating qdrant data collection...")
+if not client.collection_exists("demo_spaces_db"):
+    client.create_collection(
+        collection_name="demo_spaces_db",
+        vectors_config=models.VectorParams(
+            size=model.config.projection_dim,
+            distance=models.Distance.COSINE
+        ),
+    )
+# Embed the audio files !
+audio_files = [p for p in glob(os.path.join(KAGGLE_DB_PATH, '*/*.wav'))]
+chunk_size, idx = 1, 0
+total_chunks = int(len(audio_files) / chunk_size)
+# Use tqdm for a progress bar
+print("Uploading on DB + S3")
+for i in tqdm(range(0, len(audio_files), chunk_size),
+              desc="[INFO] Uploading data records to data collection..."):
+    chunk = audio_files[i:i + chunk_size]  # Get a chunk of audio files
+    records = []
+    for audio_file in chunk:
+        embedding = get_audio_embedding(model, audio_file, cache)
+        file_obj = open(audio_file, 'rb')
+        s3key = f'{S3_FOLDER}/{Path(audio_file).name}'
+        upload_file_to_bucket(s3, file_obj, S3_BUCKET, s3key)
+        records.append(
+            models.PointStruct(
+                id=idx, vector=embedding,
+                payload={
+                    "audio_path": audio_file,
+                    "audio_s3url": f"https://{S3_BUCKET}.s3.amazonaws.com/{s3key}",
+                    "style": audio_file.split('/')[-1]}
+            )
+        )
+        f"Uploaded s3 file : {idx}"
+        idx += 1
+    client.upload_points(
+        collection_name="demo_spaces_db",
+        points=records
+    )
+print("[INFO] Successfully uploaded data records to data collection!")
+# It's a good practice to close the cache when done
+cache.close()

s3_utils.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import hashlib
+from enum import Enum
+import boto3
+from botocore.client import BaseClient
+# S3 HANDLING ######################################################################################
+def get_md5(fpath):
+    with open(fpath, "rb") as f:
+        file_hash = hashlib.md5()
+        while chunk := f.read(8192):
+            file_hash.update(chunk)
+    return file_hash.hexdigest()
+def upload_file_to_bucket(s3_client, file_obj, bucket, s3key):
+    """Upload a file to an S3 bucket
+    :param file_obj: File to upload
+    :param bucket: Bucket to upload to
+    :param s3key: s3key
+    :param object_name: S3 object name. If not specified then file_name is used
+    :return: True if file was uploaded, else False
+    """
+    # Upload the file
+    return s3_client.upload_fileobj(
+        file_obj, bucket, s3key,
+        ExtraArgs={"ACL": "public-read", "ContentType": "Content-Type: audio/mpeg"}
+    )
+def s3_auth(aws_access_key_id, aws_secret_access_key, region_name) -> BaseClient:
+    s3 = boto3.client(
+        service_name='s3',
+        aws_access_key_id=aws_access_key_id,
+        aws_secret_access_key=aws_secret_access_key,
+        region_name=region_name
+    )
+    return s3
+def get_list_of_buckets(s3: BaseClient):
+    response = s3.list_buckets()
+    buckets = {}
+    for buckets in response['Buckets']:
+        buckets[response['Name']] = response['Name']
+    BucketName = Enum('BucketName', buckets)
+    return BucketName
+if __name__ == '__main__':
+    import os
+    AWS_ACCESS_KEY_ID = os.environ['AWS_ACCESS_KEY_ID']
+    AWS_SECRET_ACCESS_KEY = os.environ['AWS_SECRET_ACCESS_KEY']
+    S3_BUCKET = "synthia-research"
+    S3_FOLDER = "huggingface_spaces_demo"
+    AWS_REGION = "eu-west-3"
+    s3 = s3_auth(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION)
+    print(s3.list_buckets())
+    s3key = f'{S3_FOLDER}/015.WAV'
+    #print(upload_file_to_bucket(s3, file_obj, S3_BUCKET, s3key))