vmoras's picture
Fix folder paths
4df6e8a
raw
history blame
2.61 kB
import os
from pathlib import Path
from datetime import timedelta
from google.cloud import storage
from huggingface_hub import hf_hub_download
from google.cloud.storage import transfer_manager
def download_credentials() -> None:
"""
Downloads the GCP credentials from Hugging Face Hub
:return: None
"""
os.makedirs('assets', exist_ok=True)
hf_hub_download(
repo_id=os.environ.get('DATA'), repo_type='dataset', filename="credentials.json",
token=os.environ.get('HUB_TOKEN'), local_dir="assets"
)
return
def upload_folder(client_name: str, source_directory: str) -> None:
"""
Uploads the given source directory to the GCP bucket.
:param client_name: name of the client, used as name of the main folder
:param source_directory: directory with all the sub-folders and files to upload
:return: None
"""
# Filter so the list only includes files, not directories themselves.
string_paths = [
str(path.relative_to(source_directory)).replace("\\", "/") for path in Path(source_directory).rglob("*")
if path.is_file()
]
# Path where the files in the source directory will be saved
delimiter = source_directory.find(client_name)
destination_directory = source_directory[delimiter:] + "/"
# Start the upload. Threads was used instead of process because Gradio was rebooted with the second one.
bucket = STORAGE_CLIENT.bucket('clients-bella')
_ = transfer_manager.upload_many_from_filenames(
bucket, filenames=string_paths, source_directory=source_directory, max_workers=1,
blob_name_prefix=destination_directory, worker_type=transfer_manager.THREAD
)
return
def get_link_file(client_name: str, type_media: str, media_name: str) -> str:
"""
Gets a public link during 15 minutes to a given file in GCP
:param client_name: name of the client (name of the main folder)
:param type_media: if it is audio or video
:param media_name: name of the desired media file
:return: public link to the file
"""
if media_name == 'waiting.wav':
bucket = STORAGE_CLIENT.bucket('audios_bella')
blob = bucket.blob('waiting.wav')
else:
bucket = STORAGE_CLIENT.bucket('clients-bella')
blobs = bucket.list_blobs(prefix=f'{client_name}/media/{type_media}/{media_name}')
blob = next(blobs)
signed_url = blob.generate_signed_url(expiration=timedelta(minutes=15))
return signed_url
download_credentials()
STORAGE_CLIENT = storage.Client.from_service_account_json(os.getenv('GOOGLE_APPLICATION_CREDENTIALS'))