import modal from typing import List, Dict, Tuple, Union, Callable # from preprocessing import FileIO # assets = modal.Mount.from_local_dir( # "./data", # # condition=lambda pth: not ".venv" in pth, # remote_path="./data", # ) stub = modal.Stub("vector-search-project") vector_search = modal.Image.debian_slim().pip_install( "sentence_transformers==2.2.2", "llama_index==0.9.6.post1", "angle_emb==0.1.5" ) stub.volume = modal.Volume.new() @stub.function(image=vector_search, gpu="A100", timeout=600, volumes={"/root/models": stub.volume} # secrets are available in the environment with os.environ["SECRET_NAME"] # secret=modal.Secret.from_name("my-huggingface-secret") ) def encode_content_splits(content_splits, model=None, # path or name of model **kwargs ): """ kwargs provided in case encode method has extra arguments """ from sentence_transformers import SentenceTransformer import os, time models_list = os.listdir('/root/models') print("Models:", models_list) if isinstance(model, str) and model[-1] == "/": model = model[:-1] if isinstance(model, str): model = model.split('/')[-1] if isinstance(model, str) and model in models_list: if "UAE-Large-V1-300" in model: print("Loading finetuned UAE-Large-V1-300 model from Modal Volume") from angle_emb import AnglE model = AnglE.from_pretrained('WhereIsAI/UAE-Large-V1', pretrained_model_path=os.path.join('/root/models', model), pooling_strategy='cls').cuda() kwargs['to_numpy'] = True # this model doesn't accept list of lists if isinstance(content_splits[0], list): content_splits = [chunk for episode in content_splits for chunk in episode] else: print(f"Loading model {model} from Modal volume") model = SentenceTransformer(os.path.join('/root/models', model)) elif isinstance(model, str): if model in models_list: print(f"Loading model {model} from Modal volume") model = SentenceTransformer(os.path.join('/root/models', model)) else: print(f"Model {model} not found in Modal volume, loading from HuggingFace") model = SentenceTransformer(model) else: print(f"Using model provided as argument") if 'save' in kwargs: if isinstance(kwargs['save'], str) and kwargs['save'][-1] == '/': kwargs['save'] = kwargs['save'][:-1] kwargs['save'] = kwargs['save'].split('/')[-1] fname = os.path.join('/root/models', kwargs['save']) print(f"Saving model in {fname}") # model.save(fname) print(f"Model saved in {fname}") kwargs.pop('save') print("Starting encoding") start = time.perf_counter() emb = [list(zip(episode, model.encode(episode, **kwargs))) for episode in content_splits] end = time.perf_counter() - start print(f"GPU processing lasted {end:.2f} seconds") print("Encoding finished") return emb @stub.function(image=vector_search, gpu="A100", timeout=240, mounts=[modal.Mount.from_local_dir("./data", remote_path="/root/data", condition=lambda pth: ".json" in pth)], volumes={"/root/models": stub.volume} ) def finetune(training_path='./data/training_data_300.json', valid_path='./data/validation_data_100.json', model_id=None, ignore_existing=False): import os print("Data:", os.listdir('/root/data')) print("Models:", os.listdir('/root/models')) if model_id is None: print("No model ID provided") return None elif isinstance(model_id, str) and model_id[-1] == "/": model_id = model_id[:-1] from llama_index.finetuning import EmbeddingQAFinetuneDataset training_set = EmbeddingQAFinetuneDataset.from_json(training_path) valid_set = EmbeddingQAFinetuneDataset.from_json(valid_path) print("Datasets loaded") num_training_examples = len(training_set.queries) print(f"Training examples: {num_training_examples}") from llama_index.finetuning import SentenceTransformersFinetuneEngine print(f"Model Name is {model_id}") model_ext = model_id.split('/')[1] ft_model_name = f'finetuned-{model_ext}-{num_training_examples}' model_outpath = os.path.join("/root/models", ft_model_name) print(f'Model ID: {model_id}') print(f'Model Outpath: {model_outpath}') finetune_engine = SentenceTransformersFinetuneEngine( training_set, batch_size=32, model_id=model_id, model_output_path=model_outpath, val_dataset=valid_set, epochs=10 ) import io, os, zipfile, glob, time try: start = time.perf_counter() finetune_engine.finetune() end = time.perf_counter() - start print(f"GPU processing lasted {end:.2f} seconds") print(os.listdir('/root/models')) stub.volume.commit() # Persist changes, ie the finetumed model # TODO SHARE THE MODEL ON HUGGINGFACE # https://huggingface.co/docs/transformers/v4.15.0/model_sharing folder_to_zip = model_outpath # Zip the contents of the folder at 'folder_path' and return a BytesIO object. bytes_buffer = io.BytesIO() with zipfile.ZipFile(bytes_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: for file_path in glob.glob(folder_to_zip + "/**", recursive=True): print(f"Processed file {file_path}") zip_file.write(file_path, os.path.relpath(file_path, start=folder_to_zip)) # Move the pointer to the start of the BytesIO buffer before returning bytes_buffer.seek(0) # You can now return this zipped_folder object, write it to a file, send it over a network, etc. # Replace with the path to the folder you want to zip zippedio = bytes_buffer return zippedio except Exception: return "Finetuning failed" @stub.local_entrypoint() def test_method(content_splits=[["a"]]): output = encode_content_splits.remote(content_splits) return output # deploy it with # modal token set --token-id ak-xxxxxx --token-secret as-xxxxx # given when we create a new token # modal deploy podcast/1/backend.py # View Deployment: https://modal.com/apps/jpbianchi/falcon_hackaton-project <<< use this project name