import os from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import gradio as gr def find_closest(query): files_contents = [] files_names = [] for file in os.listdir(): if file.endswith(".txt"): with open(file, 'r') as f: content = f.read() files_contents.append(content) files_names.append(file) # Append query to the end files_contents.append(query) # Initialize the TfidfVectorizer tfidf_vectorizer = TfidfVectorizer() # Fit and transform the texts tfidf_matrix = tfidf_vectorizer.fit_transform(files_contents) # Compute the cosine similarity between the query and all files similarity_scores = cosine_similarity(tfidf_matrix[-1:], tfidf_matrix[:-1]) # Get the index of the file with the highest similarity score max_similarity_idx = similarity_scores.argmax() # Return the name of the file with the highest similarity score return files_names[max_similarity_idx] def find_closest_mp3(query): closest_txt_file = find_closest(query) file_name_without_extension, _ = os.path.splitext(closest_txt_file) return file_name_without_extension + '.mp3' my_theme = gr.Theme.from_hub("ysharma/llamas") with gr.Blocks(theme=my_theme) as demo: gr.Markdown("""