import gradio as gr import os from PyPDF2 import PdfReader from transformers import pipeline from langchain.prompts import ChatPromptTemplate from io import BytesIO import time import wandb from rouge import Rouge # Environment variables setup os.environ['TOGETHER_API_KEY'] = 'your_together_api_key' os.environ['PINECONE_API_KEY'] = 'your_pinecone_api_key' ELEVENLABS_API_KEY = 'your_elevenlabs_api_key' # Summarization prompt summary_prompt = """ You are an expert AI summarization model tasked with creating a comprehensive summary for 10 years old kids of the provided context. The summary should be approximately one page long and well-structured. this is the context: ```{context}``` Please follow these specific guidelines for the summary: ### Detailed Summary - **Section 1: Key Concepts** - Introduce the first major topic or theme. - Use bullet points to list important details and insights. - **Section 2: Supporting Details** - Discuss secondary topics or supporting arguments. - Use bullet points to outline critical information and findings. ### Conclusion - Suggest any potential actions, solutions, or recommendations. this is the summary: """ summary_prompt_template = ChatPromptTemplate.from_template(summary_prompt) # Define the PDF extraction function def extract_text_from_pdf(file): reader = PdfReader(file) text = "" for page in reader.pages: text += page.extract_text() return text # Define the text-to-speech function def text_to_speech_stream(text): client = ElevenLabs(api_key=ELEVENLABS_API_KEY) response = client.text_to_speech.convert( voice_id="jBpfuIE2acCO8z3wKNLl", optimize_streaming_latency="0", output_format="mp3_44100_64", text=text, model_id="eleven_multilingual_v2", voice_settings=VoiceSettings( stability=0.5, similarity_boost=0.75, style=0, use_speaker_boost=True, ), ) audio_data = BytesIO() for chunk in response: if chunk: audio_data.write(chunk) audio_data.seek(0) if not os.path.exists('samples'): os.makedirs('samples') with open('samples/output.mp3', 'wb') as f: f.write(audio_data.read()) return 'samples/output.mp3' # Define the evaluation function def evaluate_summary(generated_summary): wandb.init(project="learnverse") reference_summaries = ["Reference summary 1...", "Reference summary 2...", "Reference summary 3..."] rouge = Rouge() rouge_1, rouge_2, rouge_l = {'r': 0, 'p': 0, 'f': 0}, {'r': 0, 'p': 0, 'f': 0}, {'r': 0, 'p': 0, 'f': 0} for reference in reference_summaries: scores = rouge.get_scores(generated_summary, reference) rouge_1['r'] += scores[0]['rouge-1']['r'] rouge_1['p'] += scores[0]['rouge-1']['p'] rouge_1['f'] += scores[0]['rouge-1']['f'] rouge_2['r'] += scores[0]['rouge-2']['r'] rouge_2['p'] += scores[0]['rouge-2']['p'] rouge_2['f'] += scores[0]['rouge-2']['f'] rouge_l['r'] += scores[0]['rouge-l']['r'] rouge_l['p'] += scores[0]['rouge-l']['p'] rouge_l['f'] += scores[0]['rouge-l']['f'] num_references = len(reference_summaries) rouge_1 = {key: value / num_references for key, value in rouge_1.items()} rouge_2 = {key: value / num_references for key, value in rouge_2.items()} rouge_l = {key: value / num_references for key, value in rouge_l.items()} wandb.log(rouge_1) wandb.log(rouge_2) wandb.log(rouge_l) wandb.finish() return {'ROUGE-1': rouge_1, 'ROUGE-2': rouge_2, 'ROUGE-L': rouge_l} # Define the main processing function def process_question(file): pdffile = extract_text_from_pdf(file) summary = summary_prompt_template.invoke({"context": pdffile}) evaluation = evaluate_summary(summary) audio_file = text_to_speech_stream(summary) return summary, evaluation, audio_file # Define the Gradio interface def gradio_interface(file): summary, evaluation, audio_file = process_question(file) return summary, evaluation, audio_file # Launch the Gradio app gr.Interface( fn=gradio_interface, inputs=gr.File(type="file", label="Upload PDF"), outputs=[gr.Textbox(label="Summary"), gr.Textbox(label="Evaluation"), gr.Audio(label="Generated Audio")] ).launch()