Ramakrushna
/

summryai

Model card Files Files and versions Community

Ramakrushna

Amitjadhav01 commited on 11 days ago

Commit

e79a00d

•

1 Parent(s): efdc76b

AMIT's python files (#1)

Browse files

- AMIT's python files (b574243264ffd3f6d6db3118d41c43994633e2bd)

Co-authored-by: Amit Jadhav <Amitjadhav01@users.noreply.huggingface.co>

Files changed (3) hide show

data_loader.py +54 -0
summarize_transcription.py +54 -0
transcribe_audio.py +44 -0

data_loader.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import os
+import logging
+import subprocess
+import time
+def download_audio(youtube_url):
+    """Downloads audio from the given YouTube URL and saves it to audio_files directory."""
+    # Create audio_files directory if it doesn't exist
+    if not os.path.exists('audio_files'):
+        os.makedirs('audio_files')
+    # Use yt-dlp to download audio
+    command = [
+        'yt-dlp',
+        '-x',  # Extract audio
+        '--audio-format', 'wav',  # Convert to WAV format
+        '-o', 'audio_files/%(title)s.%(ext)s',  # Output format
+        youtube_url
+    ]
+    result = subprocess.run(command, capture_output=True, text=True)
+    if result.returncode != 0:
+        logging.error(f'Error downloading audio: {result.stderr}')
+        raise Exception('Failed to download audio')
+    # Wait a moment for the file to be created
+    time.sleep(1)
+def get_audio_filename():
+    """Returns the latest downloaded audio filename from audio_files directory."""
+    audio_files = os.listdir('audio_files')
+    if audio_files:
+        # Sort by modification time, return the latest .wav file
+        audio_files.sort(key=lambda x: os.path.getmtime(os.path.join('audio_files', x)))
+        for file in audio_files:
+            if file.endswith('.wav'):
+                return os.path.join('audio_files', file)
+    return None
+if __name__ == "__main__":
+    youtube_url = input("Enter the YouTube URL: ")
+    try:
+        download_audio(youtube_url)
+        # Get the latest audio file (optional step, can be used later for transcription)
+        audio_file = get_audio_filename()
+        if audio_file:
+            print(f"Audio file downloaded: {audio_file}")
+        else:
+            logging.error('No audio file found after download.')
+            raise Exception('No audio file found.')
+    except Exception as e:
+        logging.error(f'An error occurred: {e}')

summarize_transcription.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import os
+import logging
+from langchain_openai import OpenAI
+from langchain_core.prompts import PromptTemplate
+from langchain_core.runnables import RunnableSequence
+import time
+def summarize_text(text):
+    """Summarizes the given text using LangChain with OpenAI."""
+    prompt_template = PromptTemplate(
+        input_variables=["text"],
+        template="Please summarize the following text:\n\n{text}"
+    )
+    llm = OpenAI(temperature=0.7)  # Adjust the temperature for creativity
+    summarization_chain = RunnableSequence(prompt_template | llm)
+    max_retries = 3
+    for attempt in range(max_retries):
+        try:
+            summary = summarization_chain.invoke({"text": text})
+            return summary
+        except Exception as e:
+            if 'insufficient_quota' in str(e) and attempt < max_retries - 1:
+                print(f'Quota exceeded. Retrying in {2 ** attempt} seconds...')
+                time.sleep(2 ** attempt)  # Exponential backoff
+            else:
+                logging.error(f'An error occurred: {e}')
+                raise e
+if __name__ == "__main__":
+    # Ensure the blogs folder exists
+    if not os.path.exists('blogs'):
+        os.makedirs('blogs')
+    # Get the transcription file path from the user
+    transcription_file_path = input("Enter the path to the transcription file: ")
+    # Read the transcription text
+    try:
+        with open(transcription_file_path, 'r') as file:
+            transcription_text = file.read()
+        # Summarize the transcription text
+        summary = summarize_text(transcription_text)
+        # Save the summary to a text file in the blogs folder
+        summary_file_path = os.path.join('blogs', os.path.basename(transcription_file_path).replace('.txt', '_summary.txt'))
+        with open(summary_file_path, 'w') as summary_file:
+            summary_file.write(summary)
+        print(f"Summary saved to: {summary_file_path}")
+    except Exception as e:
+        logging.error(f'An error occurred while processing the transcription file: {e}')

transcribe_audio.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import os
+import logging
+from whisper import load_model
+def transcribe_audio(audio_file):
+    """Transcribes audio to text using Whisper."""
+    # Load Whisper model
+    model = load_model("base")  # Change to desired model size
+    # Perform transcription
+    try:
+        result = model.transcribe(audio_file)
+        return result['text']
+    except Exception as e:
+        logging.error(f'Error transcribing audio: {e}')
+        raise Exception('Failed to transcribe audio')
+def save_transcription(transcription, title):
+    """Saves the transcription to a text file."""
+    # Create transcription directory if it doesn't exist
+    if not os.path.exists('transcriptions'):
+        os.makedirs('transcriptions')
+    # Save the transcription to a text file
+    transcription_file = os.path.join('transcriptions', f'{title}.txt')
+    with open(transcription_file, 'w', encoding='utf-8') as f:
+        f.write(transcription)
+    print(f'Transcription saved to: {transcription_file}')
+if __name__ == "__main__":
+    # Specify the path to the audio file
+    audio_file = input("Enter the path to the audio file: ")
+    # Extract title from the audio file name
+    title = os.path.splitext(os.path.basename(audio_file))[0]
+    try:
+        transcription = transcribe_audio(audio_file)
+        print("Transcription:", transcription)
+        # Save the transcription to a file
+        save_transcription(transcription, title)
+    except Exception as e:
+        logging.error(f'An error occurred: {e}')