Spaces:

jacktol
/

ATC-Transcription-Assistant

Paused

File size: 3,947 Bytes

fb3c53c
 
 
d9346bd
 
 
fb3c53c
 
 
857507d
fb3c53c
 
 
 
 
 
2c8e4b5
9d6e60d
fb3c53c
 
 
 
2c8e4b5
fb3c53c
 
2c8e4b5
fb3c53c
 
 
2c8e4b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97bfe69
 
 
 
 
 
1b10bcc
97bfe69
 
 
 
 
 
 
 
 
 
 
2c8e4b5
 
 
 
 
f37ce91
2c8e4b5
 
 
 
 
 
 
 
 
 
 
 
 
bc091a0
2c8e4b5
 
 
 
 
 
 
 
07ced72
2c8e4b5
 
 
 
 
 
 
07ced72
0264b46
2c8e4b5
8f8089e
 
 
 
f37ce91

import chainlit as cl
from faster_whisper import WhisperModel
from openai import AsyncOpenAI
import os

os.environ["HF_HOME"] = "/app/.cache"

model_path = "jacktol/whisper-medium.en-fine-tuned-for-ATC-faster-whisper"

whisper_model = WhisperModel(model_path, device="cpu", compute_type="float32")
client = AsyncOpenAI()

system_prompt = """Convert the provided transcript into standard pilot-ATC syntax without altering the content.
Ensure that all runway and heading numbers are formatted correctly (e.g., '11L' for 'one one left'). Use standard
aviation phraseology wherever applicable. Maintain the segmentation of the transcript as provided, but exclude the timestamps.
Based on the context and segmentation of each transmission, label it as either 'ATC' or 'Pilot'. At the very beginning of your
response place a horizontal div with "---" and then line-break, and then add a H2 which says "Transcription", and then
proceed with the transcription."""

def transcribe_audio(file_path):
    segments, info = whisper_model.transcribe(file_path, beam_size=5)
    transcript = []

    for segment in segments:
        transcript.append(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")

    return '\n'.join(transcript).strip()

@cl.on_chat_start
async def on_chat_start():
    try:
        if cl.user_session.get("transcription_counter") is None:
            cl.user_session.set("transcription_counter", 0)
        
        welcome_message = """
## Welcome to the **ATC Transcription Assistant**

---

### What is this tool for?

This tool transcribes **Air Traffic Control (ATC)** audio using OpenAI’s **Whisper medium.en** model, fine-tuned for ATC communications. Developed as part of a research project, the fine-tuned **Whisper medium.en** model offers significant improvements in transcription accuracy for ATC audio.

---

### Performance

- **Fine-tuned Whisper medium.en WER**: 15.08%
- **Non fine-tuned Whisper medium.en WER**: 94.59%
- **Relative improvement**: 84.06%

While the fine-tuned model performs better, **we cannot guarantee the accuracy of the transcriptions**. For more details, see the [blog post](https://jacktol.net/posts/fine-tuning_whisper_on_atc_data), or check out the [project repository](https://github.com/jack-tol/fine-tuning-whisper-on-atc-data). Feel free to contact me at [contact@jacktol.net](mailto:contact@jacktol.net).

---

### How to Use

1. **Upload an ATC audio file**: Upload an audio file in **MP3** or **WAV** format containing ATC communications.
2. **View the transcription**: The tool will transcribe the audio and display the text on the screen.
3. **Transcribe another audio**: Click **New Chat** in the top-right to start a new transcription.

---

To get started, upload the audio below.
"""
        await cl.Message(content=welcome_message).send()

        files = await cl.AskFileMessage(
            content="",
            accept={
                "audio/wav": [".wav"],
                "audio/mpeg": [".mp3"]
            },
            max_size_mb=50,
            timeout=3600
        ).send()

        if files:
            audio_file = files[0]

            transcription = transcribe_audio(audio_file.path)

            msg = cl.Message(content="")
            await msg.send()

            stream = await client.chat.completions.create(
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": transcription},
                ],
                stream=True,
                model="gpt-4o",
                temperature=0,
            )

            async for part in stream:
                token = part.choices[0].delta.content or ""
                await msg.stream_token(token)

            await msg.send()

    except Exception as e:
        print(f"Error during on_chat_start: {str(e)}")

@cl.on_stop
async def on_chat_stop():
    print("Session ended, resources cleaned up.")