Spaces:
Sleeping
Sleeping
# Function to convert segments to dictionaries | |
from faster_whisper.transcribe import Segment, Word | |
# Function to dump a Word instance to a dictionary | |
def word_to_dict(word: Word) -> dict: | |
return { | |
"start": word.start, | |
"end": word.end, | |
"word": word.word, | |
"probability": word.probability | |
} | |
# Function to load a Word instance from a dictionary | |
def dict_to_word(data: dict) -> Word: | |
return Word( | |
start=data["start"], | |
end=data["end"], | |
word=data["word"], | |
probability=data["probability"] | |
) | |
# Function to dump a Segment instance to a dictionary | |
def segment_to_dict(segment: Segment) -> dict: | |
return { | |
"id": segment.id, | |
"seek": segment.seek, | |
"start": segment.start, | |
"end": segment.end, | |
"text": segment.text, | |
"tokens": segment.tokens, | |
"temperature": segment.temperature, | |
"avg_logprob": segment.avg_logprob, | |
"compression_ratio": segment.compression_ratio, | |
"no_speech_prob": segment.no_speech_prob, | |
"words": [word_to_dict(word) for word in segment.words] if segment.words else None | |
} | |
# Function to load a Segment instance from a dictionary | |
def dict_to_segment(data: dict) -> Segment: | |
return Segment( | |
id=data["id"], | |
seek=data["seek"], | |
start=data["start"], | |
end=data["end"], | |
text=data["text"], | |
tokens=data["tokens"], | |
temperature=data["temperature"], | |
avg_logprob=data["avg_logprob"], | |
compression_ratio=data["compression_ratio"], | |
no_speech_prob=data["no_speech_prob"], | |
words=[dict_to_word(word) for word in data["words"]] if data["words"] else None | |
) | |
def get_raw_words_from_segments(segments: list[Segment]) -> str: | |
return " ".join( | |
word.word | |
for segment in segments if segment.words | |
for word in segment.words | |
) |