ivrit-ai-streaming / model.py
aviadr1's picture
sometimes the client works
d8dadfc
# Function to convert segments to dictionaries
from faster_whisper.transcribe import Segment, Word
# Function to dump a Word instance to a dictionary
def word_to_dict(word: Word) -> dict:
return {
"start": word.start,
"end": word.end,
"word": word.word,
"probability": word.probability
}
# Function to load a Word instance from a dictionary
def dict_to_word(data: dict) -> Word:
return Word(
start=data["start"],
end=data["end"],
word=data["word"],
probability=data["probability"]
)
# Function to dump a Segment instance to a dictionary
def segment_to_dict(segment: Segment) -> dict:
return {
"id": segment.id,
"seek": segment.seek,
"start": segment.start,
"end": segment.end,
"text": segment.text,
"tokens": segment.tokens,
"temperature": segment.temperature,
"avg_logprob": segment.avg_logprob,
"compression_ratio": segment.compression_ratio,
"no_speech_prob": segment.no_speech_prob,
"words": [word_to_dict(word) for word in segment.words] if segment.words else None
}
# Function to load a Segment instance from a dictionary
def dict_to_segment(data: dict) -> Segment:
return Segment(
id=data["id"],
seek=data["seek"],
start=data["start"],
end=data["end"],
text=data["text"],
tokens=data["tokens"],
temperature=data["temperature"],
avg_logprob=data["avg_logprob"],
compression_ratio=data["compression_ratio"],
no_speech_prob=data["no_speech_prob"],
words=[dict_to_word(word) for word in data["words"]] if data["words"] else None
)
def get_raw_words_from_segments(segments: list[Segment]) -> str:
return " ".join(
word.word
for segment in segments if segment.words
for word in segment.words
)