Spaces:
Sleeping
Sleeping
ChiBenevisamPas
commited on
Commit
•
e5af41f
1
Parent(s):
0dbfe10
Right to Left Direction in Word to Persian
Browse files
app.py
CHANGED
@@ -7,6 +7,8 @@ from fpdf import FPDF # For PDF output
|
|
7 |
from pptx import Presentation # For PowerPoint output
|
8 |
import subprocess # To use ffmpeg for embedding subtitles
|
9 |
import shlex # For better command-line argument handling
|
|
|
|
|
10 |
|
11 |
# Load the Whisper model
|
12 |
model = whisper.load_model("tiny") # Smaller model for faster transcription
|
@@ -81,16 +83,35 @@ def embed_hardsub_in_video(video_file, srt_file, output_video):
|
|
81 |
except Exception as e:
|
82 |
raise RuntimeError(f"Error running ffmpeg: {e}")
|
83 |
|
84 |
-
|
85 |
-
|
|
|
|
|
|
|
86 |
doc = Document()
|
|
|
|
|
|
|
|
|
87 |
for i, segment in enumerate(transcription['segments']):
|
88 |
text = segment['text']
|
89 |
|
90 |
if translation_model:
|
91 |
text = translate_text(text, tokenizer, translation_model)
|
|
|
|
|
|
|
92 |
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
doc.save(output_file)
|
95 |
|
96 |
def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
|
|
|
7 |
from pptx import Presentation # For PowerPoint output
|
8 |
import subprocess # To use ffmpeg for embedding subtitles
|
9 |
import shlex # For better command-line argument handling
|
10 |
+
from docx.oxml.ns import qn
|
11 |
+
from docx.oxml import OxmlElement
|
12 |
|
13 |
# Load the Whisper model
|
14 |
model = whisper.load_model("tiny") # Smaller model for faster transcription
|
|
|
83 |
except Exception as e:
|
84 |
raise RuntimeError(f"Error running ffmpeg: {e}")
|
85 |
|
86 |
+
from docx.oxml.ns import qn
|
87 |
+
from docx.oxml import OxmlElement
|
88 |
+
|
89 |
+
def write_word(transcription, output_file, tokenizer=None, translation_model=None, target_language=None):
|
90 |
+
"""Creates a Word document from the transcription with support for RTL when translating to Persian."""
|
91 |
doc = Document()
|
92 |
+
|
93 |
+
# Check if the target language is Persian for RTL text direction
|
94 |
+
rtl = target_language == "fa"
|
95 |
+
|
96 |
for i, segment in enumerate(transcription['segments']):
|
97 |
text = segment['text']
|
98 |
|
99 |
if translation_model:
|
100 |
text = translate_text(text, tokenizer, translation_model)
|
101 |
+
|
102 |
+
# Add a paragraph with the text
|
103 |
+
para = doc.add_paragraph(f"{i + 1}. {text.strip()}")
|
104 |
|
105 |
+
# If RTL is required, modify the paragraph's properties
|
106 |
+
if rtl:
|
107 |
+
# Set the paragraph direction to RTL
|
108 |
+
para_format = para.paragraph_format
|
109 |
+
para_format.right_to_left = True
|
110 |
+
|
111 |
+
# Set RTL for the text itself
|
112 |
+
run = para.runs[0]
|
113 |
+
run._element.rPr.append(OxmlElement('w:bidi'))
|
114 |
+
|
115 |
doc.save(output_file)
|
116 |
|
117 |
def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
|