Spaces:

visionaries666
/

DAI_Project

Sleeping

App Files Files Community

ChiBenevisamPas commited on Oct 15

Commit

e5af41f

•

1 Parent(s): 0dbfe10

Right to Left Direction in Word to Persian

Browse files

Files changed (1) hide show

app.py +24 -3

app.py CHANGED Viewed

@@ -7,6 +7,8 @@ from fpdf import FPDF  # For PDF output
 from pptx import Presentation  # For PowerPoint output
 import subprocess  # To use ffmpeg for embedding subtitles
 import shlex  # For better command-line argument handling
 # Load the Whisper model
 model = whisper.load_model("tiny")  # Smaller model for faster transcription
@@ -81,16 +83,35 @@ def embed_hardsub_in_video(video_file, srt_file, output_video):
     except Exception as e:
         raise RuntimeError(f"Error running ffmpeg: {e}")
-def write_word(transcription, output_file, tokenizer=None, translation_model=None):
-    """Creates a Word document from the transcription without timestamps."""
     doc = Document()
     for i, segment in enumerate(transcription['segments']):
         text = segment['text']
         if translation_model:
             text = translate_text(text, tokenizer, translation_model)
-        doc.add_paragraph(f"{i + 1}. {text.strip()}")  # No timestamps
     doc.save(output_file)
 def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):

 from pptx import Presentation  # For PowerPoint output
 import subprocess  # To use ffmpeg for embedding subtitles
 import shlex  # For better command-line argument handling
+from docx.oxml.ns import qn
+from docx.oxml import OxmlElement
 # Load the Whisper model
 model = whisper.load_model("tiny")  # Smaller model for faster transcription
     except Exception as e:
         raise RuntimeError(f"Error running ffmpeg: {e}")
+from docx.oxml.ns import qn
+from docx.oxml import OxmlElement
+def write_word(transcription, output_file, tokenizer=None, translation_model=None, target_language=None):
+    """Creates a Word document from the transcription with support for RTL when translating to Persian."""
     doc = Document()
+    # Check if the target language is Persian for RTL text direction
+    rtl = target_language == "fa"
     for i, segment in enumerate(transcription['segments']):
         text = segment['text']
         if translation_model:
             text = translate_text(text, tokenizer, translation_model)
+        # Add a paragraph with the text
+        para = doc.add_paragraph(f"{i + 1}. {text.strip()}")
+        # If RTL is required, modify the paragraph's properties
+        if rtl:
+            # Set the paragraph direction to RTL
+            para_format = para.paragraph_format
+            para_format.right_to_left = True
+            # Set RTL for the text itself
+            run = para.runs[0]
+            run._element.rPr.append(OxmlElement('w:bidi'))
     doc.save(output_file)
 def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):