ChiBenevisamPas commited on
Commit
e5af41f
1 Parent(s): 0dbfe10

Right to Left Direction in Word to Persian

Browse files
Files changed (1) hide show
  1. app.py +24 -3
app.py CHANGED
@@ -7,6 +7,8 @@ from fpdf import FPDF # For PDF output
7
  from pptx import Presentation # For PowerPoint output
8
  import subprocess # To use ffmpeg for embedding subtitles
9
  import shlex # For better command-line argument handling
 
 
10
 
11
  # Load the Whisper model
12
  model = whisper.load_model("tiny") # Smaller model for faster transcription
@@ -81,16 +83,35 @@ def embed_hardsub_in_video(video_file, srt_file, output_video):
81
  except Exception as e:
82
  raise RuntimeError(f"Error running ffmpeg: {e}")
83
 
84
- def write_word(transcription, output_file, tokenizer=None, translation_model=None):
85
- """Creates a Word document from the transcription without timestamps."""
 
 
 
86
  doc = Document()
 
 
 
 
87
  for i, segment in enumerate(transcription['segments']):
88
  text = segment['text']
89
 
90
  if translation_model:
91
  text = translate_text(text, tokenizer, translation_model)
 
 
 
92
 
93
- doc.add_paragraph(f"{i + 1}. {text.strip()}") # No timestamps
 
 
 
 
 
 
 
 
 
94
  doc.save(output_file)
95
 
96
  def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
 
7
  from pptx import Presentation # For PowerPoint output
8
  import subprocess # To use ffmpeg for embedding subtitles
9
  import shlex # For better command-line argument handling
10
+ from docx.oxml.ns import qn
11
+ from docx.oxml import OxmlElement
12
 
13
  # Load the Whisper model
14
  model = whisper.load_model("tiny") # Smaller model for faster transcription
 
83
  except Exception as e:
84
  raise RuntimeError(f"Error running ffmpeg: {e}")
85
 
86
+ from docx.oxml.ns import qn
87
+ from docx.oxml import OxmlElement
88
+
89
+ def write_word(transcription, output_file, tokenizer=None, translation_model=None, target_language=None):
90
+ """Creates a Word document from the transcription with support for RTL when translating to Persian."""
91
  doc = Document()
92
+
93
+ # Check if the target language is Persian for RTL text direction
94
+ rtl = target_language == "fa"
95
+
96
  for i, segment in enumerate(transcription['segments']):
97
  text = segment['text']
98
 
99
  if translation_model:
100
  text = translate_text(text, tokenizer, translation_model)
101
+
102
+ # Add a paragraph with the text
103
+ para = doc.add_paragraph(f"{i + 1}. {text.strip()}")
104
 
105
+ # If RTL is required, modify the paragraph's properties
106
+ if rtl:
107
+ # Set the paragraph direction to RTL
108
+ para_format = para.paragraph_format
109
+ para_format.right_to_left = True
110
+
111
+ # Set RTL for the text itself
112
+ run = para.runs[0]
113
+ run._element.rPr.append(OxmlElement('w:bidi'))
114
+
115
  doc.save(output_file)
116
 
117
  def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):