pvanand commited on
Commit
f2415f1
·
verified ·
1 Parent(s): 1c3dc0f

Create file_conversion.py

Browse files
Files changed (1) hide show
  1. file_conversion.py +46 -0
file_conversion.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, UploadFile, File, HTTPException
2
+ from fastapi.responses import FileResponse
3
+ from pdf2docx import Converter
4
+ import os
5
+ import tempfile
6
+
7
+ router = APIRouter()
8
+
9
+ # Define the temp directory
10
+ TEMP_DIR = "/.tempfiles"
11
+
12
+ @router.post("/convert/pdf_to_docx")
13
+ async def convert_pdf_to_docx(file: UploadFile = File(...)):
14
+ if not file.filename.endswith('.pdf'):
15
+ raise HTTPException(status_code=400, detail="File must be a PDF")
16
+
17
+ # Ensure the temp directory exists
18
+ os.makedirs(TEMP_DIR, exist_ok=True)
19
+
20
+ pdf_temp_path = os.path.join(TEMP_DIR, f"temp_{file.filename}")
21
+ docx_temp_path = pdf_temp_path.replace('.pdf', '.docx')
22
+
23
+ try:
24
+ # Save the uploaded file
25
+ with open(pdf_temp_path, "wb") as pdf_file:
26
+ pdf_file.write(await file.read())
27
+
28
+ # Convert PDF to DOCX
29
+ cv = Converter(pdf_temp_path)
30
+ cv.convert(docx_temp_path)
31
+ cv.close()
32
+
33
+ # Return the DOCX file
34
+ return FileResponse(
35
+ docx_temp_path,
36
+ media_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document',
37
+ filename=file.filename.replace('.pdf', '.docx')
38
+ )
39
+ except Exception as e:
40
+ raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}")
41
+ finally:
42
+ # Clean up temporary files
43
+ if os.path.exists(pdf_temp_path):
44
+ os.unlink(pdf_temp_path)
45
+ if os.path.exists(docx_temp_path):
46
+ os.unlink(docx_temp_path)