File size: 2,786 Bytes
f158972 70d82fc f158972 08c6313 f158972 06aa83b cafd34d c379e84 f158972 551b199 c379e84 f158972 903d7f7 91a3d66 903d7f7 f158972 47b56d3 f158972 c379e84 f486b06 8f445e9 551b199 f486b06 c2656c0 f486b06 c2656c0 f486b06 cafd34d f486b06 cafd34d f486b06 cafd34d f486b06 cafd34d f486b06 cafd34d f486b06 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import os
import nltk
from fastapi import FastAPI, File, Request, UploadFile, Body, Depends, HTTPException
from fastapi.security.api_key import APIKeyHeader
from fastapi.middleware.cors import CORSMiddleware
from typing import Optional, Annotated
from fastapi.encoders import jsonable_encoder
from PIL import Image
from io import BytesIO
import pytesseract
from nltk.tokenize import sent_tokenize
from transformers import MarianMTModel, MarianTokenizer
API_KEY = os.environ.get("API_KEY")
VALID_IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png"}
app = FastAPI()
# CORS issue write below code
origins = [
"http://localhost:3000", # Update this with the actual origin of your frontend
]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ==========================
api_key_header = APIKeyHeader(name="api_key", auto_error=False)
def get_api_key(api_key: Optional[str] = Depends(api_key_header)):
if api_key is None or api_key != API_KEY:
raise HTTPException(status_code=401, detail="Unauthorized access")
return api_key
@app.post("/api/ocr", response_model=dict)
async def ocr(
api_key: str = Depends(get_api_key),
image: UploadFile = File(...),
# languages: list = Body(["eng"])
):
try:
# print("[Hello]", image.filename.split(".")[-1].lower())
# # Check if the file format is allowed
# file_extension = image.filename.split(".")[-1].lower()
# if file_extension not in VALID_IMAGE_EXTENSIONS:
# raise HTTPException(status_code=400, detail="Invalid file format. Only .jpg, .jpeg, and .png are allowed.")
content = await image.read()
image = Image.open(BytesIO(content))
text = pytesseract.image_to_string(image, lang = 'eng')
# text = pytesseract.image_to_string(image, lang="+".join(languages))
except Exception as e:
return {"error": str(e)}, 500
return {"ImageText": text}
@app.post("/api/translate", response_model=dict)
async def translate(
api_key: str = Depends(get_api_key),
text: str = Body(...),
src: str = "en",
trg: str = "zh",
):
tokenizer, model = get_model(src, trg)
translated_text = ""
for sentence in sent_tokenize(text):
translated_sub = model.generate(**tokenizer(sentence, return_tensors="pt"))[0]
translated_text += tokenizer.decode(translated_sub, skip_special_tokens=True) + "\n"
return jsonable_encoder({"translated_text": translated_text})
def get_model(src: str, trg: str):
model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
return tokenizer, model
|