Spaces:
Sleeping
Sleeping
File size: 2,740 Bytes
f158972 70d82fc f158972 08c6313 f158972 06aa83b cafd34d c379e84 f158972 5bb11f5 c379e84 f158972 903d7f7 9e4ee07 903d7f7 f158972 47b56d3 f158972 c379e84 f486b06 ab81f2e 8f445e9 5bb11f5 551b199 f486b06 c2656c0 f486b06 c2656c0 f486b06 cafd34d f486b06 cafd34d f486b06 cafd34d f486b06 cafd34d f486b06 cafd34d f486b06 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import os
import nltk
from fastapi import FastAPI, File, Request, UploadFile, Body, Depends, HTTPException
from fastapi.security.api_key import APIKeyHeader
from fastapi.middleware.cors import CORSMiddleware
from typing import Optional, Annotated
from fastapi.encoders import jsonable_encoder
from PIL import Image
from io import BytesIO
import pytesseract
from nltk.tokenize import sent_tokenize
from transformers import MarianMTModel, MarianTokenizer
API_KEY = os.environ.get("API_KEY")
VALID_IMAGE_EXTENSIONS = {"jpg", "jpeg", "png"}
app = FastAPI()
# CORS issue write below code
# origins = [
# "http://localhost:3000", # Update this with the actual origin of your frontend
# ]
# app.add_middleware(
# CORSMiddleware,
# allow_origins=origins,
# allow_credentials=True,
# allow_methods=["*"],
# allow_headers=["*"],
# )
# ==========================
api_key_header = APIKeyHeader(name="api_key", auto_error=False)
def get_api_key(api_key: Optional[str] = Depends(api_key_header)):
if api_key is None or api_key != API_KEY:
raise HTTPException(status_code=401, detail="Unauthorized access")
return api_key
@app.post("/api/ocr", response_model=dict)
async def ocr(
api_key: str = Depends(get_api_key),
image: UploadFile = File(...),
# languages: list = Body(["eng"])
):
try:
# # Check if the file format is allowed
file_extension = image.filename.split(".")[-1].lower()
if file_extension not in VALID_IMAGE_EXTENSIONS:
raise HTTPException(status_code=400, detail="Invalid file format. Only .jpg, .jpeg, and .png are allowed.")
content = await image.read()
image = Image.open(BytesIO(content))
text = pytesseract.image_to_string(image, lang = 'eng')
# text = pytesseract.image_to_string(image, lang="+".join(languages))
except Exception as e:
return {"error": str(e)}, 500
return {"ImageText": text}
@app.post("/api/translate", response_model=dict)
async def translate(
api_key: str = Depends(get_api_key),
text: str = Body(...),
src: str = "en",
trg: str = "zh",
):
tokenizer, model = get_model(src, trg)
translated_text = ""
for sentence in sent_tokenize(text):
translated_sub = model.generate(**tokenizer(sentence, return_tensors="pt"))[0]
translated_text += tokenizer.decode(translated_sub, skip_special_tokens=True) + "\n"
return jsonable_encoder({"translated_text": translated_text})
def get_model(src: str, trg: str):
model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
return tokenizer, model
|