Spaces:
Runtime error
Runtime error
from tools.pdf_converter import PDFConverter | |
from tools.ocr_extractor import OCRExtractor | |
import os | |
import shutil | |
def main(): | |
# Convert pdf to jpg | |
pdf_converter = PDFConverter() | |
pdf_converter.convert_to_jpg('docs/input/invoices/Dataset with valid information', | |
'docs/input/invoices/processed/images') | |
# define the source and destination directory | |
src_dir = 'docs/input/invoices/processed/images' | |
dst_dir = '../sparrow-ui/docs/images' | |
# Get list of files in source directory | |
files = os.listdir(src_dir) | |
# Loop through all files in source directory and copy to destination directory | |
for f in files: | |
src_file = os.path.join(src_dir, f) | |
dst_file = os.path.join(dst_dir, f) | |
shutil.copy(src_file, dst_file) | |
# OCR | |
ocr_extractor = OCRExtractor('db_resnet50', 'crnn_vgg16_bn', pretrained=True) | |
ocr_extractor.extract('docs/input/invoices/processed', show_prediction=False) | |
if __name__ == '__main__': | |
main() |