vishanth10
commited on
Commit
•
0181645
1
Parent(s):
2ef1eee
init commit from repo
Browse files- __pycache__/app.cpython-311.pyc +0 -0
- app.py +110 -0
- utils/__pycache__/embedding_utils.cpython-311.pyc +0 -0
- utils/__pycache__/qdrant_utils.cpython-311.pyc +0 -0
- utils/__pycache__/s3_utils.cpython-311.pyc +0 -0
- utils/embedding_utils.py +139 -0
- utils/qdrant_utils.py +72 -0
- utils/s3_utils.py +74 -0
__pycache__/app.cpython-311.pyc
ADDED
Binary file (6.11 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## API Call
|
2 |
+
# main.py
|
3 |
+
## FASTAPI Main py file to access the POST body
|
4 |
+
|
5 |
+
from fastapi import FastAPI, Header, HTTPException
|
6 |
+
from pydantic import BaseModel
|
7 |
+
from typing import List, Optional
|
8 |
+
from utils.s3_utils import read_s3_file
|
9 |
+
from utils.embedding_utils import read_document, cumulative_semantic_chunking, embed_chunks
|
10 |
+
from utils.qdrant_utils import store_embeddings
|
11 |
+
import logging
|
12 |
+
import time
|
13 |
+
import os
|
14 |
+
from dotenv import load_dotenv
|
15 |
+
|
16 |
+
load_dotenv()
|
17 |
+
|
18 |
+
# Retrieve the API key from the environment
|
19 |
+
API_KEY = os.getenv('X_API_KEY')
|
20 |
+
bucket_name = os.getenv('bucket_name')
|
21 |
+
|
22 |
+
# Initialize logging
|
23 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
24 |
+
|
25 |
+
app = FastAPI()
|
26 |
+
|
27 |
+
class Metadata(BaseModel):
|
28 |
+
mime_type: str
|
29 |
+
file_size_bytes: str
|
30 |
+
file_format: str
|
31 |
+
|
32 |
+
class DocumentIndexRequest(BaseModel):
|
33 |
+
metadata: Metadata
|
34 |
+
bucket_key: str
|
35 |
+
user_id: str
|
36 |
+
org_id: str
|
37 |
+
file_id: int
|
38 |
+
data_source_id: int
|
39 |
+
|
40 |
+
@app.get("/")
|
41 |
+
async def root():
|
42 |
+
return {"message": "Welcome to the Document Indexing API!"}
|
43 |
+
|
44 |
+
@app.post("/api/document-index")
|
45 |
+
async def document_index(request: DocumentIndexRequest, x_api_key: str = Header(...)):
|
46 |
+
logging.info(f"Received request: {request}")
|
47 |
+
start_time = time.time()
|
48 |
+
# Check if the API key provided in the header matches the one in the environment
|
49 |
+
if x_api_key != API_KEY:
|
50 |
+
logging.warning("Unauthorized access attempt with invalid API key.")
|
51 |
+
raise HTTPException(status_code=401, detail="Unauthorized")
|
52 |
+
|
53 |
+
try:
|
54 |
+
#bucket_name = "document-ingestion-drive-dev"
|
55 |
+
|
56 |
+
# Read file from S3 using the presigned URL
|
57 |
+
content, metadata, file_format = read_s3_file(bucket_name, request.bucket_key)
|
58 |
+
logging.info(f"File {request.bucket_key} retrieved from S3 with format {file_format}.")
|
59 |
+
|
60 |
+
|
61 |
+
# Reading content using Simple Directory Reader
|
62 |
+
text_content = read_document(content, file_id=request.file_id, file_format=file_format)
|
63 |
+
logging.info(f"Text content extracted from file {request.bucket_key}.")
|
64 |
+
#print('text_content',text_content)
|
65 |
+
|
66 |
+
# Chunking text using semantic chunking
|
67 |
+
chunks = cumulative_semantic_chunking(text_content, max_chunk_size=512, similarity_threshold=0.8)
|
68 |
+
logging.info(f"Text content chunked into {len(chunks)} chunks.")
|
69 |
+
# Embed chunks
|
70 |
+
embeddings, total_tokens = embed_chunks(chunks)
|
71 |
+
logging.info(f"Text content embedded into vectors. Total tokens used: {total_tokens}.")
|
72 |
+
|
73 |
+
# Store embeddings in Qdrant
|
74 |
+
store_embeddings(
|
75 |
+
chunks=chunks,
|
76 |
+
embeddings=embeddings,
|
77 |
+
user_id=request.user_id,
|
78 |
+
data_source_id=request.data_source_id,
|
79 |
+
file_id=request.file_id,
|
80 |
+
organization_id=request.org_id,
|
81 |
+
s3_bucket_key=request.bucket_key,
|
82 |
+
total_tokens=total_tokens
|
83 |
+
)
|
84 |
+
|
85 |
+
logging.info(f"Embeddings for {request.bucket_key} stored successfully with metadata: {metadata}")
|
86 |
+
logging.info(f"Embeddings for stored successfully with {total_tokens} tokens")
|
87 |
+
time_taken = time.time() - start_time
|
88 |
+
logging.info(f"Time taken to process and embed the document: {time_taken} seconds")
|
89 |
+
|
90 |
+
|
91 |
+
return {"message": "Embeddings stored successfully"}
|
92 |
+
|
93 |
+
|
94 |
+
except FileNotFoundError as e:
|
95 |
+
logging.error(f"File not found: {str(e)}")
|
96 |
+
raise HTTPException(status_code=404, detail=str(e))
|
97 |
+
except PermissionError as e:
|
98 |
+
logging.error(f"Permission error: {str(e)}")
|
99 |
+
raise HTTPException(status_code=403, detail=str(e))
|
100 |
+
except HTTPException as e:
|
101 |
+
logging.error(f"HTTP error: {str(e.detail)}")
|
102 |
+
raise
|
103 |
+
except Exception as e:
|
104 |
+
logging.error(f"Error processing file {request.bucket_key}: {str(e)}")
|
105 |
+
raise HTTPException(status_code=500, detail=str(e))
|
106 |
+
|
107 |
+
# Run the FastAPI app
|
108 |
+
if __name__ == "__main__":
|
109 |
+
import uvicorn
|
110 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
utils/__pycache__/embedding_utils.cpython-311.pyc
ADDED
Binary file (8.2 kB). View file
|
|
utils/__pycache__/qdrant_utils.cpython-311.pyc
ADDED
Binary file (3.67 kB). View file
|
|
utils/__pycache__/s3_utils.cpython-311.pyc
ADDED
Binary file (3.74 kB). View file
|
|
utils/embedding_utils.py
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#embedding_utils.py
|
2 |
+
|
3 |
+
from sentence_transformers import SentenceTransformer
|
4 |
+
from llama_index.core import SimpleDirectoryReader
|
5 |
+
from huggingface_hub import login
|
6 |
+
from typing import List, Tuple
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
import numpy as np
|
9 |
+
import os
|
10 |
+
import tempfile
|
11 |
+
from docx import Document
|
12 |
+
import tempfile
|
13 |
+
import os
|
14 |
+
import logging
|
15 |
+
|
16 |
+
|
17 |
+
# Load environment variables from .env file
|
18 |
+
load_dotenv()
|
19 |
+
|
20 |
+
# Load Hugging Face token from environment variable
|
21 |
+
huggingface_token = os.getenv('HUGGINGFACE_HUB_TOKEN')
|
22 |
+
|
23 |
+
|
24 |
+
if huggingface_token:
|
25 |
+
login(token=huggingface_token, add_to_git_credential=True)
|
26 |
+
else:
|
27 |
+
raise ValueError("Hugging Face token is not set. Please set the HUGGINGFACE_HUB_TOKEN environment variable.")
|
28 |
+
|
29 |
+
# Load model and tokenizer with authentication
|
30 |
+
#model_name = 'nvidia/NV-Embed-v1'
|
31 |
+
model_name = 'nomic-ai/nomic-embed-text-v1.5'
|
32 |
+
model = SentenceTransformer('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
|
33 |
+
model.max_seq_length = 4096
|
34 |
+
model.tokenizer.padding_side = "right"
|
35 |
+
|
36 |
+
|
37 |
+
def read_document(file_content: bytes, file_id: int, file_format: str) -> str:
|
38 |
+
"""Extract text content from a document file depending on its format."""
|
39 |
+
try:
|
40 |
+
# Create a temporary directory to save the file
|
41 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
42 |
+
file_path = os.path.join(temp_dir, f"document_{file_id}.{file_format}")
|
43 |
+
|
44 |
+
# Save the content to the file
|
45 |
+
with open(file_path, 'wb') as temp_file:
|
46 |
+
temp_file.write(file_content)
|
47 |
+
|
48 |
+
# Handle different file formats
|
49 |
+
if file_format.lower() == 'docx':
|
50 |
+
text_content = extract_text_from_docx(file_path)
|
51 |
+
elif file_format.lower() == 'pdf':
|
52 |
+
text_content = extract_text_from_pdf(file_path)
|
53 |
+
elif file_format.lower() in ['txt', 'md', 'csv']:
|
54 |
+
reader = SimpleDirectoryReader(input_files=[file_path])
|
55 |
+
documents = reader.load_data()
|
56 |
+
text_content = documents[0].text if documents else ''
|
57 |
+
else:
|
58 |
+
raise ValueError(f"Unsupported file format: {file_format}")
|
59 |
+
|
60 |
+
if text_content:
|
61 |
+
return text_content
|
62 |
+
else:
|
63 |
+
raise ValueError("No content extracted from the document.")
|
64 |
+
|
65 |
+
except Exception as e:
|
66 |
+
logging.error(f"Error reading document: {e}")
|
67 |
+
raise
|
68 |
+
|
69 |
+
def extract_text_from_docx(file_path: str) -> str:
|
70 |
+
"""Extract text from a DOCX file."""
|
71 |
+
try:
|
72 |
+
doc = Document(file_path)
|
73 |
+
full_text = [para.text for para in doc.paragraphs]
|
74 |
+
return '\n'.join(full_text)
|
75 |
+
except Exception as e:
|
76 |
+
logging.error(f"Error extracting text from DOCX file: {e}")
|
77 |
+
raise
|
78 |
+
|
79 |
+
def extract_text_from_pdf(file_path: str) -> str:
|
80 |
+
"""Extract text from a PDF file."""
|
81 |
+
import pdfplumber
|
82 |
+
try:
|
83 |
+
with pdfplumber.open(file_path) as pdf:
|
84 |
+
full_text = [page.extract_text() for page in pdf.pages]
|
85 |
+
return '\n'.join(full_text).strip()
|
86 |
+
except Exception as e:
|
87 |
+
logging.error(f"Error extracting text from PDF file: {e}")
|
88 |
+
raise
|
89 |
+
|
90 |
+
|
91 |
+
|
92 |
+
def cumulative_semantic_chunking( text: str, max_chunk_size: int, similarity_threshold: float, embedding_model: SentenceTransformer = model) -> List[str]:
|
93 |
+
"""Cumulative semantic chunking using sentence embeddings."""
|
94 |
+
sentences = text.split('.')
|
95 |
+
|
96 |
+
# Encode sentences
|
97 |
+
sentence_embeddings = model.encode(sentences)
|
98 |
+
|
99 |
+
chunks = []
|
100 |
+
current_chunk = sentences[0]
|
101 |
+
#print('current chunk',current_chunk)
|
102 |
+
current_embedding = sentence_embeddings[0]
|
103 |
+
|
104 |
+
for sentence, embedding in zip(sentences[1:], sentence_embeddings[1:]):
|
105 |
+
combined_chunk = current_chunk + '. ' + sentence
|
106 |
+
combined_embedding = (current_embedding * len(current_chunk.split()) + embedding * len(sentence.split())) / (len(current_chunk.split()) + len(sentence.split()))
|
107 |
+
|
108 |
+
similarity = np.dot(current_embedding, combined_embedding) / (np.linalg.norm(current_embedding) * np.linalg.norm(combined_embedding))
|
109 |
+
|
110 |
+
if similarity >= similarity_threshold and len(combined_chunk) <= max_chunk_size:
|
111 |
+
current_chunk = combined_chunk
|
112 |
+
current_embedding = combined_embedding
|
113 |
+
else:
|
114 |
+
chunks.append(current_chunk.strip())
|
115 |
+
current_chunk = sentence
|
116 |
+
current_embedding = embedding
|
117 |
+
|
118 |
+
if current_chunk:
|
119 |
+
chunks.append(current_chunk.strip())
|
120 |
+
|
121 |
+
#print('chunks',chunks)
|
122 |
+
return chunks
|
123 |
+
|
124 |
+
# def embed_chunks(chunks: List[str]) -> List[np.ndarray]:
|
125 |
+
# """Embed the chunks using the SentenceTransformer model."""
|
126 |
+
# return model.encode(chunks)
|
127 |
+
|
128 |
+
def embed_chunks(chunks: List[str]) -> Tuple[List[np.ndarray], int]:
|
129 |
+
"""Embed the chunks using the SentenceTransformer model and return embeddings along with the total token count."""
|
130 |
+
total_tokens = 0
|
131 |
+
embeddings = []
|
132 |
+
|
133 |
+
for chunk in chunks:
|
134 |
+
tokens = model.tokenizer.encode(chunk, add_special_tokens=False)
|
135 |
+
total_tokens += len(tokens)
|
136 |
+
embedding = model.encode(chunk)
|
137 |
+
embeddings.append(embedding)
|
138 |
+
|
139 |
+
return embeddings, total_tokens
|
utils/qdrant_utils.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# qdrant_utils.py contains utility functions to interact with Qdrant, a vector similarity search engine.
|
2 |
+
|
3 |
+
from qdrant_client import QdrantClient
|
4 |
+
from qdrant_client.http import models
|
5 |
+
import os
|
6 |
+
import logging
|
7 |
+
import uuid
|
8 |
+
|
9 |
+
# Initialize logging
|
10 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
11 |
+
|
12 |
+
qdrant_client = QdrantClient(
|
13 |
+
url=os.getenv('QDRANT_URL'),
|
14 |
+
api_key=os.getenv('QDRANT_API_KEY')
|
15 |
+
)
|
16 |
+
|
17 |
+
def create_collection_if_not_exists(collection_name, vector_size):
|
18 |
+
try:
|
19 |
+
# Check if collection exists
|
20 |
+
collections = qdrant_client.get_collections().collections
|
21 |
+
if not any(collection.name == collection_name for collection in collections):
|
22 |
+
# Create the collection if it doesn't exist
|
23 |
+
qdrant_client.create_collection(
|
24 |
+
collection_name=collection_name,
|
25 |
+
vectors_config=models.VectorParams(size=vector_size, distance=models.Distance.COSINE)
|
26 |
+
)
|
27 |
+
logging.info(f"Created new collection: {collection_name}")
|
28 |
+
else:
|
29 |
+
logging.info(f"Collection {collection_name} already exists")
|
30 |
+
except Exception as e:
|
31 |
+
logging.error(f"Error creating collection: {str(e)}")
|
32 |
+
raise
|
33 |
+
|
34 |
+
def store_embeddings(chunks, embeddings, user_id, data_source_id, file_id, organization_id, s3_bucket_key, total_tokens):
|
35 |
+
try:
|
36 |
+
collection_name = "embed" # Name of the collection in Qdrant
|
37 |
+
vector_size = len(embeddings[0])
|
38 |
+
|
39 |
+
# Ensure the collection exists
|
40 |
+
create_collection_if_not_exists(collection_name, vector_size)
|
41 |
+
|
42 |
+
# Prepare points for Qdrant
|
43 |
+
points = []
|
44 |
+
for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
|
45 |
+
chunk_id = str(uuid.uuid4()) # Generate a unique ID for each chunk
|
46 |
+
points.append(
|
47 |
+
models.PointStruct(
|
48 |
+
id=chunk_id,
|
49 |
+
vector=embedding.tolist(), # Convert numpy array to list
|
50 |
+
payload={
|
51 |
+
"user_id": user_id,
|
52 |
+
"data_source_id": data_source_id,
|
53 |
+
"file_id": file_id,
|
54 |
+
"organization_id": organization_id,
|
55 |
+
"chunk_index": i,
|
56 |
+
"chunk_text": chunk,
|
57 |
+
"s3_bucket_key": s3_bucket_key,
|
58 |
+
"total_tokens": total_tokens
|
59 |
+
|
60 |
+
}
|
61 |
+
)
|
62 |
+
)
|
63 |
+
|
64 |
+
# Store embeddings in Qdrant
|
65 |
+
qdrant_client.upsert(
|
66 |
+
collection_name=collection_name,
|
67 |
+
points=points
|
68 |
+
)
|
69 |
+
logging.info(f"Successfully stored {len(points)} embeddings")
|
70 |
+
except Exception as e:
|
71 |
+
logging.error(f"Error storing embeddings in Qdrant: {str(e)}")
|
72 |
+
raise
|
utils/s3_utils.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import boto3
|
2 |
+
import os
|
3 |
+
import urllib.parse
|
4 |
+
import logging
|
5 |
+
from botocore.exceptions import NoCredentialsError, ClientError
|
6 |
+
|
7 |
+
#bucket_name = "document-ingestion-drive-dev"
|
8 |
+
aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
|
9 |
+
aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY')
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
def get_s3_client():
|
14 |
+
try:
|
15 |
+
s3_client = boto3.client(
|
16 |
+
's3',
|
17 |
+
aws_access_key_id=aws_access_key_id,
|
18 |
+
aws_secret_access_key=aws_secret_access_key,
|
19 |
+
region_name='us-west-2'
|
20 |
+
)
|
21 |
+
logging.info("S3 client initialized successfully.")
|
22 |
+
return s3_client
|
23 |
+
except NoCredentialsError as e:
|
24 |
+
logging.error(f"Failed to initialize S3 client: {str(e)}")
|
25 |
+
raise
|
26 |
+
except Exception as e:
|
27 |
+
logging.error(f"Failed to initialize S3 client: {str(e)}")
|
28 |
+
raise
|
29 |
+
|
30 |
+
|
31 |
+
def read_s3_file(bucket_name, key):
|
32 |
+
try:
|
33 |
+
logging.info(f"Reading file from S3: bucket={bucket_name}, key={key}")
|
34 |
+
s3 = get_s3_client()
|
35 |
+
response = s3.get_object(Bucket=bucket_name, Key=key)
|
36 |
+
|
37 |
+
content = response['Body'].read()
|
38 |
+
metadata = response.get('Metadata', {})
|
39 |
+
|
40 |
+
# Attempt to get the file format from metadata
|
41 |
+
file_format = metadata.get('file_format') # Assuming 'file_format' is set as custom metadata
|
42 |
+
if not file_format:
|
43 |
+
# Fallback to using Content-Type if 'file_format' is not set in metadata
|
44 |
+
content_type = response.get('ContentType')
|
45 |
+
if content_type:
|
46 |
+
if 'word' in content_type:
|
47 |
+
file_format = 'docx'
|
48 |
+
elif 'pdf' in content_type:
|
49 |
+
file_format = 'pdf'
|
50 |
+
elif 'text' in content_type:
|
51 |
+
file_format = 'txt'
|
52 |
+
else:
|
53 |
+
file_format = 'unknown'
|
54 |
+
else:
|
55 |
+
raise ValueError("File format could not be determined from metadata or Content-Type.")
|
56 |
+
|
57 |
+
logging.info(f"File read successfully from S3: bucket={bucket_name}, key={key}, format={file_format}")
|
58 |
+
return content, metadata, file_format
|
59 |
+
except s3.exceptions.NoSuchKey:
|
60 |
+
logging.error(f"File not found in S3: bucket={bucket_name}, key={key}")
|
61 |
+
raise FileNotFoundError(f"File not found: bucket={bucket_name}, key={key}")
|
62 |
+
except NoCredentialsError:
|
63 |
+
logging.error("AWS credentials not found.")
|
64 |
+
raise PermissionError("AWS credentials not found.")
|
65 |
+
except ClientError as e:
|
66 |
+
logging.error(f"Error reading file from S3: {str(e)}")
|
67 |
+
raise
|
68 |
+
except Exception as e:
|
69 |
+
logging.error(f"Error reading file from S3: {str(e)}")
|
70 |
+
raise
|
71 |
+
|
72 |
+
|
73 |
+
# Initialize logging
|
74 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|