Spaces:
Sleeping
Sleeping
File size: 2,128 Bytes
2927735 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
# ADDING GOOGLE DRIVE SUPPORT
import io
import os
import csv
import PyPDF2
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseUpload, MediaIoBaseDownload
from driveapi.service import get_credentials
credentials_info = get_credentials()
credentials = service_account.Credentials.from_service_account_info(credentials_info)
service = build('drive', 'v3', credentials=credentials)
logs_id = os.environ.get('LOGS_ID')
# Save Logs
def upload_chat_to_drive(chat_history, file_name):
# Convert chat history to CSV
csv_output = io.StringIO()
writer = csv.writer(csv_output)
writer.writerows(chat_history)
csv_output.seek(0)
# File metadata
file_metadata = {
'name': file_name,
'mimeType': 'application/vnd.google-apps.spreadsheet',
'parents': [logs_id]
}
# Upload file
media = MediaIoBaseUpload(csv_output, mimetype='text/csv')
file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
## Read PDF files
def download_file(file_id):
service = build('drive', 'v3', credentials=credentials)
request = service.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
fh.seek(0)
return fh
# Function to process a PDF file
def process_pdf(file_stream):
pdf_reader = PyPDF2.PdfReader(file_stream)
text = ""
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text()
return text
def drive_content(shared_folder_id):
# List files in the folder
results = service.files().list(q=f"'{shared_folder_id}' in parents", fields="files(id, name, mimeType)").execute()
items = results.get('files', [])
content = ''
for item in items:
print(f"Processing file: {item['name']}")
file_stream = download_file(item['id'])
content += str(process_pdf(file_stream))
return content |