Spaces:
Sleeping
Sleeping
import os | |
import requests | |
import streamlit as st | |
# Streamlit app setup | |
st.title('Llama Cloud API Document Extraction') | |
# File Upload | |
uploaded_file = st.file_uploader('Choose a PDF file', type='pdf') | |
api_key = os.getenv('API_KEY') | |
if uploaded_file is not None: | |
# Upload file to Llama Cloud | |
headers = { | |
'accept': 'application/json', | |
'Authorization': f'Bearer {api_key}' | |
} | |
files = {'upload_file': (uploaded_file.name, uploaded_file, 'application/pdf')} | |
response = requests.post('https://api.cloud.llamaindex.ai/api/v1/files', headers=headers, files=files) | |
file_id = response.json().get('id') | |
st.write(f'File uploaded with ID: {file_id}') | |
# Infer Schema | |
schema_data = { | |
'name': 'Inferred Schema', | |
'file_ids': [file_id] | |
} | |
schema_response = requests.post('https://api.cloud.llamaindex.ai/api/v1/extraction/schemas/infer', headers=headers, json=schema_data) | |
schema_id = schema_response.json().get('id') | |
st.write(f'Schema inferred with ID: {schema_id}') | |
# Start Extraction Job | |
job_data = { | |
'schema_id': schema_id, | |
'file_id': file_id | |
} | |
job_response = requests.post('https://api.cloud.llamaindex.ai/api/v1/extraction/jobs', headers=headers, json=job_data) | |
job_id = job_response.json().get('id') | |
st.write(f'Extraction job started with ID: {job_id}') | |
# Check Job Status | |
status_response = requests.get(f'https://api.cloud.llamaindex.ai/api/v1/extraction/jobs/{job_id}', headers=headers) | |
status = status_response.json().get('status') | |
st.write(f'Job Status: {status}') | |
# Display Results | |
if status == 'completed': | |
results_response = requests.get(f'https://api.cloud.llamaindex.ai/api/v1/extraction/jobs/{job_id}/result', headers=headers) | |
results = results_response.json() | |
st.write('### Extraction Results:') | |
st.json(results) | |
else: | |
st.write('Extraction job is still in progress or has failed.') |