File size: 1,976 Bytes
be00d5a
03250e1
 
b1f7167
 
 
 
 
 
03250e1
be00d5a
b1f7167
 
 
 
 
 
 
 
03250e1
b1f7167
 
 
 
 
 
 
 
03250e1
b1f7167
 
 
 
 
 
 
 
03250e1
b1f7167
 
 
 
03250e1
b1f7167
 
 
 
 
 
c8abcdb
03250e1
b1f7167
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import os
import requests
import streamlit as st

# Streamlit app setup
st.title('Llama Cloud API Document Extraction')

# File Upload
uploaded_file = st.file_uploader('Choose a PDF file', type='pdf')
api_key = os.getenv('API_KEY')

if uploaded_file is not None:
    # Upload file to Llama Cloud
    headers = {
        'accept': 'application/json',
        'Authorization': f'Bearer {api_key}'
    }
    files = {'upload_file': (uploaded_file.name, uploaded_file, 'application/pdf')}
    response = requests.post('https://api.cloud.llamaindex.ai/api/v1/files', headers=headers, files=files)
    file_id = response.json().get('id')
    st.write(f'File uploaded with ID: {file_id}')

    # Infer Schema
    schema_data = {
        'name': 'Inferred Schema',
        'file_ids': [file_id]
    }
    schema_response = requests.post('https://api.cloud.llamaindex.ai/api/v1/extraction/schemas/infer', headers=headers, json=schema_data)
    schema_id = schema_response.json().get('id')
    st.write(f'Schema inferred with ID: {schema_id}')

    # Start Extraction Job
    job_data = {
        'schema_id': schema_id,
        'file_id': file_id
    }
    job_response = requests.post('https://api.cloud.llamaindex.ai/api/v1/extraction/jobs', headers=headers, json=job_data)
    job_id = job_response.json().get('id')
    st.write(f'Extraction job started with ID: {job_id}')

    # Check Job Status
    status_response = requests.get(f'https://api.cloud.llamaindex.ai/api/v1/extraction/jobs/{job_id}', headers=headers)
    status = status_response.json().get('status')
    st.write(f'Job Status: {status}')

    # Display Results
    if status == 'completed':
        results_response = requests.get(f'https://api.cloud.llamaindex.ai/api/v1/extraction/jobs/{job_id}/result', headers=headers)
        results = results_response.json()
        st.write('### Extraction Results:')
        st.json(results)
    else:
        st.write('Extraction job is still in progress or has failed.')