import json import streamlit as st from PyPDF2 import PdfReader from transformers import pipeline # Specify the model name model_name = "Canstralian/RabbitRedux" # Initialize the pipeline nlp_pipeline = pipeline("text2text-generation", model=model_name) # Example usage input_text = "Provide an example of secure Python coding practices." output = nlp_pipeline(input_text) print(output) def process_pdf(file): reader = PdfReader(file) text = "" for page in reader.pages: text += page.extract_text() return text def convert_to_json(text): # Use the Hugging Face model to process the text result = nlp_pipeline(text) return result[0]['generated_text'] st.title("PDF to JSON Converter") uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) if uploaded_file is not None: st.write("Processing your file...") # Extract text from the PDF pdf_text = process_pdf(uploaded_file) # Convert the extracted text to JSON using the Hugging Face model json_output = convert_to_json(pdf_text) # Display the JSON output st.write("Converted JSON:") st.json(json.loads(json_output)) # Provide a download link for the JSON file json_filename = uploaded_file.name.replace(".pdf", ".json") st.download_button( label="Download JSON", data=json_output, file_name=json_filename, mime="application/json" )