Spaces:

hlydecker
/

padlet-summary

Runtime error

File size: 5,605 Bytes

import gradio as gr
import subprocess
import json
import requests
import re
import pandas as pd
import openai
from bs4 import BeautifulSoup

# Simple function to strip html

def strip_html_tags(html_text):
    # Use BeautifulSoup to parse and clean HTML content
    soup = BeautifulSoup(html_text, 'html.parser')
    return soup.get_text()

def html_posts_to_table(html_posts):

    subject_pattern = r"Subject: (.*?)\n"
    body_text_pattern = r"Body Text: (.*?)\n"

    subjects = re.findall(subject_pattern, html_posts)
    body_texts = re.findall(body_text_pattern, html_posts)

    data = {
        'Subject': subjects,
        'Body Text': body_texts
    }

    df = pd.DataFrame(data)

    return(df)

def api_call(board_id, api_key):
    curl_command = [
        'curl', '-s', '--request', 'GET',
        '--url', f"https://api.padlet.dev/v1/boards/{board_id}?include=posts%2Csections",
        '--header', f"X-Api-Key: {api_key}",
        '--header', 'accept: application/vnd.api+json'
    ]
    
    try:
        response = subprocess.check_output(curl_command, universal_newlines=True)
        response_data = json.loads(response)
        
        # Extract the contents of all posts, stripping HTML tags from bodyHtml
        posts_data = response_data.get("included", [])
        post_contents = []

        for post in posts_data:
            if post.get("type") == "post":
                attributes = post.get("attributes", {}).get("content", {})
                subject = attributes.get("subject", "")
                body_html = attributes.get("bodyHtml", "")
                
                if subject:
                    cleaned_body = strip_html_tags(body_html)
                    post_contents.append({"subject": subject, "content": cleaned_body})

        if post_contents:
            df = pd.DataFrame(post_contents)
            return df
        else:
            return pd.DataFrame({"subject": ["No post contents found."], "content": [""]})
    except subprocess.CalledProcessError:
        return pd.DataFrame({"subject": ["Error: Unable to fetch data using cURL."], "content": [""]})

def create_post(subject, post_content, board_id, api_key):

    curl_command = [
        'curl', '-s', '--request', 'POST',
        '--url', f"https://api.padlet.dev/v1/boards/{board_id}/posts",
        '--header', f"X-Api-Key: {api_key}",
        '--header', 'accept: application/vnd.api+json',
        '--header', 'content-type: application/vnd.api+json',
        '--data',
        json.dumps({
            "data": {
                "type": "post",
                "attributes": {
                    "content": {
                        "subject": subject,
                        "body": post_content
                    }
                }
            }
        })
    ]
    
    try:
        response = subprocess.check_output(curl_command, universal_newlines=True)
        response_data = json.loads(response)
        return "Post created successfully."
    except subprocess.CalledProcessError as e:
        return f"Error: Unable to create post - {str(e)}"

def posts_to_prompt(padlet_posts):
    post_prompt = padlet_posts.apply(lambda row: f"{row['subject']} {row['content']}", axis=1).str.cat(sep=', ')
    return post_prompt

def remove_html_tags(text):
    # Use a regular expression to remove HTML tags
    clean = re.compile('<.*?>')
    return re.sub(clean, '', text)

def summarize_padlet_posts(padlet_posts, openai_api_key, system_prompt):
    # Concatenate padlet post df
    post_prompt = posts_to_prompt(padlet_posts)

    # Set the system prompt with more specific instructions
    system_prompt = system_prompt

    # Set the prompt for the GPT-3.5 model
    prompt = system_prompt + "\n" + post_prompt  # Added a newline after system_prompt

    try:
        # Make the API call to GPT-3.5
        response = openai.Completion.create(
            engine="text-davinci-003",  # GPT-3.5 engine
            prompt=prompt,
            max_tokens=1000,  # Limit response length for concise summaries
            api_key=openai_api_key,
            temperature=0.5  # Adjust temperature as needed
        )

        # Extract and return the summary, removing leading newlines and HTML tags
        summary = response.choices[0].text.lstrip('\n')
        summary = remove_html_tags(summary)
        return summary
    except Exception as e:
        return f"Error: {str(e)}"

def summarize_padlets(input_board_id, output_board_id, padlet_api, openai_api, system_prompt):

    posts_to_summarize = api_call(input_board_id, padlet_api)

    summary = summarize_padlet_posts(posts_to_summarize, openai_api, system_prompt)

    create_post("Summary",summary, output_board_id, padlet_api)

    return(summary)

iface = gr.Interface(
    fn=summarize_padlets,
    inputs=[
        gr.inputs.Textbox(label="Input Board ID"),
        gr.inputs.Textbox(label="Output Board ID"),
        gr.inputs.Textbox(label="Padlet API Key", type="password"),
        gr.inputs.Textbox(label="OpenAI API Key", type="password", placeholder="sk.."),
        gr.inputs.Textbox(label="System Prompt", default = "You are an AI assistant tasked with summarizing the main points of the following Padlet posts. Please provide a concise summary of the posts based on their content.")
    ],
    outputs=gr.outputs.Textbox(label="Summary"),
    live=False,  # Set to True to show the result without clicking a button
    title="Padlet Summarization",
    description="Summarize Padlet posts and create a summary post on another board using OpenAI GPT3.5.",
)

# Run the Gradio interface
iface.launch()