Spaces:
Runtime error
Runtime error
import gradio as gr | |
import os | |
import openai | |
from newspaper import Article | |
import json | |
import re | |
from transformers import GPT2Tokenizer | |
import nltk | |
from nltk.tokenize import sent_tokenize | |
import requests | |
nltk.download('punkt') | |
# define the text summarizer function | |
def text_prompt(request, page_url, contraseña, temp): | |
try: | |
headers = {'User-Agent': 'Chrome/83.0.4103.106'} | |
response = requests.get(page_url, headers=headers) | |
html = response.text | |
page = Article('') | |
page.set_html(html) | |
page.parse() | |
except Exception as e: | |
return "", f"--- Ha ocurrido un error al procesar la URL: {e} ---", "" | |
tokenizer = GPT2Tokenizer.from_pretrained("gpt2") | |
sentences = sent_tokenize(page.text) | |
tokens = [] | |
page_text = "" | |
for sentence in sentences: | |
tokens.extend(tokenizer.tokenize(sentence)) | |
# Recortar el texto a un máximo de 1800 tokens | |
if len(tokens) > 1800: | |
break | |
page_text += sentence + " " | |
# Eliminar el ultimo espacio | |
page_text = page_text.strip() | |
num_tokens = len(tokens) | |
if num_tokens > 10: | |
openai.api_key = contraseña | |
# get the response from openai API | |
try: | |
response = openai.Completion.create( | |
engine="text-davinci-003", | |
prompt=request + "\n\n" + page_text, | |
max_tokens=2048, | |
temperature=temp, | |
top_p=0.9, | |
) | |
# get the response text | |
response_text = response.choices[0].text | |
total_tokens = response["usage"]["total_tokens"] | |
# clean the response text | |
response_text = re.sub(r'\s+', ' ', response_text) | |
return page.text, response_text, total_tokens | |
except Exception as e: | |
return page.text, f"--- Ha ocurrido un error al procesar la solicitud: {e} ---", num_tokens | |
return page.text, "--- Min number of tokens ---", num_tokens | |
# define the gradio interface | |
iface = gr.Interface( | |
fn=text_prompt, | |
inputs=[gr.Textbox(lines=1, placeholder="Enter your prompt here...", label="Prompt:", type="text"), | |
gr.Textbox(lines=1, placeholder="Enter the URL here...", label="URL to parse:", type="text"), | |
gr.Textbox(lines=1, placeholder="Enter your API-key here...", label="API-Key:", type="password"), | |
gr.Slider(0.0,1.0, value=0.3, label="Temperature:") | |
], | |
outputs=[gr.Textbox(label="Input:"), gr.Textbox(label="Output:"), gr.Textbox(label="Total Tokens:")], | |
examples=[["Summarize the following text as a list:","https://blog.google/outreach-initiatives/google-org/our-commitment-on-using-ai-to-accelerate-progress-on-global-development-goals/","",0.3], | |
["Generate a summary of the following text. Give me an overview of main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:", "https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html","",0.7], | |
["Generate the next insights based on the following text. Indicates N/A if the information is not available in the text.\n- Summary:\n- Acquisition Price:\n- Why is this important for the acquirer:\n- Business Line for the acquirer:\n- Tech Focus for the acquired (list):","https://techcrunch.com/2022/09/28/eqt-acquires-billtrust-a-company-automating-the-invoice-to-cash-process-for-1-7b/","",0.3] | |
], | |
title="ChatGPT info extraction with newspaper3k", | |
description="This tool allows querying the text retrieved from the URL using OpenAI's [text-davinci-003] engine.\nThe URL text can be referenced in the prompt as \"following text\".\nA GPT2 tokenizer is included to ensure that the 2000 token limit for OpenAI queries is not exceeded. Provide a prompt with your request, the url for text retrieval, your api-key and temperature to process the text." | |
) | |
# captura de errores en la integración como componente | |
error_message = "" | |
try: | |
iface.launch() | |
except Exception as e: | |
error_message = "An error occurred: " + str(e) | |
iface.outputs[1].value = error_message |