Spaces:

dromerosm
/

gpt-info-extraction

Runtime error

App Files Files Community

gpt-info-extraction / app.py

dromerosm

Update app.py

608e720 almost 2 years ago

raw

history blame

4.17 kB

	import gradio as gr
	import os
	import openai
	from newspaper import Article
	import json
	import re
	from transformers import GPT2Tokenizer
	import nltk
	from nltk.tokenize import sent_tokenize
	import requests

	nltk.download('punkt')


	# define the text summarizer function
	def text_prompt(request, page_url, contraseña, temp):
	try:
	headers = {'User-Agent': 'Chrome/83.0.4103.106'}
	response = requests.get(page_url, headers=headers)
	html = response.text

	page = Article('')
	page.set_html(html)
	page.parse()

	except Exception as e:
	return "", f"--- Ha ocurrido un error al procesar la URL: {e} ---", ""

	tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
	sentences = sent_tokenize(page.text)

	tokens = []
	page_text = ""

	for sentence in sentences:
	tokens.extend(tokenizer.tokenize(sentence))

	# Recortar el texto a un máximo de 1800 tokens
	if len(tokens) > 1800:
	break
	page_text += sentence + " "

	# Eliminar el ultimo espacio
	page_text = page_text.strip()

	num_tokens = len(tokens)

	if num_tokens > 10:
	openai.api_key = contraseña
	# get the response from openai API
	try:
	response = openai.Completion.create(
	engine="text-davinci-003",
	prompt=request + "\n\n" + page_text,
	max_tokens=2048,
	temperature=temp,
	top_p=0.9,
	)
	# get the response text
	response_text = response.choices[0].text
	total_tokens = response["usage"]["total_tokens"]

	# clean the response text
	response_text = re.sub(r'\s+', ' ', response_text)
	return page.text, response_text, total_tokens
	except Exception as e:
	return page.text, f"--- Ha ocurrido un error al procesar la solicitud: {e} ---", num_tokens
	return page.text, "--- Min number of tokens ---", num_tokens

	# define the gradio interface
	iface = gr.Interface(
	fn=text_prompt,
	inputs=[gr.Textbox(lines=1, placeholder="Enter your prompt here...", label="Prompt:", type="text"),
	gr.Textbox(lines=1, placeholder="Enter the URL here...", label="URL to parse:", type="text"),
	gr.Textbox(lines=1, placeholder="Enter your API-key here...", label="API-Key:", type="password"),
	gr.Slider(0.0,1.0, value=0.3, label="Temperature:")
	],
	outputs=[gr.Textbox(label="Input:"), gr.Textbox(label="Output:"), gr.Textbox(label="Total Tokens:")],
	examples=[["Summarize the following text as a list:","https://blog.google/outreach-initiatives/google-org/our-commitment-on-using-ai-to-accelerate-progress-on-global-development-goals/","",0.3],
	["Generate a summary of the following text. Give me an overview of main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:", "https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html","",0.7],
	["Generate the next insights based on the following text. Indicates N/A if the information is not available in the text.\n- Summary:\n- Acquisition Price:\n- Why is this important for the acquirer:\n- Business Line for the acquirer:\n- Tech Focus for the acquired (list):","https://techcrunch.com/2022/09/28/eqt-acquires-billtrust-a-company-automating-the-invoice-to-cash-process-for-1-7b/","",0.3]
	],
	title="ChatGPT info extraction with newspaper3k",
	description="This tool allows querying the text retrieved from the URL using OpenAI's [text-davinci-003] engine.\nThe URL text can be referenced in the prompt as \"following text\".\nA GPT2 tokenizer is included to ensure that the 2000 token limit for OpenAI queries is not exceeded. Provide a prompt with your request, the url for text retrieval, your api-key and temperature to process the text."
	)

	# captura de errores en la integración como componente

	error_message = ""

	try:
	iface.launch()
	except Exception as e:
	error_message = "An error occurred: " + str(e)
	iface.outputs[1].value = error_message