Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,32 +1,49 @@
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import openai
|
4 |
-
import
|
5 |
import json
|
6 |
import re
|
7 |
from transformers import GPT2Tokenizer
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
|
10 |
# define the text summarizer function
|
11 |
def text_prompt(request, page_url, contraseña, temp):
|
12 |
try:
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
15 |
page.parse()
|
|
|
16 |
except Exception as e:
|
17 |
return "", f"--- Ha ocurrido un error al procesar la URL: {e} ---", ""
|
18 |
-
|
19 |
-
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
20 |
-
|
21 |
-
tokens = tokenizer.tokenize(page.text)
|
22 |
|
23 |
-
|
24 |
-
|
25 |
|
26 |
-
|
27 |
-
|
28 |
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
if num_tokens > 10:
|
32 |
openai.api_key = contraseña
|
@@ -41,9 +58,11 @@ def text_prompt(request, page_url, contraseña, temp):
|
|
41 |
)
|
42 |
# get the response text
|
43 |
response_text = response.choices[0].text
|
|
|
|
|
44 |
# clean the response text
|
45 |
response_text = re.sub(r'\s+', ' ', response_text)
|
46 |
-
return page.text, response_text,
|
47 |
except Exception as e:
|
48 |
return page.text, f"--- Ha ocurrido un error al procesar la solicitud: {e} ---", num_tokens
|
49 |
return page.text, "--- Min number of tokens ---", num_tokens
|
@@ -56,7 +75,7 @@ iface = gr.Interface(
|
|
56 |
gr.Textbox(lines=1, placeholder="Enter your API-key here...", label="API-Key:", type="password"),
|
57 |
gr.Slider(0.0,1.0, value=0.3, label="Temperature:")
|
58 |
],
|
59 |
-
outputs=[gr.Textbox(label="Input:"), gr.Textbox(label="Output:"), gr.Textbox(label="Tokens:")],
|
60 |
examples=[["Summarize the following text as a list:","https://blog.google/outreach-initiatives/google-org/our-commitment-on-using-ai-to-accelerate-progress-on-global-development-goals/","",0.3],
|
61 |
["Generate a summary of the following text. Give me an overview of main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:", "https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html","",0.7],
|
62 |
["Generate the next insights based on the following text. Indicates N/A if the information is not available in the text.\n- Summary:\n- Acquisition Price:\n- Why is this important for the acquirer:\n- Business Line for the acquirer:\n- Tech Focus for the acquired (list):","https://techcrunch.com/2022/09/28/eqt-acquires-billtrust-a-company-automating-the-invoice-to-cash-process-for-1-7b/","",0.3]
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import openai
|
4 |
+
from newspaper import Article
|
5 |
import json
|
6 |
import re
|
7 |
from transformers import GPT2Tokenizer
|
8 |
+
import nltk
|
9 |
+
from nltk.tokenize import sent_tokenize
|
10 |
+
import requests
|
11 |
+
|
12 |
+
nltk.download('punkt')
|
13 |
|
14 |
|
15 |
# define the text summarizer function
|
16 |
def text_prompt(request, page_url, contraseña, temp):
|
17 |
try:
|
18 |
+
headers = {'User-Agent': 'Chrome/83.0.4103.106'}
|
19 |
+
response = requests.get(page_url, headers=headers)
|
20 |
+
html = response.text
|
21 |
+
|
22 |
+
page = Article('')
|
23 |
+
page.set_html(html)
|
24 |
page.parse()
|
25 |
+
|
26 |
except Exception as e:
|
27 |
return "", f"--- Ha ocurrido un error al procesar la URL: {e} ---", ""
|
|
|
|
|
|
|
|
|
28 |
|
29 |
+
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
30 |
+
sentences = sent_tokenize(page.text)
|
31 |
|
32 |
+
tokens = []
|
33 |
+
page_text = ""
|
34 |
|
35 |
+
for sentence in sentences:
|
36 |
+
tokens.extend(tokenizer.tokenize(sentence))
|
37 |
+
|
38 |
+
# Recortar el texto a un máximo de 1800 tokens
|
39 |
+
if len(tokens) > 1800:
|
40 |
+
break
|
41 |
+
page_text += sentence + " "
|
42 |
+
|
43 |
+
# Eliminar el ultimo espacio
|
44 |
+
page_text = page_text.strip()
|
45 |
+
|
46 |
+
num_tokens = len(tokens)
|
47 |
|
48 |
if num_tokens > 10:
|
49 |
openai.api_key = contraseña
|
|
|
58 |
)
|
59 |
# get the response text
|
60 |
response_text = response.choices[0].text
|
61 |
+
total_tokens = response["usage"]["total_tokens"]
|
62 |
+
|
63 |
# clean the response text
|
64 |
response_text = re.sub(r'\s+', ' ', response_text)
|
65 |
+
return page.text, response_text, total_tokens
|
66 |
except Exception as e:
|
67 |
return page.text, f"--- Ha ocurrido un error al procesar la solicitud: {e} ---", num_tokens
|
68 |
return page.text, "--- Min number of tokens ---", num_tokens
|
|
|
75 |
gr.Textbox(lines=1, placeholder="Enter your API-key here...", label="API-Key:", type="password"),
|
76 |
gr.Slider(0.0,1.0, value=0.3, label="Temperature:")
|
77 |
],
|
78 |
+
outputs=[gr.Textbox(label="Input:"), gr.Textbox(label="Output:"), gr.Textbox(label="Total Tokens:")],
|
79 |
examples=[["Summarize the following text as a list:","https://blog.google/outreach-initiatives/google-org/our-commitment-on-using-ai-to-accelerate-progress-on-global-development-goals/","",0.3],
|
80 |
["Generate a summary of the following text. Give me an overview of main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:", "https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html","",0.7],
|
81 |
["Generate the next insights based on the following text. Indicates N/A if the information is not available in the text.\n- Summary:\n- Acquisition Price:\n- Why is this important for the acquirer:\n- Business Line for the acquirer:\n- Tech Focus for the acquired (list):","https://techcrunch.com/2022/09/28/eqt-acquires-billtrust-a-company-automating-the-invoice-to-cash-process-for-1-7b/","",0.3]
|