dromerosm commited on
Commit
3578b4b
·
1 Parent(s): 8b1d3b8

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -0
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import openai
4
+ from newspaper import Article
5
+ import json
6
+ import re
7
+ from transformers import GPT2Tokenizer
8
+ import requests
9
+
10
+
11
+ # define the text summarizer function
12
+ def text_prompt(request, system_role, page_url, contraseña, temp):
13
+ try:
14
+ headers = {'User-Agent': 'Chrome/83.0.4103.106'}
15
+ response = requests.get(page_url, headers=headers)
16
+ html = response.text
17
+
18
+ page = Article('')
19
+ page.set_html(html)
20
+ page.parse()
21
+
22
+ except Exception as e:
23
+ return "", f"--- An error occurred while processing the URL: {e} ---", ""
24
+
25
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
26
+ sentences = page.text.split('.')
27
+
28
+ tokens = []
29
+ page_text = ""
30
+
31
+ for sentence in sentences:
32
+ tokens.extend(tokenizer.tokenize(sentence))
33
+
34
+ # Trim text to a maximum of 1800 tokens
35
+ if len(tokens) > 1800:
36
+ break
37
+ page_text += sentence + ". "
38
+
39
+ # Delete the last space
40
+ page_text = page_text.strip()
41
+
42
+ num_tokens = len(tokens)
43
+
44
+ if num_tokens > 10 and contraseña.startswith("sk-"):
45
+ openai.api_key = contraseña
46
+ # get the response from openai API
47
+ try:
48
+ response = openai.ChatCompletion.create(
49
+ model="gpt-3.5-turbo",
50
+ messages=[
51
+ {"role": "system", "content": system_role},
52
+ {"role": "user", "content": request + "\n\n" + 'Text:\n\n"' + page_text + '\n"'}
53
+ ],
54
+ max_tokens=2048,
55
+ temperature=temp,
56
+ top_p=0.9,
57
+ )
58
+ # get the response text
59
+ response_text = response['choices'][0]['message']['content']
60
+ total_tokens = response["usage"]["total_tokens"]
61
+
62
+ # clean the response text
63
+ response_text = re.sub(r'\s+', ' ', response_text)
64
+ response_text = "#### "+ page.title + "\n\n" + response_text.strip()
65
+ total_tokens_str = str(total_tokens) + " (${:.2f} USD)".format(total_tokens/1000*0.002)
66
+
67
+
68
+ return page.text, response_text, total_tokens_str
69
+ except Exception as e:
70
+ return page.text, f"--- An error occurred while processing the request: {e} ---", num_tokens
71
+ return page.text, "--- Check API-Key or Min number of tokens:", str(num_tokens)
72
+
73
+ # define the gradio interface
74
+ iface = gr.Interface(
75
+ fn=text_prompt,
76
+ inputs=[gr.Textbox(lines=1, placeholder="Enter your prompt here...", label="Prompt:", type="text"),
77
+ gr.Textbox(lines=1, placeholder="Enter your system-role description here...", label="System:", type="text"),
78
+ gr.Textbox(lines=1, placeholder="Enter the Article's URL here...", label="URL to parse:", type="text"),
79
+ gr.Textbox(lines=1, placeholder="Enter your API-key here...", label="API-Key:", type="password"),
80
+ gr.Slider(0.0,1.0, value=0.3, label="Temperature:")
81
+ ],
82
+ outputs=[gr.Textbox(label="Input:"), gr.Markdown(label="Output:"), gr.Markdown(label="Total Tokens:")],
83
+ examples=[["Summarize the following text as a list:", "Act as a Business Consultant", "https://blog.google/outreach-initiatives/google-org/our-commitment-on-using-ai-to-accelerate-progress-on-global-development-goals/","",0.3],
84
+ ["Generate a summary of the following text. Give me an overview of main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:", "Act as a Business Consultant", "https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html","",0.7],
85
+ ["Generate the next insights based on the following text. Indicates N/A if the information is not available in the text.\n- Summary:\n- Acquisition Price:\n- Why is this important for the acquirer:\n- Business Line for the acquirer:\n- Tech Focus for the acquired (list):","Act as a Business Consultant", "https://techcrunch.com/2022/09/28/eqt-acquires-billtrust-a-company-automating-the-invoice-to-cash-process-for-1-7b/","",0.3]
86
+ ],
87
+ title="ChatGPT info extraction from URL",
88
+ description="This tool allows querying the text retrieved from the URL with newspaper3k lib and using OpenAI's [gpt-3.5-turbo] engine.\nThe URL text can be referenced in the prompt as \"following text\".\nA GPT2 tokenizer is included to ensure that the 1.800 token limit for OpenAI queries is not exceeded. Provide a prompt with your request, the url for text retrieval, your api-key and temperature to process the text."
89
+ )
90
+
91
+ # error capturing in integration as a component
92
+
93
+ error_message = ""
94
+
95
+ try:
96
+ iface.queue(concurrency_count=20)
97
+ iface.launch()
98
+ except Exception as e:
99
+ error_message = "An error occurred: " + str(e)
100
+ iface.outputs[1].value = error_message