dromerosm commited on
Commit
0a06e99
·
1 Parent(s): 1772966
Files changed (1) hide show
  1. app.py +81 -65
app.py CHANGED
@@ -9,83 +9,99 @@ import requests
9
 
10
 
11
  # define the text summarizer function
12
- def text_prompt(request, system_role, page_url, contraseña, temp):
13
- try:
14
- headers = {'User-Agent': 'Chrome/83.0.4103.106'}
15
- response = requests.get(page_url, headers=headers)
16
- html = response.text
17
-
18
- page = Article('')
19
- page.set_html(html)
20
- page.parse()
21
-
22
- except Exception as e:
23
- return "", f"--- An error occurred while processing the URL: {e} ---", ""
24
-
25
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
26
- sentences = page.text.split('.')
27
-
28
- tokens = []
29
- page_text = ""
30
-
31
- for sentence in sentences:
32
- tokens.extend(tokenizer.tokenize(sentence))
33
-
34
- # Trim text to a maximum of 3100 tokens
35
- if len(tokens) > 3100:
36
- break
37
- page_text += sentence + ". "
38
-
39
- # Delete the last space
40
- page_text = page_text.strip()
41
-
42
- num_tokens = len(tokens)
43
-
44
- if num_tokens > 10 and contraseña.startswith("sk-"):
45
- openai.api_key = contraseña
46
- # get the response from openai API
47
  try:
48
- response = openai.ChatCompletion.create(
49
- model="gpt-3.5-turbo",
50
- messages=[
51
- {"role": "system", "content": system_role},
52
- {"role": "user", "content": request + "\n\n" + 'Text:\n\n"' + page_text + '\n"'}
53
- ],
54
- max_tokens=512,
55
- temperature=temp,
56
- top_p=1.0,
57
- )
58
- # get the response text
59
- response_text = response['choices'][0]['message']['content']
60
- total_tokens = response["usage"]["total_tokens"]
61
-
62
- # clean the response text
63
- response_text = re.sub(r'\s+', ' ', response_text)
64
- response_text = f"#### [{page.title}]({page_url})\n\n{response_text.strip()}"
65
- total_tokens_str = str(total_tokens) + " (${:.2f} USD)".format(total_tokens/1000*0.002)
66
-
67
-
68
- return page.text, response_text, total_tokens_str
69
  except Exception as e:
70
- return page.text, f"--- An error occurred while processing the request: {e} ---", num_tokens
71
- return page.text, "--- Check API-Key or Min number of tokens:", str(num_tokens)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  # define the gradio interface
74
  iface = gr.Interface(
75
  fn=text_prompt,
76
  inputs=[gr.Textbox(lines=1, placeholder="Enter your prompt here...", label="Prompt:", type="text"),
77
  gr.Textbox(lines=1, placeholder="Enter your system-role description here...", label="System Role:", type="text"),
78
- gr.Textbox(lines=1, placeholder="Enter the Article's URL here...", label="Article's URL to parse:", type="text"),
79
  gr.Textbox(lines=1, placeholder="Enter your API-key here...", label="API-Key:", type="password"),
80
- gr.Slider(0.0,1.0, value=0.3, label="Temperature:")
 
 
81
  ],
82
  outputs=[gr.Textbox(label="Input:"), gr.Markdown(label="Output:"), gr.Markdown(label="Total Tokens:")],
83
- examples=[["Resumen el siguiente texto en un máximo de 100 palabras.", "Actuar como consultor de negocio. La respuesta deberá aparentar ser novedosa. Formatea la respuesta en Markdown. El texto deberá ser traducido siempre al español. Deberás añadir al final una lista de topics del texto en forma de lista separada por comas.", "https://blog.google/outreach-initiatives/google-org/our-commitment-on-using-ai-to-accelerate-progress-on-global-development-goals/","",0.3],
84
- ["Generate a summary of the following text. Give me an overview of the main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:", "Act as a Business Consultant", "https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html","",0.7],
85
- ["Generate the next insights based on the following text. Indicates N/A if the information is not available in the text.\n- Summary:\n- Acquisition Price:\n- Why is this important for the acquirer:\n- Business Line for the acquirer:\n- Tech Focus for the acquired (list):","Act as a Business Consultant", "https://techcrunch.com/2022/09/28/eqt-acquires-billtrust-a-company-automating-the-invoice-to-cash-process-for-1-7b/","",0.3]
86
- ],
87
  title="ChatGPT info extraction from URL",
88
- description="This tool allows querying the text retrieved from the URL with newspaper3k lib and using OpenAI's [gpt-3.5-turbo] engine.\nThe URL text can be referenced in the prompt as \"following text\".\nA GPT2 tokenizer is included to ensure that the 1.800 token limit for OpenAI queries is not exceeded. Provide a prompt with your request, the description for the system role, the url for text retrieval, your api-key and temperature to process the text."
89
  )
90
 
91
  # error capturing in integration as a component
@@ -94,7 +110,7 @@ error_message = ""
94
 
95
  try:
96
  iface.queue(concurrency_count=20)
97
- iface.launch()
98
  except Exception as e:
99
  error_message = "An error occurred: " + str(e)
100
  iface.outputs[1].value = error_message
 
9
 
10
 
11
  # define the text summarizer function
12
+ def text_prompt(request, system_role, page_urls_str, contrasenya, api_base, deployment_id, temp):
 
 
 
 
 
 
 
 
 
 
 
 
13
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
14
+
15
+ page_urls = [page_url_str for page_url_str in page_urls_str.split("\n") if page_url_str]
16
+ if len(page_urls) == 0:
17
+ return "", "urls not found", ""
18
+
19
+ page_texts = []
20
+ response_texts = []
21
+ total_tokens = 0
22
+ for page_url in page_urls:
 
 
 
 
 
 
 
 
 
 
 
 
23
  try:
24
+ headers = {'User-Agent': 'Chrome/83.0.4103.106'}
25
+ response = requests.get(page_url, headers=headers)
26
+ html = response.text
27
+
28
+ page = Article('')
29
+ page.set_html(html)
30
+ page.parse()
31
+
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  except Exception as e:
33
+ return "", f"--- An error occurred while processing the URL: {e} ---", ""
34
+
35
+ sentences = page.text.split('.')
36
+
37
+ tokens = []
38
+ page_text = ""
39
+
40
+ for sentence in sentences:
41
+ tokens.extend(tokenizer.tokenize(sentence))
42
+
43
+ # Trim text to a maximum of 3100 tokens
44
+ if len(tokens) > 3100:
45
+ break
46
+ page_text += sentence + ". "
47
+
48
+ # Delete the last space
49
+ page_text = page_text.strip()
50
+
51
+ num_tokens = len(tokens)
52
+
53
+ if num_tokens > 10 and len(contrasenya) > 6 and len(deployment_id) > 6:
54
+ openai.api_type = "azure"
55
+ openai.api_version = "2023-03-15-preview"
56
+ openai.api_base = api_base
57
+ openai.api_key = contrasenya
58
+ # get the response from openai API
59
+ try:
60
+ response = openai.ChatCompletion.create(
61
+ deployment_id=deployment_id,
62
+ messages=[
63
+ {"role": "system", "content": system_role},
64
+ {"role": "user", "content": request + "\n\n" + 'Text:\n\n""""' + page_text + '\n""""'}
65
+ ],
66
+ max_tokens=4000,
67
+ temperature=temp,
68
+ top_p=1.0,
69
+ )
70
+ # get the response text
71
+ response_text = response['choices'][0]['message']['content']
72
+ total_tokens += response["usage"]["total_tokens"]
73
+
74
+ # clean the response text
75
+ response_text = re.sub(r'\s+', ' ', response_text)
76
+ response_text = f"#### [{page.title}]({page_url})\n\n{response_text.strip()}"
77
+
78
+ page_texts.append(page.text)
79
+ response_texts.append(response_text)
80
+ except Exception as e:
81
+ return page.text, f"--- An error occurred while processing the request: {e} ---", num_tokens
82
+ else:
83
+ return page.text, "--- Check API-Key or deployment_id or Min number of tokens:", str(num_tokens)
84
+
85
+ page_texts_str = "\n\n============\n".join([page_text for page_text in page_texts])
86
+ response_texts_str = "\n\n".join([response_text for response_text in response_texts])
87
+ total_tokens_str = str(total_tokens) + " (${:.2f} USD)".format(total_tokens / 1000 * 0.03)
88
+ return page_texts_str, response_texts_str, total_tokens_str
89
+
90
 
91
  # define the gradio interface
92
  iface = gr.Interface(
93
  fn=text_prompt,
94
  inputs=[gr.Textbox(lines=1, placeholder="Enter your prompt here...", label="Prompt:", type="text"),
95
  gr.Textbox(lines=1, placeholder="Enter your system-role description here...", label="System Role:", type="text"),
96
+ gr.Textbox(lines=10, placeholder="Enter the Articles' URLs here...", label="Articles' URLs to parse (one per line up to 10):", type="text"),
97
  gr.Textbox(lines=1, placeholder="Enter your API-key here...", label="API-Key:", type="password"),
98
+ gr.Textbox(lines=1, placeholder="Enter your Azure OpenAI endpoint here...", label="Endpoint:", type="text"),
99
+ gr.Textbox(lines=1, placeholder="Enter your model name here...", label="Deployment ID:", type="text"),
100
+ gr.Slider(0.0, 1.0, value=0.0, label="Temperature:")
101
  ],
102
  outputs=[gr.Textbox(label="Input:"), gr.Markdown(label="Output:"), gr.Markdown(label="Total Tokens:")],
 
 
 
 
103
  title="ChatGPT info extraction from URL",
104
+ description="This tool allows querying the text retrieved from the URL with newspaper3k lib and using MSFT Azure OpenAI's [gpt-3.5-turbo] engine.\nThe URL text can be referenced in the prompt as \"following text\".\nA GPT2 tokenizer is included to ensure that the 1.800 token limit for OpenAI queries is not exceeded. Provide a prompt with your request, the description for the system role, the url for text retrieval, your api-key and temperature to process the text."
105
  )
106
 
107
  # error capturing in integration as a component
 
110
 
111
  try:
112
  iface.queue(concurrency_count=20)
113
+ iface.launch(debug=True)
114
  except Exception as e:
115
  error_message = "An error occurred: " + str(e)
116
  iface.outputs[1].value = error_message