Matt-CB commited on
Commit
fe23a12
·
1 Parent(s): d6c617c
Files changed (2) hide show
  1. app.py +94 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import openai
3
+ import streamlit as st
4
+ import trafilatura
5
+ from langchain import PromptTemplate, OpenAI, LLMChain
6
+
7
+ # Definition of the style
8
+ custom_css = """
9
+ <style>
10
+ .title-text {
11
+ color: #0077B6; /* Cambia el color del subtítulo a un azul más claro */
12
+ }
13
+ </style>
14
+ """
15
+
16
+ st.sidebar.write("This program uses the OpenAI API to function. Please enter your API key below to use the program. ")
17
+ OPENAI_API_KEY = st.sidebar.text_input("API KEY:")
18
+
19
+ class WebContentSummarizerApp:
20
+ def __init__(self):
21
+ st.title("WebContent Summarizer")
22
+ # Insert the custom CSS into the application
23
+ st.markdown(custom_css, unsafe_allow_html=True)
24
+ # Use a CSS class for the subtitle
25
+ st.markdown('<p class="title-text">With just a simple URL, simplify your online research and content consumption. '
26
+ 'This AI tool extracts and condenses the essential information from web pages into a concise, easy-to-read paragraph.</p>', unsafe_allow_html=True)
27
+ self.url = st.text_input("Paste or type the URL you want to summarize:")
28
+ self.run_button = st.button("Summarize")
29
+
30
+ @staticmethod
31
+ def scrape_website(url):
32
+ downloaded = trafilatura.fetch_url(url)
33
+ main_text = trafilatura.extract(downloaded)
34
+ return main_text
35
+
36
+ def run(self):
37
+ if self.run_button:
38
+ if self.url:
39
+ url = self.url
40
+ with st.spinner("Getting information from the page..."):
41
+ self.text = self.scrape_website(url)
42
+ with st.spinner("Analyzing and organizing..."):
43
+ result = self.llm_text_insight()
44
+ st.write("Summarized content:")
45
+ st.write(result)
46
+ else:
47
+ st.warning("Please enter a valid URL and your OpenAI API Key.")
48
+
49
+ def llm_text_insight(self):
50
+ openai.api_key = OPENAI_API_KEY
51
+ text = self.text
52
+
53
+ # Split the text into smaller fragments
54
+ max_length = 4096
55
+ chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]
56
+
57
+ results = []
58
+
59
+ template = """
60
+ Please take the following text interprete and transform it into an organized paragraph for a study.
61
+ Ensure that the paragraph is clear and coherent. If necessary, add or rearrange ideas to achieve a logical structure.
62
+ The text is as follows:
63
+
64
+ text: {input}
65
+ """
66
+
67
+ llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
68
+ prompt_template = PromptTemplate.from_template(template=template)
69
+ chain = LLMChain(llm=llm, prompt=prompt_template)
70
+
71
+ for chunk in chunks:
72
+ result = chain.predict(input=chunk)
73
+ results.append(result)
74
+
75
+ # Check if results contain more than one paragraph and final result
76
+ if len(results) > 1:
77
+ alternate_input = " ".join(results)
78
+
79
+ llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
80
+ prompt_template = PromptTemplate.from_template(template=template)
81
+ chain = LLMChain(llm=llm, prompt=prompt_template)
82
+ result = chain.predict(input=alternate_input)
83
+
84
+ return result
85
+
86
+
87
+ else:
88
+ # Concatenation of the results
89
+ return " ".join(results)
90
+
91
+
92
+ if __name__ == "__main__":
93
+ app = WebContentSummarizerApp()
94
+ app.run()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ openai
2
+ streamlit
3
+ trafilatura
4
+ langchain