File size: 3,419 Bytes
fe23a12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eac13ac
 
fe23a12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os
import openai
import streamlit as st
import trafilatura
from langchain import PromptTemplate, OpenAI, LLMChain

# Definition of the style
custom_css = """
<style>
.title-text {
    color: #0077B6;  /* Cambia el color del subtítulo a un azul más claro */
}
</style>
"""

st.sidebar.write("This program uses the OpenAI API to function. Please enter your API key below to use the program. ")
OPENAI_API_KEY = st.sidebar.text_input("API KEY:")

class WebContentSummarizerApp:
    def __init__(self):
        st.title("WebContent Summarizer")
        # Insert the custom CSS into the application
        st.markdown(custom_css, unsafe_allow_html=True)
        # Use a CSS class for the subtitle
        st.markdown('<p class="title-text">With just a simple URL, simplify your online research and content consumption. '
                    'This AI tool extracts and condenses the essential information from web pages into a concise, easy-to-read paragraph.</p>', unsafe_allow_html=True)
        self.url = st.text_input("Paste or type the URL you want to summarize:")
        self.run_button = st.button("Summarize")

    @staticmethod
    def scrape_website(url):
        downloaded = trafilatura.fetch_url(url)
        main_text = trafilatura.extract(downloaded)
        return main_text

    def run(self):
        if self.run_button:
            if self.url:
                url = self.url
                with st.spinner("Getting information from the page..."):
                    self.text = self.scrape_website(url)
                with st.spinner("Analyzing and organizing..."):
                    result = self.llm_text_insight()
                st.write("Summarized content:")
                st.write(result)
            else:
                st.warning("Please enter a valid URL and your OpenAI API Key.")

    def llm_text_insight(self):
        openai.api_key = OPENAI_API_KEY
        text = self.text

        # Split the text into smaller fragments
        max_length = 4096
        chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]

        results = []

        template = """
        Please take the following text interprete and transform it into an organized paragraph for a study. 
        Ensure that the paragraph is clear and coherent. If necessary, add or rearrange ideas to achieve a logical structure. 
        The text is as follows:

        text: {input}

        Make sure the text is only one paragraph
        """

        llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
        prompt_template = PromptTemplate.from_template(template=template)
        chain = LLMChain(llm=llm, prompt=prompt_template)

        for chunk in chunks:
            result = chain.predict(input=chunk)
            results.append(result)

        # Check if results contain more than one paragraph and final result
        if len(results) > 1:
            alternate_input = " ".join(results)

            llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
            prompt_template = PromptTemplate.from_template(template=template)
            chain = LLMChain(llm=llm, prompt=prompt_template)
            result = chain.predict(input=alternate_input)

            return result


        else:
            # Concatenation of the results
            return " ".join(results)


if __name__ == "__main__":
    app = WebContentSummarizerApp()
    app.run()