Spaces:

mishrasahil934
/

Team_skulk

Sleeping

mishrasahil934 commited on Jan 13

Commit

baec4bc

verified ·

1 Parent(s): 648594a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -27,26 +27,20 @@ def extract_text_from_pdf(pdf_path):
 # Web Scraping Function
 def scrape_article(url):
-    response = requests.get(url, timeout=10)
-    response.raise_for_status()  # Raise an error if the request fails
-    soup = BeautifulSoup(response.content, 'html.parser')
-    # Extract the main content (common tags for articles)
-    paragraphs = soup.find_all('p')
-    article_text = "\n".join([para.get_text() for para in paragraphs])
-    if not article_text.strip():
-        raise ValueError("Unable to extract content from the page.")
-    return article_text
-# Function to split long text into smaller chunks
-def split_text(input_text, max_tokens=512):
-    words = input_text.split()
-    return [" ".join(words[i:i + max_tokens]) for i in range(0, len(words), max_tokens)]
 # LLM pipeline for summarization
 def llm_pipeline(input_text):
-    chunks = split_text(input_text, max_tokens=512)  # Split text into chunks
     pipe_sum = pipeline(
         'summarization',
         model=base_model,
@@ -54,11 +48,8 @@ def llm_pipeline(input_text):
         max_length=500,
         min_length=50,
     )
-    summaries = []
-    for chunk in chunks:
-        result = pipe_sum(chunk)
-        summaries.append(result[0]['summary_text'])
-    return " ".join(summaries)
 @st.cache_data
 # Function to display the PDF

 # Web Scraping Function
 def scrape_article(url):
+     response = requests.get(url, timeout=10)
+     response.raise_for_status()  # Raise an error if the request fails
+     soup = BeautifulSoup(response.content, 'html.parser')
+        # Extract the main content (common tags for articles)
+     paragraphs = soup.find_all('p')
+     article_text = "\n".join([para.get_text() for para in paragraphs])
+     if not article_text.strip():
+         raise ValueError("Unable to extract content from the page.")
+     return article_text
 # LLM pipeline for summarization
 def llm_pipeline(input_text):
     pipe_sum = pipeline(
         'summarization',
         model=base_model,
         max_length=500,
         min_length=50,
     )
+    result = pipe_sum(input_text)
+    return result[0]['summary_text']
 @st.cache_data
 # Function to display the PDF