SoMeScreenShotter

Runtime error

App Files Files Community

acecalisto3 commited on Jan 15

Commit

cce82a9

verified ·

1 Parent(s): d840901

Update 1app.py

Browse files

Files changed (1) hide show

1app.py +59 -70

1app.py CHANGED Viewed

@@ -56,7 +56,7 @@ class SmartWebScraper:
         self.text_generator = DialoGPTModel()
         self.lemmatizer = WordNetLemmatizer()
         self.stop_words = set(stopwords.words('english'))
     def process_query(self, query: str) -> Tuple[str, List[str]]:
         tokens = word_tokenize(query.lower())
         tokens = [self.lemmatizer.lemmatize(word) for word in tokens if word not in self.stop_words]
@@ -111,90 +111,79 @@ class SmartWebScraper:
             return {"error": str(e)}
     def format_response(self, data: dict, query: str) -> str:
-    if "error" in data:
-        return f"I encountered an error while processing your request: {data['error']}"
-    # Create a structured summary of the data
-    summary = []
-    query_lower = query.lower()
-    # First, collect summary information
-    if "images" in data:
-        summary.append(f"Found {len(data['images'])} images")
-    if "links" in data:
-        summary.append(f"Found {len(data['links'])} links")
-    if "text" in data:
-        summary.append(f"Found {len(data['text'])} text blocks")
-    if "prices" in data:
-        summary.append(f"Found {len(data['prices'])} price mentions")
-    # Handle specific query types
-    if "how many" in query_lower:
-        if "image" in query_lower and "images" in data:
-            return f"There are {len(data['images'])} images on the webpage."
-        elif "link" in query_lower and "links" in data:
-            return f"There are {len(data['links'])} links on the webpage."
-        elif "price" in query_lower and "prices" in data:
-            return f"There are {len(data['prices'])} prices mentioned on the webpage."
-        elif "text" in query_lower and "text" in data:
-            return f"There are {len(data['text'])} text blocks on the webpage."
-    if "show" in query_lower or "list" in query_lower:
-        if "image" in query_lower and "images" in data:
-            images = data['images'][:5]  # Limit to 5 images
-            return "Here are up to 5 images found:\n" + "\n".join([f"- {img['alt'] or 'No description'} ({img['src']})" for img in images])
-        elif "link" in query_lower and "links" in data:
-            links = data['links'][:5]  # Limit to 5 links
-            return "Here are up to 5 links found:\n" + "\n".join([f"- {link['text'] or 'No text'} ({link['href']})" for link in links])
-        elif "text" in query_lower and "text" in data:
-            texts = data['text'][:3]  # Limit to 3 text blocks
-            return "Here are up to 3 text blocks found:\n" + "\n".join([f"- {text[:100]}..." for text in texts])
-    # If no specific handling matched, return general summary
-    if summary:
-        return "Here's what I found on the webpage:\n" + "\n".join(summary)
-    return "I couldn't find any relevant information based on your query."
-    elif "show" in query_lower or "list" in query_lower:
-        if "image" in query_lower and "images" in data:
-            return f"Images found:\n" + "\n".join([f"- {img.get('alt', 'No description')} ({img.get('src', 'No source')})" for img in data['images'][:5]])
-        elif "link" in query_lower and "links" in data:
-            return f"Links found:\n" + "\n".join([f"- {link.get('text', 'No text')} ({link.get('href', 'No URL')})" for link in data['links'][:5]])
-    # If no specific pattern matches, create a general summary
-    summary_text = "Here's what I found on the webpage:\n" + "\n".join(summary)
-    if len(summary) == 0:
         return "I couldn't find any relevant information based on your query."
-    return summary_text
-def create_interface():
-    scraper = SmartWebScraper()
-    def process_request(query: str, url: str) -> str:
         if not url:
             return "Please provide a URL to analyze."
         try:
             parsed_url = urlparse(url)
             if not all([parsed_url.scheme, parsed_url.netloc]):
                 return "Please provide a valid URL (including http:// or https://)."
             # Add timeout to prevent hanging
-            data = scraper.extract_data(url, query)
-            response = scraper.format_response(data, query)
             # Validate response
             if not response or response.isspace():
                 return "I couldn't generate a meaningful response based on the available data."
             return response
         except Exception as e:
             logging.error(f"Error processing request: {str(e)}")
             return f"An error occurred while processing your request: {str(e)}"
     with gr.Blocks() as demo:
         gr.Markdown("# Smart Web Scraper")
         gr.Markdown("Ask me anything about a webpage, and I'll try to find the information you need!")
@@ -217,7 +206,7 @@ def create_interface():
         - "List all forms"
         """)
     return demo
 if __name__ == "__main__":
     demo = create_interface()  # Assign the returned Gradio interface to 'demo'
     demo.launch(debug=True)

         self.text_generator = DialoGPTModel()
         self.lemmatizer = WordNetLemmatizer()
         self.stop_words = set(stopwords.words('english'))
     def process_query(self, query: str) -> Tuple[str, List[str]]:
         tokens = word_tokenize(query.lower())
         tokens = [self.lemmatizer.lemmatize(word) for word in tokens if word not in self.stop_words]
             return {"error": str(e)}
     def format_response(self, data: dict, query: str) -> str:
+        if "error" in data:
+            return f"I encountered an error while processing your request: {data['error']}"
+        # Create a structured summary of the data
+        summary = []
+        query_lower = query.lower()
+        # First, collect summary information
+        if "images" in data:
+            summary.append(f"Found {len(data['images'])} images")
+        if "links" in data:
+            summary.append(f"Found {len(data['links'])} links")
+        if "text" in data:
+            summary.append(f"Found {len(data['text'])} text blocks")
+        if "prices" in data:
+            summary.append(f"Found {len(data['prices'])} price mentions")
+        # Handle specific query types
+        if "how many" in query_lower:
+            if "image" in query_lower and "images" in data:
+                return f"There are {len(data['images'])} images on the webpage."
+            elif "link" in query_lower and "links" in data:
+                return f"There are {len(data['links'])} links on the webpage."
+            elif "price" in query_lower and "prices" in data:
+                return f"There are {len(data['prices'])} prices mentioned on the webpage."
+            elif "text" in query_lower and "text" in data:
+                return f"There are {len(data['text'])} text blocks on the webpage."
+        if "show" in query_lower or "list" in query_lower:
+            if "image" in query_lower and "images" in data:
+                images = data['images'][:5]  # Limit to 5 images
+                return "Here are up to 5 images found:\n" + "\n".join([f"- {img['alt'] or 'No description'} ({img['src']})" for img in images])
+            elif "link" in query_lower and "links" in data:
+                links = data['links'][:5]  # Limit to 5 links
+                return "Here are up to 5 links found:\n" + "\n".join([f"- {link['text'] or 'No text'} ({link['href']})" for link in links])
+            elif "text" in query_lower and "text" in data:
+                texts = data['text'][:3]  # Limit to 3 text blocks
+                return "Here are up to 3 text blocks found:\n" + "\n".join([f"- {text[:100]}..." for text in texts])
+        # If no specific handling matched, return general summary
+        if summary:
+            return "Here's what I found on the webpage:\n" + "\n".join(summary)
         return "I couldn't find any relevant information based on your query."
+    def handle_query(self, query: str, url: str) -> str:
         if not url:
             return "Please provide a URL to analyze."
         try:
             parsed_url = urlparse(url)
             if not all([parsed_url.scheme, parsed_url.netloc]):
                 return "Please provide a valid URL (including http:// or https://)."
             # Add timeout to prevent hanging
+            data = self.extract_data(url, query)
+            response = self.format_response(data, query)
             # Validate response
             if not response or response.isspace():
                 return "I couldn't generate a meaningful response based on the available data."
             return response
         except Exception as e:
             logging.error(f"Error processing request: {str(e)}")
             return f"An error occurred while processing your request: {str(e)}"
+def create_interface():
+    scraper = SmartWebScraper()
+    def process_request(query: str, url: str) -> str:
+        return scraper.handle_query(query, url)
     with gr.Blocks() as demo:
         gr.Markdown("# Smart Web Scraper")
         gr.Markdown("Ask me anything about a webpage, and I'll try to find the information you need!")
         - "List all forms"
         """)
     return demo
 if __name__ == "__main__":
     demo = create_interface()  # Assign the returned Gradio interface to 'demo'
     demo.launch(debug=True)