Spaces:

xuyingliKepler
/

hackernews_bot

Sleeping

App Files Files Community

xuyingliKepler commited on Dec 3, 2023

Commit

c869a11

1 Parent(s): 8dab0c7

Upload 4 files

Browse files

Files changed (4) hide show

app/__init__.py +0 -0
app/functions.py +157 -0
app/models.py +28 -0
app/tools.py +55 -0

app/__init__.py ADDED Viewed

File without changes

app/functions.py ADDED Viewed

	@@ -0,0 +1,157 @@

+from bs4 import BeautifulSoup
+import asyncio
+import aiohttp
+from typing import List, Dict, Union
+import json
+BASE_URL = "https://hacker-news.firebaseio.com/v0"
+async def fetch_item(session: aiohttp.ClientSession, item_id: int):
+    """
+    Asynchronously fetches details of a story by its ID.
+    Args:
+        session: Aiohttp ClientSession for making HTTP requests.
+        item_id (int): The ID of the item to fetch.
+    Returns:
+        dict: Details of the story.
+    """
+    url = f"{BASE_URL}/item/{item_id}.json"
+    async with session.get(url) as response:
+        return await response.json()
+async def fetch_story_ids(story_type: str = "top", limit: int = None):
+    """
+    Asynchronously fetches the top story IDs.
+    Args:
+        story_type: The story type. Defaults to top (`topstories.json`)
+        limit: The limit of stories to be fetched.
+    Returns:
+        List[int]: A list of top story IDs.
+    """
+    url = f"{BASE_URL}/{story_type}stories.json"
+    async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
+        async with session.get(url) as response:
+            story_ids = await response.json()
+    if limit:
+        story_ids = story_ids[:limit]
+    return story_ids
+async def fetch_text(session, url):
+    """
+    Fetches the text from a URL (if there's text to be fetched). If it fails,
+    it will return an informative message to the LLM.
+    Args:
+        session: `aiohttp` session
+        url: The story URL
+    Returns:
+        A string representing whether the story text or an informative error (represented as a string)
+    """
+    try:
+        async with session.get(url) as response:
+            if response.status == 200:
+                html_content = await response.text()
+                soup = BeautifulSoup(html_content, 'html.parser')
+                text_content = soup.get_text()
+                return text_content
+            else:
+                return f"Unable to fetch content from {url}. Status code: {response.status}"
+    except Exception as e:
+        return f"An error occurred: {e}"
+async def get_hn_stories(limit: int = 5, keywords: List[str] = None, story_type: str = "top"):
+    """
+    Asynchronously fetches the top Hacker News stories based on the provided parameters.
+    Args:
+        limit (int): The number of top stories to retrieve. Default is 10.
+        keywords (List[str]): A list of keywords to filter the top stories.
+        story_type (str): The story type
+    Returns:
+        List[Dict[str, Union[str, int]]]: A list of dictionaries containing
+        'story_id', 'title', 'url', and 'score' of the stories.
+    """
+    if limit and keywords is None:
+        story_ids = await fetch_story_ids(story_type, limit)
+    else:
+        story_ids = await fetch_story_ids(story_type)
+    async def fetch_and_filter_stories(story_id):
+        async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
+            story = await fetch_item(session, story_id)
+        return story
+    tasks = [fetch_and_filter_stories(story_id) for story_id in story_ids]
+    stories = await asyncio.gather(*tasks)
+    filtered_stories = []
+    for story in stories:
+        story_info = {
+            "title": story.get("title"),
+            "url": story.get("url"),
+            "score": story.get("score"),
+            "story_id": story.get("id"),
+        }
+        if keywords is None or any(keyword.lower() in story['title'].lower() for keyword in keywords):
+            filtered_stories.append(story_info)
+    return filtered_stories[:limit]
+async def get_relevant_comments(story_id: int, limit: int =10):
+    """
+    Get the most relevant comments for a Hacker News item.
+    Args:
+        story_id: The ID of the Hacker News item.
+        limit: The number of comments to retrieve (default is 10).
+    Returns:
+        A list of dictionaries, each containing comment details.
+    """
+    async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
+        story = await fetch_item(session, story_id)
+        if 'kids' not in story:
+            return "This item doesn't have comments."
+        comment_ids = story['kids']
+        comment_details = await asyncio.gather(*[fetch_item(session, cid) for cid in comment_ids])
+        comment_details.sort(key=lambda comment: comment.get('score', 0), reverse=True)
+        relevant_comments = comment_details[:limit]
+        relevant_comments = [comment["text"] for comment in relevant_comments]
+        return json.dumps(relevant_comments)
+async def get_story_content(story_url: str):
+    """
+    Gets the content of the story using BeautifulSoup.
+    Args:
+        story_url: A string representing the story URL
+    Returns:
+        The content of the story
+    """
+    async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
+        story_content = await fetch_text(session, story_url)
+        return story_content

app/models.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from typing import List
+from pydantic import BaseModel, Field
+class Stories(BaseModel):
+    """A model representing stories from Hacker News"""
+    limit: int = Field(default=5, description="The number of stories to return. Defaults to 5.")
+    keywords: List[str] = Field(default=None, description="The list of keywords to filter the stories. "
+                                                          "Defaults to None")
+    story_type: str = Field(default="top", description="The story type. It can be one of the following: "
+                                                       "'top', 'new', 'best', 'ask', 'show', 'job'. Defaults to 'top'")
+class Comments(BaseModel):
+    """A model representing the highest scored comments from a story"""
+    story_id: int = Field(..., description="The story id")
+    limit: int = Field(default=10, description="The number of comments to return. Defaults to 10.")
+class Content(BaseModel):
+    """A model representing the content of a story fetched from the URL"""
+    story_url: str = Field(..., description="The story URL")
+class Item(BaseModel):
+    """A model representing a story, comment, job, Ask HN and even a poll"""
+    item_id: str = Field(..., description="The item's unique id")

app/tools.py ADDED Viewed

	@@ -0,0 +1,55 @@

+from typing import Optional, Type, List
+from langchain.tools import BaseTool
+from pydantic import BaseModel
+from app.models import Stories, Comments, Content
+from app.functions import get_hn_stories
+from app.functions import get_relevant_comments
+from app.functions import get_story_content
+class StoriesTool(BaseTool):
+    name = "get_stories"
+    description = "Gets stories from Hacker News. The stories are described by a 'story_id', a 'title', a 'url' and" \
+                  " a 'score'."
+    def _run(self, limit: int = 5, keywords: List[str] = None, story_type: str = "top"):
+        stories = get_hn_stories(limit, keywords, story_type)
+        return stories
+    def _arun(self, limit: int = 5, keywords: List[str] = None, story_type: str = "top"):
+        stories = get_hn_stories(limit, keywords, story_type)
+        return stories
+    args_schema: Optional[Type[BaseModel]] = Stories
+class CommentsTool(BaseTool):
+    name = "get_comments"
+    description = "Gets comments from a specific Hacker News story"
+    def _run(self, story_id: int, limit: int = 10):
+        comments = get_relevant_comments(story_id, limit)
+        return comments
+    def _arun(self, story_id: int, limit: int = 10):
+        comments = get_relevant_comments(story_id, limit)
+        return comments
+    args_schema: Optional[Type[BaseModel]] = Comments
+class ContentTool(BaseTool):
+    name = "get_content"
+    description = "Gets the Hacker News story content from a URL"
+    def _run(self, story_url: str):
+        story_content = get_story_content(story_url)
+        return story_content
+    def _arun(self, story_url: str):
+        story_content = get_story_content(story_url)
+        return story_content
+    args_schema: Optional[Type[BaseModel]] = Content