Spaces:
Sleeping
Sleeping
Commit
·
c869a11
1
Parent(s):
8dab0c7
Upload 4 files
Browse files- app/__init__.py +0 -0
- app/functions.py +157 -0
- app/models.py +28 -0
- app/tools.py +55 -0
app/__init__.py
ADDED
File without changes
|
app/functions.py
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from bs4 import BeautifulSoup
|
2 |
+
import asyncio
|
3 |
+
import aiohttp
|
4 |
+
from typing import List, Dict, Union
|
5 |
+
import json
|
6 |
+
|
7 |
+
BASE_URL = "https://hacker-news.firebaseio.com/v0"
|
8 |
+
|
9 |
+
|
10 |
+
async def fetch_item(session: aiohttp.ClientSession, item_id: int):
|
11 |
+
"""
|
12 |
+
Asynchronously fetches details of a story by its ID.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
session: Aiohttp ClientSession for making HTTP requests.
|
16 |
+
item_id (int): The ID of the item to fetch.
|
17 |
+
|
18 |
+
Returns:
|
19 |
+
dict: Details of the story.
|
20 |
+
"""
|
21 |
+
url = f"{BASE_URL}/item/{item_id}.json"
|
22 |
+
async with session.get(url) as response:
|
23 |
+
return await response.json()
|
24 |
+
|
25 |
+
|
26 |
+
async def fetch_story_ids(story_type: str = "top", limit: int = None):
|
27 |
+
"""
|
28 |
+
Asynchronously fetches the top story IDs.
|
29 |
+
|
30 |
+
Args:
|
31 |
+
story_type: The story type. Defaults to top (`topstories.json`)
|
32 |
+
limit: The limit of stories to be fetched.
|
33 |
+
|
34 |
+
Returns:
|
35 |
+
List[int]: A list of top story IDs.
|
36 |
+
"""
|
37 |
+
url = f"{BASE_URL}/{story_type}stories.json"
|
38 |
+
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
|
39 |
+
async with session.get(url) as response:
|
40 |
+
story_ids = await response.json()
|
41 |
+
|
42 |
+
if limit:
|
43 |
+
story_ids = story_ids[:limit]
|
44 |
+
|
45 |
+
return story_ids
|
46 |
+
|
47 |
+
|
48 |
+
async def fetch_text(session, url):
|
49 |
+
"""
|
50 |
+
Fetches the text from a URL (if there's text to be fetched). If it fails,
|
51 |
+
it will return an informative message to the LLM.
|
52 |
+
|
53 |
+
Args:
|
54 |
+
session: `aiohttp` session
|
55 |
+
url: The story URL
|
56 |
+
|
57 |
+
Returns:
|
58 |
+
A string representing whether the story text or an informative error (represented as a string)
|
59 |
+
"""
|
60 |
+
try:
|
61 |
+
async with session.get(url) as response:
|
62 |
+
if response.status == 200:
|
63 |
+
|
64 |
+
html_content = await response.text()
|
65 |
+
soup = BeautifulSoup(html_content, 'html.parser')
|
66 |
+
text_content = soup.get_text()
|
67 |
+
|
68 |
+
return text_content
|
69 |
+
else:
|
70 |
+
return f"Unable to fetch content from {url}. Status code: {response.status}"
|
71 |
+
except Exception as e:
|
72 |
+
return f"An error occurred: {e}"
|
73 |
+
|
74 |
+
|
75 |
+
async def get_hn_stories(limit: int = 5, keywords: List[str] = None, story_type: str = "top"):
|
76 |
+
"""
|
77 |
+
Asynchronously fetches the top Hacker News stories based on the provided parameters.
|
78 |
+
|
79 |
+
Args:
|
80 |
+
limit (int): The number of top stories to retrieve. Default is 10.
|
81 |
+
keywords (List[str]): A list of keywords to filter the top stories.
|
82 |
+
story_type (str): The story type
|
83 |
+
|
84 |
+
Returns:
|
85 |
+
List[Dict[str, Union[str, int]]]: A list of dictionaries containing
|
86 |
+
'story_id', 'title', 'url', and 'score' of the stories.
|
87 |
+
"""
|
88 |
+
|
89 |
+
if limit and keywords is None:
|
90 |
+
story_ids = await fetch_story_ids(story_type, limit)
|
91 |
+
else:
|
92 |
+
story_ids = await fetch_story_ids(story_type)
|
93 |
+
|
94 |
+
async def fetch_and_filter_stories(story_id):
|
95 |
+
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
|
96 |
+
story = await fetch_item(session, story_id)
|
97 |
+
return story
|
98 |
+
|
99 |
+
tasks = [fetch_and_filter_stories(story_id) for story_id in story_ids]
|
100 |
+
stories = await asyncio.gather(*tasks)
|
101 |
+
|
102 |
+
filtered_stories = []
|
103 |
+
for story in stories:
|
104 |
+
story_info = {
|
105 |
+
"title": story.get("title"),
|
106 |
+
"url": story.get("url"),
|
107 |
+
"score": story.get("score"),
|
108 |
+
"story_id": story.get("id"),
|
109 |
+
}
|
110 |
+
|
111 |
+
if keywords is None or any(keyword.lower() in story['title'].lower() for keyword in keywords):
|
112 |
+
filtered_stories.append(story_info)
|
113 |
+
|
114 |
+
return filtered_stories[:limit]
|
115 |
+
|
116 |
+
|
117 |
+
async def get_relevant_comments(story_id: int, limit: int =10):
|
118 |
+
"""
|
119 |
+
Get the most relevant comments for a Hacker News item.
|
120 |
+
|
121 |
+
Args:
|
122 |
+
story_id: The ID of the Hacker News item.
|
123 |
+
limit: The number of comments to retrieve (default is 10).
|
124 |
+
|
125 |
+
Returns:
|
126 |
+
A list of dictionaries, each containing comment details.
|
127 |
+
"""
|
128 |
+
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
|
129 |
+
story = await fetch_item(session, story_id)
|
130 |
+
|
131 |
+
if 'kids' not in story:
|
132 |
+
return "This item doesn't have comments."
|
133 |
+
|
134 |
+
comment_ids = story['kids']
|
135 |
+
|
136 |
+
comment_details = await asyncio.gather(*[fetch_item(session, cid) for cid in comment_ids])
|
137 |
+
comment_details.sort(key=lambda comment: comment.get('score', 0), reverse=True)
|
138 |
+
|
139 |
+
relevant_comments = comment_details[:limit]
|
140 |
+
relevant_comments = [comment["text"] for comment in relevant_comments]
|
141 |
+
|
142 |
+
return json.dumps(relevant_comments)
|
143 |
+
|
144 |
+
|
145 |
+
async def get_story_content(story_url: str):
|
146 |
+
"""
|
147 |
+
Gets the content of the story using BeautifulSoup.
|
148 |
+
|
149 |
+
Args:
|
150 |
+
story_url: A string representing the story URL
|
151 |
+
|
152 |
+
Returns:
|
153 |
+
The content of the story
|
154 |
+
"""
|
155 |
+
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
|
156 |
+
story_content = await fetch_text(session, story_url)
|
157 |
+
return story_content
|
app/models.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
from pydantic import BaseModel, Field
|
3 |
+
|
4 |
+
|
5 |
+
class Stories(BaseModel):
|
6 |
+
"""A model representing stories from Hacker News"""
|
7 |
+
limit: int = Field(default=5, description="The number of stories to return. Defaults to 5.")
|
8 |
+
keywords: List[str] = Field(default=None, description="The list of keywords to filter the stories. "
|
9 |
+
"Defaults to None")
|
10 |
+
story_type: str = Field(default="top", description="The story type. It can be one of the following: "
|
11 |
+
"'top', 'new', 'best', 'ask', 'show', 'job'. Defaults to 'top'")
|
12 |
+
|
13 |
+
|
14 |
+
class Comments(BaseModel):
|
15 |
+
"""A model representing the highest scored comments from a story"""
|
16 |
+
story_id: int = Field(..., description="The story id")
|
17 |
+
limit: int = Field(default=10, description="The number of comments to return. Defaults to 10.")
|
18 |
+
|
19 |
+
|
20 |
+
class Content(BaseModel):
|
21 |
+
"""A model representing the content of a story fetched from the URL"""
|
22 |
+
story_url: str = Field(..., description="The story URL")
|
23 |
+
|
24 |
+
|
25 |
+
class Item(BaseModel):
|
26 |
+
"""A model representing a story, comment, job, Ask HN and even a poll"""
|
27 |
+
item_id: str = Field(..., description="The item's unique id")
|
28 |
+
|
app/tools.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Optional, Type, List
|
2 |
+
|
3 |
+
from langchain.tools import BaseTool
|
4 |
+
from pydantic import BaseModel
|
5 |
+
from app.models import Stories, Comments, Content
|
6 |
+
|
7 |
+
from app.functions import get_hn_stories
|
8 |
+
from app.functions import get_relevant_comments
|
9 |
+
from app.functions import get_story_content
|
10 |
+
|
11 |
+
|
12 |
+
class StoriesTool(BaseTool):
|
13 |
+
name = "get_stories"
|
14 |
+
description = "Gets stories from Hacker News. The stories are described by a 'story_id', a 'title', a 'url' and" \
|
15 |
+
" a 'score'."
|
16 |
+
|
17 |
+
def _run(self, limit: int = 5, keywords: List[str] = None, story_type: str = "top"):
|
18 |
+
stories = get_hn_stories(limit, keywords, story_type)
|
19 |
+
return stories
|
20 |
+
|
21 |
+
def _arun(self, limit: int = 5, keywords: List[str] = None, story_type: str = "top"):
|
22 |
+
stories = get_hn_stories(limit, keywords, story_type)
|
23 |
+
return stories
|
24 |
+
|
25 |
+
args_schema: Optional[Type[BaseModel]] = Stories
|
26 |
+
|
27 |
+
|
28 |
+
class CommentsTool(BaseTool):
|
29 |
+
name = "get_comments"
|
30 |
+
description = "Gets comments from a specific Hacker News story"
|
31 |
+
|
32 |
+
def _run(self, story_id: int, limit: int = 10):
|
33 |
+
comments = get_relevant_comments(story_id, limit)
|
34 |
+
return comments
|
35 |
+
|
36 |
+
def _arun(self, story_id: int, limit: int = 10):
|
37 |
+
comments = get_relevant_comments(story_id, limit)
|
38 |
+
return comments
|
39 |
+
|
40 |
+
args_schema: Optional[Type[BaseModel]] = Comments
|
41 |
+
|
42 |
+
|
43 |
+
class ContentTool(BaseTool):
|
44 |
+
name = "get_content"
|
45 |
+
description = "Gets the Hacker News story content from a URL"
|
46 |
+
|
47 |
+
def _run(self, story_url: str):
|
48 |
+
story_content = get_story_content(story_url)
|
49 |
+
return story_content
|
50 |
+
|
51 |
+
def _arun(self, story_url: str):
|
52 |
+
story_content = get_story_content(story_url)
|
53 |
+
return story_content
|
54 |
+
|
55 |
+
args_schema: Optional[Type[BaseModel]] = Content
|