Spaces:
Running
Running
import requests | |
import re | |
from bs4 import BeautifulSoup | |
import logging | |
import os | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
class AirPortCollector: | |
def __init__(self): | |
self.url = "https://t.me/s/jichang_list?before=457" | |
self.airports = [] | |
self.proxy = os.getenv('PROXY') | |
def fetch_content(self): | |
try: | |
proxies = {'http': self.proxy, 'https': self.proxy} if self.proxy else None | |
response = requests.get(self.url, verify=False, proxies=proxies) | |
response.raise_for_status() | |
return response.text | |
except requests.RequestException as e: | |
logging.error(f"Error fetching content: {e}") | |
return None | |
def parse_content(self, content): | |
if not content: | |
return | |
soup = BeautifulSoup(content, 'html.parser') | |
messages = soup.find_all('div', class_='tgme_widget_message_text') | |
for message in messages: | |
airport = {} | |
text = message.get_text() | |
# Extract airport name | |
name_match = re.search(r'⦁ 名称:\s*(.*)', text) | |
if name_match: | |
airport['name'] = name_match.group(1).strip() | |
# Extract official website | |
website_matches = re.findall(r'⦁ 官网:\s*(https?://\S+)', text) | |
if website_matches: | |
airport['websites'] = website_matches | |
# Extract Telegram channel | |
channel_match = re.search(r'⦁ 频道:\s*(@\S+)', text) | |
if channel_match: | |
airport['channel'] = channel_match.group(1) | |
# Extract Telegram group | |
group_match = re.search(r'⦁ 群组:\s*(@\S+)', text) | |
if group_match: | |
airport['group'] = group_match.group(1) | |
if airport: | |
self.airports.append(airport) | |
def collect(self): | |
content = self.fetch_content() | |
if content: | |
self.parse_content(content) | |
logging.info(f"Collected {len(self.airports)} airports") | |
else: | |
logging.warning("Failed to fetch content") | |
def get_airports(self): | |
return self.airports | |
def main(): | |
collector = AirPortCollector() | |
collector.collect() | |
airports = collector.get_airports() | |
# Write to file | |
with open('/app/subscribes.txt', 'w') as f: | |
for airport in airports: | |
if 'websites' in airport: | |
for website in airport['websites']: | |
f.write(f"{website}\n") | |
logging.info(f"Wrote {len(airports)} airport websites to subscribes.txt") | |
if __name__ == "__main__": | |
main() |