File size: 2,718 Bytes
94be149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import requests
import re
from bs4 import BeautifulSoup
import logging
import os

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

class AirPortCollector:
    def __init__(self):
        self.url = "https://t.me/s/jichang_list?before=457"
        self.airports = []
        self.proxy = os.getenv('PROXY')

    def fetch_content(self):
        try:
            proxies = {'http': self.proxy, 'https': self.proxy} if self.proxy else None
            response = requests.get(self.url, verify=False, proxies=proxies)
            response.raise_for_status()
            return response.text
        except requests.RequestException as e:
            logging.error(f"Error fetching content: {e}")
            return None

    def parse_content(self, content):
        if not content:
            return

        soup = BeautifulSoup(content, 'html.parser')
        messages = soup.find_all('div', class_='tgme_widget_message_text')

        for message in messages:
            airport = {}
            text = message.get_text()
            
            # Extract airport name
            name_match = re.search(r'⦁ 名称:\s*(.*)', text)
            if name_match:
                airport['name'] = name_match.group(1).strip()

            # Extract official website
            website_matches = re.findall(r'⦁ 官网:\s*(https?://\S+)', text)
            if website_matches:
                airport['websites'] = website_matches

            # Extract Telegram channel
            channel_match = re.search(r'⦁ 频道:\s*(@\S+)', text)
            if channel_match:
                airport['channel'] = channel_match.group(1)

            # Extract Telegram group
            group_match = re.search(r'⦁ 群组:\s*(@\S+)', text)
            if group_match:
                airport['group'] = group_match.group(1)

            if airport:
                self.airports.append(airport)

    def collect(self):
        content = self.fetch_content()
        if content:
            self.parse_content(content)
            logging.info(f"Collected {len(self.airports)} airports")
        else:
            logging.warning("Failed to fetch content")

    def get_airports(self):
        return self.airports

def main():
    collector = AirPortCollector()
    collector.collect()
    airports = collector.get_airports()

    # Write to file
    with open('/app/subscribes.txt', 'w') as f:
        for airport in airports:
            if 'websites' in airport:
                for website in airport['websites']:
                    f.write(f"{website}\n")

    logging.info(f"Wrote {len(airports)} airport websites to subscribes.txt")

if __name__ == "__main__":
    main()