Trafic_Marseille / rtm_scraper.py
tyriaa
Third commit
1caf2c7
from __future__ import unicode_literals
import requests
import json
from datetime import datetime
import re
from bs4 import BeautifulSoup
class RTMTraffic:
def __init__(self):
self.headers = {
'User-Agent': 'RTM API Python Client',
'From': 'https://github.com/your-username/your-repo'
}
def get_alerts(self, period='all', line_type=None):
"""
Récupère les alertes de trafic RTM
Args:
period (str): 'today', 'coming' ou 'all'
line_type (str): None, 'bus', 'metro' ou 'tram'
Returns:
dict: Dictionnaire contenant les alertes
"""
if line_type is None:
url = 'https://api.rtm.fr/front/getAlertes/FR/All'
response = requests.get(url, headers=self.headers)
data = response.json()['data']
alerts_today = self._process_alerts(data.get('AlertesToday', []))
alerts_coming = self._process_alerts(data.get('AlertesComing', []))
if period == 'today':
return alerts_today
elif period == 'coming':
return alerts_coming
else:
return {
'AlertesToday': alerts_today,
'AlertesComing': alerts_coming
}
else:
url = f'https://api.rtm.fr/front/getAlertes/FR/{line_type}'
response = requests.get(url, headers=self.headers)
data = response.json()['data']
return self._process_alerts(data.get('Alertes', []))
def _extract_description(self, html_content):
"""
Extrait la description depuis le contenu HTML
Args:
html_content (str): Contenu HTML de l'alerte
Returns:
str: Description nettoyée
"""
if not html_content:
return ""
# Parser le HTML
soup = BeautifulSoup(html_content, 'html.parser')
# Supprimer les balises <style> et leur contenu
for style in soup.find_all('style'):
style.decompose()
# Récupérer le texte et nettoyer
description = soup.get_text(separator=' ', strip=True)
# Nettoyer les espaces multiples et les sauts de ligne
description = re.sub(r'\s+', ' ', description)
description = description.replace('\\n', ' ').replace('\\r', '')
return description.strip()
def _process_alerts(self, alerts):
"""
Traite les alertes pour un format plus lisible
Args:
alerts (list): Liste des alertes brutes
Returns:
list: Liste des alertes formatées
"""
processed_alerts = []
for alert in alerts:
# Extraire la description depuis le HTML (MessageD)
description = self._extract_description(alert.get('MessageD', ''))
if not description:
# Si pas de description dans MessageD, utiliser MessageB
description = alert.get('MessageB', '').strip()
processed_alert = {
'title': alert.get('MessageA', ''),
'description': description,
'validity': alert.get('MessageC', ''),
'type': alert.get('type', ''),
'valid_until': alert.get('ValidUntilTime', ''),
'affected_lines': []
}
# Traiter les lignes affectées
for line in alert.get('AffectedLine', []):
if 'PublicCode' in line:
processed_alert['affected_lines'].append(line['PublicCode'])
processed_alerts.append(processed_alert)
return processed_alerts
def get_rtm_traffic():
"""
Récupère toutes les perturbations de trafic RTM
Returns:
list: Liste des perturbations
"""
rtm = RTMTraffic()
all_alerts = rtm.get_alerts()
traffic_info = []
# Traiter les alertes du jour
if 'AlertesToday' in all_alerts:
for alert in all_alerts['AlertesToday']:
traffic_info.append({
'title': alert['title'],
'description': alert['description'],
'affected_lines': alert['affected_lines'],
'validity': alert['validity'],
'type': alert['type']
})
# Traiter les alertes à venir
if 'AlertesComing' in all_alerts:
for alert in all_alerts['AlertesComing']:
traffic_info.append({
'title': f"[À venir] {alert['title']}",
'description': alert['description'],
'affected_lines': alert['affected_lines'],
'validity': alert['validity'],
'type': alert['type']
})
return traffic_info
if __name__ == "__main__":
traffic_info = get_rtm_traffic()
if traffic_info:
for info in traffic_info:
print("\nPerturbation :")
print(f"Titre : {info['title']}")
if info['description']:
print(f"Description : {info['description']}")
print(f"Type : {info['type']}")
print(f"Validité : {info['validity']}")
if info['affected_lines']:
print(f"Lignes affectées : {', '.join(info['affected_lines'])}")
print("-" * 50)
else:
print("\nAucune perturbation en cours")