Spaces:
Runtime error
Runtime error
# Created by Leandro Carneiro at 19/01/2024 | |
# Description: | |
# ------------------------------------------------ | |
import os.path | |
import time | |
from googleapiclient.discovery import build | |
import requests | |
from bs4 import BeautifulSoup | |
import constants | |
def google_search_api(search_term, api_key, cse_id, **kwargs): | |
try: | |
service = build("customsearch", "v1", developerKey=api_key) | |
res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute() | |
return res['items'] | |
except Exception as e: | |
return -1 | |
def search_google(subject, sites): | |
try: | |
results = [] | |
for site in sites: | |
print(' Buscando notícias no domínio: ' + site) | |
query = f"{subject} site:{site}" | |
sites_searched = google_search_api(query, os.environ['GOOGLE_KEY'], os.environ['GOOGLE_SEARCH'], num=constants.num_sites) | |
if sites_searched == -1: | |
results.append(site) | |
else: | |
for s in sites_searched: | |
results.append(s['link']) | |
#time.sleep(3) | |
print(' Total de sites encontrados: ' + str(len(results))) | |
return results | |
except Exception as e: | |
print(str(e)) | |
return str(e) | |
def retrieve_text_from_site(sites): | |
result = [] | |
for site in sites: | |
print(' Baixando texto do site: ' + site) | |
try: | |
response = requests.get(site) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.content, 'html.parser') | |
result.append(soup.get_text()) | |
except Exception as e: | |
result.append('Erro na recuperação do texto: ' + str(e)) | |
return result | |
def delete_base(local_base): | |
try: | |
for i in os.listdir(local_base): | |
file_path = os.path.join(local_base, i) | |
os.remove(file_path) | |
return 0 | |
except Exception as e: | |
return str(e) | |
def save_on_base(sites, texts, local_base): | |
try: | |
for i in range(len(sites)): | |
filename = f'news{i}.txt' | |
with open(os.path.join(local_base, filename), 'w', encoding='utf-8') as file: | |
file.write(texts[i]) | |
with open(os.path.join(local_base, 'filename_url.csv'), 'a', encoding='utf-8') as file: | |
file.write(filename + ';' + sites[i] + '\n') | |
return 0 | |
except Exception as e: | |
return str(e) | |