Gosse Minnema
Add sociofillmore code, load dataset via private dataset repo
b11ac48
raw
history blame
No virus
654 Bytes
import langdetect
import json
DATA_FILE = "data/thecrashes_data.json"
def main():
texts = get_texts()
for text in texts:
if langdetect.detect(text) == "en":
print("\n<-------------------------------")
print(text)
print("------------------------------>\n")
def get_texts():
with open(DATA_FILE, encoding="utf-8") as f:
data = json.load(f)
texts = []
for event in data:
for article in event["articles"]:
texts.append(article["title"] + "\n\n" + article["summary"])
return texts
if __name__ == '__main__':
main()