Spaces:
Runtime error
Runtime error
tensorwitz
commited on
Commit
·
0506f5f
1
Parent(s):
889f06f
Integrated trend topic analysis
Browse files
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
-
import datetime
|
2 |
-
import hmac
|
3 |
import os
|
4 |
import uuid
|
5 |
|
@@ -10,6 +9,10 @@ from azure.cosmos import ContainerProxy, CosmosClient
|
|
10 |
from bs4 import BeautifulSoup, NavigableString
|
11 |
from dotenv import load_dotenv
|
12 |
from st_copy_to_clipboard import st_copy_to_clipboard
|
|
|
|
|
|
|
|
|
13 |
|
14 |
load_dotenv()
|
15 |
|
@@ -270,7 +273,7 @@ def get_article_summary(article: str) -> str:
|
|
270 |
os.environ.get("SUMMARY_API"),
|
271 |
headers={
|
272 |
"Content-Type": "application/json",
|
273 |
-
"Authorization": os.environ.get("SUMMARY_API_KEY"),
|
274 |
"azureml-model-deployment": "heute-summary-api",
|
275 |
},
|
276 |
data={"article": article},
|
@@ -369,10 +372,14 @@ def on_click_handler_generate_article(**kwargs):
|
|
369 |
kwargs["webpage_option"],
|
370 |
)
|
371 |
headline = create_headline(created_article, kwargs["webpage_option"])
|
|
|
|
|
|
|
|
|
372 |
db_analytics_item = {
|
373 |
"id": str(uuid.uuid4()),
|
374 |
"oparation": "article_generation",
|
375 |
-
"timestamp": str(datetime.
|
376 |
}
|
377 |
client: ContainerProxy = st.session_state["db_container"]
|
378 |
client.create_item(body=db_analytics_item)
|
@@ -397,7 +404,7 @@ def on_click_handler_generate_generate_article_keywords(**kwargs):
|
|
397 |
db_analytics_item = {
|
398 |
"id": str(uuid.uuid4()),
|
399 |
"oparation": "article_generation",
|
400 |
-
"timestamp": str(datetime.
|
401 |
}
|
402 |
client: ContainerProxy = st.session_state["db_container"]
|
403 |
client.create_item(body=db_analytics_item)
|
@@ -415,7 +422,291 @@ def reset_session_state():
|
|
415 |
st.session_state["generated_article"] = ""
|
416 |
st.session_state["studie_links"] = []
|
417 |
st.session_state["article_summary"] = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
418 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
419 |
|
420 |
if "extracted_articles" not in st.session_state:
|
421 |
st.session_state["extracted_articles"] = []
|
@@ -433,8 +724,6 @@ if "selected_page" not in st.session_state:
|
|
433 |
st.session_state["selected_page"] = 0
|
434 |
if "generated_article" not in st.session_state:
|
435 |
st.session_state["generated_article"] = ""
|
436 |
-
if "function_state" not in st.session_state:
|
437 |
-
st.session_state["function_state"] = True
|
438 |
if "generated_headline" not in st.session_state:
|
439 |
st.session_state["generated_headline"] = ""
|
440 |
if "webpage_option" not in st.session_state:
|
@@ -450,12 +739,26 @@ if "db_container" not in st.session_state:
|
|
450 |
db_analytics_item = {
|
451 |
"id": str(uuid.uuid4()),
|
452 |
"oparation": "page_load",
|
453 |
-
"timestamp": str(datetime.
|
454 |
}
|
455 |
client.create_item(body=db_analytics_item)
|
456 |
st.session_state["db_container"] = client
|
457 |
if "article_summary" not in st.session_state:
|
458 |
st.session_state["article_summary"] = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
459 |
|
460 |
PROCESS_STEPS = [
|
461 |
"Artikel Extraktion",
|
@@ -464,6 +767,20 @@ PROCESS_STEPS = [
|
|
464 |
"Artikel Ausgabe",
|
465 |
]
|
466 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
467 |
|
468 |
# def check_password():
|
469 |
# """Returns `True` if the user had the correct password."""
|
@@ -520,16 +837,23 @@ with st.sidebar:
|
|
520 |
"Artikel Generierung mit Links",
|
521 |
key="article_gen_btn",
|
522 |
use_container_width=True,
|
523 |
-
on_click=lambda: st.session_state.update({"
|
524 |
)
|
525 |
st.button(
|
526 |
"Artikel Generierung mit Stichpunkten",
|
527 |
key="headline_gen_btn",
|
528 |
use_container_width=True,
|
529 |
-
on_click=lambda: st.session_state.update({"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
530 |
)
|
531 |
|
532 |
-
if st.session_state["
|
533 |
tab_col1, tab_col2, tab_col3, tab_col4 = st.columns([1, 1, 1, 1])
|
534 |
|
535 |
tab_col1.button(
|
@@ -719,7 +1043,8 @@ if st.session_state["function_state"]:
|
|
719 |
st.button(
|
720 |
"Neuen Artikel generieren", key="reset_btn", on_click=reset_session_state
|
721 |
)
|
722 |
-
|
|
|
723 |
st.write(
|
724 |
"Bitte trage die Stichpunkte ein, die Du in den Artikel einbauen möchtest. Der Textinput ist essenziell für die Generierung des Artikels."
|
725 |
)
|
@@ -780,3 +1105,154 @@ else:
|
|
780 |
st.button(
|
781 |
"Neuen Artikel generieren", key="reset_btn", on_click=reset_session_state
|
782 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import datetime
|
|
|
2 |
import os
|
3 |
import uuid
|
4 |
|
|
|
9 |
from bs4 import BeautifulSoup, NavigableString
|
10 |
from dotenv import load_dotenv
|
11 |
from st_copy_to_clipboard import st_copy_to_clipboard
|
12 |
+
from pytrends.request import TrendReq
|
13 |
+
import pytz
|
14 |
+
import xml.etree.ElementTree as ET
|
15 |
+
import re
|
16 |
|
17 |
load_dotenv()
|
18 |
|
|
|
273 |
os.environ.get("SUMMARY_API"),
|
274 |
headers={
|
275 |
"Content-Type": "application/json",
|
276 |
+
"Authorization": "Bearer " + os.environ.get("SUMMARY_API_KEY"),
|
277 |
"azureml-model-deployment": "heute-summary-api",
|
278 |
},
|
279 |
data={"article": article},
|
|
|
372 |
kwargs["webpage_option"],
|
373 |
)
|
374 |
headline = create_headline(created_article, kwargs["webpage_option"])
|
375 |
+
|
376 |
+
print(headline)
|
377 |
+
print(created_article)
|
378 |
+
|
379 |
db_analytics_item = {
|
380 |
"id": str(uuid.uuid4()),
|
381 |
"oparation": "article_generation",
|
382 |
+
"timestamp": str(datetime.now()),
|
383 |
}
|
384 |
client: ContainerProxy = st.session_state["db_container"]
|
385 |
client.create_item(body=db_analytics_item)
|
|
|
404 |
db_analytics_item = {
|
405 |
"id": str(uuid.uuid4()),
|
406 |
"oparation": "article_generation",
|
407 |
+
"timestamp": str(datetime.now()),
|
408 |
}
|
409 |
client: ContainerProxy = st.session_state["db_container"]
|
410 |
client.create_item(body=db_analytics_item)
|
|
|
422 |
st.session_state["generated_article"] = ""
|
423 |
st.session_state["studie_links"] = []
|
424 |
st.session_state["article_summary"] = ""
|
425 |
+
st.session_state["selection_content_trends_ressort"] = "Alle"
|
426 |
+
st.session_state["trends_realtime_all"] = {}
|
427 |
+
st.session_state["trends_today"] = {}
|
428 |
+
st.session_state["trends_yesterday"] = []
|
429 |
+
st.session_state["content_trend_articles_extracted"] = []
|
430 |
+
st.session_state["content_trend_article_links"] = []
|
431 |
+
st.session_state["webpage_option"] = "Boulevard"
|
432 |
+
|
433 |
+
## Trends
|
434 |
+
def fetch_trends(**kwargs):
|
435 |
+
timespan = kwargs["timespan"]
|
436 |
+
|
437 |
+
match timespan:
|
438 |
+
case "Echtzeit":
|
439 |
+
fetch_trends_realtime()
|
440 |
+
case "Heute":
|
441 |
+
fetch_trends_today()
|
442 |
+
case "Gestern":
|
443 |
+
fetch_trends_yesterday()
|
444 |
+
|
445 |
+
|
446 |
+
def fetch_trends_realtime():
|
447 |
+
pytrend = TrendReq(hl='de-AT', tz=360, timeout=(10,50))
|
448 |
+
|
449 |
+
for ressort_name, ressort_code in RESSORTS.items():
|
450 |
+
trends_realtime = pytrend.realtime_trending_searches(pn='AT', cat=ressort_code, count=5)
|
451 |
+
|
452 |
+
st.session_state["trends_realtime_" + ressort_code] = trends_realtime
|
453 |
+
|
454 |
+
|
455 |
+
def fetch_trends_today():
|
456 |
+
pytrend = TrendReq(hl='de-AT', tz=360, timeout=(10,50))
|
457 |
+
|
458 |
+
trends_today = pytrend.today_searches(pn="AT")
|
459 |
+
st.session_state["trends_today"] = trends_today
|
460 |
+
|
461 |
+
|
462 |
+
def fetch_trends_yesterday():
|
463 |
+
timezone = 'Europe/Vienna'
|
464 |
+
today = datetime.now(pytz.timezone(timezone)).date()
|
465 |
+
feed = ET.fromstring(requests.get(TRENDS_YESTERDAY_FEED_URL).content)
|
466 |
+
ns = {'ht': 'https://trends.google.de/trends/trendingsearches/daily'} # Define namespace
|
467 |
+
|
468 |
+
trends = []
|
469 |
+
|
470 |
+
for item in feed.findall(".//item"):
|
471 |
+
pubDate = datetime.strptime(item.find('pubDate').text, '%a, %d %b %Y %H:%M:%S %z').date()
|
472 |
+
# Filter: Überspringe, wenn pubDate heute ist
|
473 |
+
if pubDate == today:
|
474 |
+
continue
|
475 |
+
|
476 |
+
entry = {
|
477 |
+
'title': item.find('title').text,
|
478 |
+
'pubDate': item.find('pubDate').text,
|
479 |
+
'approx_traffic': item.find('ht:approx_traffic', ns).text if item.find('ht:approx_traffic', ns) is not None else None,
|
480 |
+
'news_items': []
|
481 |
+
}
|
482 |
+
|
483 |
+
for news_item in item.findall('ht:news_item', ns):
|
484 |
+
news_details = {
|
485 |
+
'title': news_item.find('ht:news_item_title', ns).text,
|
486 |
+
'snippet': news_item.find('ht:news_item_snippet', ns).text,
|
487 |
+
'url': news_item.find('ht:news_item_url', ns).text,
|
488 |
+
'source': news_item.find('ht:news_item_source', ns).text
|
489 |
+
}
|
490 |
+
entry['news_items'].append(news_details)
|
491 |
+
|
492 |
+
trends.append(entry)
|
493 |
+
|
494 |
+
st.session_state["trends_yesterday"] = trends
|
495 |
+
|
496 |
+
|
497 |
+
def render_trends_realtime(container):
|
498 |
+
ressort = st.session_state["selection_content_trends_ressort"]
|
499 |
+
trends_realtime = st.session_state["trends_realtime_" + RESSORTS[ressort]]
|
500 |
+
|
501 |
+
if trends_realtime == {}:
|
502 |
+
container.info(
|
503 |
+
body="Die Echtzeit-Trends wurden noch nicht geladen. Bitte verwende zunächst die Suche auf der rechten Seite!",
|
504 |
+
icon="ℹ️"
|
505 |
+
)
|
506 |
+
else:
|
507 |
+
container.selectbox(
|
508 |
+
label="Ressort auswählen",
|
509 |
+
options=RESSORTS,
|
510 |
+
placeholder="Bitte auswählen",
|
511 |
+
key="selection_content_trends_ressort",
|
512 |
+
)
|
513 |
+
|
514 |
+
for trend_count, trend in enumerate(trends_realtime, start=1):
|
515 |
+
with container.expander(f"{trend_count} -- {trend['title']}"):
|
516 |
+
articles = extract_article_details_realtime(trend['articles'])
|
517 |
+
|
518 |
+
for article_count, article in enumerate(articles, start=1):
|
519 |
+
key = f"selection_trends_realtime_{ressort}_{trend_count}_{article_count}"
|
520 |
+
|
521 |
+
st.checkbox(
|
522 |
+
f"{article_count} -- {article['articleTitle']} [Go To →]({article['url']})",
|
523 |
+
key=key,
|
524 |
+
disabled=disable_checkbox(f"selection_trends_realtime_{ressort}", key),
|
525 |
+
on_change=update_trend_article_list(key, article['url'])
|
526 |
+
)
|
527 |
+
|
528 |
|
529 |
+
def render_trends_today(container):
|
530 |
+
trends_today = st.session_state["trends_today"]
|
531 |
+
|
532 |
+
if trends_today == {}:
|
533 |
+
container.info(
|
534 |
+
body="Die heutigen Trends wurden noch nicht geladen. Bitte verwende zunächst die Suche auf der rechten Seite!",
|
535 |
+
icon="ℹ️"
|
536 |
+
)
|
537 |
+
|
538 |
+
for trend_count, trend in enumerate(trends_today, start=1):
|
539 |
+
with container.expander(f"{trend_count} -- {trend['title']['query']} | Generated Traffic: {trend['formattedTraffic']}"):
|
540 |
+
articles = extract_article_details_today(trend['articles'])
|
541 |
+
|
542 |
+
for article_count, article in enumerate(articles, start=1):
|
543 |
+
key = f"selection_trends_today_{trend_count}_{article_count}"
|
544 |
+
|
545 |
+
st.checkbox(
|
546 |
+
f"{article_count} -- {article['articleTitle']} [Go To →]({article['url']})",
|
547 |
+
key=key,
|
548 |
+
disabled=disable_checkbox("selection_trends_today", key),
|
549 |
+
on_change=update_trend_article_list(key, article['url'])
|
550 |
+
)
|
551 |
+
|
552 |
+
|
553 |
+
def render_trends_yesterday(container):
|
554 |
+
trends_yesterday = st.session_state["trends_yesterday"]
|
555 |
+
|
556 |
+
if trends_yesterday == []:
|
557 |
+
container.info(
|
558 |
+
body="Die gestrigen Trends wurden noch nicht geladen. Bitte verwende zunächst die Suche auf der rechten Seite!",
|
559 |
+
icon="ℹ️"
|
560 |
+
)
|
561 |
+
|
562 |
+
for trend_count, trend in enumerate(trends_yesterday, start=1):
|
563 |
+
with container.expander(f"{trend_count}• {trend['title']} | Generated Traffic: {trend['approx_traffic']}"):
|
564 |
+
st.write(f"Veröffentlichungsdatum : {trend['pubDate']}")
|
565 |
+
|
566 |
+
for article_count, article in enumerate(trend['news_items'], start=1):
|
567 |
+
key = f"selection_trends_yesterday_{trend_count}_{article_count}"
|
568 |
+
|
569 |
+
st.checkbox(
|
570 |
+
label=f"{article_count} -- {article['title']} [Go To →]({article['url']})",
|
571 |
+
key=key,
|
572 |
+
disabled=disable_checkbox("selection_trends_yesterday", key),
|
573 |
+
on_change=update_trend_article_list(key, article['url'])
|
574 |
+
)
|
575 |
+
|
576 |
+
|
577 |
+
def get_checkbox_states(pattern: str):
|
578 |
+
cb_states = {key: val for key, val in st.session_state.items() if re.search(string=key, pattern=pattern)}
|
579 |
+
|
580 |
+
return cb_states
|
581 |
+
|
582 |
+
|
583 |
+
def disable_checkbox(pattern: str, session_key: bool):
|
584 |
+
if session_key in list(st.session_state.keys()):
|
585 |
+
cb_states = get_checkbox_states(pattern)
|
586 |
+
|
587 |
+
return not cb_states[session_key] and not sum(list(cb_states.values())) < LINKS_MAX_CHECKED
|
588 |
+
|
589 |
+
return False
|
590 |
+
|
591 |
+
|
592 |
+
def update_trend_article_list(session_key, article_url):
|
593 |
+
|
594 |
+
if session_key in list(st.session_state.keys()):
|
595 |
+
if st.session_state[session_key]:
|
596 |
+
if article_url not in st.session_state["content_trend_article_links"]:
|
597 |
+
st.session_state["content_trend_article_links"].append(article_url)
|
598 |
+
else:
|
599 |
+
if article_url in st.session_state["content_trend_article_links"]:
|
600 |
+
st.session_state["content_trend_article_links"].remove(article_url)
|
601 |
+
|
602 |
+
|
603 |
+
## Content extraction
|
604 |
+
def extract_text_from_element(element):
|
605 |
+
# Initialisiere einen leeren Textstring
|
606 |
+
text_content = ""
|
607 |
+
|
608 |
+
# Überprüfe, ob das Element ein <p>, <ul> oder <ol>-Tag ist
|
609 |
+
if element.name in ["p", "ul", "ol"]:
|
610 |
+
# Extrahiere den Text des Tags und füge ihn zum Textstring hinzu
|
611 |
+
text_content += element.get_text() + "\n"
|
612 |
+
|
613 |
+
# Überprüfe, ob das Element ein Tag mit Kindern ist (kein Textknoten)
|
614 |
+
if not isinstance(element, NavigableString):
|
615 |
+
# Rekursiv durch jedes Child-Element gehen und den Text hinzufügen
|
616 |
+
for child in element.children:
|
617 |
+
text_content += extract_text_from_element(child)
|
618 |
+
|
619 |
+
return text_content
|
620 |
+
|
621 |
+
|
622 |
+
def filter_empty_lines(text):
|
623 |
+
# Teile den Text in Zeilen auf
|
624 |
+
lines = text.split("\n")
|
625 |
+
|
626 |
+
# Filtere leere Zeilen heraus
|
627 |
+
non_empty_lines = filter(lambda line: line.strip() != "", lines)
|
628 |
+
|
629 |
+
# Verbinde die nicht leeren Zeilen zu einem String
|
630 |
+
filtered_text = "\n".join(non_empty_lines)
|
631 |
+
|
632 |
+
return filtered_text
|
633 |
+
|
634 |
+
|
635 |
+
def extract_article(url):
|
636 |
+
# Webseite herunterladen
|
637 |
+
headers = {
|
638 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
|
639 |
+
}
|
640 |
+
response = requests.get(url, headers=headers)
|
641 |
+
|
642 |
+
# Überprüfen, ob die Anfrage erfolgreich war (Status-Code 200)
|
643 |
+
if response.status_code == 200:
|
644 |
+
# HTML-Inhalt parsen
|
645 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
646 |
+
|
647 |
+
# Finden Sie das <article>-Tag (nehmen Sie an, dass es eins gibt)
|
648 |
+
article_tag = soup.find("article")
|
649 |
+
|
650 |
+
if article_tag:
|
651 |
+
# Starte die Rekursion für jedes Child-Element des <article>-Tags
|
652 |
+
extracted_text = extract_text_from_element(article_tag)
|
653 |
+
stripped_text = filter_empty_lines(extracted_text)
|
654 |
+
return stripped_text
|
655 |
+
else:
|
656 |
+
print("Kein <article>-Tag gefunden.")
|
657 |
+
return None
|
658 |
+
else:
|
659 |
+
# Falls die Anfrage nicht erfolgreich war, eine Fehlermeldung ausgeben
|
660 |
+
print(f"Fehler: {response.status_code}")
|
661 |
+
return None
|
662 |
+
|
663 |
+
|
664 |
+
def extract_links(**kwargs):
|
665 |
+
with st.spinner("Extrahiere Informationen aus den Links..."):
|
666 |
+
results = []
|
667 |
+
for link in kwargs["links"]:
|
668 |
+
if link != '':
|
669 |
+
results.append(extract_article(link))
|
670 |
+
|
671 |
+
st.session_state[kwargs["key"]] = results
|
672 |
+
|
673 |
+
|
674 |
+
def extract_article_details_realtime(articles):
|
675 |
+
article_details = []
|
676 |
+
|
677 |
+
|
678 |
+
for article in articles:
|
679 |
+
|
680 |
+
article_detail = {
|
681 |
+
'url': article['url'],
|
682 |
+
'snippet': article['snippet'],
|
683 |
+
'articleTitle': article['articleTitle'],
|
684 |
+
'time': article['time']
|
685 |
+
}
|
686 |
+
|
687 |
+
article_details.append(article_detail)
|
688 |
+
|
689 |
+
return article_details
|
690 |
+
|
691 |
+
|
692 |
+
def extract_article_details_today(articles):
|
693 |
+
article_details = []
|
694 |
+
|
695 |
+
for article in articles:
|
696 |
+
article_detail = {
|
697 |
+
'url': article['url'],
|
698 |
+
'snippet': article['snippet'],
|
699 |
+
'articleTitle': article['title'],
|
700 |
+
}
|
701 |
+
|
702 |
+
article_details.append(article_detail)
|
703 |
+
|
704 |
+
return article_details
|
705 |
+
|
706 |
+
def get_final_articles():
|
707 |
+
final_trend_articles = [article_content for article_key, article_content in st.session_state.items() if re.search(string=article_key, pattern="content_trend_article_final")]
|
708 |
+
|
709 |
+
return final_trend_articles
|
710 |
|
711 |
if "extracted_articles" not in st.session_state:
|
712 |
st.session_state["extracted_articles"] = []
|
|
|
724 |
st.session_state["selected_page"] = 0
|
725 |
if "generated_article" not in st.session_state:
|
726 |
st.session_state["generated_article"] = ""
|
|
|
|
|
727 |
if "generated_headline" not in st.session_state:
|
728 |
st.session_state["generated_headline"] = ""
|
729 |
if "webpage_option" not in st.session_state:
|
|
|
739 |
db_analytics_item = {
|
740 |
"id": str(uuid.uuid4()),
|
741 |
"oparation": "page_load",
|
742 |
+
"timestamp": str(datetime.now()),
|
743 |
}
|
744 |
client.create_item(body=db_analytics_item)
|
745 |
st.session_state["db_container"] = client
|
746 |
if "article_summary" not in st.session_state:
|
747 |
st.session_state["article_summary"] = ""
|
748 |
+
if "article_generation_mode" not in st.session_state:
|
749 |
+
st.session_state["article_generation_mode"] = "links"
|
750 |
+
if "selection_content_trends_ressort" not in st.session_state:
|
751 |
+
st.session_state["selection_content_trends_ressort"] = "Alle"
|
752 |
+
if "trends_realtime_all" not in st.session_state:
|
753 |
+
st.session_state["trends_realtime_all"] = {}
|
754 |
+
if "trends_today" not in st.session_state:
|
755 |
+
st.session_state["trends_today"] = {}
|
756 |
+
if "trends_yesterday" not in st.session_state:
|
757 |
+
st.session_state["trends_yesterday"] = []
|
758 |
+
if "content_trend_articles_extracted" not in st.session_state:
|
759 |
+
st.session_state["content_trend_articles_extracted"] = []
|
760 |
+
if "content_trend_article_links" not in st.session_state:
|
761 |
+
st.session_state["content_trend_article_links"] = []
|
762 |
|
763 |
PROCESS_STEPS = [
|
764 |
"Artikel Extraktion",
|
|
|
767 |
"Artikel Ausgabe",
|
768 |
]
|
769 |
|
770 |
+
RESSORTS = {
|
771 |
+
"Alle": "all",
|
772 |
+
"Gesundheit": "m",
|
773 |
+
"Business": "b",
|
774 |
+
"Headlines": "h",
|
775 |
+
"Sport": "s",
|
776 |
+
"Entertainment": "e",
|
777 |
+
"Technik": "t",
|
778 |
+
}
|
779 |
+
|
780 |
+
TRENDS_YESTERDAY_FEED_URL = 'https://trends.google.de/trends/trendingsearches/daily/rss?geo=AT'
|
781 |
+
|
782 |
+
LINKS_MAX_CHECKED = 3
|
783 |
+
|
784 |
|
785 |
# def check_password():
|
786 |
# """Returns `True` if the user had the correct password."""
|
|
|
837 |
"Artikel Generierung mit Links",
|
838 |
key="article_gen_btn",
|
839 |
use_container_width=True,
|
840 |
+
on_click=lambda: st.session_state.update({"article_generation_mode": "links"}),
|
841 |
)
|
842 |
st.button(
|
843 |
"Artikel Generierung mit Stichpunkten",
|
844 |
key="headline_gen_btn",
|
845 |
use_container_width=True,
|
846 |
+
on_click=lambda: st.session_state.update({"article_generation_mode": "keywords"}),
|
847 |
+
)
|
848 |
+
st.button(
|
849 |
+
label="Artikelgenerierung mit Trendthemenanalyse",
|
850 |
+
key="trends_gen_btn",
|
851 |
+
use_container_width=True,
|
852 |
+
on_click=lambda: st.session_state.update({"article_generation_mode": "trends"})
|
853 |
+
|
854 |
)
|
855 |
|
856 |
+
if st.session_state["article_generation_mode"] == "links":
|
857 |
tab_col1, tab_col2, tab_col3, tab_col4 = st.columns([1, 1, 1, 1])
|
858 |
|
859 |
tab_col1.button(
|
|
|
1043 |
st.button(
|
1044 |
"Neuen Artikel generieren", key="reset_btn", on_click=reset_session_state
|
1045 |
)
|
1046 |
+
|
1047 |
+
if st.session_state["article_generation_mode"] == "keywords":
|
1048 |
st.write(
|
1049 |
"Bitte trage die Stichpunkte ein, die Du in den Artikel einbauen möchtest. Der Textinput ist essenziell für die Generierung des Artikels."
|
1050 |
)
|
|
|
1105 |
st.button(
|
1106 |
"Neuen Artikel generieren", key="reset_btn", on_click=reset_session_state
|
1107 |
)
|
1108 |
+
|
1109 |
+
if st.session_state["article_generation_mode"] == "trends":
|
1110 |
+
|
1111 |
+
trends_left, trends_right = st.columns([0.8, 0.2])
|
1112 |
+
|
1113 |
+
trends_right.radio(
|
1114 |
+
label="Zeitraum auswählen",
|
1115 |
+
options=[
|
1116 |
+
"Echtzeit",
|
1117 |
+
"Heute",
|
1118 |
+
"Gestern"
|
1119 |
+
],
|
1120 |
+
key="selection_content_trends_timespan"
|
1121 |
+
)
|
1122 |
+
|
1123 |
+
trends_right.button(
|
1124 |
+
label="Suchen",
|
1125 |
+
type="primary",
|
1126 |
+
on_click=fetch_trends,
|
1127 |
+
kwargs={
|
1128 |
+
"timespan": st.session_state["selection_content_trends_timespan"]
|
1129 |
+
},
|
1130 |
+
use_container_width=True
|
1131 |
+
)
|
1132 |
+
|
1133 |
+
trends_timespan = st.session_state["selection_content_trends_timespan"]
|
1134 |
+
|
1135 |
+
match trends_timespan:
|
1136 |
+
case "Echtzeit":
|
1137 |
+
render_trends_realtime(trends_left)
|
1138 |
+
case "Heute":
|
1139 |
+
render_trends_today(trends_left)
|
1140 |
+
case "Gestern":
|
1141 |
+
render_trends_yesterday(trends_left)
|
1142 |
+
|
1143 |
+
try:
|
1144 |
+
st.button(
|
1145 |
+
label="Informationen aus Links extrahieren",
|
1146 |
+
on_click=extract_links,
|
1147 |
+
use_container_width=True,
|
1148 |
+
type="secondary",
|
1149 |
+
key="btn_extract_trend_links",
|
1150 |
+
kwargs={
|
1151 |
+
"key": "content_trend_articles_extracted",
|
1152 |
+
"links": st.session_state["content_trend_article_links"]
|
1153 |
+
},
|
1154 |
+
)
|
1155 |
+
except Exception as e:
|
1156 |
+
print(f"Fehler beim Extrahieren der Informationen: {str(e)}")
|
1157 |
+
st.error(
|
1158 |
+
body=f"Sie haben einen oder mehrere Links in einem inkorrekten Format angegeben. Bitte lade diese Seite neu und verwende valide URLs: {str(e)}",
|
1159 |
+
icon="🚨",
|
1160 |
+
)
|
1161 |
+
|
1162 |
+
st.write()
|
1163 |
+
|
1164 |
+
if st.session_state["content_trend_article_links"] != []:
|
1165 |
+
st.write("Folgende Informationen konnten aus ihren Artikeln extrahiert werden:")
|
1166 |
+
|
1167 |
+
for i, link_content in enumerate(st.session_state["content_trend_articles_extracted"]):
|
1168 |
+
with st.expander(f"Link {i+1}"):
|
1169 |
+
if link_content:
|
1170 |
+
st.text_area(
|
1171 |
+
label="Bitte bearbeiten Sie die Informationen falls notwendig:",
|
1172 |
+
value=link_content,
|
1173 |
+
key="content_trend_article_final_" + str(i + 1)
|
1174 |
+
)
|
1175 |
+
else:
|
1176 |
+
st.info(
|
1177 |
+
body="Die Webseite Ihres Artikels blockiert das automatische Extrahieren des Artikels. Wenn Sie den Artikel dennoch verwenden möchten, dann können Sie diesen kopieren und in das untenstehende Textfeld einfügen.",
|
1178 |
+
icon="ℹ️",
|
1179 |
+
)
|
1180 |
+
st.text_area(
|
1181 |
+
"Bitte fügen Sie den Artikel ein:",
|
1182 |
+
value=link_content,
|
1183 |
+
key="content_trend_article_final_" + str(i + 1)
|
1184 |
+
)
|
1185 |
+
|
1186 |
+
st.write("Artikellänge")
|
1187 |
+
st.radio(
|
1188 |
+
"Optionen",
|
1189 |
+
["Kurz", "Mittel", "Lang", "SEO", "SEO Plus"],
|
1190 |
+
key="length_option",
|
1191 |
+
)
|
1192 |
+
|
1193 |
+
st.text_area(
|
1194 |
+
"Füge weitere Informationen für den Prompt hinzu, falls nötig:",
|
1195 |
+
key="add_info",
|
1196 |
+
)
|
1197 |
+
|
1198 |
+
st.button(
|
1199 |
+
"Artikel generieren",
|
1200 |
+
key="article_btn",
|
1201 |
+
on_click=on_click_handler_generate_article,
|
1202 |
+
kwargs={
|
1203 |
+
"length_option": st.session_state["length_option"],
|
1204 |
+
"final_articles": get_final_articles(),
|
1205 |
+
"add_info": st.session_state["add_info"],
|
1206 |
+
"webpage_option": st.session_state["webpage_option"],
|
1207 |
+
},
|
1208 |
+
)
|
1209 |
+
|
1210 |
+
|
1211 |
+
if st.session_state["generated_headline"] != "" and st.session_state["generated_article"] != "":
|
1212 |
+
st.write(f"**{st.session_state['generated_headline']}**")
|
1213 |
+
st.write(st.session_state["generated_article"])
|
1214 |
+
|
1215 |
+
st.write("**Zusammenfassung:**")
|
1216 |
+
st.write(st.session_state["article_summary"])
|
1217 |
+
|
1218 |
+
st.write("Kopieren Sie den Artikel: ")
|
1219 |
+
st_copy_to_clipboard(
|
1220 |
+
st.session_state["generated_headline"]
|
1221 |
+
+ "\n"
|
1222 |
+
+ st.session_state["generated_article"]
|
1223 |
+
)
|
1224 |
+
|
1225 |
+
if st.session_state["studie_links"]:
|
1226 |
+
st.write("Hier sind einige Studien, die relevant sein könnten:")
|
1227 |
+
for result in st.session_state["studie_links"]:
|
1228 |
+
st.write(f"- [{result['title']}]({result['link']})")
|
1229 |
+
else:
|
1230 |
+
st.write("Keine relevanten Studien gefunden.")
|
1231 |
+
|
1232 |
+
if "takeaways" in st.session_state:
|
1233 |
+
st.write("Hier sind einige Takeaways die wichtig sein könnten:")
|
1234 |
+
st.write(st.session_state["takeaways"])
|
1235 |
+
|
1236 |
+
if "faq" in st.session_state:
|
1237 |
+
st.write("Hier sind FAQs zu dem Artikel:")
|
1238 |
+
st.write(st.session_state["faq"])
|
1239 |
+
|
1240 |
+
st.button(
|
1241 |
+
"Relevante Studien finden",
|
1242 |
+
on_click=get_related_studies,
|
1243 |
+
args=(st.session_state["generated_article"],),
|
1244 |
+
)
|
1245 |
+
|
1246 |
+
st.button(
|
1247 |
+
"Key Takeaways generieren",
|
1248 |
+
on_click=lambda: get_takeaways(st.session_state["generated_article"]),
|
1249 |
+
)
|
1250 |
+
|
1251 |
+
st.button(
|
1252 |
+
"FAQ generieren",
|
1253 |
+
on_click=lambda: get_faq(st.session_state["generated_article"]),
|
1254 |
+
)
|
1255 |
+
|
1256 |
+
st.button(
|
1257 |
+
"Neuen Artikel generieren", key="reset_btn", on_click=reset_session_state
|
1258 |
+
)
|