Spaces:
Runtime error
Runtime error
xuyingli
commited on
Commit
·
61448e5
1
Parent(s):
5634922
Create streamlit_audio.py
Browse files- streamlit_audio.py +1059 -0
streamlit_audio.py
ADDED
@@ -0,0 +1,1059 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pprint
|
3 |
+
import requests
|
4 |
+
from bs4 import BeautifulSoup
|
5 |
+
from gnews import GNews
|
6 |
+
from datetime import datetime
|
7 |
+
import edge_tts
|
8 |
+
import arxiv
|
9 |
+
import subprocess
|
10 |
+
import base64
|
11 |
+
import openai
|
12 |
+
import streamlit as st
|
13 |
+
from langchain.utilities import GoogleSerperAPIWrapper
|
14 |
+
from langchain.utilities import GoogleSerperAPIWrapper
|
15 |
+
from langchain.llms.openai import OpenAI
|
16 |
+
from youtubesearchpython import *
|
17 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
18 |
+
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
|
19 |
+
from langchain.docstore.document import Document
|
20 |
+
from langchain.llms.openai import OpenAI
|
21 |
+
from langchain.chains.summarize import load_summarize_chain
|
22 |
+
from langchain.chat_models import ChatOpenAI
|
23 |
+
from langchain.agents import initialize_agent, Tool
|
24 |
+
from langchain.agents import AgentType
|
25 |
+
from langchain.chat_models import ChatOpenAI
|
26 |
+
from langchain.document_loaders import WebBaseLoader
|
27 |
+
from langchain.chains.summarize import load_summarize_chain
|
28 |
+
|
29 |
+
os.environ["SERPER_API_KEY"] = st.secrets["SERPER_API_KEY"]
|
30 |
+
os.environ["OPENAI_API_KEY"]= st.secrets["OPENAI_API_KEY"]
|
31 |
+
openai.api_key = os.environ["OPENAI_API_KEY"]
|
32 |
+
|
33 |
+
system_message = '''
|
34 |
+
You are a very talented editor, skilled at consolidating
|
35 |
+
fragmented information and introductions into a cohesive script, without missing any details.
|
36 |
+
Compile the news article based on the information in 【】.
|
37 |
+
'''
|
38 |
+
|
39 |
+
system_message_2 = '''
|
40 |
+
You are a linguist, skilled in summarizing textual content and presenting it in 3 bullet points using markdown.
|
41 |
+
'''
|
42 |
+
|
43 |
+
system_message_3 = '''
|
44 |
+
你是个语言学家,擅长把英文翻译成中文。要注意表达的流畅和使用中文的表达习惯。不要返回多余的信息,只把文字翻译成中文。
|
45 |
+
'''
|
46 |
+
|
47 |
+
def find_next_link_text(url, target_link, target_text):
|
48 |
+
"""
|
49 |
+
Find the first link and text after the given target link and text on the specified URL.
|
50 |
+
|
51 |
+
Parameters:
|
52 |
+
url (str): The URL of the webpage to scrape.
|
53 |
+
target_link (str): The specific link to be found.
|
54 |
+
target_text (str): The specific link text to be found.
|
55 |
+
|
56 |
+
Returns:
|
57 |
+
tuple: A tuple containing the next link and its text. Returns (None, None) if not found.
|
58 |
+
"""
|
59 |
+
|
60 |
+
# Send a GET request
|
61 |
+
response = requests.get(url)
|
62 |
+
response.raise_for_status() # This will raise an exception if there's an error
|
63 |
+
|
64 |
+
# Parse the content using BeautifulSoup
|
65 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
66 |
+
|
67 |
+
# Find all the <ul> elements
|
68 |
+
ul_elems = soup.find_all('ul')
|
69 |
+
|
70 |
+
# Initialize a list to store all links and their texts
|
71 |
+
all_links = []
|
72 |
+
|
73 |
+
# Extract links and texts from all <ul> elements
|
74 |
+
for ul_elem in ul_elems:
|
75 |
+
links = [(link.get('href'), link.text) for link in ul_elem.find_all('a')]
|
76 |
+
all_links.extend(links)
|
77 |
+
|
78 |
+
# Extract the first link and text after the specified link-text pair
|
79 |
+
found = False
|
80 |
+
for link, text in all_links:
|
81 |
+
if found:
|
82 |
+
return link, text
|
83 |
+
if link == target_link and text == target_text:
|
84 |
+
found = True
|
85 |
+
|
86 |
+
return None, None
|
87 |
+
|
88 |
+
def is_link_accessible(url):
|
89 |
+
"""Check if a link is accessible."""
|
90 |
+
try:
|
91 |
+
response = requests.get(url, timeout=10) # setting a timeout to avoid waiting indefinitely
|
92 |
+
# Check if the status code is 4xx or 5xx
|
93 |
+
if 400 <= response.status_code < 600:
|
94 |
+
return False
|
95 |
+
return True
|
96 |
+
except requests.RequestException:
|
97 |
+
return False
|
98 |
+
|
99 |
+
def get_latest_aws_ml_blog():
|
100 |
+
url = 'https://aws.amazon.com/blogs/machine-learning/'
|
101 |
+
|
102 |
+
response = requests.get(url)
|
103 |
+
|
104 |
+
if response.status_code != 200:
|
105 |
+
print(f"Failed to retrieve webpage. Status code: {response.status_code}")
|
106 |
+
return None, None
|
107 |
+
|
108 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
109 |
+
|
110 |
+
articles = soup.find_all('div', class_='lb-col lb-mid-18 lb-tiny-24')
|
111 |
+
|
112 |
+
if not articles:
|
113 |
+
print("No articles found.")
|
114 |
+
return None, None
|
115 |
+
|
116 |
+
title = articles[0].find('h2').text
|
117 |
+
link = articles[0].find('a')['href']
|
118 |
+
|
119 |
+
return title, link
|
120 |
+
|
121 |
+
def fetch_videos_from_channel(channel_id):
|
122 |
+
playlist = Playlist(playlist_from_channel_id(channel_id))
|
123 |
+
while playlist.hasMoreVideos:
|
124 |
+
playlist.getNextVideos()
|
125 |
+
return playlist.videos
|
126 |
+
|
127 |
+
def get_h1_text(url):
|
128 |
+
"""Fetches the text content of the first h1 element from the given URL."""
|
129 |
+
|
130 |
+
# Get the HTML content of the URL
|
131 |
+
response = requests.get(url)
|
132 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
133 |
+
|
134 |
+
# Find the first h1 element and get its text
|
135 |
+
h1_element = soup.find('h1', class_='entry-title')
|
136 |
+
if h1_element:
|
137 |
+
return h1_element.text.strip() # Remove any extra whitespaces
|
138 |
+
else:
|
139 |
+
return None
|
140 |
+
|
141 |
+
def get_transcript(video_id):
|
142 |
+
raw_data = YouTubeTranscriptApi.get_transcript(video_id)
|
143 |
+
texts = [item['text'] for item in raw_data]
|
144 |
+
return ' '.join(texts)
|
145 |
+
|
146 |
+
def extract_data_from_url(url, class_name):
|
147 |
+
"""
|
148 |
+
从指定的URL中提取特定类名的<a>标签的href属性和文本内容。
|
149 |
+
|
150 |
+
参数:
|
151 |
+
- url (str): 要提取数据的网页URL。
|
152 |
+
- class_name (str): 要查找的<a>标签的类名。
|
153 |
+
|
154 |
+
"""
|
155 |
+
|
156 |
+
response = requests.get(url)
|
157 |
+
|
158 |
+
if response.status_code == 200:
|
159 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
160 |
+
target_a = soup.find('a', class_=class_name)
|
161 |
+
|
162 |
+
if target_a:
|
163 |
+
data_mrf_link = target_a.get('href')
|
164 |
+
text = target_a.get_text().strip()
|
165 |
+
return (data_mrf_link, text)
|
166 |
+
else:
|
167 |
+
raise ValueError("找不到目标元素。")
|
168 |
+
else:
|
169 |
+
raise ConnectionError("请求失败。")
|
170 |
+
|
171 |
+
def split_text_into_documents(long_string, max_docs=20):
|
172 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
173 |
+
chunk_size=500,
|
174 |
+
chunk_overlap=20,
|
175 |
+
length_function=len,
|
176 |
+
)
|
177 |
+
texts = text_splitter.split_text(long_string)
|
178 |
+
docs = [Document(page_content=t) for t in texts[:max_docs]]
|
179 |
+
|
180 |
+
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
|
181 |
+
chunk_size=500, chunk_overlap=0
|
182 |
+
)
|
183 |
+
split_docs = text_splitter.split_documents(docs)
|
184 |
+
return split_docs
|
185 |
+
|
186 |
+
|
187 |
+
def autoplay_audio(file_path: str):
|
188 |
+
with open(file_path, "rb") as f:
|
189 |
+
data = f.read()
|
190 |
+
b64 = base64.b64encode(data).decode()
|
191 |
+
md = f"""
|
192 |
+
<audio controls autoplay style="width: 100%;">
|
193 |
+
<source src="data:audio/mp3;base64,{b64}" type="audio/mp3">
|
194 |
+
</audio>
|
195 |
+
"""
|
196 |
+
st.markdown(
|
197 |
+
md,
|
198 |
+
unsafe_allow_html=True,
|
199 |
+
)
|
200 |
+
|
201 |
+
def get_h1_from_url(url):
|
202 |
+
response = requests.get(url)
|
203 |
+
|
204 |
+
if response.status_code == 200:
|
205 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
206 |
+
|
207 |
+
# 根据class查找<h1>标签
|
208 |
+
h1_tag = soup.find("h1", class_="f-display-2")
|
209 |
+
if h1_tag:
|
210 |
+
return h1_tag.text
|
211 |
+
else:
|
212 |
+
print("Couldn't find the <h1> tag with the specified class on the page.")
|
213 |
+
return None
|
214 |
+
else:
|
215 |
+
print(f"Failed to fetch the webpage. Status code: {response.status_code}")
|
216 |
+
return None
|
217 |
+
|
218 |
+
|
219 |
+
def summarize_documents(split_docs):
|
220 |
+
llm = ChatOpenAI(temperature=1, model_name="gpt-3.5-turbo-16k")
|
221 |
+
chain = load_summarize_chain(llm, chain_type="map_reduce")
|
222 |
+
summary = chain.run(split_docs)
|
223 |
+
return summary
|
224 |
+
|
225 |
+
|
226 |
+
def get_completion_from_messages(messages,
|
227 |
+
model="gpt-3.5-turbo-16k",
|
228 |
+
temperature=1.5, max_tokens=7000):
|
229 |
+
response = openai.ChatCompletion.create(
|
230 |
+
model=model,
|
231 |
+
messages=messages,
|
232 |
+
temperature=temperature,
|
233 |
+
max_tokens=max_tokens,
|
234 |
+
)
|
235 |
+
return response.choices[0].message["content"]
|
236 |
+
|
237 |
+
def fetch_gnews_links(query, language='en', country='US', period='1d', start_date=None, end_date=None, max_results=5, exclude_websites=None):
|
238 |
+
"""
|
239 |
+
Fetch news links from Google News based on the provided query.
|
240 |
+
|
241 |
+
Parameters:
|
242 |
+
- query (str): The search query for fetching news.
|
243 |
+
- ... (other params): Additional parameters for customizing the news fetch.
|
244 |
+
|
245 |
+
Returns:
|
246 |
+
- List[str]: List of URLs based on the search query.
|
247 |
+
"""
|
248 |
+
|
249 |
+
# Ensure that the exclude_websites parameter is a list
|
250 |
+
content = {'title':[], 'summary':[], 'url':[]}
|
251 |
+
|
252 |
+
# Initialize GNews
|
253 |
+
google_news = GNews(language=language, country=country, period=period, start_date=start_date, end_date=end_date, max_results=max_results, exclude_websites=exclude_websites)
|
254 |
+
|
255 |
+
# Fetch news based on the query
|
256 |
+
news_items = google_news.get_news(query)
|
257 |
+
print(news_items)
|
258 |
+
# Extract URLs
|
259 |
+
urls = [item['url'] for item in news_items]
|
260 |
+
content['title'] = [item['title'] for item in news_items]
|
261 |
+
|
262 |
+
for url in urls:
|
263 |
+
content['url'].append(url)
|
264 |
+
content['summary'].append(summarize_website_content(url))
|
265 |
+
|
266 |
+
return content
|
267 |
+
|
268 |
+
|
269 |
+
|
270 |
+
def summarize_website_content(url, temperature=1, model_name="gpt-3.5-turbo-16k", chain_type="stuff"):
|
271 |
+
"""
|
272 |
+
Summarize the content of a given website URL.
|
273 |
+
|
274 |
+
Parameters:
|
275 |
+
- url (str): The website URL to fetch and summarize.
|
276 |
+
- temperature (float, optional): Temperature parameter for ChatOpenAI model. Default is 0.
|
277 |
+
- model_name (str, optional): The model name for ChatOpenAI. Default is "gpt-3.5-turbo-16k".
|
278 |
+
- chain_type (str, optional): The type of summarization chain to use. Default is "stuff".
|
279 |
+
|
280 |
+
Returns:
|
281 |
+
- The summarized content.
|
282 |
+
"""
|
283 |
+
if True:
|
284 |
+
# Load the content from the given URL
|
285 |
+
loader = WebBaseLoader(url)
|
286 |
+
docs = loader.load()
|
287 |
+
|
288 |
+
# Initialize the ChatOpenAI model
|
289 |
+
llm = ChatOpenAI(temperature=temperature, model_name=model_name)
|
290 |
+
|
291 |
+
# Load the summarization chain
|
292 |
+
chain = load_summarize_chain(llm, chain_type=chain_type)
|
293 |
+
|
294 |
+
# Run the chain on the loaded documents
|
295 |
+
summarized_content = chain.run(docs)
|
296 |
+
|
297 |
+
return summarized_content
|
298 |
+
|
299 |
+
else:
|
300 |
+
return 'No result'
|
301 |
+
|
302 |
+
|
303 |
+
def get_transcript_link(url):
|
304 |
+
"""Fetches the first 'Transcript' link from the given URL."""
|
305 |
+
|
306 |
+
response = requests.get(url)
|
307 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
308 |
+
|
309 |
+
transcript_link_element = soup.find('a', string="Transcript")
|
310 |
+
|
311 |
+
if transcript_link_element:
|
312 |
+
return transcript_link_element['href']
|
313 |
+
else:
|
314 |
+
return None
|
315 |
+
|
316 |
+
def get_youtube_link(url):
|
317 |
+
"""Fetches the first 'Transcript' link from the given URL."""
|
318 |
+
|
319 |
+
response = requests.get(url)
|
320 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
321 |
+
|
322 |
+
transcript_link_element = soup.find('a', string="Video")
|
323 |
+
|
324 |
+
if transcript_link_element:
|
325 |
+
return transcript_link_element['href']
|
326 |
+
else:
|
327 |
+
return None
|
328 |
+
|
329 |
+
def get_latest_openai_blog_url():
|
330 |
+
base_url = "https://openai.com"
|
331 |
+
blog_url = f"{base_url}/blog"
|
332 |
+
|
333 |
+
response = requests.get(blog_url)
|
334 |
+
|
335 |
+
if response.status_code == 200:
|
336 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
337 |
+
|
338 |
+
# 查找具有特定类名的<a>标签
|
339 |
+
target_link = soup.find("a", class_="ui-link group relative cursor-pointer")
|
340 |
+
if target_link:
|
341 |
+
# Combining base URL with the relative path
|
342 |
+
post_url = base_url + target_link['href']
|
343 |
+
return post_url
|
344 |
+
else:
|
345 |
+
print("Couldn't find the target post URL.")
|
346 |
+
return None
|
347 |
+
else:
|
348 |
+
print(f"Failed to fetch the webpage. Status code: {response.status_code}")
|
349 |
+
return None
|
350 |
+
|
351 |
+
def extract_blog_link_info(url):
|
352 |
+
headers = {
|
353 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.3'
|
354 |
+
}
|
355 |
+
|
356 |
+
response = requests.get(url, headers=headers)
|
357 |
+
|
358 |
+
if response.status_code != 200:
|
359 |
+
return None, None
|
360 |
+
|
361 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
362 |
+
|
363 |
+
# 由于网站可能有多个这样的链接,我们只选择第一个匹配的项
|
364 |
+
link_element = soup.find('a', class_='f-post-link')
|
365 |
+
|
366 |
+
if link_element:
|
367 |
+
text_content = link_element.h3.text.strip()
|
368 |
+
href_link = link_element['href']
|
369 |
+
return text_content, href_link
|
370 |
+
else:
|
371 |
+
return None, None
|
372 |
+
|
373 |
+
|
374 |
+
def get_all_text_from_url(url):
|
375 |
+
# Fetch the content using requests
|
376 |
+
response = requests.get(url)
|
377 |
+
response.raise_for_status() # Raise an error if the request failed
|
378 |
+
|
379 |
+
# Parse the HTML using BeautifulSoup
|
380 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
381 |
+
|
382 |
+
# Extract all text
|
383 |
+
return ' '.join(soup.stripped_strings) # `stripped_strings` generates strings by stripping extra whitespaces
|
384 |
+
|
385 |
+
def contains_keywords(s):
|
386 |
+
keywords = ["AI", "GPT", "LLM"]
|
387 |
+
return any(keyword in s for keyword in keywords)
|
388 |
+
|
389 |
+
|
390 |
+
def input_page(st, **state):
|
391 |
+
# Include Font Awesome CSS
|
392 |
+
st.markdown(
|
393 |
+
"""
|
394 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css">
|
395 |
+
""",
|
396 |
+
unsafe_allow_html=True,
|
397 |
+
)
|
398 |
+
|
399 |
+
# Style and position the GitHub and Twitter icons at the bottom left corner
|
400 |
+
st.markdown(
|
401 |
+
"""
|
402 |
+
<style>
|
403 |
+
.social-icons {
|
404 |
+
gap: 10px; # Space between icons
|
405 |
+
}
|
406 |
+
.social-icons a i {
|
407 |
+
color: #6c6c6c;
|
408 |
+
}
|
409 |
+
</style>
|
410 |
+
""",
|
411 |
+
unsafe_allow_html=True,
|
412 |
+
)
|
413 |
+
|
414 |
+
# Add the GitHub and Twitter icons with hyperlinks
|
415 |
+
github_url = "https://github.com/xl631212/llm_newsletter/tree/main" # replace with your GitHub repo URL
|
416 |
+
twitter_url = "https://twitter.com/li_xuying" # replace with your Twitter profile URL
|
417 |
+
|
418 |
+
st.markdown("""
|
419 |
+
<h1 style='text-align: center; color: black;'>
|
420 |
+
Your Personal <span style='color: #FF4B4B; font-size: 1.25em;'>AI News</span> Podcast
|
421 |
+
</h1>
|
422 |
+
<div class="social-icons" style='text-align: center; color: black;'>
|
423 |
+
<a href="https://github.com/xl631212/llm_newsletter/tree/main" target="_blank"><i class="fab fa-github fa-2x"></i></a>
|
424 |
+
<a href="https://twitter.com/li_xuying" target="_blank"><i class="fab fa-twitter fa-2x"></i></a>
|
425 |
+
</div>
|
426 |
+
""",
|
427 |
+
unsafe_allow_html=True
|
428 |
+
)
|
429 |
+
st.markdown("<h3 style='text-align: center; color: black;'>Empower Your Day with Real-Time Insights: Leveraging AI for Instant News <br> and Podcast Updates.</h3>", unsafe_allow_html=True)
|
430 |
+
st.markdown("""
|
431 |
+
<h4 style='text-align: center; color: #6C6C6C;'>
|
432 |
+
Choose your preferred options🔘 at the bottom, then double-click👆 the button below to initiate.
|
433 |
+
<br>
|
434 |
+
Sit back and relax while we craft your personalized LLM podcast within <span style='color: #2859C0; font-size: 1.15em;'>3 mins</span>.
|
435 |
+
</h4>
|
436 |
+
<br><br>
|
437 |
+
""",
|
438 |
+
unsafe_allow_html=True)
|
439 |
+
|
440 |
+
|
441 |
+
button_placeholder = st.empty()
|
442 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
443 |
+
|
444 |
+
with st.container():
|
445 |
+
col3a, col4a, col5a= st.columns([4,7,4])
|
446 |
+
with col3a:
|
447 |
+
pass
|
448 |
+
with col4a:
|
449 |
+
col1a, col2a, col8a = st.columns([3,1,3])
|
450 |
+
with col1a:
|
451 |
+
st.write("**Options🔘:**")
|
452 |
+
with col5a:
|
453 |
+
pass
|
454 |
+
|
455 |
+
st.markdown("""
|
456 |
+
<style>
|
457 |
+
.stButton > button {
|
458 |
+
font-size: 100px;
|
459 |
+
width: 35%; /* 设置一个固定的宽度 */
|
460 |
+
height: 50px; /* 设置一个固定的高度 */
|
461 |
+
color: white;
|
462 |
+
background-color: #FF4B4B;
|
463 |
+
border: none;
|
464 |
+
border-radius: 15px;
|
465 |
+
margin: auto;
|
466 |
+
font-weight: bold;
|
467 |
+
display: flex;
|
468 |
+
justify-content: center;
|
469 |
+
align-items: center;
|
470 |
+
}
|
471 |
+
|
472 |
+
.stButton > button:hover {
|
473 |
+
background-color: #EFEFEF; /* 为按钮添加简单的悬停效果 */
|
474 |
+
color: #9A9A9A;
|
475 |
+
}
|
476 |
+
|
477 |
+
.stButton > button div p {
|
478 |
+
font-size: 24px; /* 改变按钮文本的字号 */
|
479 |
+
margin: 0; /* 移除段落的默认边距 */
|
480 |
+
}
|
481 |
+
|
482 |
+
.stButton > button div p:hover {
|
483 |
+
font-size: 20px;
|
484 |
+
}
|
485 |
+
</style>
|
486 |
+
""", unsafe_allow_html=True)
|
487 |
+
|
488 |
+
|
489 |
+
with st.container():
|
490 |
+
col3, col4, col5= st.columns([4,7,4])
|
491 |
+
with col3:
|
492 |
+
pass
|
493 |
+
with col4:
|
494 |
+
|
495 |
+
col1, col2, col8 = st.columns([4,2,4])
|
496 |
+
with col1:
|
497 |
+
language = st.selectbox(
|
498 |
+
"Language",
|
499 |
+
("English", "中文"),
|
500 |
+
key='ahaha'
|
501 |
+
)
|
502 |
+
audio_length_adjust = st.select_slider('Audio length', options=['short', 'medium', 'long'],value=('medium'))
|
503 |
+
if audio_length_adjust == 'short':
|
504 |
+
audio_length = 200
|
505 |
+
elif audio_length_adjust == 'medium':
|
506 |
+
audio_length = 350
|
507 |
+
else:
|
508 |
+
audio_length = 500
|
509 |
+
st.session_state.audio_length = audio_length
|
510 |
+
|
511 |
+
|
512 |
+
with col8:
|
513 |
+
options_2 = st.selectbox(
|
514 |
+
'In a tone of',
|
515 |
+
['Informal', 'Professional', 'Humorous'],
|
516 |
+
key='opt3'
|
517 |
+
)
|
518 |
+
day = st.select_slider('Information volume', options=['small', 'medium', 'large'],value=('medium'))
|
519 |
+
if day == 'small':
|
520 |
+
st.session_state.day = 2
|
521 |
+
st.session_state.arxiv = 2
|
522 |
+
elif day == 'medium':
|
523 |
+
st.session_state.day = 4
|
524 |
+
st.session_state.arxiv = 3
|
525 |
+
else:
|
526 |
+
st.session_state.day = 6
|
527 |
+
st.session_state.arxiv = 4
|
528 |
+
|
529 |
+
with col5:
|
530 |
+
pass
|
531 |
+
|
532 |
+
|
533 |
+
with button_placeholder:
|
534 |
+
# 创建按钮
|
535 |
+
if st.button("👆 Double-Click Generation"):
|
536 |
+
st.session_state.page = "two"
|
537 |
+
st.session_state.language = language
|
538 |
+
if options_2 == 'Informal':
|
539 |
+
st.session_state.tone = """read news and present them in a casual and conversational tone.
|
540 |
+
You should use everyday language, contractions, and slang to engage the audience and make the news more relatable. """
|
541 |
+
elif options_2 == 'Humorous':
|
542 |
+
st.session_state.tone = """read news and present in a comical and amusing tone.
|
543 |
+
You should be able to recognize and exaggerate humorous elements of each article along with jokes and deliver them in a way
|
544 |
+
that will make the audience laugh."""
|
545 |
+
|
546 |
+
|
547 |
+
st.markdown("""
|
548 |
+
<style>
|
549 |
+
.footer {
|
550 |
+
position: fixed;
|
551 |
+
bottom: 0;
|
552 |
+
left: 10px;
|
553 |
+
width: auto;
|
554 |
+
background-color: transparent;
|
555 |
+
text-align: right;
|
556 |
+
padding-right: 10px;
|
557 |
+
padding-bottom: 10px;
|
558 |
+
}
|
559 |
+
</style>
|
560 |
+
<div class="footer">Made with ❤️ by Xuying Li</div>
|
561 |
+
""", unsafe_allow_html=True)
|
562 |
+
|
563 |
+
|
564 |
+
|
565 |
+
def compute_page(st, **state):
|
566 |
+
# Include Font Awesome CSS
|
567 |
+
st.markdown(
|
568 |
+
"""
|
569 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css">
|
570 |
+
""",
|
571 |
+
unsafe_allow_html=True,
|
572 |
+
)
|
573 |
+
|
574 |
+
# Style and position the GitHub and Twitter icons at the bottom left corner
|
575 |
+
st.markdown(
|
576 |
+
"""
|
577 |
+
<style>
|
578 |
+
.social-icons {
|
579 |
+
gap: 10px; # Space between icons
|
580 |
+
}
|
581 |
+
.social-icons a i {
|
582 |
+
color: #6c6c6c;
|
583 |
+
}
|
584 |
+
</style>
|
585 |
+
""",
|
586 |
+
unsafe_allow_html=True,
|
587 |
+
)
|
588 |
+
|
589 |
+
# Add the GitHub and Twitter icons with hyperlinks
|
590 |
+
github_url = "https://github.com/xl631212/llm_newsletter/tree/main" # replace with your GitHub repo URL
|
591 |
+
twitter_url = "https://twitter.com/li_xuying" # replace with your Twitter profile URL
|
592 |
+
|
593 |
+
st.markdown("""
|
594 |
+
<h1 style='text-align: center; color: black;'>
|
595 |
+
Your Personal <span style='color: #FF4B4B; font-size: 1.25em;'>AI News</span> Podcast
|
596 |
+
</h1>
|
597 |
+
<div class="social-icons" style='text-align: center; color: black;'>
|
598 |
+
<a href="https://github.com/xl631212/llm_newsletter/tree/main" target="_blank"><i class="fab fa-github fa-2x"></i></a>
|
599 |
+
<a href="https://twitter.com/li_xuying" target="_blank"><i class="fab fa-twitter fa-2x"></i></a>
|
600 |
+
</div>
|
601 |
+
""",
|
602 |
+
unsafe_allow_html=True
|
603 |
+
)
|
604 |
+
|
605 |
+
st.markdown("""
|
606 |
+
<style>
|
607 |
+
/* This styles the main content excluding h1 and h2 */
|
608 |
+
#root .block-container {
|
609 |
+
width: 75%;
|
610 |
+
margin: auto;
|
611 |
+
}
|
612 |
+
</style>
|
613 |
+
""", unsafe_allow_html=True)
|
614 |
+
radio_placeholder = st.empty()
|
615 |
+
progress_placeholder = st.empty()
|
616 |
+
progress_text = "Searching for Openai Blog..."
|
617 |
+
my_bar = progress_placeholder.progress(0, text=progress_text)
|
618 |
+
openai_blog_url = get_latest_openai_blog_url()
|
619 |
+
if openai_blog_url:
|
620 |
+
openai_title = get_h1_from_url(openai_blog_url)
|
621 |
+
openai_blog = summarize_website_content(openai_blog_url)
|
622 |
+
|
623 |
+
my_bar.progress(10, text="Searching for Microsoft Blog...")
|
624 |
+
url = "https://blogs.microsoft.com/"
|
625 |
+
M_title, Microsoft_link = extract_blog_link_info(url)
|
626 |
+
bair_blog = summarize_website_content(Microsoft_link)
|
627 |
+
|
628 |
+
|
629 |
+
my_bar.progress(20, text="Searching for Amazon Blog...")
|
630 |
+
A_title, A_link = get_latest_aws_ml_blog()
|
631 |
+
mit_blog = summarize_website_content(A_link)
|
632 |
+
|
633 |
+
my_bar.progress(30, text="Searching for Apple Blog...")
|
634 |
+
url = 'https://machinelearning.apple.com/'
|
635 |
+
|
636 |
+
response = requests.get(url)
|
637 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
638 |
+
|
639 |
+
# 根据提供的HTML片段,定位到文章的标题和链接
|
640 |
+
article = soup.select_one('h3.post-title a')
|
641 |
+
apple_link = 'https://machinelearning.apple.com'+ article['href']
|
642 |
+
|
643 |
+
Apple_blog_title = article.text
|
644 |
+
Apple_blog = summarize_website_content(apple_link)
|
645 |
+
|
646 |
+
my_bar.progress(35, text='Searching for machine learning street talk...')
|
647 |
+
channel_id = "UCMLtBahI5DMrt0NPvDSoIRQ"
|
648 |
+
playlist = Playlist(playlist_from_channel_id(channel_id))
|
649 |
+
|
650 |
+
while playlist.hasMoreVideos:
|
651 |
+
playlist.getNextVideos()
|
652 |
+
|
653 |
+
machine_title = playlist.videos[0]['title']
|
654 |
+
machine_link = playlist.videos[0]['link']
|
655 |
+
machine_learning_boardcast = summarize_website_content(machine_link)
|
656 |
+
|
657 |
+
my_bar.progress(40, text='Searching for lex friman boardcast...')
|
658 |
+
url = "https://lexfridman.com/podcast/"
|
659 |
+
link = get_transcript_link(url)
|
660 |
+
L_title = get_h1_text(link)
|
661 |
+
youtube_link = get_youtube_link(url)
|
662 |
+
lexi_boardcast = summarize_website_content(youtube_link)
|
663 |
+
|
664 |
+
|
665 |
+
my_bar.progress(50, text="Searching for arxiv ...")
|
666 |
+
search = arxiv.Search(
|
667 |
+
query = "AI, LLM, machine learning, NLP",
|
668 |
+
max_results = st.session_state.arxiv,
|
669 |
+
sort_by = arxiv.SortCriterion.SubmittedDate
|
670 |
+
)
|
671 |
+
ariv_essay = ''
|
672 |
+
for result in search.results():
|
673 |
+
ariv_essay += result.summary
|
674 |
+
|
675 |
+
my_bar.progress(60, text="Searching Google News...")
|
676 |
+
google_news = fetch_gnews_links(query='AI, LLM, Machine learning', max_results = st.session_state.day)
|
677 |
+
|
678 |
+
my_bar.progress(70, text="Searching Techcrunch...")
|
679 |
+
url = 'https://techcrunch.com/category/artificial-intelligence/'
|
680 |
+
response = requests.get(url)
|
681 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
682 |
+
articles = soup.select('.post-block__title a')
|
683 |
+
|
684 |
+
data_mrf_link, h_title = articles[0]['href'],articles[0].text
|
685 |
+
h_content = summarize_website_content(data_mrf_link)
|
686 |
+
|
687 |
+
my_bar.progress(75, text="Nvidia Podcast...")
|
688 |
+
url = "https://blogs.nvidia.com/ai-podcast/"
|
689 |
+
target_link = "https://blogs.nvidia.com/ai-podcast/"
|
690 |
+
target_text = "AI Podcast"
|
691 |
+
next_link, Nvidia_title = find_next_link_text(url, target_link, target_text)
|
692 |
+
n_content = summarize_website_content(next_link)
|
693 |
+
|
694 |
+
|
695 |
+
my_bar.progress(80, text="Writing Newsletter...")
|
696 |
+
|
697 |
+
query = n_content + str(google_news['summary']) + str(mit_blog) + str(h_content)\
|
698 |
+
+ openai_blog + 'new arxiv essay' + ariv_essay
|
699 |
+
|
700 |
+
query = query.replace('<|endoftext|>', '')
|
701 |
+
messages = [
|
702 |
+
{'role':'system',
|
703 |
+
'content': system_message + "keep it equal to {} words.".format(st.session_state.audio_length) + st.session_state.tone},
|
704 |
+
{'role':'user',
|
705 |
+
'content': f"【{query}】"},]
|
706 |
+
response = get_completion_from_messages(messages)
|
707 |
+
|
708 |
+
my_bar.progress(90, text="Generating Podcast...")
|
709 |
+
if st.session_state.language == 'English':
|
710 |
+
updated = response.replace('-', '').replace('--', '').replace('"', '').replace('“', '')
|
711 |
+
command = f'edge-tts --text "{updated}" --write-media hello.mp3'
|
712 |
+
subprocess.run(command, shell=True)
|
713 |
+
my_bar.progress(90, text="Generating Summary...")
|
714 |
+
|
715 |
+
query = response
|
716 |
+
messages = [
|
717 |
+
{'role':'system',
|
718 |
+
'content': system_message_2},
|
719 |
+
{'role':'user',
|
720 |
+
'content': f"【{query}】"},]
|
721 |
+
summary = get_completion_from_messages(messages)
|
722 |
+
|
723 |
+
else:
|
724 |
+
before = response
|
725 |
+
before = before.replace('<|endoftext|>', '')
|
726 |
+
messages = [
|
727 |
+
{'role':'system',
|
728 |
+
'content': system_message_3},
|
729 |
+
{'role':'user',
|
730 |
+
'content': f"【{before}】"},]
|
731 |
+
after = get_completion_from_messages(messages)
|
732 |
+
# 构建 edge-tts 命令
|
733 |
+
command = f'edge-tts --voice zh-CN-XiaoyiNeural --text "{after}" --write-media hello2.mp3'
|
734 |
+
# 使用 subprocess 运行命令
|
735 |
+
subprocess.run(command, shell=True)
|
736 |
+
|
737 |
+
|
738 |
+
my_bar.progress(100, text="Almost there...")
|
739 |
+
|
740 |
+
with radio_placeholder:
|
741 |
+
#audio_file = open('hello.mp3', 'rb')
|
742 |
+
#audio_bytes = audio_file.read()
|
743 |
+
#st.audio(audio_bytes, format='wav')
|
744 |
+
if st.session_state.language == 'English':
|
745 |
+
autoplay_audio("hello.mp3")
|
746 |
+
else:
|
747 |
+
autoplay_audio("hello2.mp3")
|
748 |
+
|
749 |
+
|
750 |
+
my_bar.empty()
|
751 |
+
if st.session_state.language == 'English':
|
752 |
+
st.subheader('Summary and Commentary', divider='rainbow')
|
753 |
+
st.markdown(summary)
|
754 |
+
|
755 |
+
st.subheader('Technology News', divider='red')
|
756 |
+
for i in range(len(google_news['title'])):
|
757 |
+
if len(google_news['summary'][i]) > 100:
|
758 |
+
st.markdown(f'<a href="{google_news["url"][i]}" style="color: #2859C0; text-decoration: none; \
|
759 |
+
font-size: 20px;font-weight: bold;"> {google_news["title"][i]} </a>\
|
760 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">Google News</span>', unsafe_allow_html=True)
|
761 |
+
st.markdown(google_news['summary'][i])
|
762 |
+
|
763 |
+
st.markdown(f'<a href="{data_mrf_link}" style="color: #2859C0; text-decoration: none; \
|
764 |
+
font-size: 20px;font-weight: bold;">{h_title}</a>\
|
765 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">Techcrunch</span>', unsafe_allow_html=True)
|
766 |
+
st.markdown(h_content)
|
767 |
+
|
768 |
+
st.subheader('Podcast and Speeches', divider='orange')
|
769 |
+
|
770 |
+
st.markdown(f'<a href="{youtube_link}" style="color: #2859C0; text-decoration: none; \
|
771 |
+
font-size: 20px;font-weight: bold;">{L_title}</a>\
|
772 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">Lex Fridman</span>', unsafe_allow_html=True)
|
773 |
+
st.markdown(lexi_boardcast)
|
774 |
+
|
775 |
+
st.markdown(f'<a href="{next_link}" style="color: #2859C0; text-decoration: none; \
|
776 |
+
font-size: 20px;font-weight: bold;">{Nvidia_title}</a>\
|
777 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">Nvidia</span>', unsafe_allow_html=True)
|
778 |
+
st.markdown(n_content)
|
779 |
+
|
780 |
+
st.markdown(f'<a href="{machine_link}" style="color: #2859C0; text-decoration: none; \
|
781 |
+
font-size: 20px;font-weight: bold;">{machine_title}</a>\
|
782 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">Machine Learning Street Talk</span>', unsafe_allow_html=True)
|
783 |
+
st.markdown(machine_learning_boardcast)
|
784 |
+
|
785 |
+
st.subheader('Technology Blogs', divider='green')
|
786 |
+
st.markdown(f'<a href= {openai_blog_url} style="color: #2859C0; text-decoration: none; \
|
787 |
+
font-size: 20px;font-weight: bold;"> {openai_title}</a>\
|
788 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">Openai</span>', unsafe_allow_html=True)
|
789 |
+
st.markdown(openai_blog)
|
790 |
+
|
791 |
+
st.markdown(f'<a href={Microsoft_link} style="color: #2859C0; text-decoration: none; \
|
792 |
+
font-size: 20px;font-weight: bold;"> {M_title}</a>\
|
793 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">Microsoft</span>', unsafe_allow_html=True)
|
794 |
+
st.markdown(bair_blog)
|
795 |
+
|
796 |
+
st.markdown(f'<a href="https://aws.amazon.com/blogs/machine-learning/" style="color: #2859C0; text-decoration: none; \
|
797 |
+
font-size: 20px;font-weight: bold;"> {A_title}</a>\
|
798 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">Amazon</span>', unsafe_allow_html=True)
|
799 |
+
st.markdown(mit_blog)
|
800 |
+
|
801 |
+
st.markdown(
|
802 |
+
f'<a href={apple_link} style="color: #2859C0; text-decoration: none; font-size: 20px; font-weight: bold;">{Apple_blog_title}</a>\
|
803 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">Apple</span>',
|
804 |
+
unsafe_allow_html=True
|
805 |
+
)
|
806 |
+
st.markdown(Apple_blog)
|
807 |
+
|
808 |
+
|
809 |
+
st.subheader('Cutting-edge Papers', divider='green')
|
810 |
+
for result in search.results():
|
811 |
+
st.markdown(f'<a href="{result.entry_id}" style="color: #2859C0; text-decoration: none; \
|
812 |
+
font-size: 20px;font-weight: bold;"> {result.title} </a>\
|
813 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">{result.primary_category}</span>\
|
814 |
+
', unsafe_allow_html=True)
|
815 |
+
st.markdown(result.summary)
|
816 |
+
|
817 |
+
|
818 |
+
elif st.session_state.language == '中文':
|
819 |
+
st.subheader('摘要与评论', divider='rainbow')
|
820 |
+
summary = after.replace('<|endoftext|>', '')
|
821 |
+
st.markdown(summary)
|
822 |
+
st.subheader('科技新闻', divider='rainbow')
|
823 |
+
for i in range(len(google_news['title'])):
|
824 |
+
title = google_news['title'][i]
|
825 |
+
messages = [
|
826 |
+
{'role':'system',
|
827 |
+
'content': system_message_3},
|
828 |
+
{'role':'user',
|
829 |
+
'content': f"【{title}】"},]
|
830 |
+
|
831 |
+
title = get_completion_from_messages(messages)
|
832 |
+
news_summary = google_news['summary'][i]
|
833 |
+
messages = [
|
834 |
+
{'role':'system',
|
835 |
+
'content': system_message_3},
|
836 |
+
{'role':'user',
|
837 |
+
'content': f"【{news_summary}】"},]
|
838 |
+
news_summary = get_completion_from_messages(messages)
|
839 |
+
|
840 |
+
st.markdown(f'<a href="{google_news["url"][i]}" style="color: #2859C0; text-decoration: none; \
|
841 |
+
font-size: 20px;font-weight: bold;"> {title} </a>\
|
842 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">Google News</span>', unsafe_allow_html=True)
|
843 |
+
st.markdown(news_summary)
|
844 |
+
news_summary = h_title
|
845 |
+
messages = [
|
846 |
+
{'role':'system',
|
847 |
+
'content': system_message_3},
|
848 |
+
{'role':'user',
|
849 |
+
'content': f"【{news_summary}】"},]
|
850 |
+
h_title = get_completion_from_messages(messages)
|
851 |
+
st.markdown(f'<a href="{data_mrf_link}" style="color: #2859C0; text-decoration: none; \
|
852 |
+
font-size: 20px;font-weight: bold;">{h_title}</a>\
|
853 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">Techcrunch</span>', unsafe_allow_html=True)
|
854 |
+
news_summary = h_content
|
855 |
+
messages = [
|
856 |
+
{'role':'system',
|
857 |
+
'content': system_message_3},
|
858 |
+
{'role':'user',
|
859 |
+
'content': f"【{news_summary}】"},]
|
860 |
+
h_content = get_completion_from_messages(messages)
|
861 |
+
st.markdown(h_content)
|
862 |
+
|
863 |
+
st.subheader('播客与博客', divider='orange')
|
864 |
+
news_summary = L_title
|
865 |
+
messages = [
|
866 |
+
{'role':'system',
|
867 |
+
'content': system_message_3},
|
868 |
+
{'role':'user',
|
869 |
+
'content': f"【{news_summary}】"},]
|
870 |
+
L_title = get_completion_from_messages(messages)
|
871 |
+
st.markdown(f'<a href="{youtube_link}" style="color: #2859C0; text-decoration: none; \
|
872 |
+
font-size: 20px;font-weight: bold;">{L_title}</a>\
|
873 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">Lex Fridman</span>', unsafe_allow_html=True)
|
874 |
+
news_summary = lexi_boardcast
|
875 |
+
messages = [
|
876 |
+
{'role':'system',
|
877 |
+
'content': system_message_3},
|
878 |
+
{'role':'user',
|
879 |
+
'content': f"【{news_summary}】"},]
|
880 |
+
lexi_boardcast = get_completion_from_messages(messages)
|
881 |
+
st.markdown(lexi_boardcast)
|
882 |
+
|
883 |
+
news_summary = Nvidia_title
|
884 |
+
messages = [
|
885 |
+
{'role':'system',
|
886 |
+
'content': system_message_3},
|
887 |
+
{'role':'user',
|
888 |
+
'content': f"【{news_summary}】"},]
|
889 |
+
Nvidia_title = get_completion_from_messages(messages)
|
890 |
+
st.markdown(f'<a href="{next_link}" style="color: #2859C0; text-decoration: none; \
|
891 |
+
font-size: 20px;font-weight: bold;">{Nvidia_title}</a>\
|
892 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">Nvidia</span>', unsafe_allow_html=True)
|
893 |
+
news_summary = n_content
|
894 |
+
messages = [
|
895 |
+
{'role':'system',
|
896 |
+
'content': system_message_3},
|
897 |
+
{'role':'user',
|
898 |
+
'content': f"【{news_summary}】"},]
|
899 |
+
n_content = get_completion_from_messages(messages)
|
900 |
+
st.markdown(n_content)
|
901 |
+
|
902 |
+
news_summary = machine_title
|
903 |
+
messages = [
|
904 |
+
{'role':'system',
|
905 |
+
'content': system_message_3},
|
906 |
+
{'role':'user',
|
907 |
+
'content': f"【{news_summary}】"},]
|
908 |
+
machine_title = get_completion_from_messages(messages)
|
909 |
+
st.markdown(f'<a href="{machine_link}" style="color: #2859C0; text-decoration: none; \
|
910 |
+
font-size: 20px;font-weight: bold;">{machine_title}</a>\
|
911 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">Machine Learning Street Talk</span>', unsafe_allow_html=True)
|
912 |
+
|
913 |
+
news_summary = machine_learning_boardcast
|
914 |
+
messages = [
|
915 |
+
{'role':'system',
|
916 |
+
'content': system_message_3},
|
917 |
+
{'role':'user',
|
918 |
+
'content': f"【{news_summary}】"},]
|
919 |
+
machine_learning_boardcast = get_completion_from_messages(messages)
|
920 |
+
st.markdown(machine_learning_boardcast)
|
921 |
+
|
922 |
+
st.subheader('科技博客', divider='green')
|
923 |
+
openai_blog = openai_blog.replace('<|endoftext|>', '')
|
924 |
+
messages = [
|
925 |
+
{'role':'system',
|
926 |
+
'content': system_message_3},
|
927 |
+
{'role':'user',
|
928 |
+
'content': f"{openai_blog}"},]
|
929 |
+
openai_blog = get_completion_from_messages(messages)
|
930 |
+
|
931 |
+
|
932 |
+
messages = [
|
933 |
+
{'role':'system',
|
934 |
+
'content': system_message_3},
|
935 |
+
{'role':'user',
|
936 |
+
'content': f"【{openai_title}】"},]
|
937 |
+
openai_title = get_completion_from_messages(messages)
|
938 |
+
|
939 |
+
st.markdown(f'<a href= {openai_blog_url} style="color: #2859C0; text-decoration: none; \
|
940 |
+
font-size: 20px;font-weight: bold;"> {openai_title}</a>\
|
941 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">Openai</span>', unsafe_allow_html=True)
|
942 |
+
st.markdown(openai_blog)
|
943 |
+
|
944 |
+
bair_blog = bair_blog.replace('<|endoftext|>', '')
|
945 |
+
messages = [
|
946 |
+
{'role':'system',
|
947 |
+
'content': system_message_3},
|
948 |
+
{'role':'user',
|
949 |
+
'content': f"【{bair_blog}】"},]
|
950 |
+
bair_blog = get_completion_from_messages(messages)
|
951 |
+
|
952 |
+
messages = [
|
953 |
+
{'role':'system',
|
954 |
+
'content': system_message_3},
|
955 |
+
{'role':'user',
|
956 |
+
'content': f"{M_title}"},]
|
957 |
+
M_title = get_completion_from_messages(messages)
|
958 |
+
st.markdown(f'<a href={link} style="color: #2859C0; text-decoration: none; \
|
959 |
+
font-size: 20px;font-weight: bold;"> {M_title}</a>\
|
960 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">Microsoft</span>', unsafe_allow_html=True)
|
961 |
+
st.markdown(bair_blog)
|
962 |
+
|
963 |
+
mit_blog = mit_blog.replace('<|endoftext|>', '')
|
964 |
+
messages = [
|
965 |
+
{'role':'system',
|
966 |
+
'content': system_message_3},
|
967 |
+
{'role':'user',
|
968 |
+
'content': f"【{mit_blog}】"},]
|
969 |
+
mit_blog = get_completion_from_messages(messages)
|
970 |
+
|
971 |
+
messages = [
|
972 |
+
{'role':'system',
|
973 |
+
'content': system_message_3},
|
974 |
+
{'role':'user',
|
975 |
+
'content': f"{A_title}"},]
|
976 |
+
A_title = get_completion_from_messages(messages)
|
977 |
+
st.markdown(f'<a href="{A_link}" style="color: #2859C0; text-decoration: none; \
|
978 |
+
font-size: 20px;font-weight: bold;"> {A_title}</a>\
|
979 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">Amazon</span>', unsafe_allow_html=True)
|
980 |
+
st.markdown(mit_blog)
|
981 |
+
|
982 |
+
|
983 |
+
st.subheader('尖端论文', divider='green')
|
984 |
+
for result in search.results():
|
985 |
+
title = result.title
|
986 |
+
result_summary = result.summary
|
987 |
+
messages = [
|
988 |
+
{'role':'system',
|
989 |
+
'content': system_message_3},
|
990 |
+
{'role':'user',
|
991 |
+
'content': f"{title}"},]
|
992 |
+
result_title = get_completion_from_messages(messages)
|
993 |
+
|
994 |
+
messages = [
|
995 |
+
{'role':'system',
|
996 |
+
'content': system_message_3},
|
997 |
+
{'role':'user',
|
998 |
+
'content': f"{result_summary}"},]
|
999 |
+
result_summary = get_completion_from_messages(messages)
|
1000 |
+
|
1001 |
+
st.markdown(f'<a href="{result.entry_id}" style="color: #2859C0; text-decoration: none; \
|
1002 |
+
font-size: 20px;font-weight: bold;"> {result_title} </a>\
|
1003 |
+
<span style="margin-left: 10px; background-color: white; padding: 0px 7px; border: 1px solid rgb(251, 88, 88); border-radius: 20px; font-size: 7px; color: rgb(251, 88, 88)">{result.primary_category}</span>\
|
1004 |
+
', unsafe_allow_html=True)
|
1005 |
+
st.markdown(result_summary)
|
1006 |
+
st.markdown("""
|
1007 |
+
<style>
|
1008 |
+
.footer {
|
1009 |
+
position: fixed;
|
1010 |
+
bottom: 0;
|
1011 |
+
left: 10px;
|
1012 |
+
width: auto;
|
1013 |
+
background-color: transparent;
|
1014 |
+
text-align: left;
|
1015 |
+
padding-left: 10px;
|
1016 |
+
padding-top: 10px;
|
1017 |
+
}
|
1018 |
+
</style>
|
1019 |
+
<div class="footer">Made with ❤️ by Xuying Li</div>
|
1020 |
+
""", unsafe_allow_html=True)
|
1021 |
+
|
1022 |
+
def page_one():
|
1023 |
+
input_page(st)
|
1024 |
+
|
1025 |
+
def page_two():
|
1026 |
+
compute_page(st)
|
1027 |
+
|
1028 |
+
|
1029 |
+
def main():
|
1030 |
+
# 初始化session状态
|
1031 |
+
if "page" not in st.session_state:
|
1032 |
+
st.session_state.page = "one"
|
1033 |
+
|
1034 |
+
if "choice" not in st.session_state:
|
1035 |
+
st.session_state.choice = ""
|
1036 |
+
|
1037 |
+
if "language" not in st.session_state:
|
1038 |
+
st.session_state.language = "English"
|
1039 |
+
|
1040 |
+
if "audio_length" not in st.session_state:
|
1041 |
+
st.session_state.audio_length = '5'
|
1042 |
+
|
1043 |
+
if "day" not in st.session_state:
|
1044 |
+
st.session_state.day = 0
|
1045 |
+
st.session_state.arxiv = 0
|
1046 |
+
|
1047 |
+
if "tone" not in st.session_state:
|
1048 |
+
st.session_state.tone = ''
|
1049 |
+
|
1050 |
+
|
1051 |
+
# 根据session状态来渲染页面
|
1052 |
+
if st.session_state.page == "one":
|
1053 |
+
page_one()
|
1054 |
+
elif st.session_state.page == "two":
|
1055 |
+
page_two()
|
1056 |
+
|
1057 |
+
if __name__ == "__main__":
|
1058 |
+
st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
|
1059 |
+
main()
|