test1 / pages /26_Web_Scraping.py
dvaz's picture
Upload 26_Web_Scraping.py
8413ddb
raw
history blame
1.63 kB
import requests
import streamlit as st
import streamlit.components.v1 as components
from requests_html import HTMLSession
from trafilatura import fetch_url , extract
import json
st.set_page_config('Scraping',":shark:","wide",menu_items={'About': "This is an *extremely* cool app!"})
menu=['Twitter','Trafilatura']
ch=st.sidebar.selectbox('Menu',menu)
if 'Trafilatura' in ch:
with st.container():
st.title('Trafilatura library')
ti=st.text_input('Input url')
if len(ti)>0:
url = fetch_url(ti)
x=extract(url,include_links=True,include_images=True)
st.write(x)
url = "https://twitter135.p.rapidapi.com/Search/"
headers = {
"X-RapidAPI-Key": "70652cf94dmsh09357b76b162df8p153fbajsn7e40a4b838cf",
"X-RapidAPI-Host": "twitter135.p.rapidapi.com"
}
if 'Twitter' in ch:
with st.container():
st.title('Search key in Twitter')
ti=st.text_input('Put Hashtag')
if len(ti)>0:
querystring = {"q":ti,"count":"20"}
r=requests.get(url,headers=headers, params=querystring)
x=r.json()
options= x['globalObjects']['tweets']
for i,j in options.items():
st.success(j['created_at'])
st.subheader(j['full_text'])
st.write('---')
'''
response = requests.request("GET", url, headers=headers, params=querystring)
x = response.text
x= json.loads(x)
options= x['globalObjects']['tweets']
for k,v in options.items():
st.success(v['created_at'])
st.subheader(v['full_text'])
st.write('---')
'''