dvaz commited on
Commit
8413ddb
·
1 Parent(s): 53da065

Upload 26_Web_Scraping.py

Browse files
Files changed (1) hide show
  1. pages/26_Web_Scraping.py +68 -0
pages/26_Web_Scraping.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import streamlit as st
3
+ import streamlit.components.v1 as components
4
+ from requests_html import HTMLSession
5
+ from trafilatura import fetch_url , extract
6
+ import json
7
+
8
+
9
+
10
+ st.set_page_config('Scraping',":shark:","wide",menu_items={'About': "This is an *extremely* cool app!"})
11
+
12
+
13
+ menu=['Twitter','Trafilatura']
14
+ ch=st.sidebar.selectbox('Menu',menu)
15
+
16
+
17
+ if 'Trafilatura' in ch:
18
+ with st.container():
19
+ st.title('Trafilatura library')
20
+ ti=st.text_input('Input url')
21
+ if len(ti)>0:
22
+ url = fetch_url(ti)
23
+ x=extract(url,include_links=True,include_images=True)
24
+ st.write(x)
25
+
26
+
27
+
28
+
29
+
30
+
31
+ url = "https://twitter135.p.rapidapi.com/Search/"
32
+ headers = {
33
+ "X-RapidAPI-Key": "70652cf94dmsh09357b76b162df8p153fbajsn7e40a4b838cf",
34
+ "X-RapidAPI-Host": "twitter135.p.rapidapi.com"
35
+ }
36
+
37
+ if 'Twitter' in ch:
38
+ with st.container():
39
+ st.title('Search key in Twitter')
40
+ ti=st.text_input('Put Hashtag')
41
+ if len(ti)>0:
42
+
43
+ querystring = {"q":ti,"count":"20"}
44
+
45
+ r=requests.get(url,headers=headers, params=querystring)
46
+ x=r.json()
47
+ options= x['globalObjects']['tweets']
48
+ for i,j in options.items():
49
+ st.success(j['created_at'])
50
+ st.subheader(j['full_text'])
51
+ st.write('---')
52
+
53
+ '''
54
+ response = requests.request("GET", url, headers=headers, params=querystring)
55
+ x = response.text
56
+ x= json.loads(x)
57
+ options= x['globalObjects']['tweets']
58
+ for k,v in options.items():
59
+ st.success(v['created_at'])
60
+ st.subheader(v['full_text'])
61
+ st.write('---')
62
+ '''
63
+
64
+
65
+
66
+
67
+
68
+