navpan2 commited on
Commit
003e243
·
1 Parent(s): f352079

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +20 -0
  2. main.py +35 -0
  3. requirements.txt +6 -0
  4. temp.py +21 -0
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:${PATH}"
6
+ WORKDIR /app
7
+ COPY --chown=user . /app
8
+ USER root
9
+
10
+ RUN apt-get update && apt-get install -y wget unzip && \
11
+ wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb && \
12
+ apt install -y ./google-chrome-stable_current_amd64.deb && \
13
+ rm google-chrome-stable_current_amd64.deb && \
14
+ apt-get clean
15
+
16
+ USER user
17
+ RUN pip install --trusted-host pypi.python.org -r requirements.txt
18
+ COPY --chown=user . .
19
+
20
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from fastapi import FastAPI
3
+ import time
4
+ from bs4 import BeautifulSoup
5
+ from selenium import webdriver
6
+ from selenium.webdriver.chrome.service import Service
7
+ from selenium.webdriver.chrome.options import Options
8
+ from webdriver_manager.chrome import ChromeDriverManager
9
+ chrome_options = Options()
10
+ chrome_options.add_argument('--no-sandbox')
11
+ chrome_options.add_argument('--headless')
12
+ chrome_options.add_argument('--disable-gpu')
13
+ chrome_options.add_argument('--disable-dev-shm-usage')
14
+
15
+ app = FastAPI()
16
+ favicon_path='https://www.iconarchive.com/download/i49313/martin-berube/sport/Soccer.ico'
17
+
18
+ @app.get("/")
19
+ def read_root():
20
+ return {"Hello": "World"}
21
+ @app.get("/selenium/")
22
+ async def read_item(reqUrl,tag):
23
+ driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=chrome_options)
24
+ url= reqUrl
25
+ driver.get(url)
26
+ time.sleep(3)
27
+ soup = BeautifulSoup(driver.page_source,'lxml')
28
+ headings = soup.find_all( name= tag)
29
+ a=[]
30
+ for heading in headings:
31
+ a.append(heading.getText())
32
+ driver.quit()
33
+ return {"Hello": a}
34
+
35
+
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ selenium
2
+ beautifulsoup4
3
+ webdriver_manager
4
+ lxml
5
+ fastapi
6
+ uvicorn
temp.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from bs4 import BeautifulSoup
3
+ from selenium import webdriver
4
+ from selenium.webdriver.chrome.service import Service
5
+ from selenium.webdriver.chrome.options import Options
6
+ from webdriver_manager.chrome import ChromeDriverManager
7
+ chrome_options = Options()
8
+ chrome_options.add_argument('--no-sandbox')
9
+ chrome_options.add_argument('--headless')
10
+ chrome_options.add_argument('--disable-gpu')
11
+ chrome_options.add_argument('--disable-dev-shm-usage')
12
+ driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=chrome_options)
13
+ url= 'https://navpan2-dockerbot.hf.space/'
14
+ driver.get(url)
15
+ time.sleep(5)
16
+ soup = BeautifulSoup(driver.page_source,'lxml')
17
+ headings = soup.find_all( name= 'span')
18
+ for heading in headings:
19
+ print(heading.getText())
20
+ time.sleep(2)
21
+ driver.quit()