mugiwarafx commited on
Commit
51cb5f1
1 Parent(s): 9c8d3cb
Files changed (3) hide show
  1. Dockerfile +13 -0
  2. app.py +45 -0
  3. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+
5
+ WORKDIR /app
6
+
7
+ COPY --chown=user ./requirements.txt requirements.txt
8
+
9
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
10
+
11
+ COPY --chown=user . /app
12
+
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from bs4 import BeautifulSoup
4
+ from typing import List, Dict
5
+
6
+ app = FastAPI()
7
+
8
+ all_html_tags = {
9
+ "a", "abbr", "address", "area", "article", "aside", "audio", "b", "base", "bdi", "bdo", "blockquote", "body",
10
+ "br", "button", "canvas", "caption", "cite", "code", "col", "colgroup", "data", "datalist", "dd", "del", "details",
11
+ "dfn", "dialog", "div", "dl", "dt", "em", "embed", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2",
12
+ "h3", "h4", "h5", "h6", "head", "header", "hr", "html", "i", "iframe", "img", "input", "ins", "kbd", "label", "legend",
13
+ "li", "link", "main", "map", "mark", "meta", "meter", "nav", "noscript", "object", "ol", "optgroup", "option", "output",
14
+ "p", "param", "picture", "pre", "progress", "q", "rp", "rt", "ruby", "s", "samp", "script", "section", "select", "small",
15
+ "source", "span", "strong", "style", "sub", "summary", "sup", "table", "tbody", "td", "template", "textarea", "tfoot",
16
+ "th", "thead", "time", "title", "tr", "track", "u", "ul", "var", "video", "wbr"
17
+ }
18
+
19
+
20
+ class HTMLInput(BaseModel):
21
+ html_code: str
22
+
23
+
24
+ class HTMLOutput(BaseModel):
25
+ tags_used: List[str]
26
+ tags_not_used: List[str]
27
+
28
+
29
+ def extract_html_tags(html_code: str) -> Dict[str, List[str]]:
30
+ soup = BeautifulSoup(html_code, "html.parser")
31
+ tags_used = {tag.name for tag in soup.find_all()}
32
+ tags_not_used = all_html_tags - tags_used
33
+ return {
34
+ "tags_used": list(tags_used),
35
+ "tags_not_used": list(tags_not_used)
36
+ }
37
+
38
+
39
+ @app.post("/extract_tags", response_model=HTMLOutput)
40
+ async def extract_tags(input: HTMLInput):
41
+ try:
42
+ result = extract_html_tags(input.html_code)
43
+ return HTMLOutput(**result)
44
+ except Exception as e:
45
+ raise HTTPException(status_code=500, detail=str(e))
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pydantic
4
+ beautifulsoup4