Spaces:
Running
Running
Upload 8 files
Browse files- app.py +50 -0
- lib/data_entry.py +16 -0
- lib/lang.py +6 -0
- lib/model.py +40 -0
- lib/result.py +9 -0
- lib/telemetry.py +29 -0
- requirements.txt +73 -0
- schemas/request.py +8 -0
app.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
from huggingface_hub import login, CommitScheduler
|
5 |
+
|
6 |
+
from lib.result import Result
|
7 |
+
from lib.data_entry import DataEntry
|
8 |
+
from lib.telemetry import TelemetryManager
|
9 |
+
from lib.model import Model
|
10 |
+
from lib.lang import Language
|
11 |
+
from schemas.request import Request
|
12 |
+
|
13 |
+
login(os.environ["HF_TOKEN"])
|
14 |
+
|
15 |
+
models: dict[Language, Model] = {
|
16 |
+
Language.ENGLISH: Model.get_english_model(),
|
17 |
+
Language.SPANISH: Model.get_spanish_model()
|
18 |
+
}
|
19 |
+
|
20 |
+
telemetry = TelemetryManager()
|
21 |
+
|
22 |
+
async def app_func(text: str, language: str) -> int:
|
23 |
+
try:
|
24 |
+
request = Request(text=text, language=language)
|
25 |
+
except ValueError as e:
|
26 |
+
raise gr.Error(e)
|
27 |
+
|
28 |
+
result = models[request.language].analyze(request.text)
|
29 |
+
telemetry.write_data(DataEntry(text, result))
|
30 |
+
|
31 |
+
percentage = round(result.percentage * 100)
|
32 |
+
percentage = max(percentage, 0)
|
33 |
+
percentage = min(percentage, 100)
|
34 |
+
|
35 |
+
return percentage
|
36 |
+
|
37 |
+
|
38 |
+
demo = gr.Interface(
|
39 |
+
fn=app_func,
|
40 |
+
inputs=[gr.Text(label="Texto"), gr.Radio(label="Idioma", choices=[Language.ENGLISH.value, Language.SPANISH.value])],
|
41 |
+
outputs=gr.Label(num_top_classes=1, label="Probabilidad de phishing"),
|
42 |
+
title="ConfIA Model Demo",
|
43 |
+
description="Demo que te permite probar nuestros modelos de forma muy sencilla",
|
44 |
+
examples=[["You have just Woned a free iPhone 16!! FOR FREE!!!", Language.ENGLISH], ["When will you review that PR? It's kinda urgent", Language.ENGLISH]],
|
45 |
+
cache_examples=True
|
46 |
+
)
|
47 |
+
|
48 |
+
if __name__ == "__main__":
|
49 |
+
demo.queue(max_size=5)
|
50 |
+
demo.launch(share=True, debug=True)
|
lib/data_entry.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
|
3 |
+
from lib.result import Result
|
4 |
+
|
5 |
+
|
6 |
+
@dataclass(frozen=True, slots=True)
|
7 |
+
class DataEntry:
|
8 |
+
text: str
|
9 |
+
result: Result
|
10 |
+
|
11 |
+
def to_dict(self) -> dict[str, str | float | bool]:
|
12 |
+
return {
|
13 |
+
"text": self.text,
|
14 |
+
"is_phishing": self.result.is_phishing(),
|
15 |
+
"percentage": self.result.percentage
|
16 |
+
}
|
lib/lang.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from enum import Enum
|
2 |
+
|
3 |
+
|
4 |
+
class Language(Enum):
|
5 |
+
ENGLISH = "Inglés"
|
6 |
+
SPANISH = "Español"
|
lib/model.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import tensorflow as tf
|
4 |
+
import keras
|
5 |
+
from huggingface_hub import login, hf_hub_download
|
6 |
+
|
7 |
+
from lib.result import Result
|
8 |
+
|
9 |
+
os.environ["KERAS_BACKEND"] = "jax"
|
10 |
+
|
11 |
+
login(token=os.getenv("HF_TOKEN"))
|
12 |
+
|
13 |
+
|
14 |
+
class Model:
|
15 |
+
|
16 |
+
@staticmethod
|
17 |
+
def get_english_model() -> "Model":
|
18 |
+
return Model("elsamueldev/confia-97-english", "confia-97-english.keras")
|
19 |
+
|
20 |
+
@staticmethod
|
21 |
+
def get_spanish_model() -> "Model":
|
22 |
+
return Model("elsamueldev/confia-97-spanish", "confia-97-spanish.keras")
|
23 |
+
|
24 |
+
def __init__(self, repo_id: str, filename: str) -> None:
|
25 |
+
path = hf_hub_download(
|
26 |
+
repo_id=repo_id,
|
27 |
+
filename=filename,
|
28 |
+
local_dir="./",
|
29 |
+
local_dir_use_symlinks=False
|
30 |
+
)
|
31 |
+
if path is None:
|
32 |
+
raise RuntimeError("Model could not be downloaded")
|
33 |
+
|
34 |
+
self.__model = keras.saving.load_model(path)
|
35 |
+
|
36 |
+
def analyze(self, text: str) -> Result:
|
37 |
+
raw_result = self.__model.predict(tf.constant([text]))
|
38 |
+
result = round(float(raw_result[0][0]), 3)
|
39 |
+
|
40 |
+
return Result(percentage=result)
|
lib/result.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
|
3 |
+
|
4 |
+
@dataclass(frozen=True, slots=True)
|
5 |
+
class Result:
|
6 |
+
percentage: float
|
7 |
+
|
8 |
+
def is_phishing(self) -> bool:
|
9 |
+
return self.percentage >= 0.5
|
lib/telemetry.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from uuid import uuid4
|
3 |
+
import json
|
4 |
+
|
5 |
+
from huggingface_hub import login, CommitScheduler
|
6 |
+
|
7 |
+
from lib.data_entry import DataEntry
|
8 |
+
|
9 |
+
login(os.environ["HF_TOKEN"])
|
10 |
+
|
11 |
+
|
12 |
+
class TelemetryManager:
|
13 |
+
def __init__(self) -> None:
|
14 |
+
self.__file_path = f"data/{uuid4()}.jsonl"
|
15 |
+
self.__scheduler = CommitScheduler(
|
16 |
+
repo_id="elsamueldev/confia-demo-data",
|
17 |
+
repo_type="dataset",
|
18 |
+
private=True,
|
19 |
+
folder_path="data/",
|
20 |
+
path_in_repo="/",
|
21 |
+
every=5
|
22 |
+
)
|
23 |
+
|
24 |
+
def write_data(self, data: DataEntry) -> None:
|
25 |
+
content = json.dumps(data.to_dict())
|
26 |
+
|
27 |
+
with self.__scheduler.lock:
|
28 |
+
with open(self.__file_path, "a") as file:
|
29 |
+
file.write(f"{content}\n")
|
requirements.txt
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==2.1.0
|
2 |
+
aiofiles==23.2.1
|
3 |
+
annotated-types==0.7.0
|
4 |
+
anyio==4.8.0
|
5 |
+
astunparse==1.6.3
|
6 |
+
certifi==2024.12.14
|
7 |
+
charset-normalizer==3.4.1
|
8 |
+
click==8.1.8
|
9 |
+
fastapi==0.115.6
|
10 |
+
ffmpy==0.5.0
|
11 |
+
filelock==3.16.1
|
12 |
+
flatbuffers==24.12.23
|
13 |
+
fsspec==2024.12.0
|
14 |
+
gast==0.6.0
|
15 |
+
google-pasta==0.2.0
|
16 |
+
gradio==5.12.0
|
17 |
+
gradio_client==1.5.4
|
18 |
+
grpcio==1.69.0
|
19 |
+
h11==0.14.0
|
20 |
+
h5py==3.12.1
|
21 |
+
httpcore==1.0.7
|
22 |
+
httpx==0.28.1
|
23 |
+
huggingface-hub==0.27.1
|
24 |
+
idna==3.10
|
25 |
+
Jinja2==3.1.5
|
26 |
+
keras==3.8.0
|
27 |
+
libclang==18.1.1
|
28 |
+
Markdown==3.7
|
29 |
+
markdown-it-py==3.0.0
|
30 |
+
MarkupSafe==2.1.5
|
31 |
+
mdurl==0.1.2
|
32 |
+
ml-dtypes==0.4.1
|
33 |
+
namex==0.0.8
|
34 |
+
numpy==2.0.2
|
35 |
+
opt_einsum==3.4.0
|
36 |
+
optree==0.14.0
|
37 |
+
orjson==3.10.15
|
38 |
+
packaging==24.2
|
39 |
+
pandas==2.2.3
|
40 |
+
pillow==11.1.0
|
41 |
+
protobuf==5.29.3
|
42 |
+
pydantic==2.10.5
|
43 |
+
pydantic_core==2.27.2
|
44 |
+
pydub==0.25.1
|
45 |
+
Pygments==2.19.1
|
46 |
+
python-dateutil==2.9.0.post0
|
47 |
+
python-multipart==0.0.20
|
48 |
+
pytz==2024.2
|
49 |
+
PyYAML==6.0.2
|
50 |
+
requests==2.32.3
|
51 |
+
rich==13.9.4
|
52 |
+
ruff==0.9.2
|
53 |
+
safehttpx==0.1.6
|
54 |
+
semantic-version==2.10.0
|
55 |
+
shellingham==1.5.4
|
56 |
+
six==1.17.0
|
57 |
+
sniffio==1.3.1
|
58 |
+
starlette==0.41.3
|
59 |
+
tensorboard==2.18.0
|
60 |
+
tensorboard-data-server==0.7.2
|
61 |
+
tensorflow==2.18.0
|
62 |
+
tensorflow-io-gcs-filesystem==0.37.1
|
63 |
+
termcolor==2.5.0
|
64 |
+
tomlkit==0.13.2
|
65 |
+
tqdm==4.67.1
|
66 |
+
typer==0.15.1
|
67 |
+
typing_extensions==4.12.2
|
68 |
+
tzdata==2024.2
|
69 |
+
urllib3==2.3.0
|
70 |
+
uvicorn==0.34.0
|
71 |
+
websockets==14.2
|
72 |
+
Werkzeug==3.1.3
|
73 |
+
wrapt==1.17.2
|
schemas/request.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel, Field
|
2 |
+
|
3 |
+
from lib.lang import Language
|
4 |
+
|
5 |
+
|
6 |
+
class Request(BaseModel):
|
7 |
+
text: str = Field(min_length=3, max_length=500)
|
8 |
+
language: Language
|