elsamueldev commited on
Commit
fe87150
·
verified ·
1 Parent(s): 7967c62

Upload 8 files

Browse files
Files changed (8) hide show
  1. app.py +50 -0
  2. lib/data_entry.py +16 -0
  3. lib/lang.py +6 -0
  4. lib/model.py +40 -0
  5. lib/result.py +9 -0
  6. lib/telemetry.py +29 -0
  7. requirements.txt +73 -0
  8. schemas/request.py +8 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import gradio as gr
4
+ from huggingface_hub import login, CommitScheduler
5
+
6
+ from lib.result import Result
7
+ from lib.data_entry import DataEntry
8
+ from lib.telemetry import TelemetryManager
9
+ from lib.model import Model
10
+ from lib.lang import Language
11
+ from schemas.request import Request
12
+
13
+ login(os.environ["HF_TOKEN"])
14
+
15
+ models: dict[Language, Model] = {
16
+ Language.ENGLISH: Model.get_english_model(),
17
+ Language.SPANISH: Model.get_spanish_model()
18
+ }
19
+
20
+ telemetry = TelemetryManager()
21
+
22
+ async def app_func(text: str, language: str) -> int:
23
+ try:
24
+ request = Request(text=text, language=language)
25
+ except ValueError as e:
26
+ raise gr.Error(e)
27
+
28
+ result = models[request.language].analyze(request.text)
29
+ telemetry.write_data(DataEntry(text, result))
30
+
31
+ percentage = round(result.percentage * 100)
32
+ percentage = max(percentage, 0)
33
+ percentage = min(percentage, 100)
34
+
35
+ return percentage
36
+
37
+
38
+ demo = gr.Interface(
39
+ fn=app_func,
40
+ inputs=[gr.Text(label="Texto"), gr.Radio(label="Idioma", choices=[Language.ENGLISH.value, Language.SPANISH.value])],
41
+ outputs=gr.Label(num_top_classes=1, label="Probabilidad de phishing"),
42
+ title="ConfIA Model Demo",
43
+ description="Demo que te permite probar nuestros modelos de forma muy sencilla",
44
+ examples=[["You have just Woned a free iPhone 16!! FOR FREE!!!", Language.ENGLISH], ["When will you review that PR? It's kinda urgent", Language.ENGLISH]],
45
+ cache_examples=True
46
+ )
47
+
48
+ if __name__ == "__main__":
49
+ demo.queue(max_size=5)
50
+ demo.launch(share=True, debug=True)
lib/data_entry.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+ from lib.result import Result
4
+
5
+
6
+ @dataclass(frozen=True, slots=True)
7
+ class DataEntry:
8
+ text: str
9
+ result: Result
10
+
11
+ def to_dict(self) -> dict[str, str | float | bool]:
12
+ return {
13
+ "text": self.text,
14
+ "is_phishing": self.result.is_phishing(),
15
+ "percentage": self.result.percentage
16
+ }
lib/lang.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+
3
+
4
+ class Language(Enum):
5
+ ENGLISH = "Inglés"
6
+ SPANISH = "Español"
lib/model.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import tensorflow as tf
4
+ import keras
5
+ from huggingface_hub import login, hf_hub_download
6
+
7
+ from lib.result import Result
8
+
9
+ os.environ["KERAS_BACKEND"] = "jax"
10
+
11
+ login(token=os.getenv("HF_TOKEN"))
12
+
13
+
14
+ class Model:
15
+
16
+ @staticmethod
17
+ def get_english_model() -> "Model":
18
+ return Model("elsamueldev/confia-97-english", "confia-97-english.keras")
19
+
20
+ @staticmethod
21
+ def get_spanish_model() -> "Model":
22
+ return Model("elsamueldev/confia-97-spanish", "confia-97-spanish.keras")
23
+
24
+ def __init__(self, repo_id: str, filename: str) -> None:
25
+ path = hf_hub_download(
26
+ repo_id=repo_id,
27
+ filename=filename,
28
+ local_dir="./",
29
+ local_dir_use_symlinks=False
30
+ )
31
+ if path is None:
32
+ raise RuntimeError("Model could not be downloaded")
33
+
34
+ self.__model = keras.saving.load_model(path)
35
+
36
+ def analyze(self, text: str) -> Result:
37
+ raw_result = self.__model.predict(tf.constant([text]))
38
+ result = round(float(raw_result[0][0]), 3)
39
+
40
+ return Result(percentage=result)
lib/result.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass(frozen=True, slots=True)
5
+ class Result:
6
+ percentage: float
7
+
8
+ def is_phishing(self) -> bool:
9
+ return self.percentage >= 0.5
lib/telemetry.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from uuid import uuid4
3
+ import json
4
+
5
+ from huggingface_hub import login, CommitScheduler
6
+
7
+ from lib.data_entry import DataEntry
8
+
9
+ login(os.environ["HF_TOKEN"])
10
+
11
+
12
+ class TelemetryManager:
13
+ def __init__(self) -> None:
14
+ self.__file_path = f"data/{uuid4()}.jsonl"
15
+ self.__scheduler = CommitScheduler(
16
+ repo_id="elsamueldev/confia-demo-data",
17
+ repo_type="dataset",
18
+ private=True,
19
+ folder_path="data/",
20
+ path_in_repo="/",
21
+ every=5
22
+ )
23
+
24
+ def write_data(self, data: DataEntry) -> None:
25
+ content = json.dumps(data.to_dict())
26
+
27
+ with self.__scheduler.lock:
28
+ with open(self.__file_path, "a") as file:
29
+ file.write(f"{content}\n")
requirements.txt ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.1.0
2
+ aiofiles==23.2.1
3
+ annotated-types==0.7.0
4
+ anyio==4.8.0
5
+ astunparse==1.6.3
6
+ certifi==2024.12.14
7
+ charset-normalizer==3.4.1
8
+ click==8.1.8
9
+ fastapi==0.115.6
10
+ ffmpy==0.5.0
11
+ filelock==3.16.1
12
+ flatbuffers==24.12.23
13
+ fsspec==2024.12.0
14
+ gast==0.6.0
15
+ google-pasta==0.2.0
16
+ gradio==5.12.0
17
+ gradio_client==1.5.4
18
+ grpcio==1.69.0
19
+ h11==0.14.0
20
+ h5py==3.12.1
21
+ httpcore==1.0.7
22
+ httpx==0.28.1
23
+ huggingface-hub==0.27.1
24
+ idna==3.10
25
+ Jinja2==3.1.5
26
+ keras==3.8.0
27
+ libclang==18.1.1
28
+ Markdown==3.7
29
+ markdown-it-py==3.0.0
30
+ MarkupSafe==2.1.5
31
+ mdurl==0.1.2
32
+ ml-dtypes==0.4.1
33
+ namex==0.0.8
34
+ numpy==2.0.2
35
+ opt_einsum==3.4.0
36
+ optree==0.14.0
37
+ orjson==3.10.15
38
+ packaging==24.2
39
+ pandas==2.2.3
40
+ pillow==11.1.0
41
+ protobuf==5.29.3
42
+ pydantic==2.10.5
43
+ pydantic_core==2.27.2
44
+ pydub==0.25.1
45
+ Pygments==2.19.1
46
+ python-dateutil==2.9.0.post0
47
+ python-multipart==0.0.20
48
+ pytz==2024.2
49
+ PyYAML==6.0.2
50
+ requests==2.32.3
51
+ rich==13.9.4
52
+ ruff==0.9.2
53
+ safehttpx==0.1.6
54
+ semantic-version==2.10.0
55
+ shellingham==1.5.4
56
+ six==1.17.0
57
+ sniffio==1.3.1
58
+ starlette==0.41.3
59
+ tensorboard==2.18.0
60
+ tensorboard-data-server==0.7.2
61
+ tensorflow==2.18.0
62
+ tensorflow-io-gcs-filesystem==0.37.1
63
+ termcolor==2.5.0
64
+ tomlkit==0.13.2
65
+ tqdm==4.67.1
66
+ typer==0.15.1
67
+ typing_extensions==4.12.2
68
+ tzdata==2024.2
69
+ urllib3==2.3.0
70
+ uvicorn==0.34.0
71
+ websockets==14.2
72
+ Werkzeug==3.1.3
73
+ wrapt==1.17.2
schemas/request.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+
3
+ from lib.lang import Language
4
+
5
+
6
+ class Request(BaseModel):
7
+ text: str = Field(min_length=3, max_length=500)
8
+ language: Language