Spaces:
Runtime error
Runtime error
Salif SAWADOGO
commited on
Commit
·
7204409
1
Parent(s):
f27ce21
⚡️ improve code quality and use dcc instead global variable
Browse files- .gitignore +1 -1
- app/global_vars.py +0 -1
- app/helpers/__init__.py +0 -0
- app/helpers/abstracts.py +40 -0
- app/helpers/models.py +12 -0
- app/helpers/processor.py +99 -0
- app/helpers/s3.py +72 -0
- app/helpers/utils.py +5 -0
- app/pages/Annotations/callbacks.py +117 -88
- app/pages/Annotations/layout.py +199 -212
- app/pages/Annotations/state.py +20 -0
- app/pages/base_page.py +2 -2
.gitignore
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
.venv
|
2 |
**__pycache__**
|
3 |
**.pyc**
|
4 |
-
|
5 |
**/.venv/**
|
6 |
**/*.egg-info
|
7 |
**/*parquet
|
|
|
1 |
.venv
|
2 |
**__pycache__**
|
3 |
**.pyc**
|
4 |
+
**/*env/**
|
5 |
**/.venv/**
|
6 |
**/*.egg-info
|
7 |
**/*parquet
|
app/global_vars.py
CHANGED
@@ -4,7 +4,6 @@ from clients import s3_loader
|
|
4 |
from utils import extract_audio_identifier
|
5 |
|
6 |
DATA_FILE = "sawadogosalif/MooreFRCollections_BibleOnlyText"
|
7 |
-
audio_paths, possible_values = [], []
|
8 |
data = load_dataset(DATA_FILE, split="train").to_pandas()
|
9 |
data[["chapter", "page"]] = data["moore_source_url"].apply(
|
10 |
lambda x: pd.Series(extract_audio_identifier(x))
|
|
|
4 |
from utils import extract_audio_identifier
|
5 |
|
6 |
DATA_FILE = "sawadogosalif/MooreFRCollections_BibleOnlyText"
|
|
|
7 |
data = load_dataset(DATA_FILE, split="train").to_pandas()
|
8 |
data[["chapter", "page"]] = data["moore_source_url"].apply(
|
9 |
lambda x: pd.Series(extract_audio_identifier(x))
|
app/helpers/__init__.py
ADDED
File without changes
|
app/helpers/abstracts.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from abc import ABC, abstractmethod
|
2 |
+
from typing import List
|
3 |
+
import pandas as pd
|
4 |
+
|
5 |
+
class AbstractS3Client(ABC):
|
6 |
+
@abstractmethod
|
7 |
+
def upload_file(self, local_path: str, s3_key: str) -> None:
|
8 |
+
"""Upload un fichier vers S3."""
|
9 |
+
pass
|
10 |
+
|
11 |
+
@abstractmethod
|
12 |
+
def download_file(self, local_path: str, s3_key: str) -> None:
|
13 |
+
"""Télécharge un fichier depuis S3."""
|
14 |
+
pass
|
15 |
+
|
16 |
+
@abstractmethod
|
17 |
+
def list_files(self, prefix: str = "") -> List[str]:
|
18 |
+
"""Liste les fichiers dans S3 sous un préfixe donné."""
|
19 |
+
pass
|
20 |
+
|
21 |
+
@abstractmethod
|
22 |
+
def load_json_files(self, files: List[str], unique_columns: List[str] = None) -> pd.DataFrame:
|
23 |
+
"""Charge et combine des fichiers JSON en un DataFrame."""
|
24 |
+
pass
|
25 |
+
|
26 |
+
class AbstractProcessor(ABC):
|
27 |
+
@abstractmethod
|
28 |
+
def get_audio_paths(self, folder: str) -> list[str]:
|
29 |
+
"""Retourne et trie les chemins audio d'un dossier."""
|
30 |
+
pass
|
31 |
+
|
32 |
+
@abstractmethod
|
33 |
+
def process_text(self, text: str) -> str:
|
34 |
+
"""Nettoie et traite un texte."""
|
35 |
+
pass
|
36 |
+
|
37 |
+
@abstractmethod
|
38 |
+
def splitter(self, text: str) -> list[str]:
|
39 |
+
"""Divise un texte en segments."""
|
40 |
+
pass
|
app/helpers/models.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# helpers/models.py
|
2 |
+
from dataclasses import dataclass
|
3 |
+
from typing import Optional
|
4 |
+
|
5 |
+
@dataclass
|
6 |
+
class S3Config:
|
7 |
+
"""Configuration for S3 connection."""
|
8 |
+
bucket_name: str
|
9 |
+
endpoint_url: str
|
10 |
+
access_key: str
|
11 |
+
secret_key: str
|
12 |
+
region_name: Optional[str] = None
|
app/helpers/processor.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
import urllib.parse
|
5 |
+
from pathlib import Path
|
6 |
+
import pandas as pd
|
7 |
+
from loguru import logger
|
8 |
+
from .abstracts import AbstractProcessor
|
9 |
+
from .s3 import S3Client # Si besoin d'utiliser des fonctions S3
|
10 |
+
|
11 |
+
class Processor(AbstractProcessor):
|
12 |
+
def get_audio_paths(self, folder: str) -> list[str]:
|
13 |
+
def extract_number(file_path: str) -> int:
|
14 |
+
match = re.search(r"segment_(\d+)", file_path)
|
15 |
+
return int(match.group(1)) if match else float("inf")
|
16 |
+
audio_paths = list(Path(folder).glob("*.mp3"))
|
17 |
+
audio_paths = [audio_path.as_posix() for audio_path in audio_paths]
|
18 |
+
audio_paths = sorted(audio_paths, key=extract_number)
|
19 |
+
return audio_paths[3:]
|
20 |
+
|
21 |
+
def process_text(self, text: str) -> str:
|
22 |
+
text = re.sub(r"\+\s*\.", ".", text)
|
23 |
+
text = re.sub(r"\*\s*\+\s*;", ";", text)
|
24 |
+
text = re.sub(r"\*\s*\+", "", text)
|
25 |
+
text = text.replace(" + ", " ").replace(" * ", " ").replace("+", " ")
|
26 |
+
text = re.sub(r'["“”]', "", text)
|
27 |
+
return text.strip()
|
28 |
+
|
29 |
+
def splitter(self, text: str) -> list[str]:
|
30 |
+
return re.split(r"[,:;.]", self.process_text(text))
|
31 |
+
|
32 |
+
# Ajoutez d'autres méthodes de traitement si nécessaire
|
33 |
+
def flatten_nested_values(self, nested_values: pd.Series) -> list[str]:
|
34 |
+
flattened = []
|
35 |
+
for group in nested_values:
|
36 |
+
for item in group:
|
37 |
+
cleaned_item = re.sub(r"^\d+\s*", "", item).strip()
|
38 |
+
if cleaned_item:
|
39 |
+
flattened.append(cleaned_item)
|
40 |
+
return flattened
|
41 |
+
|
42 |
+
def load_persistent_data(self, file: str) -> list:
|
43 |
+
if os.path.exists(file):
|
44 |
+
with open(file, "r", encoding="utf-8") as f:
|
45 |
+
return json.load(f)
|
46 |
+
return []
|
47 |
+
|
48 |
+
def save_persistent_data(self, data: list, file: str) -> None:
|
49 |
+
with open(file, "w", encoding="utf-8") as f:
|
50 |
+
json.dump(data, f, ensure_ascii=False, indent=2)
|
51 |
+
|
52 |
+
def extract_audio_identifier(self, url: str):
|
53 |
+
parts = url.strip("/").split("/")
|
54 |
+
return urllib.parse.unquote(parts[-2]), int(parts[-1])
|
55 |
+
|
56 |
+
def find_and_return_after_last(self, long_list: list, short_list: list) -> list:
|
57 |
+
last_index = -1
|
58 |
+
for i, item in enumerate(long_list):
|
59 |
+
if item in short_list:
|
60 |
+
last_index = i
|
61 |
+
return long_list[last_index+1:] if last_index != -1 else long_list
|
62 |
+
|
63 |
+
def load_page_verses_and_audios(self, s3_client, page: str, df_verses: pd.DataFrame) -> tuple[list[str], list[str]]:
|
64 |
+
audio_paths = self.get_audio_paths(page)
|
65 |
+
page_fixed = page.replace("/", "\\")
|
66 |
+
_, chapter, page_str = page_fixed.split("\\")
|
67 |
+
s3_key = f"labelling/{chapter}/{page_str}/results.json"
|
68 |
+
page_int = int(page_str.replace("page_", ""))
|
69 |
+
tmp = df_verses[(df_verses.chapter == chapter) & (df_verses.page == page_int)]
|
70 |
+
possible_values = tmp["moore_verse_text"].apply(self.splitter)
|
71 |
+
possible_values = self.flatten_nested_values(possible_values)
|
72 |
+
try:
|
73 |
+
# Vous pouvez injecter ici un client S3 si besoin, ou utiliser directement une fonction d'un module dédié
|
74 |
+
s3_client.download_file("result.json",s3_key)
|
75 |
+
transcriptions = self.load_persistent_data("result.json")
|
76 |
+
latest_transcription = transcriptions[-1].get("transcriptions")
|
77 |
+
latest_audio = [transcriptions[-1].get("segment_path")]
|
78 |
+
audio_paths = self.find_and_return_after_last(audio_paths, latest_audio)
|
79 |
+
possible_values = self.find_and_return_after_last(possible_values, latest_transcription)
|
80 |
+
logger.info(f"Latest transcription: {latest_audio} / {latest_transcription}")
|
81 |
+
return possible_values, audio_paths
|
82 |
+
except Exception as e:
|
83 |
+
logger.error(f"An error occurred: {e}")
|
84 |
+
return possible_values, audio_paths
|
85 |
+
|
86 |
+
def get_contribution_data(self, s3_client) -> pd.DataFrame:
|
87 |
+
files = s3_client.list_files("labelling")
|
88 |
+
files = [file for file in files if file.endswith("json")]
|
89 |
+
try:
|
90 |
+
df = s3_client.load_json_files(files=files, unique_columns=["segment_path", "user_id"])
|
91 |
+
df[["tmp1", "chapter", "page", "segment"]] = df.segment_path.str.split("/", expand=True)
|
92 |
+
return (
|
93 |
+
df.sort_values(["chapter", "page"]).drop(columns=["tmp1", "segment_path"])
|
94 |
+
if not df.empty
|
95 |
+
else pd.DataFrame()
|
96 |
+
)
|
97 |
+
except Exception as e:
|
98 |
+
logger.error(f"Error in get_contribution_data: {e}")
|
99 |
+
return pd.DataFrame()
|
app/helpers/s3.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import boto3
|
4 |
+
import s3fs
|
5 |
+
from typing import List, Optional
|
6 |
+
from loguru import logger
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
from .models import S3Config
|
9 |
+
from .abstracts import AbstractS3Client
|
10 |
+
import pandas as pd
|
11 |
+
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
class S3Client(AbstractS3Client):
|
15 |
+
def __init__(self, config: S3Config):
|
16 |
+
self.config = config
|
17 |
+
self._fs = s3fs.S3FileSystem(
|
18 |
+
key=self.config.access_key,
|
19 |
+
secret=self.config.secret_key,
|
20 |
+
client_kwargs=(
|
21 |
+
{
|
22 |
+
"endpoint_url": self.config.endpoint_url,
|
23 |
+
"region_name": self.config.region_name,
|
24 |
+
}
|
25 |
+
if self.config.region_name
|
26 |
+
else {"endpoint_url": self.config.endpoint_url}
|
27 |
+
),
|
28 |
+
)
|
29 |
+
self._client = boto3.client(
|
30 |
+
"s3",
|
31 |
+
endpoint_url=self.config.endpoint_url,
|
32 |
+
aws_access_key_id=self.config.access_key,
|
33 |
+
aws_secret_access_key=self.config.secret_key,
|
34 |
+
)
|
35 |
+
|
36 |
+
def upload_file(self, local_path: str, s3_key: str) -> None:
|
37 |
+
if os.path.exists(local_path):
|
38 |
+
self._client.upload_file(local_path, self.config.bucket_name, s3_key)
|
39 |
+
logger.info(f"Uploaded {local_path} to s3://{self.config.bucket_name}/{s3_key}")
|
40 |
+
else:
|
41 |
+
logger.error(f"File {local_path} does not exist.")
|
42 |
+
|
43 |
+
def download_file(self, local_path: str, s3_key: str) -> None:
|
44 |
+
self._client.download_file(self.config.bucket_name, s3_key, local_path)
|
45 |
+
logger.info(f"Downloaded {s3_key} to {local_path}")
|
46 |
+
|
47 |
+
def list_files(self, prefix: str = "") -> List[str]:
|
48 |
+
paginator = self._client.get_paginator("list_objects_v2")
|
49 |
+
files = []
|
50 |
+
try:
|
51 |
+
for page in paginator.paginate(Bucket=self.config.bucket_name, Prefix=prefix):
|
52 |
+
for obj in page.get("Contents", []):
|
53 |
+
files.append(obj["Key"])
|
54 |
+
except Exception as e:
|
55 |
+
raise Exception(f"Error listing files in bucket {self.config.bucket_name}: {str(e)}")
|
56 |
+
return files
|
57 |
+
|
58 |
+
def load_json_files(self, files: List[str], unique_columns: Optional[List[str]] = None) -> pd.DataFrame:
|
59 |
+
data = []
|
60 |
+
for file in files:
|
61 |
+
try:
|
62 |
+
with self._fs.open(f"s3://{self.config.bucket_name}/{file}", encoding="utf-8") as f:
|
63 |
+
json_data = json.load(f)
|
64 |
+
data.append(pd.json_normalize(json_data))
|
65 |
+
except Exception as e:
|
66 |
+
raise Exception(f"Error processing file {file}: {str(e)}")
|
67 |
+
if not data:
|
68 |
+
return pd.DataFrame()
|
69 |
+
result = pd.concat(data, ignore_index=True)
|
70 |
+
if unique_columns:
|
71 |
+
result = result.drop_duplicates(subset=unique_columns)
|
72 |
+
return result
|
app/helpers/utils.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import urllib
|
2 |
+
|
3 |
+
def extract_audio_identifier(url: str) -> tuple:
|
4 |
+
parts = url.strip("/").split("/")
|
5 |
+
return urllib.parse.unquote(parts[-2]), int(parts[-1])
|
app/pages/Annotations/callbacks.py
CHANGED
@@ -1,24 +1,43 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
2 |
from dash import Input, Output, State, no_update
|
3 |
import datetime
|
4 |
import os
|
5 |
from pathlib import Path
|
6 |
-
from
|
7 |
-
from
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
9 |
|
|
|
|
|
|
|
10 |
PERSIST_FILE = "results.json"
|
11 |
|
|
|
|
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
def get_page_paths(chapter_path):
|
15 |
-
"""Retrieve all page folders in the chapter folder."""
|
16 |
-
pages_dir = chapter_path
|
17 |
-
if pages_dir.exists():
|
18 |
-
return [d for d in pages_dir.iterdir() if d.is_dir()]
|
19 |
-
return []
|
20 |
|
21 |
-
#
|
|
|
|
|
22 |
@app.callback(
|
23 |
Output("chapter-section", "style"),
|
24 |
Output("pseudo-continue-button", "style"),
|
@@ -26,11 +45,12 @@ def get_page_paths(chapter_path):
|
|
26 |
State("user-info", "value")
|
27 |
)
|
28 |
def show_chapter_section(n_clicks, user_info):
|
|
|
29 |
if n_clicks and user_info:
|
30 |
return {"display": "block"}, {"display": "none"}
|
31 |
return {"display": "none"}, {"display": "block"}
|
32 |
|
33 |
-
|
34 |
@app.callback(
|
35 |
Output("page-section", "style"),
|
36 |
Output("chapter-continue-button", "style"),
|
@@ -38,11 +58,12 @@ def show_chapter_section(n_clicks, user_info):
|
|
38 |
State("chapter-dropdown", "value")
|
39 |
)
|
40 |
def show_page_section(n_clicks, chapter_value):
|
|
|
41 |
if n_clicks and chapter_value:
|
42 |
return {"display": "block"}, {"display": "none"}
|
43 |
return {"display": "none"}, {"display": "block"}
|
44 |
|
45 |
-
|
46 |
@app.callback(
|
47 |
Output("transcription-section", "style"),
|
48 |
Output("start-button", "style"),
|
@@ -50,56 +71,67 @@ def show_page_section(n_clicks, chapter_value):
|
|
50 |
State("page-dropdown", "value")
|
51 |
)
|
52 |
def show_transcription_section(n_clicks, page_value):
|
|
|
53 |
if n_clicks and page_value:
|
54 |
return {"display": "block"}, {"display": "none"}
|
55 |
return {"display": "none"}, {"display": "block"}
|
56 |
|
57 |
-
|
|
|
|
|
|
|
58 |
@app.callback(
|
59 |
Output("page-dropdown", "options"),
|
60 |
Input("chapter-dropdown", "value")
|
61 |
)
|
62 |
def update_pages(chapter_value):
|
|
|
63 |
if chapter_value:
|
64 |
chapter_path = Path(chapter_value)
|
65 |
-
pages =
|
66 |
-
|
67 |
-
return options
|
68 |
return []
|
69 |
|
70 |
-
|
|
|
|
|
|
|
71 |
@app.callback(
|
|
|
|
|
72 |
Output("audio-player", "src"),
|
73 |
Output("suggestion-checklist", "options"),
|
74 |
-
Output("hidden-message", "style"),
|
75 |
-
|
76 |
Input("page-dropdown", "value"),
|
77 |
State("chapter-dropdown", "value")
|
78 |
)
|
79 |
-
|
80 |
def update_audio_and_suggestions(page_value, chapter_value):
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
|
|
85 |
if page_value and chapter_value:
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
92 |
os.remove(PERSIST_FILE)
|
93 |
-
|
94 |
-
pass
|
95 |
-
return no_update, no_update, hidden_message_style
|
96 |
|
97 |
-
return no_update, no_update, hidden_message_style
|
98 |
|
|
|
|
|
|
|
99 |
@app.callback(
|
|
|
|
|
100 |
Output("audio-player", "src", allow_duplicate=True),
|
101 |
Output("suggestion-checklist", "options", allow_duplicate=True),
|
102 |
-
Output("suggestion-checklist", "value", allow_duplicate=True),
|
103 |
Output("confirmation-message", "children"),
|
104 |
Output("transcription-store", "data"),
|
105 |
Input("submit-button", "n_clicks"),
|
@@ -107,54 +139,51 @@ def update_audio_and_suggestions(page_value, chapter_value):
|
|
107 |
State("user-info", "value"),
|
108 |
State("page-dropdown", "value"),
|
109 |
State("audio-player", "src"),
|
|
|
|
|
110 |
State("transcription-store", "data"),
|
111 |
prevent_initial_call=True
|
112 |
)
|
113 |
-
def update_transcription(n_clicks, selected_transcriptions, user_info, page_value,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
if n_clicks > 0 and page_value and current_audio:
|
115 |
-
# Use the audio path as the unique segment identifier
|
116 |
-
segment_path = current_audio
|
117 |
-
|
118 |
-
# Create a new transcription entry with a timestamp
|
119 |
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
120 |
-
|
121 |
-
|
|
|
122 |
"transcriptions": selected_transcriptions,
|
123 |
"timestamp": timestamp,
|
124 |
"user_id": user_info
|
125 |
-
}
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
audio_paths.pop(0) # Remove the current audio
|
131 |
-
next_audio = audio_paths[0] if audio_paths else ""
|
132 |
else:
|
133 |
next_audio = ""
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
return next_audio, next_options, [], confirmation_message, stored_transcriptions
|
152 |
-
|
153 |
-
|
154 |
-
return no_update, no_update, no_update, no_update, no_update
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
@app.callback(
|
159 |
Output("confirmation-message", "children", allow_duplicate=True),
|
160 |
Input("save-results-button", "n_clicks"),
|
@@ -163,21 +192,21 @@ def update_transcription(n_clicks, selected_transcriptions, user_info, page_valu
|
|
163 |
prevent_initial_call=True
|
164 |
)
|
165 |
def save_results(n_clicks, page_value, stored_transcriptions):
|
|
|
|
|
|
|
|
|
166 |
if n_clicks > 0 and page_value:
|
167 |
-
|
168 |
try:
|
169 |
-
initial_transcriptions = load_persistent_data(PERSIST_FILE)
|
170 |
except Exception as e:
|
171 |
-
|
172 |
initial_transcriptions = []
|
173 |
-
combined_transcriptions = initial_transcriptions + stored_transcriptions
|
174 |
-
save_persistent_data(combined_transcriptions, PERSIST_FILE)
|
175 |
-
|
176 |
-
s3_key = f"labelling/{
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
return confirmation_message
|
182 |
-
|
183 |
-
return no_update
|
|
|
1 |
+
"""
|
2 |
+
callbacks.py
|
3 |
+
-------------
|
4 |
+
Ce module définit les callbacks de l'application Dash en utilisant les abstractions
|
5 |
+
définies dans le sous-package helpers."""
|
6 |
+
|
7 |
from dash import Input, Output, State, no_update
|
8 |
import datetime
|
9 |
import os
|
10 |
from pathlib import Path
|
11 |
+
from loguru import logger
|
12 |
+
from dotenv import load_dotenv
|
13 |
+
|
14 |
+
load_dotenv()
|
15 |
+
|
16 |
+
from helpers.processor import Processor
|
17 |
+
from helpers.s3 import S3Client
|
18 |
+
from helpers.models import S3Config
|
19 |
|
20 |
+
from app import app
|
21 |
+
|
22 |
+
from global_vars import data, BUCKET_NAME
|
23 |
PERSIST_FILE = "results.json"
|
24 |
|
25 |
+
# Instanciation du Processor
|
26 |
+
processor = Processor()
|
27 |
|
28 |
+
# Instanciation du client S3 à partir de la configuration
|
29 |
+
s3_config = S3Config(
|
30 |
+
bucket_name=BUCKET_NAME,
|
31 |
+
endpoint_url=os.getenv("AWS_ENDPOINT_URL_S3"),
|
32 |
+
access_key=os.getenv("AWS_ACCESS_KEY_ID"),
|
33 |
+
secret_key=os.getenv("AWS_SECRET_ACCESS_KEY")
|
34 |
+
)
|
35 |
+
s3_client = S3Client(s3_config)
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
+
# -----------------------------------------------------------------------------
|
39 |
+
# CALLBACKS D'AFFICHAGE DES SECTIONS
|
40 |
+
# -----------------------------------------------------------------------------
|
41 |
@app.callback(
|
42 |
Output("chapter-section", "style"),
|
43 |
Output("pseudo-continue-button", "style"),
|
|
|
45 |
State("user-info", "value")
|
46 |
)
|
47 |
def show_chapter_section(n_clicks, user_info):
|
48 |
+
"""Affiche la section chapitre après saisie d'une information utilisateur."""
|
49 |
if n_clicks and user_info:
|
50 |
return {"display": "block"}, {"display": "none"}
|
51 |
return {"display": "none"}, {"display": "block"}
|
52 |
|
53 |
+
|
54 |
@app.callback(
|
55 |
Output("page-section", "style"),
|
56 |
Output("chapter-continue-button", "style"),
|
|
|
58 |
State("chapter-dropdown", "value")
|
59 |
)
|
60 |
def show_page_section(n_clicks, chapter_value):
|
61 |
+
"""Affiche la section page après sélection d'un chapitre."""
|
62 |
if n_clicks and chapter_value:
|
63 |
return {"display": "block"}, {"display": "none"}
|
64 |
return {"display": "none"}, {"display": "block"}
|
65 |
|
66 |
+
|
67 |
@app.callback(
|
68 |
Output("transcription-section", "style"),
|
69 |
Output("start-button", "style"),
|
|
|
71 |
State("page-dropdown", "value")
|
72 |
)
|
73 |
def show_transcription_section(n_clicks, page_value):
|
74 |
+
"""Affiche la section transcription après sélection d'une page."""
|
75 |
if n_clicks and page_value:
|
76 |
return {"display": "block"}, {"display": "none"}
|
77 |
return {"display": "none"}, {"display": "block"}
|
78 |
|
79 |
+
|
80 |
+
# -----------------------------------------------------------------------------
|
81 |
+
# CALLBACK DE MISE À JOUR DU DROPDOWN DES PAGES
|
82 |
+
# -----------------------------------------------------------------------------
|
83 |
@app.callback(
|
84 |
Output("page-dropdown", "options"),
|
85 |
Input("chapter-dropdown", "value")
|
86 |
)
|
87 |
def update_pages(chapter_value):
|
88 |
+
"""Met à jour les options du dropdown de pages selon le chapitre sélectionné."""
|
89 |
if chapter_value:
|
90 |
chapter_path = Path(chapter_value)
|
91 |
+
pages = [d for d in chapter_path.iterdir() if d.is_dir()] if chapter_path.exists() else []
|
92 |
+
return [{"label": d.name, "value": str(d)} for d in pages]
|
|
|
93 |
return []
|
94 |
|
95 |
+
|
96 |
+
# -----------------------------------------------------------------------------
|
97 |
+
# CALLBACK DE MISE À JOUR AUDIO ET DES SUGGESTIONS (via dcc.Store)
|
98 |
+
# -----------------------------------------------------------------------------
|
99 |
@app.callback(
|
100 |
+
Output("audio-store", "data"),
|
101 |
+
Output("values-store", "data"),
|
102 |
Output("audio-player", "src"),
|
103 |
Output("suggestion-checklist", "options"),
|
104 |
+
Output("hidden-message", "style"),
|
|
|
105 |
Input("page-dropdown", "value"),
|
106 |
State("chapter-dropdown", "value")
|
107 |
)
|
|
|
108 |
def update_audio_and_suggestions(page_value, chapter_value):
|
109 |
+
"""
|
110 |
+
Met à jour les stores pour les chemins audio et les suggestions.
|
111 |
+
Affiche le premier segment audio et les 6 premières suggestions.
|
112 |
+
"""
|
113 |
+
hidden_style = {"display": "none"}
|
114 |
if page_value and chapter_value:
|
115 |
+
# Utilise la méthode abstraite pour charger les transcriptions et extraire l'état
|
116 |
+
possible_values, audio_paths = processor.load_page_verses_and_audios(s3_client, page_value, data)
|
117 |
+
options = [{"label": t, "value": t} for t in possible_values[:6]]
|
118 |
+
audio_src = audio_paths[0] if audio_paths else no_update
|
119 |
+
return audio_paths, possible_values, audio_src, options, hidden_style
|
120 |
+
else:
|
121 |
+
if os.path.exists(PERSIST_FILE):
|
122 |
os.remove(PERSIST_FILE)
|
123 |
+
return no_update, no_update, no_update, no_update, {"display": "block"}
|
|
|
|
|
124 |
|
|
|
125 |
|
126 |
+
# -----------------------------------------------------------------------------
|
127 |
+
# CALLBACK POUR LE TRAITEMENT DE LA TRANSCIPTION
|
128 |
+
# -----------------------------------------------------------------------------
|
129 |
@app.callback(
|
130 |
+
Output("audio-store", "data", allow_duplicate=True),
|
131 |
+
Output("values-store", "data", allow_duplicate=True),
|
132 |
Output("audio-player", "src", allow_duplicate=True),
|
133 |
Output("suggestion-checklist", "options", allow_duplicate=True),
|
134 |
+
Output("suggestion-checklist", "value", allow_duplicate=True),
|
135 |
Output("confirmation-message", "children"),
|
136 |
Output("transcription-store", "data"),
|
137 |
Input("submit-button", "n_clicks"),
|
|
|
139 |
State("user-info", "value"),
|
140 |
State("page-dropdown", "value"),
|
141 |
State("audio-player", "src"),
|
142 |
+
State("audio-store", "data"),
|
143 |
+
State("values-store", "data"),
|
144 |
State("transcription-store", "data"),
|
145 |
prevent_initial_call=True
|
146 |
)
|
147 |
+
def update_transcription(n_clicks, selected_transcriptions, user_info, page_value,
|
148 |
+
current_audio, audio_store, values_store, stored_transcriptions):
|
149 |
+
"""
|
150 |
+
Traite la soumission d'une transcription :
|
151 |
+
- Ajoute l'entrée avec timestamp dans le store de transcription.
|
152 |
+
- Retire le segment audio traité et les suggestions utilisées.
|
153 |
+
- Met à jour l'audio et les options de la checklist.
|
154 |
+
"""
|
155 |
if n_clicks > 0 and page_value and current_audio:
|
|
|
|
|
|
|
|
|
156 |
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
157 |
+
stored_transcriptions = stored_transcriptions if stored_transcriptions is not None else []
|
158 |
+
stored_transcriptions.append({
|
159 |
+
"segment_path": current_audio,
|
160 |
"transcriptions": selected_transcriptions,
|
161 |
"timestamp": timestamp,
|
162 |
"user_id": user_info
|
163 |
+
})
|
164 |
+
# Mise à jour du store audio
|
165 |
+
if audio_store and isinstance(audio_store, list):
|
166 |
+
audio_store.pop(0)
|
167 |
+
next_audio = audio_store[0] if audio_store else ""
|
|
|
|
|
168 |
else:
|
169 |
next_audio = ""
|
170 |
+
# Mise à jour du store de suggestions
|
171 |
+
if values_store and isinstance(values_store, list):
|
172 |
+
for val in selected_transcriptions:
|
173 |
+
if val in values_store:
|
174 |
+
values_store.remove(val)
|
175 |
+
next_options = [{"label": t, "value": t} for t in (values_store[:6] + ["autre transcription"])] if values_store else []
|
176 |
+
confirmation_message = (f"Transcriptions sélectionnées : {', '.join(selected_transcriptions)}"
|
177 |
+
if selected_transcriptions else "Aucune transcription sélectionnée.")
|
178 |
+
# Réinitialisation de la checklist
|
179 |
+
if len(next_options)>1:
|
180 |
+
return audio_store, values_store, next_audio, next_options, [], confirmation_message, stored_transcriptions
|
181 |
+
return no_update, no_update, no_update, no_update, no_update, no_update, no_update
|
182 |
+
|
183 |
+
|
184 |
+
# -----------------------------------------------------------------------------
|
185 |
+
# CALLBACK POUR LA SAUVEGARDE DES RÉSULTATS
|
186 |
+
# -----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
@app.callback(
|
188 |
Output("confirmation-message", "children", allow_duplicate=True),
|
189 |
Input("save-results-button", "n_clicks"),
|
|
|
192 |
prevent_initial_call=True
|
193 |
)
|
194 |
def save_results(n_clicks, page_value, stored_transcriptions):
|
195 |
+
"""
|
196 |
+
Sauvegarde les transcriptions en combinant les données persistantes existantes
|
197 |
+
avec les nouvelles et en les uploadant sur S3.
|
198 |
+
"""
|
199 |
if n_clicks > 0 and page_value:
|
|
|
200 |
try:
|
201 |
+
initial_transcriptions = processor.load_persistent_data(PERSIST_FILE)
|
202 |
except Exception as e:
|
203 |
+
logger.error(f"Erreur lors du chargement des données persistantes : {e}")
|
204 |
initial_transcriptions = []
|
205 |
+
combined_transcriptions = initial_transcriptions + (stored_transcriptions if stored_transcriptions else [])
|
206 |
+
processor.save_persistent_data(combined_transcriptions, PERSIST_FILE)
|
207 |
+
cleaned_page = page_value.replace("\\", "/").replace("assets/", "")
|
208 |
+
s3_key = f"labelling/{cleaned_page}/{PERSIST_FILE}"
|
209 |
+
s3_client.upload_file(PERSIST_FILE, s3_key)
|
210 |
+
|
211 |
+
return "Les résultats ont été sauvegardés avec succès."
|
212 |
+
return no_update
|
|
|
|
|
|
app/pages/Annotations/layout.py
CHANGED
@@ -1,237 +1,224 @@
|
|
1 |
-
from pathlib import Path
|
2 |
import dash_bootstrap_components as dbc
|
3 |
from dash import dcc, html
|
4 |
-
from
|
5 |
-
|
6 |
from dotenv import load_dotenv
|
7 |
|
8 |
load_dotenv()
|
9 |
BUCKET_NAME = "moore-collection"
|
10 |
|
11 |
-
#
|
12 |
-
AUDIO_FOLDER = Path("./assets/assets")
|
13 |
-
PERSIST_FILE = "results.json"
|
14 |
-
|
15 |
-
|
16 |
-
# Helper functions
|
17 |
def get_chapter_paths(base_folder):
|
18 |
-
"""Retrieve all chapter folders in the base folder."""
|
19 |
base = Path(base_folder)
|
20 |
return [d for d in base.iterdir() if d.is_dir()]
|
21 |
|
22 |
-
|
23 |
chapters = get_chapter_paths("assets")
|
24 |
chapter_options = [{"label": d.name, "value": str(d)} for d in chapters]
|
25 |
|
|
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
dbc.Col(
|
35 |
-
html.H1(
|
36 |
-
"Outil de transcription audio",
|
37 |
-
className="text-center my-4 text-primary",
|
38 |
-
),
|
39 |
-
width=12,
|
40 |
-
)
|
41 |
-
]
|
42 |
),
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
)
|
95 |
-
|
96 |
),
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
placeholder="Choisissez une page",
|
112 |
-
),
|
113 |
-
dbc.Button(
|
114 |
-
"Démarrer la transcription",
|
115 |
-
id="start-button",
|
116 |
-
color="primary",
|
117 |
-
className="w-100 mt-2",
|
118 |
-
),
|
119 |
-
]
|
120 |
-
),
|
121 |
-
]
|
122 |
-
)
|
123 |
-
],
|
124 |
-
width=12,
|
125 |
-
)
|
126 |
-
],
|
127 |
),
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
[
|
140 |
-
dcc.Loading(
|
141 |
-
html.Audio(
|
142 |
-
id="audio-player",
|
143 |
-
controls=True,
|
144 |
-
autoPlay=False,
|
145 |
-
className="w-100",
|
146 |
-
)
|
147 |
-
)
|
148 |
-
]
|
149 |
-
),
|
150 |
-
],
|
151 |
-
className="mb-4 shadow",
|
152 |
-
)
|
153 |
-
],
|
154 |
-
width=12,
|
155 |
-
),
|
156 |
-
dbc.Col(
|
157 |
-
[
|
158 |
-
dbc.Card(
|
159 |
-
[
|
160 |
-
dbc.CardHeader("Suggestions de transcriptions"),
|
161 |
-
dbc.CardBody(
|
162 |
-
[
|
163 |
-
dcc.Checklist(
|
164 |
-
id="suggestion-checklist",
|
165 |
-
options=[
|
166 |
-
{"label": t, "value": t}
|
167 |
-
for t in possible_values[:6]
|
168 |
-
],
|
169 |
-
value=[],
|
170 |
-
style={
|
171 |
-
"columns": "3",
|
172 |
-
"column-gap": "1rem",
|
173 |
-
},
|
174 |
-
)
|
175 |
-
]
|
176 |
-
),
|
177 |
-
],
|
178 |
-
className="mb-4 shadow",
|
179 |
-
)
|
180 |
-
],
|
181 |
-
width=12,
|
182 |
-
),
|
183 |
-
dbc.Col([
|
184 |
-
html.Div(
|
185 |
-
id="hidden-message",
|
186 |
-
style={"display": "none"}, # Initially hidden
|
187 |
-
children=[
|
188 |
-
html.P("Traitement de la page actuelle terminée, vous devez changer de page pour continuer. N'oubliez pas de sauvegarder.", style={"color": "red"})
|
189 |
-
]
|
190 |
)
|
191 |
-
],
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
"Sauvegarder résultats",
|
204 |
-
id="save-results-button",
|
205 |
-
n_clicks=0,
|
206 |
-
color="success",
|
207 |
-
className="w-100",
|
208 |
-
style={"marginTop": "20px"},
|
209 |
-
),
|
210 |
-
],
|
211 |
-
width=12,
|
212 |
-
),
|
213 |
-
dbc.Col(
|
214 |
-
[
|
215 |
-
html.Div(
|
216 |
-
id="confirmation-message",
|
217 |
-
className="text-success text-center mt-3",
|
218 |
-
)
|
219 |
-
],
|
220 |
-
width=12,
|
221 |
-
),
|
222 |
-
dbc.Row(
|
223 |
-
[
|
224 |
-
dcc.Store(
|
225 |
-
id="transcription-store", data=[]
|
226 |
-
), # Store for temporary transcriptions
|
227 |
-
]
|
228 |
-
),
|
229 |
-
],
|
230 |
),
|
231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
)
|
233 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
234 |
|
235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
|
237 |
-
|
|
|
|
|
|
|
|
1 |
import dash_bootstrap_components as dbc
|
2 |
from dash import dcc, html
|
3 |
+
from pathlib import Path
|
|
|
4 |
from dotenv import load_dotenv
|
5 |
|
6 |
load_dotenv()
|
7 |
BUCKET_NAME = "moore-collection"
|
8 |
|
9 |
+
# --- Helper pour récupérer les chapitres ---
|
|
|
|
|
|
|
|
|
|
|
10 |
def get_chapter_paths(base_folder):
|
|
|
11 |
base = Path(base_folder)
|
12 |
return [d for d in base.iterdir() if d.is_dir()]
|
13 |
|
|
|
14 |
chapters = get_chapter_paths("assets")
|
15 |
chapter_options = [{"label": d.name, "value": str(d)} for d in chapters]
|
16 |
|
17 |
+
# --- Fonctions de création des différentes cards ---
|
18 |
|
19 |
+
def header_card():
|
20 |
+
"""Carte d'en-tête avec le titre de l'application."""
|
21 |
+
return dbc.Row(
|
22 |
+
dbc.Col(
|
23 |
+
html.H1(
|
24 |
+
"Outil de transcription audio",
|
25 |
+
className="text-center my-4 text-primary",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
),
|
27 |
+
width=12,
|
28 |
+
)
|
29 |
+
)
|
30 |
+
|
31 |
+
def user_info_card():
|
32 |
+
"""Carte pour la saisie des informations utilisateur."""
|
33 |
+
return dbc.Row(
|
34 |
+
dbc.Col(
|
35 |
+
[
|
36 |
+
dbc.Input(
|
37 |
+
id="user-info",
|
38 |
+
placeholder="Entrez votre email, pseudonyme ou nom pour qu'on vous crédite",
|
39 |
+
type="text",
|
40 |
+
className="mb-3",
|
41 |
+
),
|
42 |
+
dbc.Button(
|
43 |
+
"Continuer",
|
44 |
+
id="pseudo-continue-button",
|
45 |
+
color="primary",
|
46 |
+
className="w-100",
|
47 |
+
),
|
48 |
+
],
|
49 |
+
width=12,
|
50 |
+
)
|
51 |
+
)
|
52 |
+
|
53 |
+
def chapter_card():
|
54 |
+
"""Carte pour la sélection d'un chapitre."""
|
55 |
+
return dbc.Row(
|
56 |
+
id="chapter-section",
|
57 |
+
style={"display": "none"},
|
58 |
+
children=[
|
59 |
+
dbc.Col(
|
60 |
+
dbc.Card(
|
61 |
+
[
|
62 |
+
dbc.CardHeader("Sélectionnez un chapitre"),
|
63 |
+
dbc.CardBody(
|
64 |
+
[
|
65 |
+
dcc.Dropdown(
|
66 |
+
id="chapter-dropdown",
|
67 |
+
options=chapter_options,
|
68 |
+
placeholder="Choisissez un chapitre",
|
69 |
+
),
|
70 |
+
dbc.Button(
|
71 |
+
"Continuer",
|
72 |
+
id="chapter-continue-button",
|
73 |
+
color="primary",
|
74 |
+
className="w-100 mt-2",
|
75 |
+
),
|
76 |
+
]
|
77 |
+
),
|
78 |
+
]
|
79 |
+
),
|
80 |
+
width=12,
|
81 |
+
)
|
82 |
+
],
|
83 |
+
)
|
84 |
+
|
85 |
+
def page_card():
|
86 |
+
"""Carte pour la sélection d'une page."""
|
87 |
+
return dbc.Row(
|
88 |
+
id="page-section",
|
89 |
+
style={"display": "none"},
|
90 |
+
children=[
|
91 |
+
dbc.Col(
|
92 |
+
dbc.Card(
|
93 |
+
[
|
94 |
+
dbc.CardHeader("Sélectionnez une page"),
|
95 |
+
dbc.CardBody(
|
96 |
+
[
|
97 |
+
dcc.Dropdown(
|
98 |
+
id="page-dropdown",
|
99 |
+
placeholder="Choisissez une page",
|
100 |
+
),
|
101 |
+
dbc.Button(
|
102 |
+
"Démarrer la transcription",
|
103 |
+
id="start-button",
|
104 |
+
color="primary",
|
105 |
+
className="w-100 mt-2",
|
106 |
+
),
|
107 |
+
]
|
108 |
+
),
|
109 |
+
]
|
110 |
+
),
|
111 |
+
width=12,
|
112 |
+
)
|
113 |
+
],
|
114 |
+
)
|
115 |
+
|
116 |
+
def transcription_card():
|
117 |
+
"""Carte regroupant la lecture audio, les suggestions et les actions de transcription."""
|
118 |
+
audio_card = dbc.Card(
|
119 |
+
[
|
120 |
+
dbc.CardHeader("Lecture audio"),
|
121 |
+
dbc.CardBody(
|
122 |
+
dcc.Loading(
|
123 |
+
html.Audio(
|
124 |
+
id="audio-player",
|
125 |
+
controls=True,
|
126 |
+
autoPlay=False,
|
127 |
+
className="w-100",
|
128 |
)
|
129 |
+
)
|
130 |
),
|
131 |
+
],
|
132 |
+
className="mb-4 shadow",
|
133 |
+
)
|
134 |
+
|
135 |
+
suggestion_card = dbc.Card(
|
136 |
+
[
|
137 |
+
dbc.CardHeader("Suggestions de transcriptions"),
|
138 |
+
dbc.CardBody(
|
139 |
+
dcc.Checklist(
|
140 |
+
id="suggestion-checklist",
|
141 |
+
options=[], # Initialement vide, sera mis à jour via callback
|
142 |
+
value=[],
|
143 |
+
style={"columns": "3", "column-gap": "1rem"},
|
144 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
),
|
146 |
+
],
|
147 |
+
className="mb-4 shadow",
|
148 |
+
)
|
149 |
+
|
150 |
+
hidden_message = html.Div(
|
151 |
+
id="hidden-message",
|
152 |
+
style={"display": "none"},
|
153 |
+
children=[
|
154 |
+
html.P(
|
155 |
+
"Traitement de la page actuelle terminé, vous devez changer de page pour continuer. N'oubliez pas de sauvegarder.",
|
156 |
+
style={"color": "red"},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
)
|
158 |
+
],
|
159 |
+
)
|
160 |
+
|
161 |
+
action_buttons = dbc.Col(
|
162 |
+
[
|
163 |
+
dbc.Button(
|
164 |
+
"Soumettre",
|
165 |
+
id="submit-button",
|
166 |
+
n_clicks=0,
|
167 |
+
color="secondary",
|
168 |
+
className="w-100",
|
169 |
+
style={"marginTop": "20px"},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
),
|
171 |
+
dbc.Button(
|
172 |
+
"Sauvegarder résultats",
|
173 |
+
id="save-results-button",
|
174 |
+
n_clicks=0,
|
175 |
+
color="success",
|
176 |
+
className="w-100",
|
177 |
+
style={"marginTop": "20px"},
|
178 |
+
),
|
179 |
+
],
|
180 |
+
width=12,
|
181 |
+
)
|
182 |
+
|
183 |
+
confirmation_message = dbc.Col(
|
184 |
+
html.Div(
|
185 |
+
id="confirmation-message",
|
186 |
+
className="text-success text-center mt-3",
|
187 |
+
),
|
188 |
+
width=12,
|
189 |
)
|
190 |
|
191 |
+
# La carte de transcription regroupe plusieurs composants et dcc.Store pour l'état
|
192 |
+
return dbc.Row(
|
193 |
+
id="transcription-section",
|
194 |
+
style={"display": "none"},
|
195 |
+
children=[
|
196 |
+
dbc.Col(audio_card, width=12),
|
197 |
+
dbc.Col(suggestion_card, width=12),
|
198 |
+
dbc.Col(hidden_message, width=12),
|
199 |
+
action_buttons,
|
200 |
+
confirmation_message,
|
201 |
+
# Stores pour l'état de l'application
|
202 |
+
dcc.Store(id="transcription-store", data=[]),
|
203 |
+
dcc.Store(id="audio-store", data=[]),
|
204 |
+
dcc.Store(id="values-store", data=[]),
|
205 |
+
],
|
206 |
+
)
|
207 |
|
208 |
+
def create_layout():
|
209 |
+
"""Compose le layout principal à partir des différentes cards."""
|
210 |
+
return dbc.Container(
|
211 |
+
[
|
212 |
+
header_card(),
|
213 |
+
user_info_card(),
|
214 |
+
chapter_card(),
|
215 |
+
page_card(),
|
216 |
+
transcription_card(),
|
217 |
+
],
|
218 |
+
fluid=True,
|
219 |
+
className="p-4",
|
220 |
+
)
|
221 |
|
222 |
+
# Initialisation du layout
|
223 |
+
layout = create_layout()
|
224 |
+
from .callbacks import *
|
app/pages/Annotations/state.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from typing import List, Optional
|
3 |
+
import pandas as pd
|
4 |
+
|
5 |
+
@dataclass
|
6 |
+
class AppState:
|
7 |
+
audio_paths: List[str] = None
|
8 |
+
possible_values: List[str] = None
|
9 |
+
current_chapter: Optional[str] = None
|
10 |
+
current_page: Optional[str] = None
|
11 |
+
verses_data: Optional[pd.DataFrame] = None
|
12 |
+
|
13 |
+
@classmethod
|
14 |
+
def initialize(cls, dataset_name: str):
|
15 |
+
from datasets import load_dataset
|
16 |
+
data = load_dataset(dataset_name, split="train").to_pandas()
|
17 |
+
data[["chapter", "page"]] = data["moore_source_url"].apply(
|
18 |
+
lambda x: pd.Series(extract_audio_identifier(x))
|
19 |
+
)
|
20 |
+
return cls(verses_data=data)
|
app/pages/base_page.py
CHANGED
@@ -17,7 +17,7 @@ class PageMeta:
|
|
17 |
parts = self.module_name.split(".")
|
18 |
if len(parts) < 2:
|
19 |
raise ValueError(f"Invalid module name: {self.module_name}")
|
20 |
-
self._id = parts[
|
21 |
return self._id
|
22 |
|
23 |
@property
|
@@ -29,7 +29,7 @@ class PageMeta:
|
|
29 |
@property
|
30 |
def path(self) -> str:
|
31 |
if not self._path:
|
32 |
-
self._path = route_prefix
|
33 |
return self._path
|
34 |
|
35 |
@classmethod
|
|
|
17 |
parts = self.module_name.split(".")
|
18 |
if len(parts) < 2:
|
19 |
raise ValueError(f"Invalid module name: {self.module_name}")
|
20 |
+
self._id = parts[1]
|
21 |
return self._id
|
22 |
|
23 |
@property
|
|
|
29 |
@property
|
30 |
def path(self) -> str:
|
31 |
if not self._path:
|
32 |
+
self._path = route_prefix + self.id_
|
33 |
return self._path
|
34 |
|
35 |
@classmethod
|