Olivier CARON commited on
Commit
f87d000
1 Parent(s): c49547e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -12
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import os # Add this import to use os.path.splitext
2
  import csv
3
  import streamlit as st
4
  import polars as pl
@@ -16,19 +16,18 @@ def load_data(file):
16
  if file_ext.lower() in ['.xls', '.xlsx']:
17
  return pl.read_excel(file)
18
  elif file_ext.lower() == '.csv':
19
- file.seek(0) # Retour au début du fichier
20
  try:
21
- sample = file.read(4096).decode('utf-8') # Essayer de décoder l'échantillon en UTF-8
22
  encoding = 'utf-8'
23
  except UnicodeDecodeError:
24
- encoding = 'latin1' # Basculer sur 'latin1' si UTF-8 échoue
25
  file.seek(0)
26
  sample = file.read(4096).decode(encoding)
27
 
28
  file.seek(0)
29
- dialect = csv.Sniffer().sniff(sample) # Détecter le dialecte/délimiteur
30
 
31
- # Convertir le fichier en StringIO pour simuler un fichier texte, si nécessaire
32
  file.seek(0)
33
  if encoding != 'utf-8':
34
  file_content = file.read().decode(encoding)
@@ -49,10 +48,10 @@ def perform_ner(filtered_df, selected_column, labels_list):
49
  progress_bar = st.progress(0)
50
  progress_text = st.empty()
51
 
52
- start_time = time.time() # Enregistrer le temps de début pour le temps d'exécution total
53
 
54
  for index, row in enumerate(filtered_df.to_pandas().itertuples(), 1):
55
- iteration_start_time = time.time() # Temps de début pour cette itération
56
 
57
  if st.session_state.stop_processing:
58
  progress_text.text("Process stopped by the user.")
@@ -69,13 +68,13 @@ def perform_ner(filtered_df, selected_column, labels_list):
69
  progress = index / filtered_df.height
70
  progress_bar.progress(progress)
71
 
72
- iteration_time = time.time() - iteration_start_time # Calculer le temps d'exécution pour cette itération
73
- total_time = time.time() - start_time # Calculer le temps total écoulé jusqu'à présent
74
 
75
  progress_text.text(f"Progress: {index}/{filtered_df.height} - {progress * 100:.0f}% (Iteration: {iteration_time:.2f}s, Total: {total_time:.2f}s)")
76
 
77
- end_time = time.time() # Enregistrer le temps de fin
78
- total_execution_time = end_time - start_time # Calculer le temps d'exécution total
79
 
80
  progress_text.text(f"Processing complete! Total execution time: {total_execution_time:.2f}s")
81
 
 
1
+ import os
2
  import csv
3
  import streamlit as st
4
  import polars as pl
 
16
  if file_ext.lower() in ['.xls', '.xlsx']:
17
  return pl.read_excel(file)
18
  elif file_ext.lower() == '.csv':
19
+ file.seek(0) # Go back to the beginning of the file
20
  try:
21
+ sample = file.read(4096).decode('utf-8') # Try to decode the sample in UTF-8
22
  encoding = 'utf-8'
23
  except UnicodeDecodeError:
24
+ encoding = 'latin1' # Switch to 'latin1' if UTF-8 fails
25
  file.seek(0)
26
  sample = file.read(4096).decode(encoding)
27
 
28
  file.seek(0)
29
+ dialect = csv.Sniffer().sniff(sample) # Detect the delimiter
30
 
 
31
  file.seek(0)
32
  if encoding != 'utf-8':
33
  file_content = file.read().decode(encoding)
 
48
  progress_bar = st.progress(0)
49
  progress_text = st.empty()
50
 
51
+ start_time = time.time() # Record start time for total runtime
52
 
53
  for index, row in enumerate(filtered_df.to_pandas().itertuples(), 1):
54
+ iteration_start_time = time.time() # Start time for this iteration
55
 
56
  if st.session_state.stop_processing:
57
  progress_text.text("Process stopped by the user.")
 
68
  progress = index / filtered_df.height
69
  progress_bar.progress(progress)
70
 
71
+ iteration_time = time.time() - iteration_start_time # Calculate runtime for this iteration
72
+ total_time = time.time() - start_time # Calculate total elapsed time so far
73
 
74
  progress_text.text(f"Progress: {index}/{filtered_df.height} - {progress * 100:.0f}% (Iteration: {iteration_time:.2f}s, Total: {total_time:.2f}s)")
75
 
76
+ end_time = time.time() # Record end time
77
+ total_execution_time = end_time - start_time # Calculate total runtime
78
 
79
  progress_text.text(f"Processing complete! Total execution time: {total_execution_time:.2f}s")
80