lhoestq HF staff commited on
Commit
fe1b9ba
1 Parent(s): db651f9

Update analyze.py

Browse files
Files changed (1) hide show
  1. analyze.py +3 -5
analyze.py CHANGED
@@ -11,7 +11,8 @@ Row = dict[str, Any]
11
  T = TypeVar("T")
12
  BATCH_SIZE = 1
13
  MAX_TEXT_LENGTH = 500
14
- batch_analyzer: Optional[BatchAnalyzerEngine] = None
 
15
 
16
 
17
  class PresidioEntity(TypedDict):
@@ -121,16 +122,13 @@ def analyze(
121
  def presidio_scan_entities(
122
  rows: Iterable[Row], scanned_columns: list[str], columns_descriptions: list[str]
123
  ) -> Iterable[PresidioEntity]:
124
- global batch_analyzer
125
  cache: dict[str, list[RecognizerResult]] = {}
126
- if batch_analyzer is None:
127
- batch_analyser = BatchAnalyzerEngine(AnalyzerEngine())
128
  rows_with_scanned_columns_only = (
129
  {column_name: get_strings(row[column_name])[:MAX_TEXT_LENGTH] for column_name in scanned_columns} for row in rows
130
  )
131
  for indices, batch in batched(rows_with_scanned_columns_only, BATCH_SIZE, with_indices=True):
132
  yield from analyze(
133
- batch_analyzer=batch_analyser,
134
  batch=batch,
135
  indices=indices,
136
  scanned_columns=scanned_columns,
 
11
  T = TypeVar("T")
12
  BATCH_SIZE = 1
13
  MAX_TEXT_LENGTH = 500
14
+ analyzer = AnalyzerEngine()
15
+ batch_analyzer = BatchAnalyzerEngine(analyzer)
16
 
17
 
18
  class PresidioEntity(TypedDict):
 
122
  def presidio_scan_entities(
123
  rows: Iterable[Row], scanned_columns: list[str], columns_descriptions: list[str]
124
  ) -> Iterable[PresidioEntity]:
 
125
  cache: dict[str, list[RecognizerResult]] = {}
 
 
126
  rows_with_scanned_columns_only = (
127
  {column_name: get_strings(row[column_name])[:MAX_TEXT_LENGTH] for column_name in scanned_columns} for row in rows
128
  )
129
  for indices, batch in batched(rows_with_scanned_columns_only, BATCH_SIZE, with_indices=True):
130
  yield from analyze(
131
+ batch_analyzer=batch_analyzer,
132
  batch=batch,
133
  indices=indices,
134
  scanned_columns=scanned_columns,