lhoestq HF staff commited on
Commit
9d8efc1
1 Parent(s): 2e2e9ca

small optim

Browse files
Files changed (1) hide show
  1. generate.py +7 -6
generate.py CHANGED
@@ -107,12 +107,13 @@ def stream_json_objects_from_batched_tokens_generator(batched_tokens_generator:
107
  for tokens_batch in chain([first_batch], batched_tokens_generator):
108
  for stream_idx, token in enumerate(tokens_batch):
109
  streams[stream_idx] += token
110
- try:
111
- for stream_sample in islice(ijson.items(StringIteratorIO(streams[stream_idx].__iter__()), json_field + ".item", buf_size=1), skips[stream_idx], None):
112
- yield stream_sample
113
- skips[stream_idx] = +1
114
- except ijson.IncompleteJSONError:
115
- pass
 
116
 
117
 
118
  def stream_jsonl_file(filename: str, prompt: str, columns: list[str], seed: int, size: int) -> Iterator[str]:
 
107
  for tokens_batch in chain([first_batch], batched_tokens_generator):
108
  for stream_idx, token in enumerate(tokens_batch):
109
  streams[stream_idx] += token
110
+ if '"' in token or "}" in token:
111
+ try:
112
+ for stream_sample in islice(ijson.items(StringIteratorIO(streams[stream_idx].__iter__()), json_field + ".item", buf_size=1), skips[stream_idx], None):
113
+ yield stream_sample
114
+ skips[stream_idx] = +1
115
+ except ijson.IncompleteJSONError:
116
+ pass
117
 
118
 
119
  def stream_jsonl_file(filename: str, prompt: str, columns: list[str], seed: int, size: int) -> Iterator[str]: