Spaces:
Runtime error
Runtime error
from stoke.src.data.util import GenerationConfig, split_data, conll_prompts | |
from stoke.src.data.generation import DataGenerator, FlairNERModel | |
# generation parameters | |
generation_kwargs = {"max_new_tokens": 100, "repetition_penalty": 1.2} | |
# Creating TrainConfig object with default values | |
config = GenerationConfig(language_model="gpt2", output_path="data/", dataset_name="test", cuda=False, generation_kwargs=generation_kwargs) | |
# create annotation model | |
reference_model = FlairNERModel(config.language_model, "flair/ner-english-ontonotes-large") | |
# create DataGenerator | |
generator = DataGenerator(config, reference_model) | |
# run generator | |
generated_texts = generator.generate_text(conll_prompts()[:10], generation_kwargs) | |
# annotate text with reference model | |
annotated_texts = generator.annotate_text(generated_texts) | |
# save data in correct format | |
generator.save_data(annotated_texts) | |
# split dataset | |
split_data(config.path_data) | |