Spaces:
Runtime error
Runtime error
from datasets import load_dataset | |
import streamlit as st | |
from clarin_datasets.dataset_to_show import DatasetToShow | |
class KpwrNerDataset(DatasetToShow): | |
def __init__(self): | |
DatasetToShow.__init__(self) | |
self.dataset_name = "clarin-pl/kpwr-ner" | |
self.description = """ | |
KPWR-NER is a part the Polish Corpus of Wrocław University of Technology (Korpus Języka | |
Polskiego Politechniki Wrocławskiej). Its objective is named entity recognition for fine-grained categories | |
of entities. It is the ‘n82’ version of the KPWr, which means that number of classes is restricted to 82 ( | |
originally 120). During corpus creation, texts were annotated by humans from various sources, covering many | |
domains and genres. | |
Tasks (input, output and metrics) | |
Named entity recognition (NER) - tagging entities in text with their corresponding type. | |
Input ('tokens' column): sequence of tokens | |
Output ('ner' column): sequence of predicted tokens’ classes in BIO notation (82 possible classes, described | |
in detail in the annotation guidelines) | |
example: | |
[‘Roboty’, ‘mają’, ‘kilkanaście’, ‘lat’, ‘i’, ‘pochodzą’, ‘z’, ‘USA’, ‘,’, ‘Wysokie’, ‘napięcie’, ‘jest’, | |
‘dużo’, ‘młodsze’, ‘,’, ‘powstało’, ‘w’, ‘Niemczech’, ‘.’] → [‘B-nam_pro_title’, ‘O’, ‘O’, ‘O’, ‘O’, ‘O’, | |
‘O’, ‘B-nam_loc_gpe_country’, ‘O’, ‘B-nam_pro_title’, ‘I-nam_pro_title’, ‘O’, ‘O’, ‘O’, ‘O’, ‘O’, ‘O’, | |
‘B-nam_loc_gpe_country’, ‘O’] | |
""" | |
def load_data(self): | |
raw_dataset = load_dataset(self.dataset_name) | |
self.data_dict = { | |
subset: raw_dataset[subset].to_pandas() for subset in self.subsets | |
} | |
def show_dataset(self): | |
header = st.container() | |
description = st.container() | |
dataframe_head = st.container() | |
with header: | |
st.title(self.dataset_name) | |
with description: | |
st.header("Dataset description") | |
st.write(self.description) | |