Spaces:

clarin-pl
/

datasets-explorer

Runtime error

App Files Files Community

Mariusz Kossakowski commited on Sep 14, 2022

Commit

2b9022f

1 Parent(s): abb1c69

Add datasets links

Browse files

Files changed (7) hide show

clarin_datasets/abusive_clauses_dataset.py +3 -1
clarin_datasets/aspectemo_dataset.py +3 -1
clarin_datasets/cst_wikinews_dataset.py +2 -2
clarin_datasets/kpwr_ner_datasets.py +3 -1
clarin_datasets/nkjp_pos_dataset.py +3 -1
clarin_datasets/polemo_dataset.py +4 -1
clarin_datasets/punctuation_restoration_dataset.py +3 -1

clarin_datasets/abusive_clauses_dataset.py CHANGED Viewed

@@ -15,7 +15,9 @@ class AbusiveClausesDataset(DatasetToShow):
         DatasetToShow.__init__(self)
         self.dataset_name = "laugustyniak/abusive-clauses-pl"
         self.subsets = ["train", "validation", "test"]
-        self.description = """
         ''I have read and agree to the terms and conditions'' is one of the biggest lies on the Internet.
         Consumers rarely read the contracts they are required to accept. We conclude agreements over the Internet daily.
         But do we know the content of these agreements? Do we check potential unfair statements? On the Internet,

         DatasetToShow.__init__(self)
         self.dataset_name = "laugustyniak/abusive-clauses-pl"
         self.subsets = ["train", "validation", "test"]
+        self.description = f"""
+        Dataset link: https://huggingface.co/datasets/{self.dataset_name}
         ''I have read and agree to the terms and conditions'' is one of the biggest lies on the Internet.
         Consumers rarely read the contracts they are required to accept. We conclude agreements over the Internet daily.
         But do we know the content of these agreements? Do we check potential unfair statements? On the Internet,

clarin_datasets/aspectemo_dataset.py CHANGED Viewed

@@ -10,7 +10,9 @@ class AspectEmoDataset(DatasetToShow):
         DatasetToShow.__init__(self)
         self.dataset_name = "clarin-pl/aspectemo"
         self.description = [
-            """
             AspectEmo Corpus is an extended version of a publicly available PolEmo 2.0
             corpus of Polish customer reviews used in many projects on the use of different methods in sentiment
             analysis. The AspectEmo corpus consists of four subcorpora, each containing online customer reviews from the

         DatasetToShow.__init__(self)
         self.dataset_name = "clarin-pl/aspectemo"
         self.description = [
+            f"""
+            Dataset link: https://huggingface.co/datasets/{self.dataset_name}
             AspectEmo Corpus is an extended version of a publicly available PolEmo 2.0
             corpus of Polish customer reviews used in many projects on the use of different methods in sentiment
             analysis. The AspectEmo corpus consists of four subcorpora, each containing online customer reviews from the

clarin_datasets/cst_wikinews_dataset.py CHANGED Viewed

@@ -9,8 +9,8 @@ class CSTWikinewsDataset(DatasetToShow):
     def __init__(self):
         DatasetToShow.__init__(self)
         self.dataset_name = "clarin-pl/cst-wikinews"
-        self.description = """
         """
     def load_data(self):

     def __init__(self):
         DatasetToShow.__init__(self)
         self.dataset_name = "clarin-pl/cst-wikinews"
+        self.description = f"""
+        Dataset link: https://huggingface.co/datasets/{self.dataset_name}
         """
     def load_data(self):

clarin_datasets/kpwr_ner_datasets.py CHANGED Viewed

@@ -11,7 +11,9 @@ class KpwrNerDataset(DatasetToShow):
         self.data_dict_named = None
         self.dataset_name = "clarin-pl/kpwr-ner"
         self.description = [
-            """
             KPWR-NER is a part the Polish Corpus of Wrocław University of Technology (Korpus Języka
             Polskiego Politechniki Wrocławskiej). Its objective is named entity recognition for fine-grained categories
             of entities. It is the ‘n82’ version of the KPWr, which means that number of classes is restricted to 82 (

         self.data_dict_named = None
         self.dataset_name = "clarin-pl/kpwr-ner"
         self.description = [
+            f"""
+            Dataset link: https://huggingface.co/datasets/{self.dataset_name}
             KPWR-NER is a part the Polish Corpus of Wrocław University of Technology (Korpus Języka
             Polskiego Politechniki Wrocławskiej). Its objective is named entity recognition for fine-grained categories
             of entities. It is the ‘n82’ version of the KPWr, which means that number of classes is restricted to 82 (

clarin_datasets/nkjp_pos_dataset.py CHANGED Viewed

@@ -11,7 +11,9 @@ class NkjpPosDataset(DatasetToShow):
         self.data_dict_named = None
         self.dataset_name = "clarin-pl/nkjp-pos"
         self.description = [
-            """
             NKJP-POS is a part the National Corpus of Polish (Narodowy Korpus Języka Polskiego).
             Its objective is part-of-speech tagging, e.g. nouns, verbs, adjectives, adverbs, etc. During the creation of
             corpus, texts of were annotated by humans from various sources, covering many domains and genres.

         self.data_dict_named = None
         self.dataset_name = "clarin-pl/nkjp-pos"
         self.description = [
+            f"""
+            Dataset link: https://huggingface.co/datasets/{self.dataset_name}
             NKJP-POS is a part the National Corpus of Polish (Narodowy Korpus Języka Polskiego).
             Its objective is part-of-speech tagging, e.g. nouns, verbs, adjectives, adverbs, etc. During the creation of
             corpus, texts of were annotated by humans from various sources, covering many domains and genres.

clarin_datasets/polemo_dataset.py CHANGED Viewed

@@ -16,7 +16,10 @@ class PolemoDataset(DatasetToShow):
         DatasetToShow.__init__(self)
         self.dataset_name = "clarin-pl/polemo2-official"
         self.subsets = ["train", "validation", "test"]
-        self.description = """The PolEmo2.0 is a dataset of online consumer reviews from four domains: medicine,
         hotels, products, and university. It is human-annotated on a level of full reviews and individual
         sentences. Current version (PolEmo 2.0) contains 8,216 reviews having 57,466 sentences. Each text and
         sentence was manually annotated with sentiment in the 2+1 scheme, which gives a total of 197,

         DatasetToShow.__init__(self)
         self.dataset_name = "clarin-pl/polemo2-official"
         self.subsets = ["train", "validation", "test"]
+        self.description = f"""
+        Dataset link: https://huggingface.co/datasets/{self.dataset_name}
+        The PolEmo2.0 is a dataset of online consumer reviews from four domains: medicine,
         hotels, products, and university. It is human-annotated on a level of full reviews and individual
         sentences. Current version (PolEmo 2.0) contains 8,216 reviews having 57,466 sentences. Each text and
         sentence was manually annotated with sentiment in the 2+1 scheme, which gives a total of 197,

clarin_datasets/punctuation_restoration_dataset.py CHANGED Viewed

@@ -11,7 +11,9 @@ class PunctuationRestorationDataset(DatasetToShow):
         self.data_dict_named = None
         self.dataset_name = "clarin-pl/2021-punctuation-restoration"
         self.description = [
-            """
             Speech transcripts generated by Automatic Speech Recognition (ASR) systems typically do
             not contain any punctuation or capitalization. In longer stretches of automatically recognized speech,
             the lack of punctuation affects the general clarity of the output text [1]. The primary purpose of

         self.data_dict_named = None
         self.dataset_name = "clarin-pl/2021-punctuation-restoration"
         self.description = [
+            f"""
+            Dataset link: https://huggingface.co/datasets/{self.dataset_name}
             Speech transcripts generated by Automatic Speech Recognition (ASR) systems typically do
             not contain any punctuation or capitalization. In longer stretches of automatically recognized speech,
             the lack of punctuation affects the general clarity of the output text [1]. The primary purpose of