furkanakkurt1618 commited on
Commit
f2d86ab
·
1 Parent(s): 2ecade9

add ner and pos_tagging

Browse files
Files changed (3) hide show
  1. app.py +5 -2
  2. apps/ner.py +60 -0
  3. apps/pos_tagging.py +60 -0
app.py CHANGED
@@ -8,6 +8,8 @@ import apps.paraphrasing
8
  import apps.title_generation
9
  import apps.sentiment
10
  import apps.categorization
 
 
11
 
12
  st.set_page_config(
13
  page_title="Turna",
@@ -16,15 +18,16 @@ st.set_page_config(
16
  )
17
 
18
  PAGES = {
19
- "Turna": apps.home,
20
  "Text Summarization": apps.summarization,
21
  "Text Paraphrasing": apps.paraphrasing,
22
  "News Title Generation": apps.title_generation,
23
  "Sentiment Classification": apps.sentiment,
24
  "Text Categorization": apps.categorization,
 
 
25
  }
26
 
27
-
28
  st.sidebar.title("Navigation")
29
  selection = st.sidebar.radio("Pages", list(PAGES.keys()))
30
 
 
8
  import apps.title_generation
9
  import apps.sentiment
10
  import apps.categorization
11
+ import apps.ner
12
+ import apps.pos_tagging
13
 
14
  st.set_page_config(
15
  page_title="Turna",
 
18
  )
19
 
20
  PAGES = {
21
+ "TURNA": apps.home,
22
  "Text Summarization": apps.summarization,
23
  "Text Paraphrasing": apps.paraphrasing,
24
  "News Title Generation": apps.title_generation,
25
  "Sentiment Classification": apps.sentiment,
26
  "Text Categorization": apps.categorization,
27
+ "Named Entity Recognition": apps.ner,
28
+ "Part-of-Speech Tagging": apps.pos_tagging
29
  }
30
 
 
31
  st.sidebar.title("Navigation")
32
  selection = st.sidebar.radio("Pages", list(PAGES.keys()))
33
 
apps/ner.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import streamlit as st
3
+ import time
4
+ from transformers import pipeline
5
+ import os
6
+ from .utils import query
7
+
8
+ HF_AUTH_TOKEN = os.getenv('HF_AUTH_TOKEN')
9
+ headers = {"Authorization": f"Bearer {HF_AUTH_TOKEN}"}
10
+
11
+ def write():
12
+
13
+ st.markdown("# Named Entity Recognition")
14
+ st.sidebar.header("Named Entity Recognition")
15
+ st.write(
16
+ '''Here, you can detect named entities in your text using the fine-tuned TURNA NER models.'''
17
+ )
18
+
19
+ # Sidebar
20
+
21
+ # Taken from https://huggingface.co/spaces/flax-community/spanish-gpt2/blob/main/app.py
22
+ st.sidebar.subheader("Configurable parameters")
23
+
24
+ model_name = st.sidebar.selectbox(
25
+ "Model Selector",
26
+ options=[
27
+ "turna_ner_wikiann",
28
+ "turna_ner_milliyet"
29
+ ],
30
+ index=0,
31
+ )
32
+ max_new_tokens = st.sidebar.number_input(
33
+ "Maximum length",
34
+ min_value=0,
35
+ max_value=64,
36
+ value=64,
37
+ help="The maximum length of the sequence to be generated.",
38
+ )
39
+
40
+ length_penalty = st.sidebar.number_input(
41
+ "Length penalty",
42
+ value=2.0,
43
+ help=" length_penalty > 0.0 promotes longer sequences, while length_penalty < 0.0 encourages shorter sequences. ",
44
+ )
45
+
46
+ no_repeat_ngram_size = st.sidebar.number_input(
47
+ "No Repeat N-Gram Size",
48
+ min_value=0,
49
+ value=3,
50
+ help="If set to int > 0, all ngrams of that size can only occur once.",
51
+ )
52
+
53
+ input_text = st.text_area(label='Enter a text: ', height=100,
54
+ value="Ecevit, Irak hükümetinin de Ankara Büyükelçiliği için agreman istediğini belirtti.")
55
+ url = ("https://api-inference.huggingface.co/models/boun-tabi-LMG/" + model_name.lower())
56
+ params = {"length_penalty": length_penalty, "no_repeat_ngram_size": no_repeat_ngram_size, "max_new_tokens": max_new_tokens }
57
+ if st.button("Generate"):
58
+ with st.spinner('Generating...'):
59
+ output = query(input_text, url, params)
60
+ st.success(output)
apps/pos_tagging.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import streamlit as st
3
+ import time
4
+ from transformers import pipeline
5
+ import os
6
+ from .utils import query
7
+
8
+ HF_AUTH_TOKEN = os.getenv('HF_AUTH_TOKEN')
9
+ headers = {"Authorization": f"Bearer {HF_AUTH_TOKEN}"}
10
+
11
+ def write():
12
+
13
+ st.markdown("# Part-of-Speech Tagging")
14
+ st.sidebar.header("Part-of-Speech Tagging")
15
+ st.write(
16
+ '''Here, you can detect part-of-speech tags in your text using the fine-tuned TURNA POS models.'''
17
+ )
18
+
19
+ # Sidebar
20
+
21
+ # Taken from https://huggingface.co/spaces/flax-community/spanish-gpt2/blob/main/app.py
22
+ st.sidebar.subheader("Configurable parameters")
23
+
24
+ model_name = st.sidebar.selectbox(
25
+ "Model Selector",
26
+ options=[
27
+ "turna_pos_boun",
28
+ "turna_pos_imst"
29
+ ],
30
+ index=0,
31
+ )
32
+ max_new_tokens = st.sidebar.number_input(
33
+ "Maximum length",
34
+ min_value=0,
35
+ max_value=64,
36
+ value=64,
37
+ help="The maximum length of the sequence to be generated.",
38
+ )
39
+
40
+ length_penalty = st.sidebar.number_input(
41
+ "Length penalty",
42
+ value=2.0,
43
+ help=" length_penalty > 0.0 promotes longer sequences, while length_penalty < 0.0 encourages shorter sequences. ",
44
+ )
45
+
46
+ no_repeat_ngram_size = st.sidebar.number_input(
47
+ "No Repeat N-Gram Size",
48
+ min_value=0,
49
+ value=3,
50
+ help="If set to int > 0, all ngrams of that size can only occur once.",
51
+ )
52
+
53
+ input_text = st.text_area(label='Enter a text: ', height=100,
54
+ value="Çünkü her kişinin bir başka yolu, bir başka yöntemi olmak gerektir.")
55
+ url = ("https://api-inference.huggingface.co/models/boun-tabi-LMG/" + model_name.lower())
56
+ params = {"length_penalty": length_penalty, "no_repeat_ngram_size": no_repeat_ngram_size, "max_new_tokens": max_new_tokens }
57
+ if st.button("Generate"):
58
+ with st.spinner('Generating...'):
59
+ output = query(input_text, url, params)
60
+ st.success(output)