Spaces:
Sleeping
Sleeping
import spacy | |
import json | |
from datetime import datetime | |
import streamlit as st | |
import pandas as pd | |
nlp = spacy.load('la_core_web_lg') | |
def tokenize_with_spacy(text): | |
doc = nlp(text) | |
return [token.text for token in doc] | |
def annotate_text(segmented_text): | |
annotated_tokens = [] | |
for token in segmented_text: | |
doc = nlp(token) | |
annotated_token = { | |
'token': token, | |
'pos': str(doc[0].pos_), | |
'lemma': str(doc[0].lemma_), | |
'aspect': ', '.join(doc[0].morph.get("Aspect", default=[""])), | |
'tense': ', '.join(doc[0].morph.get("Tense", default=[""])), | |
'verbForm': ', '.join(doc[0].morph.get("VerbForm", default=[""])), | |
'voice': ', '.join(doc[0].morph.get("Voice", default=[""])), | |
'mood': ', '.join(doc[0].morph.get("Mood", default=[""])), | |
'number': ', '.join(doc[0].morph.get("Number", default=[""])), | |
'person': ', '.join(doc[0].morph.get("Person", default=[""])), | |
'case': ', '.join(doc[0].morph.get("Case", default=[""])), | |
'gender': ', '.join(doc[0].morph.get("Gender", default=[""])) | |
} | |
annotated_tokens.append(annotated_token) | |
return annotated_tokens | |
def save_annotations_as_json(annotated_text, filename): | |
with open(filename, 'w', encoding='utf-8') as json_file: | |
json.dump(annotated_text, json_file, ensure_ascii=False, indent=4) | |
st.set_page_config(layout="wide") | |
st.title("Annotation Tool") | |
text = st.text_area("Text") | |
if st.button("Annotate"): | |
if text: | |
segmented_text = tokenize_with_spacy(text) | |
annotated_text = annotate_text(segmented_text) | |
st.subheader("Segmented Text:") | |
st.write(segmented_text) | |
st.subheader("Annotated Text:") | |
# Create a DataFrame from the annotated text | |
df = pd.DataFrame(annotated_text) | |
st.write(df) | |
if st.button("Save Modifications as JSON"): | |
save_annotations_as_json(df.to_dict(orient='records'), 'annotations.json') | |
st.success("Annotations saved as annotations.json") | |
else: | |
st.warning("Please enter some text.") | |