import spacy import json from datetime import datetime import streamlit as st import pandas as pd nlp = spacy.load('la_core_web_lg') def tokenize_with_spacy(text): doc = nlp(text) return [token.text for token in doc] def annotate_text(segmented_text): annotated_tokens = [] for token in segmented_text: doc = nlp(token) annotated_token = { 'token': token, 'pos': str(doc[0].pos_), 'lemma': str(doc[0].lemma_), 'aspect': ', '.join(doc[0].morph.get("Aspect", default=[""])), 'tense': ', '.join(doc[0].morph.get("Tense", default=[""])), 'verbForm': ', '.join(doc[0].morph.get("VerbForm", default=[""])), 'voice': ', '.join(doc[0].morph.get("Voice", default=[""])), 'mood': ', '.join(doc[0].morph.get("Mood", default=[""])), 'number': ', '.join(doc[0].morph.get("Number", default=[""])), 'person': ', '.join(doc[0].morph.get("Person", default=[""])), 'case': ', '.join(doc[0].morph.get("Case", default=[""])), 'gender': ', '.join(doc[0].morph.get("Gender", default=[""])) } annotated_tokens.append(annotated_token) return annotated_tokens def save_annotations_as_json(annotated_text, filename): with open(filename, 'w', encoding='utf-8') as json_file: json.dump(annotated_text, json_file, ensure_ascii=False, indent=4) st.title("Annotation Tool") text = st.text_area("Text") if st.button("Annotate"): if text: segmented_text = tokenize_with_spacy(text) annotated_text = annotate_text(segmented_text) st.subheader("Segmented Text:") st.write(segmented_text) st.subheader("Annotated Text:") # Create a DataFrame from the annotated text df = pd.DataFrame(annotated_text) st.dataframe(df) if st.button("Save Modifications as JSON"): save_annotations_as_json(df.to_dict(orient='records'), 'annotations.json') st.success("Annotations saved as annotations.json") else: st.warning("Please enter some text.")