import spacy import json from datetime import datetime import streamlit as st nlp = spacy.load('la_core_web_lg') def tokenize_with_spacy(text): doc = nlp(text) return [token.text for token in doc] def annotate_text(segmented_text): annotated_tokens = [] for token in segmented_text: doc = nlp(token) annotated_token = { 'token': token, 'pos': str(doc[0].pos_), 'lemma': str(doc[0].lemma_), 'aspect': ', '.join(doc[0].morph.get("Aspect", default=[""])), 'tense': ', '.join(doc[0].morph.get("Tense", default=[""])), 'verbForm': ', '.join(doc[0].morph.get("VerbForm", default=[""])), 'voice': ', '.join(doc[0].morph.get("Voice", default=[""])), 'mood': ', '.join(doc[0].morph.get("Mood", default=[""])), 'number': ', '.join(doc[0].morph.get("Number", default=[""])), 'person': ', '.join(doc[0].morph.get("Person", default=[""])), 'case': ', '.join(doc[0].morph.get("Case", default=[""])), 'gender': ', '.join(doc[0].morph.get("Gender", default=[""])) } annotated_tokens.append(annotated_token) return annotated_tokens def save_annotations_as_json(annotated_text, filename): with open(filename, 'w', encoding='utf-8') as json_file: json.dump(annotated_text, json_file, ensure_ascii=False, indent=4) st.title("Annotation Tool") # Add Materialize CSS st.markdown('', unsafe_allow_html=True) text = st.text_area("Text") if st.button("Annotate"): if text: segmented_text = tokenize_with_spacy(text) annotated_text = annotate_text(segmented_text) st.subheader("Segmented Text:") st.write(segmented_text) st.subheader("Annotated Text:") for token in annotated_text: st.write(f"Token: {token['token']}") st.write(f"POS: {token['pos']}") st.write(f"Lemma: {token['lemma']}") st.write(f"Aspect: {', '.join(token['aspect'])}") st.write(f"Tense: {', '.join(token['tense'])}") st.write(f"VerbForm: {', '.join(token['verbForm'])}") st.write(f"Voice: {', '.join(token['voice'])}") st.write(f"Mood: {', '.join(token['mood'])}") st.write(f"Number: {', '.join(token['number'])}") st.write(f"Person: {', '.join(token['person'])}") st.write(f"Case: {', '.join(token['case'])}") st.write(f"Gender: {', '.join(token['gender'])}") st.write("-----") else: st.warning("Please enter some text.") if st.button("Save Modifications as JSON"): if annotated_text: # Generate a unique file name using a timestamp timestamp = datetime.now().strftime("%Y%m%d%H%M%S") filename = f'annotations_{timestamp}.json' save_annotations_as_json(annotated_text, filename) st.success(f"Annotations saved as {filename}") # Add Materialize JavaScript st.markdown('', unsafe_allow_html=True)