import spacy import json from datetime import datetime import streamlit as st from st_theme_select import st_theme_select # Create a custom theme custom_theme = { "primaryColor": "#009688", # Change this to your preferred color "backgroundColor": "#f2f2f2", # Change this to your preferred background color "secondaryBackgroundColor": "#ffffff", "textColor": "#000000", "font": "sans-serif", } # Apply the custom theme st.set_page_config( page_title="Annotation Tool", page_icon=":bookmark:", layout="wide", initial_sidebar_state="expanded", theme=custom_theme, ) # Add Hugging Face's logo st.image("huggingface_logo.png") # Replace with the actual image URL or path # Customize the Streamlit title st.title("Hugging Face Annotation Tool") # Theme selection widget selected_theme = st_theme_select("Select Theme", theme="default") # Check the selected theme and apply it if selected_theme != "default": st.set_theme(selected_theme) nlp = spacy.load('la_core_web_lg') def tokenize_with_spacy(text): doc = nlp(text) return [token.text for token in doc] def annotate_text(segmented_text): annotated_tokens = [] for token in segmented_text: doc = nlp(token) annotated_token = { 'token': token, 'pos': str(doc[0].pos_), 'lemma': str(doc[0].lemma_), 'aspect': ', '.join(doc[0].morph.get("Aspect", default=[""])), 'tense': ', '.join(doc[0].morph.get("Tense", default=[""])), 'verbForm': ', '.join(doc[0].morph.get("VerbForm", default=[""])), 'voice': ', '.join(doc[0].morph.get("Voice", default=[""])), 'mood': ', '.join(doc[0].morph.get("Mood", default=[""])), 'number': ', '.join(doc[0].morph.get("Number", default=[""])), 'person': ', '.join(doc[0].morph.get("Person", default=[""])), 'case': ', '.join(doc[0].morph.get("Case", default=[""])), 'gender': ', '.join(doc[0].morph.get("Gender", default=[""])) } annotated_tokens.append(annotated_token) return annotated_tokens def save_annotations_as_json(annotated_text, filename): with open(filename, 'w', encoding='utf-8') as json_file: json.dump(annotated_text, json_file, ensure_ascii=False, indent=4) st.title("Annotation Tool") # Add Materialize CSS st.markdown('', unsafe_allow_html=True) text = st.text_area("Text") if st.button("Annotate"): if text: segmented_text = tokenize_with_spacy(text) annotated_text = annotate_text(segmented_text) st.subheader("Segmented Text:") st.write(segmented_text) st.subheader("Annotated Text:") for token in annotated_text: st.write(f"Token: {token['token']}") st.write(f"POS: {token['pos']}") st.write(f"Lemma: {token['lemma']}") st.write(f"Aspect: {', '.join(token['aspect'])}") st.write(f"Tense: {', '.join(token['tense'])}") st.write(f"VerbForm: {', '.join(token['verbForm'])}") st.write(f"Voice: {', '.join(token['voice'])}") st.write(f"Mood: {', '.join(token['mood'])}") st.write(f"Number: {', '.join(token['number'])}") st.write(f"Person: {', '.join(token['person'])}") st.write(f"Case: {', '.join(token['case'])}") st.write(f"Gender: {', '.join(token['gender'])}") st.write("-----") else: st.warning("Please enter some text.") if st.button("Save Modifications as JSON"): if annotated_text: # Generate a unique file name using a timestamp timestamp = datetime.now().strftime("%Y%m%d%H%M%S") filename = f'annotations_{timestamp}.json' save_annotations_as_json(annotated_text, filename) st.success(f"Annotations saved as {filename}") # Add Materialize JavaScript st.markdown('', unsafe_allow_html=True)