Spaces:
Build error
Build error
from zemberek import TurkishMorphology | |
from typing import List | |
from functools import lru_cache | |
morphology = TurkishMorphology.create() | |
# Initialize the Turkish morphology analyzer as a global constant | |
MORPHOLOGY = TurkishMorphology.create_with_default_resources() | |
# Cache results for better performance | |
def is_verb_or_verbform_zemberek(word: str) -> bool: | |
""" | |
Check if a word is a verb or verbform using Zemberek analysis. | |
Args: | |
word (str): The word to analyze | |
Returns: | |
bool: True if the word is a verb or verbform, False otherwise | |
""" | |
try: | |
# Get the analysis result for the word | |
result = MORPHOLOGY.analyze(word) | |
# Return True if there's at least one analysis and it's a verb | |
return bool(result and "Verb" in result[0].primary_pos.value) | |
except Exception as e: | |
print(f"Error analyzing word '{word}': {str(e)}") | |
return False | |
def filter_verbs(words: List[str]) -> List[str]: | |
""" | |
Filter a list of words to keep only verbs and verbforms. | |
Args: | |
words (List[str]): List of words to filter | |
Returns: | |
List[str]: List containing only verbs and verbforms | |
""" | |
return [word for word in words if is_verb_or_verbform_zemberek(word)] | |
def main(): | |
# Example usage | |
words = ["geliyor", "gitti", "yapmak", "kitap"] | |
filtered_words = filter_verbs(words) | |
print(f"Verbs found: {filtered_words}") | |
if __name__ == "__main__": | |
main() | |