import gradio as gr def tokenize(buffer: str): split = {'(', ')', '{', '}', '[', ']', ',', ':', '+', '-', '*', '/', '%', '=', '\n'} DFA_table = { -1: {'any': -1}, 0: {' ': 1, 'any': 5, 'split': 17, 'f': 7, '"': 8, "'": 9}, 1: {' ': 2, 'f': 7, 'any': 5}, 2: {' ': 3, 'f': 7, 'any': 5}, 3: {' ': 4, 'f': 7, 'any': 5}, 4: {'any': 18}, 5: {' ': 6, 'any': 5, 'split': 17}, 6: {' ': 6, 'any': 18, 'split': 17}, 7: {'any': 5, '"': 8, "'": 9}, 8: {'"': 16, 'any': 8}, 9: {"'": 11, 'any': 10}, 10: {"'": 16, 'any': 10}, 11: {' ': 16, "'": 12, 'any': -1, 'split': 17}, 12: {"'": 13, 'any': 12}, 13: {"'": 14, 'any': -1}, 14: {"'": 15, 'any': -1}, 15: {' ': 16, 'split': 17, 'any': -1}, 16: {' ': 16, 'any': -1, 'split': 17, '"': 18, "'": 18}, 17: {'any': -1}, # final: consume split as token 18: {'any': -1}, # final: not consume split as token } finals = (17, 18) tokens = [] cursor = 0 while cursor < len(buffer): state = 0 temp = '' while cursor < len(buffer): ch = buffer[cursor] if ch in split: ch = 'split' if ch not in DFA_table[state]: ch = 'any' state = DFA_table[state][ch] if state not in finals: temp += buffer[cursor] else: break cursor += 1 if state not in finals and state != 5: raise RuntimeError(f"Rejected at state {state}") if temp != '': tokens.append(temp.strip() if temp != ' ' else temp) if state == finals[0]: tokens.append(buffer[cursor]) cursor += 1 return tokens interface = gr.Interface( fn=tokenize, title="Tokenizer", description="Tokenize the python code", theme="compact", inputs=gr.TextArea(label="Python code",value = "print('Hello World!!')"), outputs=gr.TextArea(label="Tokenize output") ) interface.launch()