eaglelandsonce commited on
Commit
8dc67de
1 Parent(s): 4c13885

Update pages/15_Plus_Detokenizer.py

Browse files
Files changed (1) hide show
  1. pages/15_Plus_Detokenizer.py +0 -14
pages/15_Plus_Detokenizer.py CHANGED
@@ -145,20 +145,6 @@ if st.button("Detokenize"):
145
  st.write("Detokenized sentence:")
146
  st.write(detokenized_sentence)
147
 
148
- # Tokenization section
149
- st.header("Tokenization")
150
- sentence = st.text_input("Enter a sentence to tokenize:", "cr8 lg")
151
-
152
- def format_token_ids(token_ids):
153
- formatted_ids = [str(token_id).zfill(5) for token_id in token_ids]
154
- return ''.join(formatted_ids)
155
-
156
- if st.button("Tokenize"):
157
- input_ids = tokenizer(sentence, return_tensors='pt').input_ids
158
- token_ids_list = input_ids[0].tolist()
159
- formatted_token_ids = format_token_ids(token_ids_list)
160
- st.write("Tokenized input IDs (formatted):")
161
- st.write(formatted_token_ids)
162
 
163
  # Load the model
164
  gpt2 = AutoModelForCausalLM.from_pretrained('gpt2')
 
145
  st.write("Detokenized sentence:")
146
  st.write(detokenized_sentence)
147
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  # Load the model
150
  gpt2 = AutoModelForCausalLM.from_pretrained('gpt2')