eaglelandsonce
commited on
Commit
•
8dc67de
1
Parent(s):
4c13885
Update pages/15_Plus_Detokenizer.py
Browse files- pages/15_Plus_Detokenizer.py +0 -14
pages/15_Plus_Detokenizer.py
CHANGED
@@ -145,20 +145,6 @@ if st.button("Detokenize"):
|
|
145 |
st.write("Detokenized sentence:")
|
146 |
st.write(detokenized_sentence)
|
147 |
|
148 |
-
# Tokenization section
|
149 |
-
st.header("Tokenization")
|
150 |
-
sentence = st.text_input("Enter a sentence to tokenize:", "cr8 lg")
|
151 |
-
|
152 |
-
def format_token_ids(token_ids):
|
153 |
-
formatted_ids = [str(token_id).zfill(5) for token_id in token_ids]
|
154 |
-
return ''.join(formatted_ids)
|
155 |
-
|
156 |
-
if st.button("Tokenize"):
|
157 |
-
input_ids = tokenizer(sentence, return_tensors='pt').input_ids
|
158 |
-
token_ids_list = input_ids[0].tolist()
|
159 |
-
formatted_token_ids = format_token_ids(token_ids_list)
|
160 |
-
st.write("Tokenized input IDs (formatted):")
|
161 |
-
st.write(formatted_token_ids)
|
162 |
|
163 |
# Load the model
|
164 |
gpt2 = AutoModelForCausalLM.from_pretrained('gpt2')
|
|
|
145 |
st.write("Detokenized sentence:")
|
146 |
st.write(detokenized_sentence)
|
147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
# Load the model
|
150 |
gpt2 = AutoModelForCausalLM.from_pretrained('gpt2')
|