Spaces:
Sleeping
Sleeping
山越貴耀
commited on
Commit
•
a962672
1
Parent(s):
d1ca986
updated app
Browse files
app.py
CHANGED
@@ -23,13 +23,13 @@ def load_model(model_name):
|
|
23 |
model.eval()
|
24 |
return tokenizer,model
|
25 |
|
26 |
-
@st.cache
|
27 |
def load_data(sentence_num):
|
28 |
df = pd.read_csv('tsne_out.csv')
|
29 |
df = df.loc[lambda d: (d['sentence_num']==sentence_num)&(d['iter_num']<1000)]
|
30 |
return df
|
31 |
|
32 |
-
@st.cache
|
33 |
def mask_prob(model,mask_id,sentences,position,temp=1):
|
34 |
masked_sentences = sentences.clone()
|
35 |
masked_sentences[:, position] = mask_id
|
@@ -37,7 +37,7 @@ def mask_prob(model,mask_id,sentences,position,temp=1):
|
|
37 |
logits = model(masked_sentences)[0]
|
38 |
return F.log_softmax(logits[:, position] / temp, dim = -1)
|
39 |
|
40 |
-
@st.cache
|
41 |
def sample_words(probs,pos,sentences):
|
42 |
candidates = [[tokenizer.decode([candidate]),torch.exp(probs)[0,candidate].item()]
|
43 |
for candidate in torch.argsort(probs[0],descending=True)[:10]]
|
@@ -53,7 +53,7 @@ def run_chains(tokenizer,model,mask_id,input_text,num_steps):
|
|
53 |
sentence = init_sent.clone()
|
54 |
data_list = []
|
55 |
st.sidebar.write('Generating samples...')
|
56 |
-
st.sidebar.write('This takes ~
|
57 |
chain_progress = st.sidebar.progress(0)
|
58 |
for step_id in range(num_steps):
|
59 |
chain_progress.progress((step_id+1)/num_steps)
|
@@ -66,6 +66,7 @@ def run_chains(tokenizer,model,mask_id,input_text,num_steps):
|
|
66 |
@st.cache(suppress_st_warning=True,show_spinner=False)
|
67 |
def run_tsne(chain):
|
68 |
st.sidebar.write('Running t-SNE...')
|
|
|
69 |
chain = chain.assign(cleaned_sentence=chain.sentence.str.replace(r'\[CLS\] ', '',regex=True).str.replace(r' \[SEP\]', '',regex=True))
|
70 |
sentence_model = load_sentence_model()
|
71 |
sentence_embeddings = sentence_model.encode(chain.cleaned_sentence.to_list(), show_progress_bar=False)
|
|
|
23 |
model.eval()
|
24 |
return tokenizer,model
|
25 |
|
26 |
+
@st.cache(show_spinner=False)
|
27 |
def load_data(sentence_num):
|
28 |
df = pd.read_csv('tsne_out.csv')
|
29 |
df = df.loc[lambda d: (d['sentence_num']==sentence_num)&(d['iter_num']<1000)]
|
30 |
return df
|
31 |
|
32 |
+
@st.cache(show_spinner=False)
|
33 |
def mask_prob(model,mask_id,sentences,position,temp=1):
|
34 |
masked_sentences = sentences.clone()
|
35 |
masked_sentences[:, position] = mask_id
|
|
|
37 |
logits = model(masked_sentences)[0]
|
38 |
return F.log_softmax(logits[:, position] / temp, dim = -1)
|
39 |
|
40 |
+
@st.cache(show_spinner=False)
|
41 |
def sample_words(probs,pos,sentences):
|
42 |
candidates = [[tokenizer.decode([candidate]),torch.exp(probs)[0,candidate].item()]
|
43 |
for candidate in torch.argsort(probs[0],descending=True)[:10]]
|
|
|
53 |
sentence = init_sent.clone()
|
54 |
data_list = []
|
55 |
st.sidebar.write('Generating samples...')
|
56 |
+
st.sidebar.write('This takes ~1 min for 1000 steps with ~10 token sentences')
|
57 |
chain_progress = st.sidebar.progress(0)
|
58 |
for step_id in range(num_steps):
|
59 |
chain_progress.progress((step_id+1)/num_steps)
|
|
|
66 |
@st.cache(suppress_st_warning=True,show_spinner=False)
|
67 |
def run_tsne(chain):
|
68 |
st.sidebar.write('Running t-SNE...')
|
69 |
+
st.sidebar.write('This takes ~1 min for 1000 steps with ~10 token sentences')
|
70 |
chain = chain.assign(cleaned_sentence=chain.sentence.str.replace(r'\[CLS\] ', '',regex=True).str.replace(r' \[SEP\]', '',regex=True))
|
71 |
sentence_model = load_sentence_model()
|
72 |
sentence_embeddings = sentence_model.encode(chain.cleaned_sentence.to_list(), show_progress_bar=False)
|