0xrushi commited on
Commit
8b8062c
1 Parent(s): 7d40134
.streamlit/secrets.toml ADDED
@@ -0,0 +1 @@
 
 
1
+ api_key = "Bearer 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"
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_chat import message
3
+ import requests
4
+ from transformers import AutoModelWithLMHead, AutoTokenizer
5
+
6
+ tokenizer = AutoTokenizer.from_pretrained('microsoft/DialoGPT-small')
7
+ model = AutoModelWithLMHead.from_pretrained('output-small-save')
8
+
9
+ st.set_page_config(
10
+ page_title="COVID Doctor using DialoGPT",
11
+ page_icon=":robot:"
12
+ )
13
+
14
+ API_URL = "https://api-inference.huggingface.co/models/microsoft/DialoGPT-small"
15
+ headers = {"Authorization": st.secrets['api_key']}
16
+
17
+ st.header("Hello - Welcome to COVID Doctor using DialoGPT")
18
+ st.markdown("[Github](https://github.com/rushic24/DialoGPT-Finetune)")
19
+
20
+ if 'generated' not in st.session_state:
21
+ st.session_state['generated'] = []
22
+
23
+ if 'past' not in st.session_state:
24
+ st.session_state['past'] = []
25
+
26
+ def query(payload):
27
+ bot_input_ids = tokenizer.encode(payload.text + tokenizer.eos_token, return_tensors='pt')
28
+
29
+ chat_history_ids = model.generate(
30
+ bot_input_ids, max_length=100,
31
+ pad_token_id=tokenizer.eos_token_id,
32
+ no_repeat_ngram_size=3,
33
+ do_sample=True,
34
+ top_k=10,
35
+ top_p=0.7,
36
+ temperature = 0.8
37
+ )
38
+ output = tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
39
+ return {"generated_text": output}
40
+
41
+ def get_text():
42
+ input_text = st.text_input("You: ","Hello, how are you?", key="input")
43
+ return input_text
44
+
45
+
46
+ user_input = get_text()
47
+
48
+ if user_input:
49
+ output = query({
50
+ "inputs": {
51
+ "past_user_inputs": st.session_state.past,
52
+ "generated_responses": st.session_state.generated,
53
+ "text": user_input,
54
+ },"parameters": {"repetition_penalty": 1.33},
55
+ })
56
+ st.session_state.past.append(user_input)
57
+ st.session_state.generated.append(output["generated_text"])
58
+
59
+ if st.session_state['generated']:
60
+
61
+ for i in range(len(st.session_state['generated'])-1, -1, -1):
62
+ message(st.session_state["generated"][i], key=str(i))
63
+ message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')
64
+
output-small-save/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/DialoGPT-small",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "conversational": {
31
+ "max_length": 1000
32
+ }
33
+ },
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.19.2",
36
+ "use_cache": true,
37
+ "vocab_size": 50257
38
+ }
output-small-save/eval_results.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ perplexity = tensor(14.1139)
output-small-save/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
output-small-save/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c5daa96e110d2152fc73fe678e43840bc3dfde47bebef78509e547998c6642c
3
+ size 510396521
output-small-save/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}}
output-small-save/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
output-small-save/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "microsoft/DialoGPT-small", "errors": "replace", "pad_token": null, "add_bos_token": false, "tokenizer_class": "GPT2Tokenizer"}
output-small-save/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd284c2fddc21cfdc730eadda9ac9e85effb9459b9eed4096cca7edb48d7cd41
3
+ size 1327
output-small-save/vocab.json ADDED
The diff for this file is too large to render. See raw diff