Spaces:

XFious
/

dearth-tiny

Sleeping

App Files Files Community

XFious commited on Dec 2, 2023

Commit

c53e345

1 Parent(s): 7569c73

add feature: free memory

Browse files

Files changed (1) hide show

app.py +86 -39

app.py CHANGED Viewed

@@ -8,30 +8,42 @@ from dearth_model import DearthForCausalLM
 import random
 import time
-tk = transformers.AutoTokenizer.from_pretrained("./tk")
-model_path = "./ts100-re2-h1-4000-model.pt"
-states = torch.load(model_path, map_location="cpu")
-model_states = states
-unwanted_prefix_dueto_compile = '_orig_mod.'
-unwanted_prefix_dueto_ddp = 'module.'
-unwanted_prefix_dueto_ddp_compiled = 'module._orig_mod.'
-for k,v in list(model_states.items()):
-    if k.startswith(unwanted_prefix_dueto_ddp_compiled):
-        new_key = k[len(unwanted_prefix_dueto_ddp_compiled):]
-        model_states[k[len(unwanted_prefix_dueto_ddp_compiled):]] = model_states.pop(k)
-    elif k.startswith(unwanted_prefix_dueto_ddp):
-        new_key = k[len(unwanted_prefix_dueto_ddp):]
-        model_states[k[len(unwanted_prefix_dueto_ddp):]] = model_states.pop(k)
-    elif k.startswith(unwanted_prefix_dueto_compile):
-        new_key = k[len(unwanted_prefix_dueto_compile):]
-        model_states[k[len(unwanted_prefix_dueto_compile):]] = model_states.pop(k)
-def generate(input, num_more_tokens):
     yml_path = "./ts100-re2-h1.yml"
     with open(yml_path, "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)['model']
@@ -43,8 +55,47 @@ def generate(input, num_more_tokens):
     model = DearthForCausalLM(config)
     model.load_state_dict(model_states)
     num_more_tokens = int(num_more_tokens)
     # print(input)
     input = input.strip()
@@ -52,7 +103,9 @@ def generate(input, num_more_tokens):
     input_ids = [tk.bos_token_id] + input_ids
     input_ids = torch.tensor(input_ids, dtype=torch.long).view(1, -1)
     # print(input_ids)
     output_ids = input_ids.squeeze(0).tolist()
     for i in range(num_more_tokens):
         input = torch.tensor(output_ids, dtype=torch.long).view(1, -1)
@@ -70,8 +123,12 @@ def generate(input, num_more_tokens):
     # print(output_ids)
     # print(tk.decode(output_ids))
     output_ids = output_ids[1:]
-    return tk.decode(output_ids)
 example_input = ["Once upon a time, there was a little girl",
                  "John and Sarah were playing together in their backyard when",
@@ -86,21 +143,11 @@ The PPL on the validation set is 1.7, in comparison, the teacher model has a PPL
 """
-# demo = gr.Interface(
-#     fn=generate,
-#     title="Tinystories LM 11M",
-#     description=Description,
-#     inputs=[
-#         gr.Textbox(lines=5, label="Input Text", value=example_input[random.randint(0, len(example_input)-1)]),
-#         gr.Slider(16, 64, step=1.0, value=32, label="more tokens", info="")
-#     ],
-#     outputs="text"
-# )
-with open("./random_input_example.js" , "r") as f:
-    file_content = f.read()
 if __name__ == "__main__":
     with gr.Blocks(
         title="Tinystories LM 11M",
         js="./random_input_example.js"

 import random
 import time
+import threading
+import asyncio
+tk = None
+model_states = None
+model = None
+lock_using_model = threading.Lock()
+recent_generate_timestamp = time.time()
+MODEL_LIVE_TIME = 15 * 60 # 15 minutes
+def load_model():
+    global tk, model_states, model
+    tk = transformers.AutoTokenizer.from_pretrained("./tk")
+    model_path = "./ts100-re2-h1-4000-model.pt"
+    states = torch.load(model_path, map_location="cpu")
+    model_states = states
+    unwanted_prefix_dueto_compile = '_orig_mod.'
+    unwanted_prefix_dueto_ddp = 'module.'
+    unwanted_prefix_dueto_ddp_compiled = 'module._orig_mod.'
+    for k,v in list(model_states.items()):
+        if k.startswith(unwanted_prefix_dueto_ddp_compiled):
+            new_key = k[len(unwanted_prefix_dueto_ddp_compiled):]
+            model_states[new_key] = model_states.pop(k)
+        elif k.startswith(unwanted_prefix_dueto_ddp):
+            new_key = k[len(unwanted_prefix_dueto_ddp):]
+            model_states[new_key] = model_states.pop(k)
+        elif k.startswith(unwanted_prefix_dueto_compile):
+            new_key = k[len(unwanted_prefix_dueto_compile):]
+            model_states[new_key] = model_states.pop(k)
     yml_path = "./ts100-re2-h1.yml"
     with open(yml_path, "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)['model']
     model = DearthForCausalLM(config)
     model.load_state_dict(model_states)
+    model.eval()
+def main_free_mem():
+    event_loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(event_loop)
+    event_loop.call_later(MODEL_LIVE_TIME, free_mem)
+    event_loop.run_forever()
+def free_mem():
+    global tk, model_states, model, recent_generate_timestamp, lock_using_model
+    lock_using_model.acquire()
+    if time.time() - recent_generate_timestamp >= MODEL_LIVE_TIME and model is not None:
+        tk = None
+        model_states = None
+        model = None
+        print(f"free mem, {time.time()}")
+    lock_using_model.release()
+    try:
+        event_loop = asyncio.get_event_loop()
+        event_loop.call_later(MODEL_LIVE_TIME, free_mem)
+    except:
+        pass
+def generate(input, num_more_tokens):
+    global tk, model_states, model, recent_generate_timestamp, lock_using_model
+    lock_using_model.acquire()
+    time_start = time.time()
+    if model is None:
+        load_model()
+    elif time.time() - recent_generate_timestamp > MODEL_LIVE_TIME:
+        tk = None
+        model_states = None
+        model = None
+        load_model()
+    recent_generate_timestamp = time.time()
+    print(f"load model time: {time.time() - time_start}")
+    time_start = time.time()
     num_more_tokens = int(num_more_tokens)
     # print(input)
     input = input.strip()
     input_ids = [tk.bos_token_id] + input_ids
     input_ids = torch.tensor(input_ids, dtype=torch.long).view(1, -1)
     # print(input_ids)
+    print(f"encode time: {time.time() - time_start}")
+    time_start = time.time()
     output_ids = input_ids.squeeze(0).tolist()
     for i in range(num_more_tokens):
         input = torch.tensor(output_ids, dtype=torch.long).view(1, -1)
     # print(output_ids)
     # print(tk.decode(output_ids))
     output_ids = output_ids[1:]
+    print(f"inference time: {time.time() - time_start}\n")
+    ret = tk.decode(output_ids)
+    lock_using_model.release()
+    return ret
 example_input = ["Once upon a time, there was a little girl",
                  "John and Sarah were playing together in their backyard when",
 """
 if __name__ == "__main__":
+    load_model()
+    thread_free_mem = threading.Thread(target=main_free_mem)
+    thread_free_mem.start()
     with gr.Blocks(
         title="Tinystories LM 11M",
         js="./random_input_example.js"