can-it-run-llm

Runtime error

Vokturz commited on Oct 3, 2023

Commit

a0b9dac

•

1 Parent(s): 74c26d6

improved how memory is managed

Files changed (1) hide show

src/app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import pandas as pd
 from utils import extract_from_url, get_model, calculate_memory
 import plotly.express as px
 import numpy as np
 st.set_page_config(page_title='Can you run it? LLM version', layout="wide", initial_sidebar_state="expanded")
@@ -64,8 +65,13 @@ if not model_name:
 model_name = extract_from_url(model_name)
 if model_name not in st.session_state:
     model = get_model(model_name, library="transformers", access_token=access_token)
-    st.session_state[model_name] = (model, calculate_memory(model, ["float32", "float16/bfloat16", "int8", "int4"]))
 gpu_vendor = st.sidebar.selectbox("GPU Vendor", ["NVIDIA", "AMD", "Intel"])
@@ -86,7 +92,7 @@ lora_pct = st.sidebar.slider("LoRa % trainable parameters", 0.1, 100.0, 2.0, ste
 st.sidebar.dataframe(gpu_spec.T)
-memory_table = pd.DataFrame(st.session_state[model_name][1]).set_index('dtype')
 memory_table['LoRA Fine-Tuning (GB)'] = (memory_table["Total Size (GB)"] +
                                           (memory_table["Parameters (Billion)"]* lora_pct/100 * (16/8)*4)) * 1.2

 from utils import extract_from_url, get_model, calculate_memory
 import plotly.express as px
 import numpy as np
+import gc
 st.set_page_config(page_title='Can you run it? LLM version', layout="wide", initial_sidebar_state="expanded")
 model_name = extract_from_url(model_name)
 if model_name not in st.session_state:
+    if 'actual_model' in st.session_state:
+        del st.session_state[st.session_state['actual_model']]
+        del st.session_state['actual_model']
+        gc.collect()
     model = get_model(model_name, library="transformers", access_token=access_token)
+    st.session_state[model_name] = calculate_memory(model, ["float32", "float16/bfloat16", "int8", "int4"])
+    st.session_state['actual_model'] = model_name
 gpu_vendor = st.sidebar.selectbox("GPU Vendor", ["NVIDIA", "AMD", "Intel"])
 st.sidebar.dataframe(gpu_spec.T)
+memory_table = pd.DataFrame(st.session_state[model_name]).set_index('dtype')
 memory_table['LoRA Fine-Tuning (GB)'] = (memory_table["Total Size (GB)"] +
                                           (memory_table["Parameters (Billion)"]* lora_pct/100 * (16/8)*4)) * 1.2