rphrp1985 commited on
Commit
58a3a72
1 Parent(s): 23ab0e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -0
app.py CHANGED
@@ -18,11 +18,29 @@ subprocess.run(
18
 
19
  shell=True,
20
  )
 
 
 
 
 
 
21
  def print_s1ystem():
22
  ram_info = psutil.virtual_memory()
23
  print(f"Total RAM: {ram_info.total / (1024.0 ** 3)} GB")
24
  print(f"Available RAM: {ram_info.available / (1024.0 ** 3)} GB")
25
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  subprocess.run(
27
  "pip install flash-attn --no-build-isolation",
28
  env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
@@ -84,6 +102,18 @@ def respond(
84
  top_p,
85
  ):
86
  print_s1ystem()
 
 
 
 
 
 
 
 
 
 
 
 
87
  messages = [{"role": "user", "content": "Hello, how are you?"}]
88
  input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
89
  ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
 
18
 
19
  shell=True,
20
  )
21
+
22
+ subprocess.run(
23
+ "pip install pynvml gpustat",
24
+
25
+ shell=True,
26
+ )
27
  def print_s1ystem():
28
  ram_info = psutil.virtual_memory()
29
  print(f"Total RAM: {ram_info.total / (1024.0 ** 3)} GB")
30
  print(f"Available RAM: {ram_info.available / (1024.0 ** 3)} GB")
31
 
32
+ import psutil
33
+ import platform
34
+ import gpustat
35
+ from datetime import datetime
36
+
37
+ def get_size(bytes, suffix="B"):
38
+ factor = 1024
39
+ for unit in ["", "K", "M", "G", "T", "P"]:
40
+ if bytes < factor:
41
+ return f"{bytes:.2f}{unit}{suffix}"
42
+ bytes /= factor
43
+
44
  subprocess.run(
45
  "pip install flash-attn --no-build-isolation",
46
  env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
 
102
  top_p,
103
  ):
104
  print_s1ystem()
105
+ uname = platform.uname()
106
+ print(f"System: {uname.system}")
107
+ print(f"Node Name: {uname.node}")
108
+ print(f"Release: {uname.release}")
109
+ print(f"Version: {uname.version}")
110
+ print(f"Machine: {uname.machine}")
111
+ print(f"Processor: {uname.processor}")
112
+
113
+ # GPU Information
114
+ gpu_stats = gpustat.GPUStatCollection.new_query()
115
+ for gpu in gpu_stats:
116
+ print(f"GPU: {gpu.name} Mem Free: {get_size(gpu.memory_free)} Mem Used: {get_size(gpu.memory_used)} Mem Total: {get_size(gpu.memory_total)}")
117
  messages = [{"role": "user", "content": "Hello, how are you?"}]
118
  input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
119
  ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>