Spaces:
Sleeping
Sleeping
0.20 subprocess implementing flash_attn
Browse files- app.py +2 -1
- requirements.txt +1 -2
app.py
CHANGED
@@ -4,12 +4,13 @@ import torch
|
|
4 |
import gradio as gr
|
5 |
import logging
|
6 |
from huggingface_hub import login
|
7 |
-
from flash_attn.flash_attention import FlashAttention
|
8 |
|
9 |
import os
|
10 |
import traceback
|
11 |
|
12 |
from threading import Thread
|
|
|
|
|
13 |
|
14 |
# Status: Breaks during generation
|
15 |
|
|
|
4 |
import gradio as gr
|
5 |
import logging
|
6 |
from huggingface_hub import login
|
|
|
7 |
|
8 |
import os
|
9 |
import traceback
|
10 |
|
11 |
from threading import Thread
|
12 |
+
import subprocess
|
13 |
+
subprocess.run('pip install -U flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
14 |
|
15 |
# Status: Breaks during generation
|
16 |
|
requirements.txt
CHANGED
@@ -5,5 +5,4 @@ accelerate==0.33.0
|
|
5 |
sentencepiece==0.2.0
|
6 |
spaces==0.29.2
|
7 |
gradio==4.39.0
|
8 |
-
bitsandbytes==0.43.2
|
9 |
-
flash-attn
|
|
|
5 |
sentencepiece==0.2.0
|
6 |
spaces==0.29.2
|
7 |
gradio==4.39.0
|
8 |
+
bitsandbytes==0.43.2
|
|