5to9 commited on
Commit
1c109f3
·
1 Parent(s): 20d9962

0.20 subprocess implementing flash_attn

Browse files
Files changed (2) hide show
  1. app.py +2 -1
  2. requirements.txt +1 -2
app.py CHANGED
@@ -4,12 +4,13 @@ import torch
4
  import gradio as gr
5
  import logging
6
  from huggingface_hub import login
7
- from flash_attn.flash_attention import FlashAttention
8
 
9
  import os
10
  import traceback
11
 
12
  from threading import Thread
 
 
13
 
14
  # Status: Breaks during generation
15
 
 
4
  import gradio as gr
5
  import logging
6
  from huggingface_hub import login
 
7
 
8
  import os
9
  import traceback
10
 
11
  from threading import Thread
12
+ import subprocess
13
+ subprocess.run('pip install -U flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
14
 
15
  # Status: Breaks during generation
16
 
requirements.txt CHANGED
@@ -5,5 +5,4 @@ accelerate==0.33.0
5
  sentencepiece==0.2.0
6
  spaces==0.29.2
7
  gradio==4.39.0
8
- bitsandbytes==0.43.2
9
- flash-attn
 
5
  sentencepiece==0.2.0
6
  spaces==0.29.2
7
  gradio==4.39.0
8
+ bitsandbytes==0.43.2