Andreas Sünder commited on
Commit
fce98ea
0 Parent(s):

Add files from previous repo

Browse files
Files changed (7) hide show
  1. .gitattributes +35 -0
  2. .gitignore +1 -0
  3. README.md +14 -0
  4. app.py +91 -0
  5. datasets/lda_poe_topics.csv +40 -0
  6. model.py +25 -0
  7. requirements.txt +3 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__/
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Topic Labelling Playground
3
+ emoji: 🚀
4
+ colorFrom: green
5
+ colorTo: green
6
+ sdk: streamlit
7
+ sdk_version: 1.27.2
8
+ app_file: app.py
9
+ pinned: false
10
+ license: other
11
+ models:
12
+ - textminr/llama-2-7b-4bit-gptq
13
+ - textminr/llama-2-7b-chat-4bit-gptq
14
+ ---
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import list_models
2
+ import streamlit as st
3
+ from model import ReplicateModel
4
+
5
+ import os
6
+ import pandas as pd
7
+
8
+ DATASETS_PATH = 'datasets'
9
+
10
+ models = {
11
+ 'mistral': ReplicateModel('mistralai/mistral-7b-instruct-v0.1:83b6a56e7c828e667f21fd596c338fd4f0039b46bcfa18d973e8e70e455fda70'),
12
+ }
13
+
14
+ prompts = {
15
+ 'simple_prompt':
16
+ '''
17
+ I have topic that is described by the following keywords: [KEYWORDS]
18
+
19
+ Based on the information above, extract a short topic label in the following format:
20
+ topic: <topic label>
21
+ '''
22
+ # 'custom_prompt': ''
23
+ }
24
+
25
+ topicsets = {
26
+ 'lda_poe_topics': os.path.join(DATASETS_PATH, 'lda_poe_topics.csv'),
27
+ }
28
+
29
+ @st.cache_data(show_spinner=False)
30
+ def get_available_models():
31
+ # return [model.modelId for model in list_models(author='textminr')]
32
+ return models.keys()
33
+
34
+ @st.cache_resource(show_spinner='Loading model...')
35
+ def load_model(model_name: str):
36
+ # model = AutoGPTQForCausalLM.from_quantized(model_name, device_map='auto')
37
+ # return pipeline('text-generation', model=model, tokenizer=model_name)
38
+ return models[model_name].load()
39
+
40
+ st.set_page_config(page_title='TL playground', page_icon='🚀', layout='wide')
41
+ st.title('🚀 Topic Labelling playground')
42
+
43
+ percentage_width_main = 70
44
+ st.markdown(
45
+ f'''<style>
46
+ @media only screen and (min-width: 1500px) {{
47
+ .appview-container .main .block-container{{
48
+ max-width: {percentage_width_main}%;
49
+ }}
50
+ }}
51
+ </style>
52
+ ''',
53
+ unsafe_allow_html=True,
54
+ )
55
+
56
+ col1, col2 = st.columns(2, gap='medium')
57
+
58
+ sel_model_name = col1.selectbox('Select a model', models, index=None, placeholder='Select a model')
59
+ if sel_model_name:
60
+ model = load_model(sel_model_name)
61
+
62
+ sel_dataset_name = col1.selectbox('Select a dataset', topicsets.keys(), index=None)
63
+ if sel_dataset_name:
64
+ sel_dataset = pd.read_csv(topicsets[sel_dataset_name], header=None)
65
+ col1.dataframe(sel_dataset)
66
+
67
+ sel_row_index = col1.selectbox('Select a row', sel_dataset.index)
68
+
69
+ sel_prompt = col2.selectbox('Select a prompt', prompts.keys())
70
+ if sel_prompt != 'custom_prompt':
71
+ col2.code(prompts[sel_prompt], language='text')
72
+ sel_prompt_text = prompts[sel_prompt]
73
+ else:
74
+ sel_prompt_text = st.text_area('Custom prompt', height=200)
75
+ col2.caption('Make sure to use "[KEYWORDS]" to indicate where the keywords should be inserted.')
76
+
77
+ btn_generate = col2.button('Generate', disabled=(sel_model_name is None or sel_dataset_name is None))
78
+ if btn_generate:
79
+ keywords = ','.join(sel_dataset.iloc[sel_row_index].tolist()[1:])
80
+
81
+ placeholder = col2.empty()
82
+ with placeholder, st.spinner('Generating...'):
83
+ prompt = sel_prompt_text.replace('[KEYWORDS]', keywords)
84
+ # result = model(prompt, max_new_tokens=100, return_full_text=False)[0]['generated_text']
85
+ result = model.generate(prompt)
86
+
87
+ message = col2.chat_message("ai")
88
+ message.write(result)
89
+ message.caption('Keywords: ' + keywords)
90
+
91
+
datasets/lda_poe_topics.csv ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Topic 1,howev,even,found,period,altogeth,imposs,precis,perhap,short,prove
2
+ Topic 2,water,wind,black,vessel,larg,sea,river,white,heavi,float
3
+ Topic 3,beauti,shall,name,whose,new,tree,angel,find,flower,fair
4
+ Topic 4,will,say,mean,rememb,speak,know,none,jupit,dare,limb
5
+ Topic 5,call,may,doubt,matter,question,exist,mind,beyond,term,now
6
+ Topic 6,one,everi,thing,moment,just,instant,anoth,almost,inde,frame
7
+ Topic 7,point,must,certain,fact,case,view,captain,given,consid,import
8
+ Topic 8,immedi,found,near,saw,state,now,present,sever,discov,approach
9
+ Topic 9,made,make,way,attempt,get,forc,difficulti,account,escap,effort
10
+ Topic 10,two,three,feet,year,hour,half,four,minut,thousand,hundr
11
+ Topic 11,whole,form,earth,small,around,figur,stood,portion,surfac,vast
12
+ Topic 12,upon,eye,look,fell,face,floor,fall,spot,depend,tabl
13
+ Topic 13,one,hope,power,leav,follow,entir,scarc,consider,pass,mad
14
+ Topic 14,man,old,die,away,live,ladi,young,busi,gentleman,pass
15
+ Topic 15,time,first,long,second,keep,thought,circumst,instanc,letter,care
16
+ Topic 16,much,evid,gave,although,truth,express,mean,seem,sens,felt
17
+ Topic 17,like,light,ever,life,dream,dark,moon,wild,deep,appear
18
+ Topic 18,effect,appear,step,upon,except,found,caus,event,discov,produc
19
+ Topic 19,day,night,last,long,continu,cours,late,arriv,bring,raven
20
+ Topic 20,lie,side,full,went,length,peter,augustus,lay,deck,board
21
+ Topic 21,far,air,seem,heaven,high,sun,breath,grew,atmospher,rise
22
+ Topic 22,now,becam,soon,distinct,object,absolut,necessari,appar,render,felt
23
+ Topic 23,poem,poe,origin,work,first,read,poet,note,paper,line
24
+ Topic 24,great,degre,island,sea,reach,set,land,strong,measur,sight
25
+ Topic 25,natur,feel,excit,interest,true,differ,intens,result,principl,peculiar
26
+ Topic 26,less,part,appear,thus,regard,posit,person,number,greater,mention
27
+ Topic 27,hand,left,place,take,took,right,arm,hold,put,end
28
+ Topic 28,without,bodi,reason,believ,suppos,corps,becom,madam,murder,least
29
+ Topic 29,still,idea,think,fanci,human,dead,possess,impress,smile,wonder
30
+ Topic 30,death,voic,sound,bell,heard,ear,low,without,fire,proceed
31
+ Topic 31,word,let,thus,attent,utter,spoke,alon,gone,repeat,scene
32
+ Topic 32,head,turn,came,upon,back,extrem,sudden,come,near,round
33
+ Topic 33,mani,manner,charact,particular,subject,singular,success,weather,alway,articl
34
+ Topic 34,littl,mere,remain,purpos,better,longer,suffer,use,wish,home
35
+ Topic 35,now,thou,friend,sure,inde,say,repli,art,fear,sir
36
+ Topic 36,well,good,among,observ,world,general,known,men,may,knew
37
+ Topic 37,said,might,possibl,thought,taken,king,still,bird,yes,dupin
38
+ Topic 38,open,door,within,close,room,chamber,wall,enter,main,box
39
+ Topic 39,never,can,noth,see,yet,seen,even,done,know,eye
40
+ Topic 40,love,thi,heart,soul,spirit,thee,god,shadow,within,passion
model.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ import replicate
3
+
4
+ class GenericModel:
5
+ def __init__(self, name: str):
6
+ self.name = name
7
+
8
+ @abstractmethod
9
+ def load(self):
10
+ pass
11
+
12
+ @abstractmethod
13
+ def generate(self, prompt: str):
14
+ pass
15
+
16
+ class ReplicateModel(GenericModel):
17
+ def __init__(self, name: str):
18
+ super().__init__(name)
19
+
20
+ def load(self):
21
+ return self
22
+
23
+ def generate(self, prompt: str):
24
+ iterator = replicate.run(self.name, input={'prompt': prompt})
25
+ return ''.join(item for item in iterator)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ transformers @ git+https://github.com/huggingface/transformers
2
+ replicate
3
+ auto_gptq