Spaces:
Running
Running
Update utils.py
Browse files
utils.py
CHANGED
@@ -11,6 +11,72 @@ from sshtunnel import SSHTunnelForwarder
|
|
11 |
local_port = int(os.getenv('LOCAL_PORT'))
|
12 |
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
class NoAudioException(Exception):
|
15 |
pass
|
16 |
|
|
|
11 |
local_port = int(os.getenv('LOCAL_PORT'))
|
12 |
|
13 |
|
14 |
+
GENERAL_INSTRUCTIONS = [
|
15 |
+
"Please transcribe this speech.",
|
16 |
+
"Please summarise this speech."
|
17 |
+
]
|
18 |
+
|
19 |
+
|
20 |
+
AUDIO_SAMPLES_W_INSTRUCT = {
|
21 |
+
'1_ASR_IMDA_PART1_ASR_v2_141' : ["Turn the spoken language into a text format.", "Please translate the content into Chinese."],
|
22 |
+
'7_ASR_IMDA_PART3_30_ASR_v2_2269': ["Need this talk written down, please."],
|
23 |
+
'17_ASR_IMDA_PART6_30_ASR_v2_1413': ["Record the spoken word in text form."],
|
24 |
+
|
25 |
+
'25_ST_COVOST2_ZH-CN_EN_ST_V2_4567': ["Please translate the given speech to English."],
|
26 |
+
'26_ST_COVOST2_EN_ZH-CN_ST_V2_5422': ["Please translate the given speech to Chinese."],
|
27 |
+
'30_SI_ALPACA-GPT4-AUDIO_SI_V2_1454': ["Please follow the instruction in the speech."],
|
28 |
+
|
29 |
+
'32_SQA_CN_COLLEDGE_ENTRANCE_ENGLISH_TEST_SQA_V2_572': ["What does the man think the woman should do at 4:00."],
|
30 |
+
'33_SQA_IMDA_PART3_30_SQA_V2_2310': ["Does Speaker2's wife cook for Speaker2 when they are at home."],
|
31 |
+
'34_SQA_IMDA_PART3_30_SQA_V2_3621': ["Does the phrase \"#gai-gai#\" have a meaning in Chinese or Hokkien language."],
|
32 |
+
'35_SQA_IMDA_PART3_30_SQA_V2_4062': ["What is the color of the vase mentioned in the dialogue."],
|
33 |
+
'36_DS_IMDA_PART4_30_DS_V2_849': ["Condense the dialogue into a concise summary highlighting major topics and conclusions."],
|
34 |
+
|
35 |
+
'39_Paralingual_IEMOCAP_ER_V2_91': ["Based on the speaker's speech patterns, what do you think they are feeling."],
|
36 |
+
'40_Paralingual_IEMOCAP_ER_V2_567': ["Based on the speaker's speech patterns, what do you think they are feeling."],
|
37 |
+
'42_Paralingual_IEMOCAP_GR_V2_320': ["Is it possible for you to identify whether the speaker in this recording is male or female."],
|
38 |
+
'43_Paralingual_IEMOCAP_GR_V2_129': ["Is it possible for you to identify whether the speaker in this recording is male or female."],
|
39 |
+
'45_Paralingual_IMDA_PART3_30_GR_V2_12312': ["So, who's speaking in the second part of the clip?", "So, who's speaking in the first part of the clip?"],
|
40 |
+
'47_Paralingual_IMDA_PART3_30_NR_V2_10479': ["Can you guess which ethnic group this person is from based on their accent."],
|
41 |
+
'49_Paralingual_MELD_ER_V2_676': ["What emotions do you think the speaker is expressing."],
|
42 |
+
'50_Paralingual_MELD_ER_V2_692': ["Based on the speaker's speech patterns, what do you think they are feeling."],
|
43 |
+
'51_Paralingual_VOXCELEB1_GR_V2_2148': ["May I know the gender of the speaker."],
|
44 |
+
'53_Paralingual_VOXCELEB1_NR_V2_2286': ["What's the nationality identity of the speaker."],
|
45 |
+
|
46 |
+
'55_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_2': ["What impact would the growth of the healthcare sector have on the country's economy in terms of employment and growth."],
|
47 |
+
'56_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_415': ["Based on the statement, can you summarize the speaker's position on the recent controversial issues in Singapore."],
|
48 |
+
'57_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_460': ["How does the author respond to parents' worries about masks in schools."],
|
49 |
+
|
50 |
+
'2_ASR_IMDA_PART1_ASR_v2_2258': ["Turn the spoken language into a text format.", "Please translate the content into Chinese."],
|
51 |
+
'3_ASR_IMDA_PART1_ASR_v2_2265': ["Turn the spoken language into a text format."],
|
52 |
+
|
53 |
+
'4_ASR_IMDA_PART2_ASR_v2_999' : ["Translate the spoken words into text format."],
|
54 |
+
'5_ASR_IMDA_PART2_ASR_v2_2241': ["Translate the spoken words into text format."],
|
55 |
+
'6_ASR_IMDA_PART2_ASR_v2_3409': ["Translate the spoken words into text format."],
|
56 |
+
|
57 |
+
'8_ASR_IMDA_PART3_30_ASR_v2_1698': ["Need this talk written down, please."],
|
58 |
+
'9_ASR_IMDA_PART3_30_ASR_v2_2474': ["Need this talk written down, please."],
|
59 |
+
|
60 |
+
'11_ASR_IMDA_PART4_30_ASR_v2_3771': ["Write out the dialogue as text."],
|
61 |
+
'12_ASR_IMDA_PART4_30_ASR_v2_103' : ["Write out the dialogue as text."],
|
62 |
+
'10_ASR_IMDA_PART4_30_ASR_v2_1527': ["Write out the dialogue as text."],
|
63 |
+
|
64 |
+
'13_ASR_IMDA_PART5_30_ASR_v2_1446': ["Translate this vocal recording into a textual format."],
|
65 |
+
'14_ASR_IMDA_PART5_30_ASR_v2_2281': ["Translate this vocal recording into a textual format."],
|
66 |
+
'15_ASR_IMDA_PART5_30_ASR_v2_4388': ["Translate this vocal recording into a textual format."],
|
67 |
+
|
68 |
+
'16_ASR_IMDA_PART6_30_ASR_v2_576': ["Record the spoken word in text form."],
|
69 |
+
'18_ASR_IMDA_PART6_30_ASR_v2_2834': ["Record the spoken word in text form."],
|
70 |
+
|
71 |
+
'19_ASR_AIShell_zh_ASR_v2_5044': ["Transform the oral presentation into a text document."],
|
72 |
+
'20_ASR_LIBRISPEECH_CLEAN_ASR_V2_833': ["Please provide a written transcription of the speech."],
|
73 |
+
|
74 |
+
'27_ST_COVOST2_EN_ZH-CN_ST_V2_6697': ["Please translate the given speech to Chinese."],
|
75 |
+
'28_SI_ALPACA-GPT4-AUDIO_SI_V2_299': ["Please follow the instruction in the speech."],
|
76 |
+
'29_SI_ALPACA-GPT4-AUDIO_SI_V2_750': ["Please follow the instruction in the speech."],
|
77 |
+
}
|
78 |
+
|
79 |
+
|
80 |
class NoAudioException(Exception):
|
81 |
pass
|
82 |
|