File size: 2,513 Bytes
11dde70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from googleapiclient.discovery import build
from google.oauth2 import service_account
from googleapiclient.http import MediaFileUpload
import pdb

import gradio as gr

# 来自Google Cloud控制台的JSON凭据文件
credentials_file =  "./src/peerless-window-254907-b386b71c0d99.json"
# "./client_secret_576367903492-diuopf97kn9eh1gte3vh65errtca1o64.apps.googleusercontent.com.json"
# Google Drive API版本
api_version = 'v3'

# 创建服务对象
credentials = service_account.Credentials.from_service_account_file(
    credentials_file, scopes=['https://www.googleapis.com/auth/drive'])
service = build('drive', api_version, credentials=credentials)


import gradio as gr
from transformers import pipeline
import numpy as np
import librosa
import torchaudio

import datetime

def generate_now_time_wav():
    # Get the current date and time
    current_time = datetime.datetime.now()

    # Format the date and time as a string
    time_string = current_time.strftime("%Y-%m-%d_%H-%M-%S")

    # Create the WAV file name with the formatted time
    wavfile_name = f"audio_{time_string}.wav"
    return wavfile_name

# transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")

def transcribe(audio_path):
    if audio_path == None:
        print("using ref audio as eval audio since it's empty")

    wav, sr = torchaudio.load(audio_path)
    if wav.shape[0] != 1:
        wav = wav[0, :]
    print(wav.shape)

    name = generate_now_time_wav()
    # 上传文件
    media = MediaFileUpload(audio_path, mimetype='audio/wav')    
    request = service.files().create(
        media_body=media,
        body={'name': name}
    )
    response = request.execute()
    
    return response.get('id')
    
demo = gr.Interface(
    fn = transcribe,
    inputs = gr.Audio(source="microphone", type='filepath'),
    outputs = "text",
)
# file_path = 'data/3_michael_20230619_100/1st_session_ZOOM0015_002.wav'

# x = gr.Audio(source="upload", type='filepath'),
# pdb.set_trace()
# x = transcribe(file_path)
# pdb.set_trace()

demo.launch()
    
# # 要上传的文件
# file_name = '1st_session_ZOOM0015_001.wav'

# # 上传文件
# media = MediaFileUpload(file_path, mimetype='audio/wav')
# request = service.files().create(
#     media_body=media,
#     body={'name': file_name}
# )


# response = request.execute()

# # 列出文件
# results = service.files().list().execute()
# files = results.get('files', [])
# pdb.set_trace()
        
# print('文件ID:%s' % response.get('id'))