Songhun commited on
Commit
60e91d9
·
verified ·
1 Parent(s): 073b742

Upload folder using huggingface_hub

Browse files
.ipynb_checkpoints/README-checkpoint.md CHANGED
@@ -37,17 +37,17 @@ feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
37
 
38
  3. Calculate Voice Similarity
39
  ```shell
40
- file_path1 = './sample_data/voice1.mp3'
41
- file_path2 = './sample_data/voice2.mp3'
42
 
43
  feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
44
  def load_and_process_audio(file_path, feature_extractor, max_length=4.0):
45
  audio, sampling_rate = librosa.load(file_path, sr=16000)
46
  inputs = feature_extractor(audio, sampling_rate=sampling_rate, return_tensors="pt", padding="max_length", truncation=True, max_length=int(max_length * sampling_rate))
47
- return inputs.input_values.to(device)
48
 
49
- audio_input1 = load_and_process_audio(file_path1, feature_extractor)
50
- audio_input2 = load_and_process_audio(file_path2, feature_extractor)
51
 
52
  embedding1 = model(audio_input1).last_hidden_state.mean(dim=1)
53
  embedding2 = model(audio_input2).last_hidden_state.mean(dim=1)
 
37
 
38
  3. Calculate Voice Similarity
39
  ```shell
40
+ file_path1 = './test1.wav'
41
+ file_path2 = './test2.wav'
42
 
43
  feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
44
  def load_and_process_audio(file_path, feature_extractor, max_length=4.0):
45
  audio, sampling_rate = librosa.load(file_path, sr=16000)
46
  inputs = feature_extractor(audio, sampling_rate=sampling_rate, return_tensors="pt", padding="max_length", truncation=True, max_length=int(max_length * sampling_rate))
47
+ return inputs.input_values
48
 
49
+ audio_input1 = load_and_process_audio(file_path1, feature_extractor).to(device)
50
+ audio_input2 = load_and_process_audio(file_path2, feature_extractor).to(device)
51
 
52
  embedding1 = model(audio_input1).last_hidden_state.mean(dim=1)
53
  embedding2 = model(audio_input2).last_hidden_state.mean(dim=1)
README.md CHANGED
@@ -37,17 +37,17 @@ feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
37
 
38
  3. Calculate Voice Similarity
39
  ```shell
40
- file_path1 = './sample_data/voice1.mp3'
41
- file_path2 = './sample_data/voice2.mp3'
42
 
43
  feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
44
  def load_and_process_audio(file_path, feature_extractor, max_length=4.0):
45
  audio, sampling_rate = librosa.load(file_path, sr=16000)
46
  inputs = feature_extractor(audio, sampling_rate=sampling_rate, return_tensors="pt", padding="max_length", truncation=True, max_length=int(max_length * sampling_rate))
47
- return inputs.input_values.to(device)
48
 
49
- audio_input1 = load_and_process_audio(file_path1, feature_extractor)
50
- audio_input2 = load_and_process_audio(file_path2, feature_extractor)
51
 
52
  embedding1 = model(audio_input1).last_hidden_state.mean(dim=1)
53
  embedding2 = model(audio_input2).last_hidden_state.mean(dim=1)
 
37
 
38
  3. Calculate Voice Similarity
39
  ```shell
40
+ file_path1 = './test1.wav'
41
+ file_path2 = './test2.wav'
42
 
43
  feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
44
  def load_and_process_audio(file_path, feature_extractor, max_length=4.0):
45
  audio, sampling_rate = librosa.load(file_path, sr=16000)
46
  inputs = feature_extractor(audio, sampling_rate=sampling_rate, return_tensors="pt", padding="max_length", truncation=True, max_length=int(max_length * sampling_rate))
47
+ return inputs.input_values
48
 
49
+ audio_input1 = load_and_process_audio(file_path1, feature_extractor).to(device)
50
+ audio_input2 = load_and_process_audio(file_path2, feature_extractor).to(device)
51
 
52
  embedding1 = model(audio_input1).last_hidden_state.mean(dim=1)
53
  embedding2 = model(audio_input2).last_hidden_state.mean(dim=1)