Spaces:

snnithya
/

GaMaDHaNi

Sleeping

App Files Files Community

Nithya commited on Oct 14, 2024

Commit

d3c7fa1

1 Parent(s): 9272247

updated app to use GPUQuantileTransformer from github

Browse files

Files changed (2) hide show

app.py +3 -30
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ debug_mode = '--debug' in sys.argv or os.environ.get('DEBUG') == 'True'
 if debug_mode:
     # Path to the local version of the package
-    local_package_path = "../../GaMaDHaNi"
     # Add the local package path to sys.path
     sys.path.insert(0, local_package_path)
@@ -51,33 +51,6 @@ def debug_profile(func):
         return pp.profile(sort_by='cumulative', out_lines=10)(func)
     return func
-class GPUQuantileTransformer:
-    '''
-    Temporary hack class to perform inverse quantile transform on GPU
-    '''
-    def __init__(self, qt):
-        # Initialize the sklearn QuantileTransformer
-        self.cpu_transformer = qt
-        self.quantiles_ = torch.Tensor(qt.quantiles_).to(device)
-        self.references_ = torch.Tensor(qt.references_).to(device)
-    def transform(self, X):
-        return self.cpu_transformer.transform(X)
-    def inverse_transform(self, X):
-        # convert distribution to uniform
-        X = 0.5 * (1 + torch.erf(X / torch.sqrt(torch.tensor(2.0))))
-        # Interpolate using the quantiles and references on GPU
-        idxs = torch.searchsorted(self.references_.view(1, -1), X.view(1, -1)).to(torch.int64)
-        idxs = idxs.view(-1, 1) # to match with the shape of quantiles
-        quantiles_low = torch.gather(self.quantiles_, 0, (idxs - 1).clamp(min=0))
-        quantiles_high = torch.gather(self.quantiles_, 0, idxs.clamp(max=self.quantiles_.size(0) - 1))
-        # Linear interpolation between quantiles
-        t = (X - self.references_[idxs - 1]) / (self.references_[idxs] - self.references_[idxs - 1] + 1e-10)
-        X_inv = quantiles_low + t * (quantiles_high - quantiles_low)
-        return X_inv.reshape(1, -1)
 def predict_voicing(confidence):
     # https://github.com/marl/crepe/pull/26
     """
@@ -162,7 +135,7 @@ def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp',
     logging.log(logging.INFO, 'Generate function')
     # load pitch values onto GPU
     pitch = torch.tensor(pitch).float().unsqueeze(0).unsqueeze(0).to(device)
-    pitch_qt = GPUQuantileTransformer(pitch_qt)
     logging.log(logging.INFO, 'Generating pitch')
     if type == 'response':
         pitch, inverted_pitch = generate_pitch_response(pitch, pitch_model, invert_pitch_fn, num_samples=num_samples, num_steps=100)
@@ -175,7 +148,7 @@ def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp',
     if pitch_qt is not None:
         # if there is not pitch quantile transformer, undo the default quantile transformation that occurs
         def undo_qt(x, min_clip=200):
-            pitch= pitch_qt.inverse_transform(x.reshape(-1, 1)).reshape(1, -1)
             pitch = torch.round(pitch) # round to nearest integer, done in preprocessing of pitch contour fed into model
             pitch[pitch < 200] = np.nan
             return pitch

 if debug_mode:
     # Path to the local version of the package
+    local_package_path = "../../GaMaDHaNi-dev"
     # Add the local package path to sys.path
     sys.path.insert(0, local_package_path)
         return pp.profile(sort_by='cumulative', out_lines=10)(func)
     return func
 def predict_voicing(confidence):
     # https://github.com/marl/crepe/pull/26
     """
     logging.log(logging.INFO, 'Generate function')
     # load pitch values onto GPU
     pitch = torch.tensor(pitch).float().unsqueeze(0).unsqueeze(0).to(device)
+    pitch_qt = p2a.GPUQuantileTransformer(pitch_qt, device=device)
     logging.log(logging.INFO, 'Generating pitch')
     if type == 'response':
         pitch, inverted_pitch = generate_pitch_response(pitch, pitch_model, invert_pitch_fn, num_samples=num_samples, num_steps=100)
     if pitch_qt is not None:
         # if there is not pitch quantile transformer, undo the default quantile transformation that occurs
         def undo_qt(x, min_clip=200):
+            pitch= pitch_qt.inverse_transform(x).squeeze(0) # qt transform expects shape (bs, seq_len, 1)
             pitch = torch.round(pitch) # round to nearest integer, done in preprocessing of pitch contour fed into model
             pitch[pitch < 200] = np.nan
             return pitch

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
 crepe==0.0.15
 hmmlearn==0.3.2
 tensorflow==2.17.0
-GaMaDHaNi @ git+https://github.com/snnithya/GaMaDHaNi.git@43aa691e0df53f64e4bffbd7b8afc703593bb630

 crepe==0.0.15
 hmmlearn==0.3.2
 tensorflow==2.17.0
+GaMaDHaNi @ git+https://github.com/snnithya/GaMaDHaNi.git@37788f4c900fc425cd193052784e88afbfdd19e2