ujalaarshad17 commited on
Commit
384e020
·
1 Parent(s): 18989b5

Added files

Browse files
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ __pycache__
2
+ Fake
3
+ Real
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, render_template, redirect, url_for
2
+ import torch
3
+ import torchaudio
4
+ import numpy as np
5
+ import plotly.graph_objs as go
6
+ import os # Import os for file operations
7
+ from model import BoundaryDetectionModel # Assuming your model is defined here
8
+ from audio_dataset import pad_audio # Assuming you have a function to pad audio
9
+
10
+ app = Flask(__name__)
11
+
12
+ # Load the pre-trained model
13
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+ model = BoundaryDetectionModel().to(device)
15
+ model.load_state_dict(torch.load("checkpoint_epoch_21_eer_0.24.pth", map_location=device)["model_state_dict"])
16
+ model.eval()
17
+
18
+ def preprocess_audio(audio_path, sample_rate=16000, target_length=8):
19
+ waveform, sr = torchaudio.load(audio_path)
20
+ if sr != sample_rate:
21
+ waveform = torchaudio.transforms.Resample(sr, sample_rate)(waveform)
22
+ waveform = pad_audio(waveform, sample_rate, target_length)
23
+ return waveform.to(device)
24
+
25
+ def infer_single_audio(audio_tensor):
26
+ with torch.no_grad():
27
+ output = model(audio_tensor).squeeze(-1).cpu().numpy()
28
+ prediction = (output > 0.5).astype(int) # Binary prediction for fake/real frames
29
+ return output, prediction
30
+
31
+ @app.route('/')
32
+ def index():
33
+ return render_template('index.html') # HTML page for file upload and results display
34
+
35
+ @app.route('/predict', methods=['POST'])
36
+ def predict():
37
+ if 'file' not in request.files:
38
+ return "No file uploaded", 400
39
+
40
+ file = request.files['file']
41
+ if file.filename == '':
42
+ return "No selected file", 400
43
+
44
+ file_path = "temp_audio.wav" # Temporary file to store uploaded audio
45
+ file.save(file_path)
46
+
47
+ # Preprocess audio and perform inference
48
+ audio_tensor = preprocess_audio(file_path)
49
+ output, prediction = infer_single_audio(audio_tensor)
50
+
51
+ # Flatten the prediction array to handle 2D structure
52
+ prediction_flat = prediction.flatten()
53
+
54
+ # Calculate total frames, fake frames, and fake percentage (formatted to 4 decimal places)
55
+ total_frames = len(prediction_flat)
56
+ fake_frame_count = int(np.sum(prediction_flat))
57
+ fake_percentage = round((fake_frame_count / total_frames) * 100, 4)
58
+ result_type = 'Fake' if fake_frame_count >= 5 else 'Real'
59
+
60
+ # Check if audio is classified as real
61
+ if result_type == 'Real':
62
+ fake_frame_intervals = "No Frame" # Set to "No Frame" if audio is real
63
+ else:
64
+ # Get precise fake frame timings with start and end times for fake frames
65
+ fake_frame_intervals = get_fake_frame_intervals(prediction_flat, frame_duration=20)
66
+
67
+ # Debug print to check intervals
68
+ print("Fake Frame Intervals:", fake_frame_intervals)
69
+
70
+ # Generate Plotly plot
71
+ plot_html = plot_fake_frames_waveform(output, prediction_flat, audio_tensor.cpu().numpy(), fake_frame_intervals)
72
+
73
+ # Render template with all results and plot
74
+ return render_template('result.html',
75
+ fake_percentage=fake_percentage,
76
+ result_type=result_type,
77
+ fake_frame_count=fake_frame_count,
78
+ total_frames=total_frames,
79
+ fake_frame_intervals=fake_frame_intervals,
80
+ plot_html=plot_html)
81
+
82
+ @app.route('/return', methods=['GET'])
83
+ def return_to_index():
84
+ # Delete temporary files before returning to index
85
+ try:
86
+ os.remove("temp_audio.wav") # Remove the temporary audio file
87
+ # If you have any other temporary files (like plots), remove them here too.
88
+ # Example: os.remove("temp_plot.html") if you save plots as HTML files.
89
+ except OSError as e:
90
+ print(f"Error deleting temporary files: {e}")
91
+
92
+ return redirect(url_for('index')) # Redirect back to the main page
93
+
94
+ def get_fake_frame_intervals(prediction, frame_duration=20):
95
+ """
96
+ Calculate start and end times in seconds for each consecutive fake frame interval.
97
+ """
98
+ intervals = []
99
+ start_time = None
100
+
101
+ for i, is_fake in enumerate(prediction):
102
+ if is_fake == 1:
103
+ if start_time is None:
104
+ start_time = i * (frame_duration / 1000) # Convert ms to seconds
105
+ else:
106
+ if start_time is not None:
107
+ end_time = i * (frame_duration / 1000) # End time of fake segment
108
+ intervals.append((round(start_time, 4), round(end_time, 4)))
109
+ start_time = None
110
+
111
+ # Append last interval if it ended on the last frame
112
+ if start_time is not None:
113
+ end_time = len(prediction) * (frame_duration / 1000) # Final end time calculation
114
+ intervals.append((round(start_time, 4), round(end_time, 4)))
115
+
116
+ return intervals
117
+
118
+ def plot_fake_frames_waveform(output, prediction_flat, waveform, fake_frame_intervals, frame_duration=20, sample_rate=16000):
119
+ # Get actual audio duration from waveform for accurate x-axis scaling
120
+ actual_duration = waveform.shape[1] / sample_rate
121
+ num_samples = waveform.shape[1] # Get number of samples from the actual waveform
122
+ time = np.linspace(0, actual_duration, num_samples)
123
+
124
+ # Plotly trace for the waveform with different colors for fake and real frames
125
+ frame_length = int(sample_rate * frame_duration / 1000) # Samples per frame
126
+
127
+ traces = []
128
+ for i in range(len(prediction_flat)):
129
+ start = i * frame_length
130
+ end = min(start + frame_length, num_samples) # Ensure we do not exceed the samples
131
+ color = 'rgba(255,0,0,0.8)' if prediction_flat[i] == 1 else 'rgba(0,128,0,0.5)'
132
+
133
+ traces.append(go.Scatter(
134
+ x=time[start:end],
135
+ y=waveform[0][start:end],
136
+ mode='lines',
137
+ line=dict(color=color),
138
+ showlegend=False
139
+ ))
140
+
141
+ # Full waveform view to show all fake and real segments
142
+ min_time, max_time = 0, actual_duration
143
+
144
+ # Layout settings for the plot
145
+ layout = go.Layout(
146
+ title="Audio Waveform with Fake Frames Highlighted",
147
+ xaxis=dict(title="Time (seconds)", range=[min_time, max_time]),
148
+ yaxis=dict(title="Amplitude"),
149
+ autosize=True,
150
+ template="plotly_white"
151
+ )
152
+
153
+ fig = go.Figure(data=traces, layout=layout)
154
+
155
+ # Convert Plotly figure to HTML
156
+ plot_html = fig.to_html(full_html=False)
157
+ return plot_html
158
+
159
+ if __name__ == '__main__':
160
+ app.run()
audio_dataset.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+ import torch
4
+ from torch.utils.data import Dataset
5
+ import torchaudio
6
+ import numpy as np
7
+
8
+ # Modify to handle dynamic target duration (8s in this case)
9
+ # def pad_audio(audio, sample_rate=16000, target_duration=8.0):
10
+ # target_length = int(sample_rate * target_duration) # Calculate target length for 8 seconds
11
+ # current_length = audio.shape[1]
12
+
13
+ # if current_length < target_length:
14
+ # padding = target_length - current_length
15
+ # audio = torch.cat((audio, torch.zeros(audio.shape[0], padding)), dim=1)
16
+ # else:
17
+ # audio = audio[:, :target_length]
18
+
19
+ # return audio
20
+ def pad_audio(audio, sample_rate=16000, target_duration=7.98):
21
+ target_length = int(sample_rate * target_duration) # Calculate target length for 8 seconds
22
+ current_length = audio.shape[1]
23
+
24
+ if current_length < target_length:
25
+ padding = target_length - current_length
26
+ audio = torch.cat((audio, torch.zeros(audio.shape[0], padding)), dim=1)
27
+ elif current_length > target_length:
28
+ # Add one frame if length is one frame more than the target
29
+ if current_length - target_length == 1:
30
+ audio = torch.cat((audio, torch.zeros(audio.shape[0], 1)), dim=1)
31
+ else:
32
+ audio = audio[:, :target_length]
33
+
34
+ return audio
35
+
36
+ # Parse labels with 10ms frame intervals for 8-second audio
37
+ def parse_labels(file_path, audio_length, sample_rate, frame_duration=0.010):
38
+ frames_per_audio = int(audio_length / frame_duration)
39
+ labels = np.zeros(frames_per_audio, dtype=np.float32)
40
+
41
+ with open(file_path, 'r') as f:
42
+ lines = f.readlines()[1:] # Skip header
43
+ for line in lines:
44
+ start, end, authenticity = line.strip().split('-')
45
+ start_time = float(start)
46
+ end_time = float(end)
47
+
48
+ if authenticity == 'F':
49
+ start_frame = int(start_time / frame_duration)
50
+ end_frame = int(end_time / frame_duration)
51
+ labels[start_frame:end_frame] = 1
52
+
53
+ # Mark 4 closest frames to boundaries
54
+ for offset in range(1, 5):
55
+ if start_frame - offset >= 0:
56
+ labels[start_frame - offset] = 1
57
+ if end_frame + offset < frames_per_audio:
58
+ labels[end_frame + offset] = 1
59
+
60
+ return labels
61
+
62
+ class AudioDataset(Dataset):
63
+ def __init__(self, audio_files, label_dir, sample_rate=16000, target_length=7.98):
64
+ self.audio_files = audio_files
65
+ self.label_dir = label_dir
66
+ self.sample_rate = sample_rate
67
+ self.target_length = target_length * sample_rate
68
+ self.raw_target_length = target_length
69
+
70
+ def __len__(self):
71
+ return len(self.audio_files)
72
+
73
+ def __getitem__(self, idx):
74
+ audio_path = self.audio_files[idx]
75
+ try:
76
+ waveform, sr = torchaudio.load(audio_path)
77
+ waveform = torchaudio.transforms.Resample(sr, self.sample_rate)(waveform)
78
+ waveform = pad_audio(waveform, self.sample_rate, self.raw_target_length)
79
+
80
+ audio_filename = os.path.basename(audio_path).replace(".wav", "")
81
+ if audio_filename.startswith("RFP_R"):
82
+ labels = np.zeros(int(self.raw_target_length / 0.010), dtype=np.float32)
83
+ else:
84
+ label_path = os.path.join(self.label_dir, f"{audio_filename}.wav_labels.txt")
85
+ labels = parse_labels(label_path, self.raw_target_length, self.sample_rate).astype(np.float32)
86
+
87
+ return waveform, torch.tensor(labels, dtype=torch.float32)
88
+
89
+ except (OSError, IOError) as e:
90
+ print(f"Error opening file {audio_path}: {e}")
91
+ new_idx = random.randint(0, len(self.audio_files) - 1)
92
+ return self.__getitem__(new_idx)
93
+
94
+
95
+ def get_audio_file_paths(extrinsic_dir, intrinsic_dir, real_dir):
96
+ extrinsic_files = [os.path.join(extrinsic_dir, f) for f in os.listdir(extrinsic_dir)
97
+ if f.endswith(".wav") and not f.startswith("partial_fake")]
98
+ intrinsic_files = [os.path.join(intrinsic_dir, f) for f in os.listdir(intrinsic_dir)
99
+ if f.endswith(".wav") and not f.startswith("partial_fake")]
100
+ real_files = [os.path.join(real_dir, f) for f in os.listdir(real_dir)
101
+ if f.endswith(".wav") and not f.startswith("partial_fake")]
102
+
103
+ # Combine all audio files into a single list, ensuring valid files only
104
+ audio_files = [f for f in extrinsic_files + real_files
105
+ if os.path.basename(f).startswith(("extrinsic"))]
106
+ return audio_files
checkpoint_epoch_16_eer_0.25.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75f0fc179f4f1bc0074dd874953ac233db9f86b58a0ca97d1e75472fefd29893
3
+ size 55028923
checkpoint_epoch_21_eer_0.24.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2a3294c037664c8bd16cb9f3fefb15b8527538e2c185d66e5f365ad0e5199b0
3
+ size 55028923
checkpoint_epoch_24_eer_0.23.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:194d1b7e6bd18d8e059a833d59cc096f1693034383b9e45043b4dc57196adaa3
3
+ size 55028923
inference.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ import numpy as np
4
+ from model import BoundaryDetectionModel # Assume the model definition is in model.py
5
+ from audio_dataset import pad_audio # Use the provided padding function
6
+
7
+
8
+ def load_model(checkpoint_path, device):
9
+ model = BoundaryDetectionModel().to(device)
10
+ model.load_state_dict(torch.load(checkpoint_path, map_location=device)["model_state_dict"])
11
+ model.eval()
12
+ return model
13
+
14
+
15
+ def preprocess_audio(audio_path, sample_rate=16000, target_length=8):
16
+ waveform, sr = torchaudio.load(audio_path)
17
+ waveform = torchaudio.transforms.Resample(sr, sample_rate)(waveform)
18
+ waveform = pad_audio(waveform, sample_rate, target_length)
19
+ print(waveform.shape)
20
+ return waveform
21
+
22
+ def infer_single_audio(model, audio_path, device):
23
+ audio_tensor = preprocess_audio(audio_path).to(device)
24
+
25
+ with torch.no_grad():
26
+ output = model(audio_tensor).squeeze(-1).cpu().numpy() # Remove extra dimensions
27
+ prediction = (output > 0.5).astype(int) # Round outputs for binary prediction if needed
28
+ return output, prediction
29
+
30
+
31
+ def main_inference(audio_path, checkpoint_path):
32
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
33
+ model = load_model(checkpoint_path, device)
34
+
35
+ print(f"Running inference on: {audio_path}")
36
+ output, prediction = infer_single_audio(model, audio_path, device)
37
+
38
+ print(f"Model Output: {output}")
39
+ print(f"Binary Prediction: {prediction}")
40
+
41
+ if __name__ == "__main__":
42
+ audio_path = "Real\RFP_R_24918.wav" # Path to the audio file for inference
43
+ checkpoint_path = "checkpoint_epoch_21_eer_0.24.pth" # Path to the trained model checkpoint
44
+ main_inference(audio_path, checkpoint_path)
model.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torchaudio
4
+ from torchaudio.transforms import MelSpectrogram
5
+
6
+ class FeatureExtractor(nn.Module):
7
+ def __init__(self, n_mels=13, sample_rate=16000, frame_size_ms=20):
8
+ super(FeatureExtractor, self).__init__()
9
+ self.mel_spec = MelSpectrogram(
10
+ sample_rate=sample_rate,
11
+ n_mels=n_mels,
12
+ win_length=int(sample_rate * frame_size_ms / 2000),
13
+ hop_length=int(sample_rate * frame_size_ms / 2000),
14
+ normalized=True
15
+ )
16
+
17
+ def forward(self, audio):
18
+ # Convert to Mel spectrogram
19
+ mel_features = self.mel_spec(audio)
20
+ # Transpose to match Conv1d input shape (batch_size, n_mels, sequence_length)
21
+ mel_features = mel_features.transpose(1, 2)
22
+ return mel_features
23
+
24
+
25
+ # FrameLevelEmbedding and FrameLevelClassifier remain the same
26
+ class FrameLevelEmbedding(nn.Module):
27
+ def __init__(self):
28
+ super(FrameLevelEmbedding, self).__init__()
29
+ self.cnn1 = nn.Conv1d(in_channels=13, out_channels=512, kernel_size=5, padding=2)
30
+ self.res_blocks = nn.Sequential(*[ResBlock(512) for _ in range(6)])
31
+ self.cnn2 = nn.Conv1d(in_channels=512, out_channels=240, kernel_size=1)
32
+
33
+ def forward(self, x):
34
+ x = x.transpose(1, 2) # (batch_size, seq_len, features) -> (batch_size, features, seq_len)
35
+ x = self.cnn1(x)
36
+ x = self.res_blocks(x)
37
+ x = self.cnn2(x)
38
+ x = x.transpose(1, 2) # (batch_size, features, seq_len) -> (batch_size, seq_len, features)
39
+ return x
40
+
41
+ # Keep the other parts of the model unchanged (e.g., ResBlock, FrameLevelClassifier, BoundaryDetectionModel)
42
+ class ResBlock(nn.Module):
43
+ def __init__(self, channels):
44
+ super(ResBlock, self).__init__()
45
+ self.conv1 = nn.Conv1d(in_channels=channels, out_channels=channels, kernel_size=1, bias=False)
46
+ self.conv2 = nn.Conv1d(in_channels=channels, out_channels=channels, kernel_size=1, bias=False)
47
+ self.bn1 = nn.BatchNorm1d(channels)
48
+ self.bn2 = nn.BatchNorm1d(channels)
49
+ self.relu = nn.ReLU()
50
+
51
+ def forward(self, x):
52
+ identity = x
53
+ out = self.relu(self.bn1(self.conv1(x)))
54
+ out = self.bn2(self.conv2(out))
55
+ out += identity
56
+ return self.relu(out)
57
+
58
+ class FrameLevelClassifier(nn.Module):
59
+ def __init__(self):
60
+ super(FrameLevelClassifier, self).__init__()
61
+ self.transformer = nn.TransformerEncoder(
62
+ nn.TransformerEncoderLayer(d_model=240, nhead=4, dim_feedforward=1024), num_layers=2
63
+ )
64
+ self.bilstm = nn.LSTM(input_size=240, hidden_size=128, num_layers=2, bidirectional=True, batch_first=True)
65
+ self.fc = nn.Linear(256, 1) # Bidirectional LSTM -> 2 * hidden_size
66
+
67
+ def forward(self, x):
68
+ # x = self.transformer(x)
69
+ x, _ = self.bilstm(x)
70
+ x = self.fc(x)
71
+ return torch.sigmoid(x)
72
+
73
+
74
+ class BoundaryDetectionModel(nn.Module):
75
+ def __init__(self):
76
+ super(BoundaryDetectionModel, self).__init__()
77
+ self.feature_extractor = FeatureExtractor()
78
+ self.frame_embedding = FrameLevelEmbedding()
79
+ self.classifier = FrameLevelClassifier()
80
+
81
+ def forward(self, audio):
82
+ features = self.feature_extractor(audio)
83
+ embeddings = self.frame_embedding(features)
84
+ output = self.classifier(embeddings)
85
+ return output
86
+
87
+
88
+ # model = BoundaryDetectionModel()
89
+ # audio, sr = torchaudio.load("new_files/Extrinsic_Partial_Fakes/extrinsic_partial_fake_RFP_R_00001.wav")
90
+ # if sr != 16000:
91
+ # resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
92
+ # audio = resampler(audio)
93
+ # # audio = audio.mean(dim=0).unsqueeze(0) # Convert to mono and add batch dimension
94
+ # output = model(audio)
95
+ # print(output.squeeze(2).shape)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch
2
+ torchaudio
3
+ librosa
4
+ flask
5
+ gunicorn
6
+ uvicorn
static/prediction_plot.png ADDED
static/prediction_waveform.png ADDED
static/styles.css ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* General Reset */
2
+ * {
3
+ margin: 0;
4
+ padding: 0;
5
+ box-sizing: border-box;
6
+ }
7
+
8
+ body {
9
+ font-family: Arial, sans-serif;
10
+ color: #333;
11
+ background-color: #f9f9f9;
12
+ display: flex;
13
+ justify-content: center;
14
+ align-items: center;
15
+ min-height: 100vh;
16
+ height: 100%;
17
+ }
18
+
19
+ .container {
20
+ width: 100%;
21
+ height: 100vh;
22
+ display: grid;
23
+ place-items: center;
24
+ gap: 0;
25
+ padding: 20px;
26
+ background-color: #fff;
27
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
28
+ border-radius: 8px;
29
+ text-align: center;
30
+ box-sizing: border-box; /* Ensure padding doesn't affect width */
31
+ }
32
+
33
+ .title {
34
+ font-size: 2em;
35
+ color: #333;
36
+ margin-bottom: 1rem;
37
+ }
38
+
39
+ .upload-form {
40
+ display: flex;
41
+ flex-direction: column;
42
+ gap: 1rem;
43
+ margin-bottom: 2rem;
44
+ }
45
+
46
+ .file-label {
47
+ font-size: 1.1em;
48
+ color: #555;
49
+ }
50
+
51
+ .file-input {
52
+ padding: 8px;
53
+ border-radius: 4px;
54
+ border: 1px solid #ccc;
55
+ }
56
+
57
+ .submit-button {
58
+ padding: 10px 20px;
59
+ font-size: 1em;
60
+ font-weight: bold;
61
+ color: #fff;
62
+ background-color: #4caf50;
63
+ border: none;
64
+ border-radius: 5px;
65
+ cursor: pointer;
66
+ transition: background-color 0.3s ease;
67
+ }
68
+
69
+ .submit-button:hover {
70
+ background-color: #45a049;
71
+ }
72
+
73
+ .result-section {
74
+ margin-top: 1.5rem;
75
+ padding: 1.5rem;
76
+ background-color: #f1f1f1;
77
+ border-radius: 8px;
78
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
79
+ }
80
+
81
+ .result-title {
82
+ font-size: 1.5em;
83
+ margin-bottom: 1rem;
84
+ color: #333;
85
+ }
86
+
87
+ .result-text {
88
+ font-size: 1.1em;
89
+ color: #666;
90
+ margin: 0.5rem 0;
91
+ }
92
+
93
+ .result-image {
94
+ margin-top: 1rem;
95
+ max-width: 100%;
96
+ height: auto;
97
+ border-radius: 8px;
98
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
99
+ }
100
+
101
+ .return-button {
102
+ padding: 10px 20px;
103
+ font-size: 1em;
104
+ font-weight: bold;
105
+ color: #fff;
106
+ background-color: #4caf50;
107
+ border: none;
108
+ border-radius: 5px;
109
+ cursor: pointer;
110
+ transition: background-color 0.3s ease;
111
+ text-decoration: none;
112
+ }
113
+
114
+ .intervals-list {
115
+ margin-bottom: 2rem;
116
+ }
117
+
118
+ ul {
119
+ list-style-type: none;
120
+ padding-left: 0; /* Optional: removes default left padding */
121
+ }
templates/index.html ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <title>Audio Boundary Detection</title>
6
+ <link
7
+ rel="stylesheet"
8
+ href="{{ url_for('static', filename='styles.css') }}"
9
+ />
10
+ </head>
11
+ <body class="page">
12
+ <div class="container">
13
+ <div>
14
+ <h1 class="title">Audio Boundary Detection</h1>
15
+ <form
16
+ action="/predict"
17
+ method="post"
18
+ enctype="multipart/form-data"
19
+ class="upload-form"
20
+ >
21
+ <label for="file" class="file-label">Upload an audio file:</label>
22
+ <input
23
+ type="file"
24
+ name="file"
25
+ accept=".wav"
26
+ required
27
+ class="file-input"
28
+ />
29
+ <button type="submit" class="submit-button">Analyze</button>
30
+ </form>
31
+ </div>
32
+ </div>
33
+ </body>
34
+ </html>
templates/result.html ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <title>Prediction Results</title>
6
+ <link
7
+ rel="stylesheet"
8
+ href="{{ url_for('static', filename='styles.css') }}"
9
+ />
10
+ </head>
11
+ <body class="page">
12
+ <div class="container">
13
+ <h1 class="title">Prediction Results</h1>
14
+ <p class="result-text">Fake Percentage: {{ fake_percentage }}%</p>
15
+ <p class="result-text">Result Type: {{ result_type }}</p>
16
+ <p class="result-text">Fake Frame Count: {{ fake_frame_count }}</p>
17
+ <div class="plot-container">
18
+ {{ plot_html|safe }}
19
+ <!-- Embed Plotly plot here -->
20
+ </div>
21
+ <div class="intervals-list">
22
+ <h2>Fake Frame Intervals:</h2>
23
+ {% if fake_frame_intervals == "No Frame" %}
24
+ <p>No Frame</p>
25
+ {% else %}
26
+ <ul>
27
+ {% for start, end in fake_frame_intervals %}
28
+ <li>{{ start }}s - {{ end }}s</li>
29
+ {% endfor %}
30
+ </ul>
31
+ {% endif %}
32
+ </div>
33
+ <a href="/" class="return-button">Analyze Another File</a>
34
+ </div>
35
+ </body>
36
+ </html>
uploads/RFP_R_24918.wav ADDED
Binary file (320 kB). View file