|
from fastapi import FastAPI, UploadFile, File |
|
from fastapi.responses import HTMLResponse |
|
import pandas as pd |
|
import io |
|
import time |
|
|
|
app = FastAPI() |
|
|
|
@app.get("/", response_class=HTMLResponse) |
|
async def analyze_logs(): |
|
return """ |
|
<html> |
|
<body> |
|
<form action="/upload/" enctype="multipart/form-data" method="post"> |
|
<input name="file" type="file"> |
|
<input type="submit"> |
|
</form> |
|
</body> |
|
</html> |
|
""" |
|
|
|
@app.post("/upload/") |
|
async def upload_file(file: UploadFile = File(...)): |
|
contents = await file.read() |
|
logs_df = pd.read_parquet(io.BytesIO(contents)) |
|
|
|
processing_message = "\n\n Processing files...\n\n" |
|
time.sleep(3) |
|
|
|
logs_df['datetime'] = pd.to_datetime(logs_df['datetime'], format='%d/%m/%Y:%H:%M:%S') |
|
logs_df['day'] = logs_df['datetime'].apply(lambda x: x.day) |
|
logs_df['hour'] = logs_df['datetime'].apply(lambda x: x.hour) |
|
logs_df['minute'] = logs_df['datetime'].apply(lambda x: x.minute) |
|
|
|
ip_address_count_df = ( |
|
logs_df.groupby(['method', 'client'], as_index=False) |
|
.size() |
|
.rename(columns={'size': 'count'}) |
|
.sort_values('count', ascending=False) |
|
) |
|
|
|
ip_address_count_df = ip_address_count_df.assign( |
|
perc=ip_address_count_df['count'].div(ip_address_count_df['count'].sum()), |
|
cum_perc=lambda df: df['perc'].cumsum(), |
|
) |
|
|
|
dataframe_styled = ( |
|
ip_address_count_df.head(1000) |
|
.style.background_gradient(subset=['count', 'perc', 'cum_perc'], cmap='cividis') |
|
.format({'count': '{:,}', 'perc': '{:.1%}', 'cum_perc': '{:.1%}'}) |
|
) |
|
|
|
|
|
styled_html = dataframe_styled.render() |
|
|
|
result = ( |
|
"<h1>Redundant IP Requests....</h1>" |
|
"<p>The Total API Requests from the sample logs are : {total_requests}</p>" |
|
"<p>The Redundant API Requests from the sample logs are : {redundant_requests}</p>" |
|
"<p>The percentage of Redundant API Requests from the sample logs is : {redundant_percentage:.2f}%</p>" |
|
"{dataframe_html}" |
|
).format( |
|
total_requests=logs_df.shape[0], |
|
redundant_requests=ip_address_count_df.shape[0], |
|
redundant_percentage=(ip_address_count_df.shape[0] / logs_df.shape[0]) * 100, |
|
dataframe_html=styled_html, |
|
) |
|
|
|
return HTMLResponse(content=result) |
|
|