File size: 2,428 Bytes
b2a551b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12e1f42
 
 
 
 
 
 
 
 
b2a551b
 
 
 
 
 
 
 
 
 
12e1f42
b2a551b
 
12e1f42
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import HTMLResponse
import pandas as pd
import io
import time

app = FastAPI()

@app.get("/", response_class=HTMLResponse)
async def analyze_logs():
    return """
    <html>
        <body>
            <form action="/upload/" enctype="multipart/form-data" method="post">
                <input name="file" type="file">
                <input type="submit">
            </form>
        </body>
    </html>
    """

@app.post("/upload/")
async def upload_file(file: UploadFile = File(...)):
    contents = await file.read()
    logs_df = pd.read_parquet(io.BytesIO(contents))

    processing_message = "\n\n Processing files...\n\n"
    time.sleep(3)  # Simulate processing time (3 seconds)

    logs_df['datetime'] = pd.to_datetime(logs_df['datetime'], format='%d/%m/%Y:%H:%M:%S')
    logs_df['day'] = logs_df['datetime'].apply(lambda x: x.day)
    logs_df['hour'] = logs_df['datetime'].apply(lambda x: x.hour)
    logs_df['minute'] = logs_df['datetime'].apply(lambda x: x.minute)

    ip_address_count_df = (
        logs_df.groupby(['method', 'client'], as_index=False)
        .size()
        .rename(columns={'size': 'count'})
        .sort_values('count', ascending=False)
    )

    ip_address_count_df = ip_address_count_df.assign(
        perc=ip_address_count_df['count'].div(ip_address_count_df['count'].sum()),
        cum_perc=lambda df: df['perc'].cumsum(),
    )

    dataframe_styled = (
        ip_address_count_df.head(1000)
        .style.background_gradient(subset=['count', 'perc', 'cum_perc'], cmap='cividis')
        .format({'count': '{:,}', 'perc': '{:.1%}', 'cum_perc': '{:.1%}'})
    )
    
    # Render the styled DataFrame to HTML
    styled_html = dataframe_styled.render()

    result = (
        "<h1>Redundant IP Requests....</h1>"
        "<p>The Total API Requests from the sample logs are : {total_requests}</p>"
        "<p>The Redundant API Requests from the sample logs are : {redundant_requests}</p>"
        "<p>The percentage of Redundant API Requests from the sample logs is : {redundant_percentage:.2f}%</p>"
        "{dataframe_html}"
    ).format(
        total_requests=logs_df.shape[0],
        redundant_requests=ip_address_count_df.shape[0],
        redundant_percentage=(ip_address_count_df.shape[0] / logs_df.shape[0]) * 100,
        dataframe_html=styled_html,
    )

    return HTMLResponse(content=result)