File size: 3,147 Bytes
68e6513
 
fbdc657
 
68e6513
 
1580227
68e6513
fbdc657
 
 
 
1580227
fbdc657
 
529d871
fbdc657
68e6513
 
1580227
68e6513
 
 
fbdc657
 
 
3433b65
 
 
 
1580227
 
 
 
 
 
 
 
 
3433b65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1580227
3433b65
 
 
 
 
 
fbdc657
3433b65
 
fbdc657
 
3433b65
 
fbdc657
 
 
 
 
 
3433b65
 
68e6513
 
529d871
68e6513
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# Utility functions for filtering the dataframe

import pandas as pd

def filter_cols(df):


    df = df[[
    'Model Name', 
    'Input $/1M', 
    'Output $/1M',
    'Average Clemscore',
    'Context Size (k)', 
    'Average Latency (s)',
    'Parameter Size (B)',
    'Release Date', 
    'License'
    ]]

    
    return df


def filter(df, language_list, parameters, input_price, output_price, multimodal,
           context, open_weight, start, end, license ):
    
    if not df.empty:  # Check if df is non-empty
        df = df[df['Languages'].apply(lambda x: all(lang in x for lang in language_list))]
    
    if not df.empty:  # Check if df is non-empty
        open_weight_df = df[df['Open Weight'] == True]
        if not open_weight_df.empty:  # Check if filtered df is non-empty
            max_parameter_size = open_weight_df['Parameter Size (B)'].max()
            print(f"MMMMMMMMMMMMMMMMMMMMMMm: {max_parameter_size}")
        
        if parameters[1] >= max_parameter_size:
            df = df[(df['Parameter Size (B)'] >= parameters[0])]
        elif parameters[1] < max_parameter_size:
            df = df[(df['Parameter Size (B)'] >= parameters[0]) & (df['Parameter Size (B)'] <= parameters[1])]
    
    if not df.empty:  # Check if df is non-empty
        df = df[(df['Input $/1M'] >= input_price[0]) & (df['Input $/1M'] <= input_price[1])]
    
    if not df.empty:  # Check if df is non-empty
        df = df[(df['Output $/1M'] >= output_price[0]) & (df['Output $/1M'] <= output_price[1])]

    if not df.empty:  # Check if df is non-empty
        if "Image" in multimodal:
            df = df[df['Multimodality Image'] == True]
        if "Multi-Image" in multimodal:
            df = df[df['Multimodality Multiple Image'] == True]
        if "Audio" in multimodal:
            df = df[df['Multimodality Audio'] == True]
        if "Video" in multimodal:
            df = df[df['Multimodality Video'] == True]

    if not df.empty:  # Check if df is non-empty
        df = df[(df['Context Size (k)'] >= (context[0])) & (df['Context Size (k)'] <= (context[1]))]

    if not df.empty:  # Check if df is non-empty
        if "Open" in open_weight and "Commercial" not in open_weight:
            df = df[df['Open Weight'] == True]
        elif "Commercial" in open_weight and "Open" not in open_weight:
            df = df[df['Open Weight'] == False]
        
    if not df.empty:  # Check if df is non-empty
        df = df[df['License Name'].apply(lambda x: any(lic in x for lic in license))]

    # Convert 'Release Date' to int temporarily
    if not df.empty:  # Check if df is non-empty
        df['Temp Date'] = pd.to_datetime(df['Temp Date']).astype(int) // 10**9  # Convert to seconds since epoch

    # Convert start and end to int (seconds since epoch)
    start = int(pd.to_datetime(start).timestamp())  
    end = int(pd.to_datetime(end).timestamp())    

    # Filter based on the converted 'Release Date'
    if not df.empty:  # Check if df is non-empty
        df = df[(df['Temp Date'] >= start) & (df['Temp Date'] <= end)]

    df = filter_cols(df)

    return df  # Return the filtered dataframe