carbonnnnn commited on
Commit
925ac7f
·
1 Parent(s): 005a748

fetch prices

Browse files
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import gradio as gr
3
+ import asyncio # Ensure asyncio is imported
4
+
5
+ from src.pricing import get_pricing_df
6
+
7
+
8
+ text_leaderboard = get_pricing_df()
9
+ llm_calc_app = gr.Blocks()
10
+ with llm_calc_app:
11
+
12
+ leaderboard_table = gr.Dataframe(
13
+ value=text_leaderboard,
14
+ elem_id="text-leaderboard-table",
15
+ interactive=False,
16
+ visible=True,
17
+ height=800
18
+ )
19
+ llm_calc_app.load()
20
+
21
+ # Ensure the app runs in an asyncio event loop
22
+ async def main():
23
+ llm_calc_app.queue()
24
+ await llm_calc_app.launch()
25
+
26
+ # Run the async main function
27
+ asyncio.run(main())
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ beautifulsoup4==4.12.3
2
+ pandas==2.2.3
3
+ selenium==4.25.0
src/__pycache__/newprice.cpython-310.pyc ADDED
Binary file (1.41 kB). View file
 
src/__pycache__/pricing.cpython-310.pyc ADDED
Binary file (1.36 kB). View file
 
src/pricing.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from pyppeteer import launch
3
+ from bs4 import BeautifulSoup
4
+ import pandas as pd # Add this import for DataFrame functionality
5
+
6
+ async def main():
7
+ # Launch the browser in headless mode
8
+ browser = await launch(headless=True)
9
+ page = await browser.newPage()
10
+
11
+
12
+ # Find the table in the HTML
13
+ await page.goto('https://llm-price.com/')
14
+ await page.waitForSelector('#tableBody')
15
+ content = await page.content()
16
+ soup = BeautifulSoup(content, 'html.parser')
17
+ table = soup.find('table')
18
+
19
+
20
+ # Check if the table is found
21
+ if table:
22
+ model_names = []
23
+ # providers = [] # Skip providers, only uses .svg logos on the website, use sources (col4) instead
24
+ input_tokens = []
25
+ output_tokens = []
26
+ sources = []
27
+ updated_times = []
28
+
29
+ # Iterate through the rows of the table
30
+ for row in table.find_all('tr')[1:]: # Skip the header row
31
+ cols = row.find_all('td')
32
+ if len(cols) > 0:
33
+ model_names.append(cols[0].text.strip())
34
+ # providers.append(cols[1].text.strip())
35
+ input_tokens.append(cols[2].text.strip())
36
+ output_tokens.append(cols[3].text.strip())
37
+ sources.append(cols[4].text.strip())
38
+ updated_times.append(cols[5].text.strip())
39
+
40
+ # Create a DataFrame from the collected data
41
+ data = {
42
+ 'Model Name': model_names,
43
+ # 'Providers': providers,
44
+ '1 M Input Tokens': input_tokens,
45
+ '1 M Output Tokens': output_tokens,
46
+ 'Source': sources,
47
+ 'Updated Time': updated_times
48
+ }
49
+
50
+ df = pd.DataFrame(data)
51
+ return df
52
+ else:
53
+ print("Table not found.")
54
+
55
+ # Close the browser
56
+ await browser.close()
57
+
58
+ # Run the main function
59
+ def get_pricing_df():
60
+ price_df = asyncio.get_event_loop().run_until_complete(main())
61
+ return price_df