teven commited on
Commit
a9cc2b2
·
1 Parent(s): 47f5af7

plotting perplexity vs text length with text hover

Browse files
Files changed (1) hide show
  1. app.py +37 -21
app.py CHANGED
@@ -1,32 +1,48 @@
1
  import dash
2
- from dash.exceptions import PreventUpdate
3
  from dash import dcc, html
4
  from dash.dependencies import Input, Output
5
- import plotly.express as px
6
- import pandas as pd
7
 
8
  # Create dash app
9
  app = dash.Dash(__name__)
10
 
11
- # Set dog and cat images
12
- dogImage = "https://www.iconexperience.com/_img/v_collection_png/256x256/shadow/dog.png"
13
- catImage = "https://d2ph5fj80uercy.cloudfront.net/06/cat3602.jpg"
14
-
15
- # Generate dataframe
16
- df = pd.DataFrame(
17
- dict(
18
- x=[1, 2],
19
- y=[2, 4],
20
- images=[dogImage, catImage],
21
- )
22
- )
23
 
24
- # Create scatter plot with x and y coordinates
25
- fig = px.scatter(df, x="x", y="y", custom_data=["images"])
 
 
 
 
 
 
26
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  # Update layout and update traces
28
  fig.update_layout(clickmode='event+select')
29
- fig.update_traces(marker_size=20)
 
 
 
 
 
 
 
 
 
 
30
 
31
  # Create app layout to show dash graph
32
  app.layout = html.Div(
@@ -35,14 +51,14 @@ app.layout = html.Div(
35
  id="graph_interaction",
36
  figure=fig,
37
  ),
38
- html.Img(id='image', src='')
39
  ]
40
  )
41
 
42
 
43
  # html callback function to hover the data on specific coordinates
44
  @app.callback(
45
- Output('image', 'src'),
46
  Input('graph_interaction', 'hoverData'))
47
  def open_url(hoverData):
48
  if hoverData:
@@ -52,4 +68,4 @@ def open_url(hoverData):
52
 
53
 
54
  if __name__ == '__main__':
55
- app.run_server(port=7860, host="0.0.0.0", debug=True, )
 
1
  import dash
2
+ import plotly.express as px
3
  from dash import dcc, html
4
  from dash.dependencies import Input, Output
5
+ from dash.exceptions import PreventUpdate
6
+ from datasets import load_dataset
7
 
8
  # Create dash app
9
  app = dash.Dash(__name__)
10
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ def get_dataset(name, n_items=1000):
13
+ ola_path = f"ola13/small-{name}-dedup"
14
+ dataset = load_dataset(ola_path, split="train").shuffle().select(range(n_items)).to_pandas()
15
+ dataset["text_length"] = dataset.apply(lambda doc: len(doc["text"]), axis=1)
16
+
17
+ for column in dataset.columns:
18
+ if column not in ["text", "perplexity", "text_length"]:
19
+ dataset = dataset.drop(column, axis=1)
20
 
21
+ dataset = dataset.sort_values("perplexity")
22
+
23
+ max_perp = dataset["perplexity"].max()
24
+ return dataset, max_perp
25
+
26
+
27
+ # names = ["oscar", "the_pile", "c4", "roots_en"]
28
+ name = "oscar"
29
+ df, max_perplexity = get_dataset(name)
30
+
31
+ # Create scatter plot with x and y coordinates
32
+ fig = px.scatter(df, x="perplexity", y="text_length", custom_data=["text"])
33
  # Update layout and update traces
34
  fig.update_layout(clickmode='event+select')
35
+ fig.update_traces(marker_size=3)
36
+ fig.update_xaxes(title_text="Perplexity (log scale)", type="log")
37
+ fig.update_yaxes(title_text="Text Length (log scale)", type="log")
38
+
39
+ styles = {
40
+ 'textbox': {
41
+ 'border': 'thin lightgrey solid',
42
+ 'overflowX': 'scroll',
43
+ "whiteSpace": "pre-wrap;"
44
+ }
45
+ }
46
 
47
  # Create app layout to show dash graph
48
  app.layout = html.Div(
 
51
  id="graph_interaction",
52
  figure=fig,
53
  ),
54
+ html.Div(id='text', style=styles['textbox'])
55
  ]
56
  )
57
 
58
 
59
  # html callback function to hover the data on specific coordinates
60
  @app.callback(
61
+ Output('text', 'children'),
62
  Input('graph_interaction', 'hoverData'))
63
  def open_url(hoverData):
64
  if hoverData:
 
68
 
69
 
70
  if __name__ == '__main__':
71
+ app.run_server(port=7860, host="0.0.0.0", debug=True)