Spaces:
Sleeping
Sleeping
carolanderson
commited on
Commit
β’
d40a9ae
1
Parent(s):
2f010ff
replace template with app
Browse files- .ipynb_checkpoints/Dockerfile-checkpoint +16 -0
- .ipynb_checkpoints/README-checkpoint.md +11 -0
- .ipynb_checkpoints/app-checkpoint.py +303 -0
- .ipynb_checkpoints/requirements-checkpoint.txt +5 -0
- README.md +1 -1
- app.py +291 -135
- requirements.txt +4 -5
.ipynb_checkpoints/Dockerfile-checkpoint
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
COPY ./requirements.txt /code/requirements.txt
|
6 |
+
RUN python3 -m pip install --no-cache-dir --upgrade pip
|
7 |
+
RUN python3 -m pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
8 |
+
|
9 |
+
COPY . .
|
10 |
+
|
11 |
+
CMD ["panel", "serve", "/code/app.py", "--address", "0.0.0.0", "--port", "7860", "--allow-websocket-origin", "*"]
|
12 |
+
|
13 |
+
RUN mkdir /.cache
|
14 |
+
RUN chmod 777 /.cache
|
15 |
+
RUN mkdir .chroma
|
16 |
+
RUN chmod 777 .chroma
|
.ipynb_checkpoints/README-checkpoint.md
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: U.S. Government Use of AI
|
3 |
+
emoji: π
|
4 |
+
colorFrom: gray
|
5 |
+
colorTo: green
|
6 |
+
sdk: docker
|
7 |
+
pinned: false
|
8 |
+
duplicated_from: Panel-Org/panel-template
|
9 |
+
---
|
10 |
+
|
11 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
.ipynb_checkpoints/app-checkpoint.py
ADDED
@@ -0,0 +1,303 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from haystack import Document
|
2 |
+
from haystack.document_stores import InMemoryDocumentStore, ElasticsearchDocumentStore, FAISSDocumentStore
|
3 |
+
from haystack.nodes import BM25Retriever
|
4 |
+
from haystack.pipelines import DocumentSearchPipeline
|
5 |
+
import pandas as pd
|
6 |
+
import panel as pn
|
7 |
+
import param
|
8 |
+
|
9 |
+
pn.extension('tabulator')
|
10 |
+
pn.extension(sizing_mode="scale_both")
|
11 |
+
|
12 |
+
import hvplot.pandas
|
13 |
+
|
14 |
+
# load data
|
15 |
+
infile = "/Users/carolanderson/Dropbox/repos/miscellany/webapps/Agency Inventory AI Usage - Sheet1.tsv"
|
16 |
+
df = pd.read_csv(infile, sep="\t", lineterminator='\n')
|
17 |
+
|
18 |
+
# rearrange column order
|
19 |
+
col_list = ['Agency', 'Name of Inventory Item',
|
20 |
+
'Primary Type of AI',
|
21 |
+
'Purpose of AI', 'Length of Usage',
|
22 |
+
'Does it directly impact the public?',
|
23 |
+
'Vendor System',
|
24 |
+
'Description of Inventory Item',
|
25 |
+
'Other Notes\r']
|
26 |
+
df = df[col_list]
|
27 |
+
|
28 |
+
# remove trailing \r from 'Other Notes' header
|
29 |
+
df = df.rename(columns = {'Other Notes\r' : 'Other Notes'})
|
30 |
+
|
31 |
+
# remove trailing spaces from agency names (caused duplicate instance of "DOC")
|
32 |
+
df['Agency'] = df['Agency'].apply(lambda x : x.rstrip())
|
33 |
+
|
34 |
+
# columns not useful for filtering
|
35 |
+
no_filter_cols = ['Name of Inventory Item', 'Description of Inventory Item', "Other Notes"]
|
36 |
+
|
37 |
+
# columns to be used for filtering
|
38 |
+
filter_cols = [c for c in df.columns.unique() if c not in no_filter_cols]
|
39 |
+
|
40 |
+
# column selector for main plot
|
41 |
+
plot_column_selector = pn.widgets.Select(options=filter_cols, name="Plot category: ")
|
42 |
+
|
43 |
+
# agency selector for main plot
|
44 |
+
plot_agency_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df["Agency"].unique()),
|
45 |
+
value=["Select all"],
|
46 |
+
name="Optional - filter by agency")
|
47 |
+
|
48 |
+
# selectors below are all for interactive dataframe
|
49 |
+
agency_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df["Agency"].unique()),
|
50 |
+
value=["Select all"],
|
51 |
+
name="Agency")
|
52 |
+
type_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Primary Type of AI'].unique()),
|
53 |
+
value=["Select all"],
|
54 |
+
name='Primary Type of AI')
|
55 |
+
purpose_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df["Purpose of AI"].unique()),
|
56 |
+
value=["Select all"],
|
57 |
+
name="Purpose of AI")
|
58 |
+
length_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Length of Usage'].unique()),
|
59 |
+
value=["Select all"],
|
60 |
+
name="Length of Usage")
|
61 |
+
impact_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Does it directly impact the public?'].unique()),
|
62 |
+
value=["Select all"],
|
63 |
+
name='Does it directly impact the public?')
|
64 |
+
vendor_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Vendor System'].unique()),
|
65 |
+
value=["Select all"],
|
66 |
+
name='Vendor System')
|
67 |
+
|
68 |
+
row_filters = [agency_selector, type_selector, purpose_selector, length_selector, impact_selector,
|
69 |
+
vendor_selector]
|
70 |
+
|
71 |
+
|
72 |
+
def custom_plot(table, column_selector, agency_selector):
|
73 |
+
if "Select all" not in agency_selector:
|
74 |
+
table = table[table['Agency'].isin(agency_selector)]
|
75 |
+
table = table[column_selector].value_counts().sort_values(ascending=True)
|
76 |
+
return table.hvplot.barh(width=600, height=400, color="#336BCC")
|
77 |
+
|
78 |
+
|
79 |
+
def custom_table_filter(table,
|
80 |
+
agency_selector,
|
81 |
+
type_selector,
|
82 |
+
purpose_selector,
|
83 |
+
length_selector,
|
84 |
+
impact_selector,
|
85 |
+
vendor_selector):
|
86 |
+
"""
|
87 |
+
This repetitive approach was the only way I could get things working with a
|
88 |
+
'Select all' menu option.
|
89 |
+
"""
|
90 |
+
if "Select all" not in agency_selector:
|
91 |
+
table = table[table["Agency"].isin(agency_selector)]
|
92 |
+
if "Select all" not in type_selector:
|
93 |
+
table = table[table['Primary Type of AI'].isin(type_selector)]
|
94 |
+
if "Select all" not in purpose_selector:
|
95 |
+
table = table[table["Purpose of AI"].isin(purpose_selector)]
|
96 |
+
if "Select all" not in length_selector:
|
97 |
+
table = table[table['Length of Usage'].isin(length_selector)]
|
98 |
+
if "Select all" not in impact_selector:
|
99 |
+
table = table[table['Does it directly impact the public?'].isin(impact_selector)]
|
100 |
+
if "Select all" not in vendor_selector:
|
101 |
+
table = table[table['Vendor System'].isin(vendor_selector)]
|
102 |
+
return table
|
103 |
+
|
104 |
+
|
105 |
+
custom_table = pn.widgets.Tabulator(df, pagination="local", page_size=350, layout="fit_data",
|
106 |
+
width=800, height=550)
|
107 |
+
|
108 |
+
custom_table.add_filter(pn.bind(custom_table_filter,
|
109 |
+
agency_selector=agency_selector,
|
110 |
+
type_selector=type_selector,
|
111 |
+
purpose_selector=purpose_selector,
|
112 |
+
length_selector=length_selector,
|
113 |
+
impact_selector=impact_selector,
|
114 |
+
vendor_selector=vendor_selector))
|
115 |
+
|
116 |
+
|
117 |
+
interactive_plot = pn.bind(custom_plot, table=df, column_selector=plot_column_selector,
|
118 |
+
agency_selector=plot_agency_selector)
|
119 |
+
|
120 |
+
overview_stacked = pn.Column(
|
121 |
+
pn.pane.Markdown("""
|
122 |
+
Plot shows the total count of entries, aggregated by various categories.
|
123 |
+
Change the category with the dropdown menu.
|
124 |
+
The total number of records in the database is 337, but some fields have missing values.
|
125 |
+
In particular, 'Vendor System' and 'Primary Type of AI' were not always filled out."""),
|
126 |
+
pn.Column(pn.Row(plot_column_selector,
|
127 |
+
plot_agency_selector),
|
128 |
+
pn.Row(interactive_plot, width=500))
|
129 |
+
|
130 |
+
)
|
131 |
+
|
132 |
+
overview_card = pn.Card(overview_stacked, header="# Overview of the data")
|
133 |
+
|
134 |
+
filename, button = custom_table.download_menu(
|
135 |
+
text_kwargs={'name': 'Enter filename ending in .csv or .json', 'value': 'default.csv'},
|
136 |
+
button_kwargs={'name': 'Download table'}
|
137 |
+
)
|
138 |
+
|
139 |
+
download_card = pn.Card(pn.pane.Markdown("""
|
140 |
+
Download current table in .csv or .json format.
|
141 |
+
File format will be automatically selected based on the file extension.
|
142 |
+
"""),
|
143 |
+
filename, button, header="### Download")
|
144 |
+
|
145 |
+
table_card = pn.Card(
|
146 |
+
pn.Row(
|
147 |
+
pn.Column(
|
148 |
+
pn.pane.Markdown("""
|
149 |
+
### Filter with the menus below
|
150 |
+
"""),pn.WidgetBox(*row_filters),
|
151 |
+
styles=dict(background='#DDE6FF')
|
152 |
+
), pn.Column(pn.pane.Markdown("""
|
153 |
+
### Scroll horizontally and vertically to see all data
|
154 |
+
"""), custom_table)),
|
155 |
+
download_card,
|
156 |
+
header="# Explore the data"
|
157 |
+
)
|
158 |
+
|
159 |
+
# stacked bar plot of impact by agency (static plot)
|
160 |
+
impact_counts = df.groupby('Agency')['Does it directly impact the public?'].value_counts()
|
161 |
+
impact_counts = impact_counts.sort_index(level="Agency", ascending=False)
|
162 |
+
impact_count_df = pd.DataFrame(impact_counts).rename(columns={'Does it directly impact the public?' : "Count"})
|
163 |
+
impact_plot = impact_count_df.hvplot.barh(stacked=True, width=500, height=400, color=[ "#019C6D", "#336BCC", "#F41903",], legend="bottom_right")
|
164 |
+
|
165 |
+
impact_card = pn.Card(
|
166 |
+
pn.Column(
|
167 |
+
pn.pane.Markdown("""
|
168 |
+
Number of systems with no, indirect, or direct impact on the public.
|
169 |
+
These judgements were made by Anna Blue and are unique to her report."""),
|
170 |
+
impact_plot), header="# Impact on the public, by agency")
|
171 |
+
|
172 |
+
# keyword search
|
173 |
+
class TableIndices(param.Parameterized):
|
174 |
+
row_indices = param.List()
|
175 |
+
col_indices = param.List()
|
176 |
+
|
177 |
+
def __call__(self):
|
178 |
+
return (self.row_indices, self.col_indices)
|
179 |
+
|
180 |
+
|
181 |
+
def run_search(text, pipeline):
|
182 |
+
if text == "":
|
183 |
+
return None
|
184 |
+
res = pipeline.run(query=text, params={"Retriever": {"top_k": 10}})
|
185 |
+
relevant_results = [r for r in res['documents'] if r.score > 0.5]
|
186 |
+
result_rows = [doc.meta['index'] for doc in relevant_results]
|
187 |
+
result_cols = [doc.meta['column_header'] for doc in relevant_results]
|
188 |
+
table_indices = TableIndices(row_indices=result_rows, col_indices=result_cols)
|
189 |
+
return table_indices
|
190 |
+
|
191 |
+
|
192 |
+
def produce_table(df, table_indices):
|
193 |
+
|
194 |
+
if not table_indices:
|
195 |
+
return None
|
196 |
+
|
197 |
+
result_df = df.iloc[table_indices.row_indices, :]
|
198 |
+
result_df = result_df.drop_duplicates()
|
199 |
+
|
200 |
+
color_df = result_df.copy()
|
201 |
+
color_df.loc[:,:] = ''
|
202 |
+
for row, col in zip(table_indices.row_indices, table_indices.col_indices):
|
203 |
+
color_df.loc[row, col] = 'background-color: yellow'
|
204 |
+
|
205 |
+
result_tab = pn.widgets.Tabulator(result_df,pagination="local", page_size=350, layout="fit_data",
|
206 |
+
width=800, height=300)
|
207 |
+
|
208 |
+
# cell coloration is working, but does not update properly unless empty search is run in between;
|
209 |
+
# otherwise it re-uses the most recent color scheme; maybe related to https://github.com/holoviz/panel/issues/3363
|
210 |
+
# result_tab.style.apply(lambda x: color_df, axis=None)
|
211 |
+
# giving up for now
|
212 |
+
return result_tab
|
213 |
+
|
214 |
+
|
215 |
+
def make_search_pane(result_tab):
|
216 |
+
if not result_tab:
|
217 |
+
return None
|
218 |
+
filename_2, button_2 = result_tab.download_menu(
|
219 |
+
text_kwargs={'name': 'Enter filename ending in .csv or .json', 'value': 'default.csv'},
|
220 |
+
button_kwargs={'name': 'Download search results'})
|
221 |
+
search_download_card = pn.Card(pn.pane.Markdown("""
|
222 |
+
Download search results in .csv or .json format.
|
223 |
+
File format will be automatically selected based on the file extension."""),
|
224 |
+
filename_2, button_2, header="### Download")
|
225 |
+
search_result = pn.Column(pn.pane.Markdown("""
|
226 |
+
### Scroll horizontally and vertically (if needed) to see everything.
|
227 |
+
"""), result_tab, search_download_card)
|
228 |
+
return search_result
|
229 |
+
|
230 |
+
# which columns to search
|
231 |
+
col_list = ['Name of Inventory Item',
|
232 |
+
'Primary Type of AI',
|
233 |
+
'Purpose of AI',
|
234 |
+
'Description of Inventory Item',
|
235 |
+
'Other Notes']
|
236 |
+
|
237 |
+
# create document store, where each string from any of the relevant columns is a doc
|
238 |
+
# save the row index as metadata
|
239 |
+
docs = []
|
240 |
+
indices = list(df.index.values)
|
241 |
+
for col in col_list:
|
242 |
+
values = df[col].tolist()
|
243 |
+
assert len(indices) == len(values)
|
244 |
+
for i, val in zip(indices, values):
|
245 |
+
dictionary = {'content' : val,
|
246 |
+
'meta' : {"index": i, "column_header" : col}
|
247 |
+
}
|
248 |
+
docs.append(Document.from_dict(dictionary))
|
249 |
+
|
250 |
+
|
251 |
+
document_store = InMemoryDocumentStore(use_bm25=True)
|
252 |
+
document_store.write_documents(docs)
|
253 |
+
retriever = BM25Retriever(document_store=document_store)
|
254 |
+
pipeline = DocumentSearchPipeline(retriever)
|
255 |
+
text_input = pn.widgets.TextInput(name='Search', placeholder='Enter text here...')
|
256 |
+
|
257 |
+
result_indices = pn.bind(run_search, text=text_input, pipeline=pipeline)
|
258 |
+
result_table = pn.bind(produce_table, df=df, table_indices=result_indices)
|
259 |
+
result_pane = pn.bind(make_search_pane, result_tab=result_table)
|
260 |
+
|
261 |
+
search_card = pn.Card(
|
262 |
+
pn.Column(
|
263 |
+
pn.Row(
|
264 |
+
text_input,
|
265 |
+
pn.pane.Markdown("""
|
266 |
+
This will search text in the following columns:
|
267 |
+
* Name of Inventory Item
|
268 |
+
* Primary Type of AI
|
269 |
+
* Purpose of AI
|
270 |
+
* Description of Inventory Item
|
271 |
+
* Other Notes
|
272 |
+
|
273 |
+
This is a keyword search based on the BM25 algorithm as implemented in the Haystack python library.
|
274 |
+
""")),
|
275 |
+
pn.Row(result_pane),
|
276 |
+
),
|
277 |
+
header="# Search the text"
|
278 |
+
)
|
279 |
+
|
280 |
+
main_text = """
|
281 |
+
The data visualized here come from a report by Anna Blue, a Social Impact Fellow
|
282 |
+
at the Responsible AI Institute. The report was released in May 2023. Some agencies have
|
283 |
+
released updated inventories since then, which are not reflected here.
|
284 |
+
|
285 |
+
Anna's report consolidated data released by individual government agencies in compliance with
|
286 |
+
Executive Order 13960, which requires federal agencies to produce an annual inventory of their AI usage.
|
287 |
+
See her [blog post](https://www.responsible.ai/post/federal-government-ai-use-cases) for additional details,
|
288 |
+
including links to the original data sources.
|
289 |
+
"""
|
290 |
+
|
291 |
+
|
292 |
+
|
293 |
+
template = pn.template.FastListTemplate(
|
294 |
+
title='U.S. Government Use of AI',
|
295 |
+
main=[pn.pane.Markdown(main_text),
|
296 |
+
pn.Row(overview_card,impact_card),
|
297 |
+
pn.Row(table_card),
|
298 |
+
pn.Row(search_card)],
|
299 |
+
accent_base_color="#FFDAC2",
|
300 |
+
header_background="#0037A2")
|
301 |
+
|
302 |
+
template.servable()
|
303 |
+
|
.ipynb_checkpoints/requirements-checkpoint.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
hvplot
|
2 |
+
farm-haystack[inference]
|
3 |
+
pandas
|
4 |
+
panel
|
5 |
+
param
|
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: π
|
4 |
colorFrom: gray
|
5 |
colorTo: green
|
|
|
1 |
---
|
2 |
+
title: U.S. Government Use of AI
|
3 |
emoji: π
|
4 |
colorFrom: gray
|
5 |
colorTo: green
|
app.py
CHANGED
@@ -1,147 +1,303 @@
|
|
1 |
-
import
|
2 |
-
import
|
3 |
-
from
|
4 |
-
|
5 |
-
import
|
6 |
import panel as pn
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
pn.extension(
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
"""
|
61 |
-
|
62 |
-
|
63 |
"""
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
-
|
79 |
-
|
80 |
|
81 |
-
|
82 |
-
|
|
|
|
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
margin=(0, 10),
|
93 |
-
design=pn.theme.Material,
|
94 |
-
)
|
95 |
-
results.append(pn.Column(row_label, row_bar))
|
96 |
-
yield results
|
97 |
-
finally:
|
98 |
-
main.disabled = False
|
99 |
-
|
100 |
-
|
101 |
-
# create widgets
|
102 |
-
randomize_url = pn.widgets.Button(name="Randomize URL", align="end")
|
103 |
-
|
104 |
-
image_url = pn.widgets.TextInput(
|
105 |
-
name="Image URL to classify",
|
106 |
-
value=pn.bind(random_url, randomize_url),
|
107 |
-
)
|
108 |
-
class_names = pn.widgets.TextInput(
|
109 |
-
name="Comma separated class names",
|
110 |
-
placeholder="Enter possible class names, e.g. cat, dog",
|
111 |
-
value="cat, dog, parrot",
|
112 |
-
)
|
113 |
|
114 |
-
input_widgets = pn.Column(
|
115 |
-
"##### π Click randomize or paste a URL to start classifying!",
|
116 |
-
pn.Row(image_url, randomize_url),
|
117 |
-
class_names,
|
118 |
-
)
|
119 |
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
|
|
|
|
|
|
|
|
139 |
)
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from haystack import Document
|
2 |
+
from haystack.document_stores import InMemoryDocumentStore, ElasticsearchDocumentStore, FAISSDocumentStore
|
3 |
+
from haystack.nodes import BM25Retriever
|
4 |
+
from haystack.pipelines import DocumentSearchPipeline
|
5 |
+
import pandas as pd
|
6 |
import panel as pn
|
7 |
+
import param
|
8 |
+
|
9 |
+
pn.extension('tabulator')
|
10 |
+
pn.extension(sizing_mode="scale_both")
|
11 |
+
|
12 |
+
import hvplot.pandas
|
13 |
+
|
14 |
+
# load data
|
15 |
+
infile = "/Users/carolanderson/Dropbox/repos/miscellany/webapps/Agency Inventory AI Usage - Sheet1.tsv"
|
16 |
+
df = pd.read_csv(infile, sep="\t", lineterminator='\n')
|
17 |
+
|
18 |
+
# rearrange column order
|
19 |
+
col_list = ['Agency', 'Name of Inventory Item',
|
20 |
+
'Primary Type of AI',
|
21 |
+
'Purpose of AI', 'Length of Usage',
|
22 |
+
'Does it directly impact the public?',
|
23 |
+
'Vendor System',
|
24 |
+
'Description of Inventory Item',
|
25 |
+
'Other Notes\r']
|
26 |
+
df = df[col_list]
|
27 |
+
|
28 |
+
# remove trailing \r from 'Other Notes' header
|
29 |
+
df = df.rename(columns = {'Other Notes\r' : 'Other Notes'})
|
30 |
+
|
31 |
+
# remove trailing spaces from agency names (caused duplicate instance of "DOC")
|
32 |
+
df['Agency'] = df['Agency'].apply(lambda x : x.rstrip())
|
33 |
+
|
34 |
+
# columns not useful for filtering
|
35 |
+
no_filter_cols = ['Name of Inventory Item', 'Description of Inventory Item', "Other Notes"]
|
36 |
+
|
37 |
+
# columns to be used for filtering
|
38 |
+
filter_cols = [c for c in df.columns.unique() if c not in no_filter_cols]
|
39 |
+
|
40 |
+
# column selector for main plot
|
41 |
+
plot_column_selector = pn.widgets.Select(options=filter_cols, name="Plot category: ")
|
42 |
+
|
43 |
+
# agency selector for main plot
|
44 |
+
plot_agency_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df["Agency"].unique()),
|
45 |
+
value=["Select all"],
|
46 |
+
name="Optional - filter by agency")
|
47 |
+
|
48 |
+
# selectors below are all for interactive dataframe
|
49 |
+
agency_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df["Agency"].unique()),
|
50 |
+
value=["Select all"],
|
51 |
+
name="Agency")
|
52 |
+
type_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Primary Type of AI'].unique()),
|
53 |
+
value=["Select all"],
|
54 |
+
name='Primary Type of AI')
|
55 |
+
purpose_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df["Purpose of AI"].unique()),
|
56 |
+
value=["Select all"],
|
57 |
+
name="Purpose of AI")
|
58 |
+
length_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Length of Usage'].unique()),
|
59 |
+
value=["Select all"],
|
60 |
+
name="Length of Usage")
|
61 |
+
impact_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Does it directly impact the public?'].unique()),
|
62 |
+
value=["Select all"],
|
63 |
+
name='Does it directly impact the public?')
|
64 |
+
vendor_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Vendor System'].unique()),
|
65 |
+
value=["Select all"],
|
66 |
+
name='Vendor System')
|
67 |
+
|
68 |
+
row_filters = [agency_selector, type_selector, purpose_selector, length_selector, impact_selector,
|
69 |
+
vendor_selector]
|
70 |
+
|
71 |
+
|
72 |
+
def custom_plot(table, column_selector, agency_selector):
|
73 |
+
if "Select all" not in agency_selector:
|
74 |
+
table = table[table['Agency'].isin(agency_selector)]
|
75 |
+
table = table[column_selector].value_counts().sort_values(ascending=True)
|
76 |
+
return table.hvplot.barh(width=600, height=400, color="#336BCC")
|
77 |
+
|
78 |
+
|
79 |
+
def custom_table_filter(table,
|
80 |
+
agency_selector,
|
81 |
+
type_selector,
|
82 |
+
purpose_selector,
|
83 |
+
length_selector,
|
84 |
+
impact_selector,
|
85 |
+
vendor_selector):
|
86 |
"""
|
87 |
+
This repetitive approach was the only way I could get things working with a
|
88 |
+
'Select all' menu option.
|
89 |
"""
|
90 |
+
if "Select all" not in agency_selector:
|
91 |
+
table = table[table["Agency"].isin(agency_selector)]
|
92 |
+
if "Select all" not in type_selector:
|
93 |
+
table = table[table['Primary Type of AI'].isin(type_selector)]
|
94 |
+
if "Select all" not in purpose_selector:
|
95 |
+
table = table[table["Purpose of AI"].isin(purpose_selector)]
|
96 |
+
if "Select all" not in length_selector:
|
97 |
+
table = table[table['Length of Usage'].isin(length_selector)]
|
98 |
+
if "Select all" not in impact_selector:
|
99 |
+
table = table[table['Does it directly impact the public?'].isin(impact_selector)]
|
100 |
+
if "Select all" not in vendor_selector:
|
101 |
+
table = table[table['Vendor System'].isin(vendor_selector)]
|
102 |
+
return table
|
103 |
+
|
104 |
+
|
105 |
+
custom_table = pn.widgets.Tabulator(df, pagination="local", page_size=350, layout="fit_data",
|
106 |
+
width=800, height=550)
|
107 |
+
|
108 |
+
custom_table.add_filter(pn.bind(custom_table_filter,
|
109 |
+
agency_selector=agency_selector,
|
110 |
+
type_selector=type_selector,
|
111 |
+
purpose_selector=purpose_selector,
|
112 |
+
length_selector=length_selector,
|
113 |
+
impact_selector=impact_selector,
|
114 |
+
vendor_selector=vendor_selector))
|
115 |
+
|
116 |
+
|
117 |
+
interactive_plot = pn.bind(custom_plot, table=df, column_selector=plot_column_selector,
|
118 |
+
agency_selector=plot_agency_selector)
|
119 |
+
|
120 |
+
overview_stacked = pn.Column(
|
121 |
+
pn.pane.Markdown("""
|
122 |
+
Plot shows the total count of entries, aggregated by various categories.
|
123 |
+
Change the category with the dropdown menu.
|
124 |
+
The total number of records in the database is 337, but some fields have missing values.
|
125 |
+
In particular, 'Vendor System' and 'Primary Type of AI' were not always filled out."""),
|
126 |
+
pn.Column(pn.Row(plot_column_selector,
|
127 |
+
plot_agency_selector),
|
128 |
+
pn.Row(interactive_plot, width=500))
|
129 |
+
|
130 |
+
)
|
131 |
+
|
132 |
+
overview_card = pn.Card(overview_stacked, header="# Overview of the data")
|
133 |
+
|
134 |
+
filename, button = custom_table.download_menu(
|
135 |
+
text_kwargs={'name': 'Enter filename ending in .csv or .json', 'value': 'default.csv'},
|
136 |
+
button_kwargs={'name': 'Download table'}
|
137 |
+
)
|
138 |
+
|
139 |
+
download_card = pn.Card(pn.pane.Markdown("""
|
140 |
+
Download current table in .csv or .json format.
|
141 |
+
File format will be automatically selected based on the file extension.
|
142 |
+
"""),
|
143 |
+
filename, button, header="### Download")
|
144 |
+
|
145 |
+
table_card = pn.Card(
|
146 |
+
pn.Row(
|
147 |
+
pn.Column(
|
148 |
+
pn.pane.Markdown("""
|
149 |
+
### Filter with the menus below
|
150 |
+
"""),pn.WidgetBox(*row_filters),
|
151 |
+
styles=dict(background='#DDE6FF')
|
152 |
+
), pn.Column(pn.pane.Markdown("""
|
153 |
+
### Scroll horizontally and vertically to see all data
|
154 |
+
"""), custom_table)),
|
155 |
+
download_card,
|
156 |
+
header="# Explore the data"
|
157 |
+
)
|
158 |
+
|
159 |
+
# stacked bar plot of impact by agency (static plot)
|
160 |
+
impact_counts = df.groupby('Agency')['Does it directly impact the public?'].value_counts()
|
161 |
+
impact_counts = impact_counts.sort_index(level="Agency", ascending=False)
|
162 |
+
impact_count_df = pd.DataFrame(impact_counts).rename(columns={'Does it directly impact the public?' : "Count"})
|
163 |
+
impact_plot = impact_count_df.hvplot.barh(stacked=True, width=500, height=400, color=[ "#019C6D", "#336BCC", "#F41903",], legend="bottom_right")
|
164 |
+
|
165 |
+
impact_card = pn.Card(
|
166 |
+
pn.Column(
|
167 |
+
pn.pane.Markdown("""
|
168 |
+
Number of systems with no, indirect, or direct impact on the public.
|
169 |
+
These judgements were made by Anna Blue and are unique to her report."""),
|
170 |
+
impact_plot), header="# Impact on the public, by agency")
|
171 |
+
|
172 |
+
# keyword search
|
173 |
+
class TableIndices(param.Parameterized):
|
174 |
+
row_indices = param.List()
|
175 |
+
col_indices = param.List()
|
176 |
+
|
177 |
+
def __call__(self):
|
178 |
+
return (self.row_indices, self.col_indices)
|
179 |
|
180 |
+
|
181 |
+
def run_search(text, pipeline):
|
182 |
+
if text == "":
|
183 |
+
return None
|
184 |
+
res = pipeline.run(query=text, params={"Retriever": {"top_k": 10}})
|
185 |
+
relevant_results = [r for r in res['documents'] if r.score > 0.5]
|
186 |
+
result_rows = [doc.meta['index'] for doc in relevant_results]
|
187 |
+
result_cols = [doc.meta['column_header'] for doc in relevant_results]
|
188 |
+
table_indices = TableIndices(row_indices=result_rows, col_indices=result_cols)
|
189 |
+
return table_indices
|
190 |
+
|
191 |
+
|
192 |
+
def produce_table(df, table_indices):
|
193 |
+
|
194 |
+
if not table_indices:
|
195 |
+
return None
|
196 |
|
197 |
+
result_df = df.iloc[table_indices.row_indices, :]
|
198 |
+
result_df = result_df.drop_duplicates()
|
199 |
|
200 |
+
color_df = result_df.copy()
|
201 |
+
color_df.loc[:,:] = ''
|
202 |
+
for row, col in zip(table_indices.row_indices, table_indices.col_indices):
|
203 |
+
color_df.loc[row, col] = 'background-color: yellow'
|
204 |
|
205 |
+
result_tab = pn.widgets.Tabulator(result_df,pagination="local", page_size=350, layout="fit_data",
|
206 |
+
width=800, height=300)
|
207 |
+
|
208 |
+
# cell coloration is working, but does not update properly unless empty search is run in between;
|
209 |
+
# otherwise it re-uses the most recent color scheme; maybe related to https://github.com/holoviz/panel/issues/3363
|
210 |
+
# result_tab.style.apply(lambda x: color_df, axis=None)
|
211 |
+
# giving up for now
|
212 |
+
return result_tab
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
|
|
|
|
|
|
|
|
|
|
|
214 |
|
215 |
+
def make_search_pane(result_tab):
|
216 |
+
if not result_tab:
|
217 |
+
return None
|
218 |
+
filename_2, button_2 = result_tab.download_menu(
|
219 |
+
text_kwargs={'name': 'Enter filename ending in .csv or .json', 'value': 'default.csv'},
|
220 |
+
button_kwargs={'name': 'Download search results'})
|
221 |
+
search_download_card = pn.Card(pn.pane.Markdown("""
|
222 |
+
Download search results in .csv or .json format.
|
223 |
+
File format will be automatically selected based on the file extension."""),
|
224 |
+
filename_2, button_2, header="### Download")
|
225 |
+
search_result = pn.Column(pn.pane.Markdown("""
|
226 |
+
### Scroll horizontally and vertically (if needed) to see everything.
|
227 |
+
"""), result_tab, search_download_card)
|
228 |
+
return search_result
|
229 |
+
|
230 |
+
# which columns to search
|
231 |
+
col_list = ['Name of Inventory Item',
|
232 |
+
'Primary Type of AI',
|
233 |
+
'Purpose of AI',
|
234 |
+
'Description of Inventory Item',
|
235 |
+
'Other Notes']
|
236 |
+
|
237 |
+
# create document store, where each string from any of the relevant columns is a doc
|
238 |
+
# save the row index as metadata
|
239 |
+
docs = []
|
240 |
+
indices = list(df.index.values)
|
241 |
+
for col in col_list:
|
242 |
+
values = df[col].tolist()
|
243 |
+
assert len(indices) == len(values)
|
244 |
+
for i, val in zip(indices, values):
|
245 |
+
dictionary = {'content' : val,
|
246 |
+
'meta' : {"index": i, "column_header" : col}
|
247 |
+
}
|
248 |
+
docs.append(Document.from_dict(dictionary))
|
249 |
+
|
250 |
+
|
251 |
+
document_store = InMemoryDocumentStore(use_bm25=True)
|
252 |
+
document_store.write_documents(docs)
|
253 |
+
retriever = BM25Retriever(document_store=document_store)
|
254 |
+
pipeline = DocumentSearchPipeline(retriever)
|
255 |
+
text_input = pn.widgets.TextInput(name='Search', placeholder='Enter text here...')
|
256 |
+
|
257 |
+
result_indices = pn.bind(run_search, text=text_input, pipeline=pipeline)
|
258 |
+
result_table = pn.bind(produce_table, df=df, table_indices=result_indices)
|
259 |
+
result_pane = pn.bind(make_search_pane, result_tab=result_table)
|
260 |
|
261 |
+
search_card = pn.Card(
|
262 |
+
pn.Column(
|
263 |
+
pn.Row(
|
264 |
+
text_input,
|
265 |
+
pn.pane.Markdown("""
|
266 |
+
This will search text in the following columns:
|
267 |
+
* Name of Inventory Item
|
268 |
+
* Primary Type of AI
|
269 |
+
* Purpose of AI
|
270 |
+
* Description of Inventory Item
|
271 |
+
* Other Notes
|
272 |
+
|
273 |
+
This is a keyword search based on the BM25 algorithm as implemented in the Haystack python library.
|
274 |
+
""")),
|
275 |
+
pn.Row(result_pane),
|
276 |
+
),
|
277 |
+
header="# Search the text"
|
278 |
)
|
279 |
|
280 |
+
main_text = """
|
281 |
+
The data visualized here come from a report by Anna Blue, a Social Impact Fellow
|
282 |
+
at the Responsible AI Institute. The report was released in May 2023. Some agencies have
|
283 |
+
released updated inventories since then, which are not reflected here.
|
284 |
+
|
285 |
+
Anna's report consolidated data released by individual government agencies in compliance with
|
286 |
+
Executive Order 13960, which requires federal agencies to produce an annual inventory of their AI usage.
|
287 |
+
See her [blog post](https://www.responsible.ai/post/federal-government-ai-use-cases) for additional details,
|
288 |
+
including links to the original data sources.
|
289 |
+
"""
|
290 |
+
|
291 |
+
|
292 |
+
|
293 |
+
template = pn.template.FastListTemplate(
|
294 |
+
title='U.S. Government Use of AI',
|
295 |
+
main=[pn.pane.Markdown(main_text),
|
296 |
+
pn.Row(overview_card,impact_card),
|
297 |
+
pn.Row(table_card),
|
298 |
+
pn.Row(search_card)],
|
299 |
+
accent_base_color="#FFDAC2",
|
300 |
+
header_background="#0037A2")
|
301 |
+
|
302 |
+
template.servable()
|
303 |
+
|
requirements.txt
CHANGED
@@ -1,6 +1,5 @@
|
|
|
|
|
|
|
|
1 |
panel
|
2 |
-
|
3 |
-
transformers
|
4 |
-
numpy
|
5 |
-
torch
|
6 |
-
aiohttp
|
|
|
1 |
+
hvplot
|
2 |
+
farm-haystack[inference]
|
3 |
+
pandas
|
4 |
panel
|
5 |
+
param
|
|
|
|
|
|
|
|