Spaces:

carolanderson
/

us_govt_ai

Running

App Files Files Community

carolanderson commited on Aug 11, 2023

Commit

d40a9ae

1 Parent(s): 2f010ff

replace template with app

Browse files

Files changed (7) hide show

.ipynb_checkpoints/Dockerfile-checkpoint +16 -0
.ipynb_checkpoints/README-checkpoint.md +11 -0
.ipynb_checkpoints/app-checkpoint.py +303 -0
.ipynb_checkpoints/requirements-checkpoint.txt +5 -0
README.md +1 -1
app.py +291 -135
requirements.txt +4 -5

.ipynb_checkpoints/Dockerfile-checkpoint ADDED Viewed

	@@ -0,0 +1,16 @@

+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN python3 -m pip install --no-cache-dir --upgrade pip
+RUN python3 -m pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY . .
+CMD ["panel", "serve", "/code/app.py", "--address", "0.0.0.0", "--port", "7860",  "--allow-websocket-origin", "*"]
+RUN mkdir /.cache
+RUN chmod 777 /.cache
+RUN mkdir .chroma
+RUN chmod 777 .chroma

.ipynb_checkpoints/README-checkpoint.md ADDED Viewed

	@@ -0,0 +1,11 @@

+---
+title: U.S. Government Use of AI
+emoji: 📈
+colorFrom: gray
+colorTo: green
+sdk: docker
+pinned: false
+duplicated_from: Panel-Org/panel-template
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

.ipynb_checkpoints/app-checkpoint.py ADDED Viewed

	@@ -0,0 +1,303 @@

+from haystack import Document
+from haystack.document_stores import InMemoryDocumentStore, ElasticsearchDocumentStore, FAISSDocumentStore
+from haystack.nodes import BM25Retriever
+from haystack.pipelines import DocumentSearchPipeline
+import pandas as pd
+import panel as pn
+import param
+pn.extension('tabulator')
+pn.extension(sizing_mode="scale_both")
+import hvplot.pandas
+# load data
+infile = "/Users/carolanderson/Dropbox/repos/miscellany/webapps/Agency Inventory AI Usage - Sheet1.tsv"
+df = pd.read_csv(infile, sep="\t", lineterminator='\n')
+# rearrange column order
+col_list = ['Agency', 'Name of Inventory Item',
+     'Primary Type of AI',
+       'Purpose of AI', 'Length of Usage',
+       'Does it directly impact the public?',
+        'Vendor  System',
+        'Description of Inventory Item',
+       'Other Notes\r']
+df = df[col_list]
+# remove trailing \r from 'Other Notes' header
+df = df.rename(columns = {'Other Notes\r' : 'Other Notes'})
+# remove trailing spaces from agency names (caused duplicate instance of "DOC")
+df['Agency'] = df['Agency'].apply(lambda x : x.rstrip())
+# columns not useful for filtering
+no_filter_cols = ['Name of Inventory Item', 'Description of Inventory Item', "Other Notes"]
+# columns to be used for filtering
+filter_cols = [c for c in df.columns.unique() if c not in no_filter_cols]
+# column selector for main plot
+plot_column_selector = pn.widgets.Select(options=filter_cols, name="Plot category: ")
+# agency selector for main plot
+plot_agency_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df["Agency"].unique()),
+                                         value=["Select all"],
+                                         name="Optional - filter by agency")
+# selectors below are all for interactive dataframe
+agency_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df["Agency"].unique()),
+                                         value=["Select all"],
+                                         name="Agency")
+type_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Primary Type of AI'].unique()),
+                                         value=["Select all"],
+                                         name='Primary Type of AI')
+purpose_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df["Purpose of AI"].unique()),
+                                         value=["Select all"],
+                                         name="Purpose of AI")
+length_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Length of Usage'].unique()),
+                                         value=["Select all"],
+                                         name="Length of Usage")
+impact_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Does it directly impact the public?'].unique()),
+                                         value=["Select all"],
+                                         name='Does it directly impact the public?')
+vendor_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Vendor  System'].unique()),
+                                         value=["Select all"],
+                                         name='Vendor  System')
+row_filters = [agency_selector, type_selector, purpose_selector, length_selector, impact_selector,
+               vendor_selector]
+def custom_plot(table, column_selector, agency_selector):
+    if "Select all" not in agency_selector:
+        table = table[table['Agency'].isin(agency_selector)]
+    table = table[column_selector].value_counts().sort_values(ascending=True)
+    return table.hvplot.barh(width=600, height=400, color="#336BCC")
+def custom_table_filter(table,
+                        agency_selector,
+                        type_selector,
+                        purpose_selector,
+                        length_selector,
+                        impact_selector,
+                        vendor_selector):
+    """
+    This repetitive approach was the only way I could get things working with a
+    'Select all' menu option.
+    """
+    if "Select all" not in agency_selector:
+        table = table[table["Agency"].isin(agency_selector)]
+    if "Select all" not in type_selector:
+        table = table[table['Primary Type of AI'].isin(type_selector)]
+    if "Select all" not in purpose_selector:
+        table = table[table["Purpose of AI"].isin(purpose_selector)]
+    if "Select all" not in length_selector:
+        table = table[table['Length of Usage'].isin(length_selector)]
+    if "Select all" not in impact_selector:
+        table = table[table['Does it directly impact the public?'].isin(impact_selector)]
+    if "Select all" not in vendor_selector:
+        table = table[table['Vendor  System'].isin(vendor_selector)]
+    return table
+custom_table = pn.widgets.Tabulator(df, pagination="local", page_size=350, layout="fit_data",
+                                   width=800, height=550)
+custom_table.add_filter(pn.bind(custom_table_filter,
+                                agency_selector=agency_selector,
+                                type_selector=type_selector,
+                                purpose_selector=purpose_selector,
+                                length_selector=length_selector,
+                                impact_selector=impact_selector,
+                                vendor_selector=vendor_selector))
+interactive_plot = pn.bind(custom_plot, table=df, column_selector=plot_column_selector,
+                          agency_selector=plot_agency_selector)
+overview_stacked = pn.Column(
+    pn.pane.Markdown("""
+    Plot shows the total count of entries, aggregated by various categories.
+    Change the category with the dropdown menu.
+    The total number of records in the database is 337, but some fields have missing values.
+    In particular, 'Vendor System' and 'Primary Type of AI' were not always filled out."""),
+     pn.Column(pn.Row(plot_column_selector,
+             plot_agency_selector),
+    pn.Row(interactive_plot, width=500))
+)
+overview_card = pn.Card(overview_stacked, header="# Overview of the data")
+filename, button = custom_table.download_menu(
+    text_kwargs={'name': 'Enter filename ending in .csv or .json', 'value': 'default.csv'},
+    button_kwargs={'name': 'Download table'}
+)
+download_card = pn.Card(pn.pane.Markdown("""
+Download current table in .csv or .json format.
+File format will be automatically selected based on the file extension.
+             """),
+                 filename, button, header="### Download")
+table_card = pn.Card(
+             pn.Row(
+                 pn.Column(
+                     pn.pane.Markdown("""
+                     ### Filter with the menus below
+                     """),pn.WidgetBox(*row_filters),
+                     styles=dict(background='#DDE6FF')
+                 ), pn.Column(pn.pane.Markdown("""
+             ### Scroll horizontally and vertically to see all data
+             """), custom_table)),
+            download_card,
+    header="# Explore the data"
+)
+# stacked bar plot of impact by agency (static plot)
+impact_counts = df.groupby('Agency')['Does it directly impact the public?'].value_counts()
+impact_counts = impact_counts.sort_index(level="Agency", ascending=False)
+impact_count_df = pd.DataFrame(impact_counts).rename(columns={'Does it directly impact the public?' : "Count"})
+impact_plot = impact_count_df.hvplot.barh(stacked=True, width=500, height=400, color=[ "#019C6D", "#336BCC", "#F41903",], legend="bottom_right")
+impact_card = pn.Card(
+    pn.Column(
+    pn.pane.Markdown("""
+    Number of systems with no, indirect, or direct impact on the public.
+    These judgements were made by Anna Blue and are unique to her report."""),
+    impact_plot), header="# Impact on the public, by agency")
+# keyword search
+class TableIndices(param.Parameterized):
+    row_indices = param.List()
+    col_indices = param.List()
+    def __call__(self):
+        return (self.row_indices, self.col_indices)
+def run_search(text, pipeline):
+    if text == "":
+        return None
+    res = pipeline.run(query=text, params={"Retriever": {"top_k": 10}})
+    relevant_results = [r for r in res['documents'] if r.score > 0.5]
+    result_rows = [doc.meta['index'] for doc in relevant_results]
+    result_cols = [doc.meta['column_header'] for doc in relevant_results]
+    table_indices = TableIndices(row_indices=result_rows, col_indices=result_cols)
+    return table_indices
+def produce_table(df, table_indices):
+    if not table_indices:
+        return None
+    result_df = df.iloc[table_indices.row_indices, :]
+    result_df = result_df.drop_duplicates()
+    color_df = result_df.copy()
+    color_df.loc[:,:] = ''
+    for row, col in zip(table_indices.row_indices, table_indices.col_indices):
+        color_df.loc[row, col] = 'background-color: yellow'
+    result_tab = pn.widgets.Tabulator(result_df,pagination="local", page_size=350, layout="fit_data",
+                                   width=800, height=300)
+    # cell coloration is working, but does not update properly unless empty search is run in between;
+    # otherwise it re-uses the most recent color scheme; maybe related to https://github.com/holoviz/panel/issues/3363
+    # result_tab.style.apply(lambda x: color_df, axis=None)
+    # giving up for now
+    return result_tab
+def make_search_pane(result_tab):
+    if not result_tab:
+        return None
+    filename_2, button_2 = result_tab.download_menu(
+    text_kwargs={'name': 'Enter filename ending in .csv or .json', 'value': 'default.csv'},
+    button_kwargs={'name': 'Download search results'})
+    search_download_card = pn.Card(pn.pane.Markdown("""
+    Download search results in .csv or .json format.
+    File format will be automatically selected based on the file extension."""),
+                            filename_2, button_2, header="### Download")
+    search_result = pn.Column(pn.pane.Markdown("""
+    ### Scroll horizontally and vertically (if needed) to see everything.
+    """), result_tab, search_download_card)
+    return search_result
+# which columns to search
+col_list = ['Name of Inventory Item',
+     'Primary Type of AI',
+       'Purpose of AI',
+        'Description of Inventory Item',
+       'Other Notes']
+# create document store, where each string from any of the relevant columns is a doc
+# save the row index as metadata
+docs = []
+indices = list(df.index.values)
+for col in col_list:
+    values = df[col].tolist()
+    assert len(indices) == len(values)
+    for i, val in zip(indices, values):
+        dictionary = {'content' : val,
+                     'meta' : {"index": i, "column_header" : col}
+                     }
+        docs.append(Document.from_dict(dictionary))
+document_store = InMemoryDocumentStore(use_bm25=True)
+document_store.write_documents(docs)
+retriever = BM25Retriever(document_store=document_store)
+pipeline = DocumentSearchPipeline(retriever)
+text_input = pn.widgets.TextInput(name='Search', placeholder='Enter text here...')
+result_indices = pn.bind(run_search, text=text_input, pipeline=pipeline)
+result_table = pn.bind(produce_table, df=df, table_indices=result_indices)
+result_pane = pn.bind(make_search_pane, result_tab=result_table)
+search_card = pn.Card(
+    pn.Column(
+        pn.Row(
+            text_input,
+            pn.pane.Markdown("""
+            This will search text in the following columns:
+            * Name of Inventory Item
+            * Primary Type of AI
+            * Purpose of AI
+            * Description of Inventory Item
+            * Other Notes
+            This is a keyword search based on the BM25 algorithm as implemented in the Haystack python library.
+            """)),
+        pn.Row(result_pane),
+     ),
+    header="# Search the text"
+)
+main_text = """
+The data visualized here come from a report by Anna Blue, a Social Impact Fellow
+at the Responsible AI Institute. The report was released in May 2023. Some agencies have
+released updated inventories since then, which are not reflected here.
+Anna's report consolidated data released by individual government agencies in compliance with
+Executive Order 13960, which requires federal agencies to produce an annual inventory of their AI usage.
+See her [blog post](https://www.responsible.ai/post/federal-government-ai-use-cases) for additional details,
+ including links to the original data sources.
+"""
+template = pn.template.FastListTemplate(
+    title='U.S. Government Use of AI',
+    main=[pn.pane.Markdown(main_text),
+        pn.Row(overview_card,impact_card),
+        pn.Row(table_card),
+        pn.Row(search_card)],
+    accent_base_color="#FFDAC2",
+    header_background="#0037A2")
+template.servable()

.ipynb_checkpoints/requirements-checkpoint.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+hvplot
+farm-haystack[inference]
+pandas
+panel
+param

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Panel Template
 emoji: 📈
 colorFrom: gray
 colorTo: green

 ---
+title: U.S. Government Use of AI
 emoji: 📈
 colorFrom: gray
 colorTo: green

app.py CHANGED Viewed

@@ -1,147 +1,303 @@
-import io
-import random
-from typing import List, Tuple
-import aiohttp
 import panel as pn
-from PIL import Image
-from transformers import CLIPModel, CLIPProcessor
-pn.extension(design="bootstrap", sizing_mode="stretch_width")
-ICON_URLS = {
-    "brand-github": "https://github.com/holoviz/panel",
-    "brand-twitter": "https://twitter.com/Panel_Org",
-    "brand-linkedin": "https://www.linkedin.com/company/panel-org",
-    "message-circle": "https://discourse.holoviz.org/",
-    "brand-discord": "https://discord.gg/AXRHnJU6sP",
-}
-async def random_url(_):
-    pet = random.choice(["cat", "dog"])
-    api_url = f"https://api.the{pet}api.com/v1/images/search"
-    async with aiohttp.ClientSession() as session:
-        async with session.get(api_url) as resp:
-            return (await resp.json())[0]["url"]
-@pn.cache
-def load_processor_model(
-    processor_name: str, model_name: str
-) -> Tuple[CLIPProcessor, CLIPModel]:
-    processor = CLIPProcessor.from_pretrained(processor_name)
-    model = CLIPModel.from_pretrained(model_name)
-    return processor, model
-async def open_image_url(image_url: str) -> Image:
-    async with aiohttp.ClientSession() as session:
-        async with session.get(image_url) as resp:
-            return Image.open(io.BytesIO(await resp.read()))
-def get_similarity_scores(class_items: List[str], image: Image) -> List[float]:
-    processor, model = load_processor_model(
-        "openai/clip-vit-base-patch32", "openai/clip-vit-base-patch32"
-    )
-    inputs = processor(
-        text=class_items,
-        images=[image],
-        return_tensors="pt",  # pytorch tensors
-    )
-    outputs = model(**inputs)
-    logits_per_image = outputs.logits_per_image
-    class_likelihoods = logits_per_image.softmax(dim=1).detach().numpy()
-    return class_likelihoods[0]
-async def process_inputs(class_names: List[str], image_url: str):
     """
-    High level function that takes in the user inputs and returns the
-    classification results as panel objects.
     """
-    try:
-        main.disabled = True
-        if not image_url:
-            yield "##### ⚠️ Provide an image URL"
-            return
-        yield "##### ⚙ Fetching image and running model..."
-        try:
-            pil_img = await open_image_url(image_url)
-            img = pn.pane.Image(pil_img, height=400, align="center")
-        except Exception as e:
-            yield f"##### 😔 Something went wrong, please try a different URL!"
-            return
-        class_items = class_names.split(",")
-        class_likelihoods = get_similarity_scores(class_items, pil_img)
-        # build the results column
-        results = pn.Column("##### 🎉 Here are the results!", img)
-        for class_item, class_likelihood in zip(class_items, class_likelihoods):
-            row_label = pn.widgets.StaticText(
-                name=class_item.strip(), value=f"{class_likelihood:.2%}", align="center"
-            )
-            row_bar = pn.indicators.Progress(
-                value=int(class_likelihood * 100),
-                sizing_mode="stretch_width",
-                bar_color="secondary",
-                margin=(0, 10),
-                design=pn.theme.Material,
-            )
-            results.append(pn.Column(row_label, row_bar))
-        yield results
-    finally:
-        main.disabled = False
-# create widgets
-randomize_url = pn.widgets.Button(name="Randomize URL", align="end")
-image_url = pn.widgets.TextInput(
-    name="Image URL to classify",
-    value=pn.bind(random_url, randomize_url),
-)
-class_names = pn.widgets.TextInput(
-    name="Comma separated class names",
-    placeholder="Enter possible class names, e.g. cat, dog",
-    value="cat, dog, parrot",
-)
-input_widgets = pn.Column(
-    "##### 😊 Click randomize or paste a URL to start classifying!",
-    pn.Row(image_url, randomize_url),
-    class_names,
-)
-# add interactivity
-interactive_result = pn.panel(
-    pn.bind(process_inputs, image_url=image_url, class_names=class_names),
-    height=600,
-)
-# add footer
-footer_row = pn.Row(pn.Spacer(), align="center")
-for icon, url in ICON_URLS.items():
-    href_button = pn.widgets.Button(icon=icon, width=35, height=35)
-    href_button.js_on_click(code=f"window.open('{url}')")
-    footer_row.append(href_button)
-footer_row.append(pn.Spacer())
-# create dashboard
-main = pn.WidgetBox(
-    input_widgets,
-    interactive_result,
-    footer_row,
 )
-title = "Panel Demo - Image Classification"
-pn.template.BootstrapTemplate(
-    title=title,
-    main=main,
-    main_max_width="min(50%, 698px)",
-    header_background="#F08080",
-).servable(title=title)

+from haystack import Document
+from haystack.document_stores import InMemoryDocumentStore, ElasticsearchDocumentStore, FAISSDocumentStore
+from haystack.nodes import BM25Retriever
+from haystack.pipelines import DocumentSearchPipeline
+import pandas as pd
 import panel as pn
+import param
+pn.extension('tabulator')
+pn.extension(sizing_mode="scale_both")
+import hvplot.pandas
+# load data
+infile = "/Users/carolanderson/Dropbox/repos/miscellany/webapps/Agency Inventory AI Usage - Sheet1.tsv"
+df = pd.read_csv(infile, sep="\t", lineterminator='\n')
+# rearrange column order
+col_list = ['Agency', 'Name of Inventory Item',
+     'Primary Type of AI',
+       'Purpose of AI', 'Length of Usage',
+       'Does it directly impact the public?',
+        'Vendor  System',
+        'Description of Inventory Item',
+       'Other Notes\r']
+df = df[col_list]
+# remove trailing \r from 'Other Notes' header
+df = df.rename(columns = {'Other Notes\r' : 'Other Notes'})
+# remove trailing spaces from agency names (caused duplicate instance of "DOC")
+df['Agency'] = df['Agency'].apply(lambda x : x.rstrip())
+# columns not useful for filtering
+no_filter_cols = ['Name of Inventory Item', 'Description of Inventory Item', "Other Notes"]
+# columns to be used for filtering
+filter_cols = [c for c in df.columns.unique() if c not in no_filter_cols]
+# column selector for main plot
+plot_column_selector = pn.widgets.Select(options=filter_cols, name="Plot category: ")
+# agency selector for main plot
+plot_agency_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df["Agency"].unique()),
+                                         value=["Select all"],
+                                         name="Optional - filter by agency")
+# selectors below are all for interactive dataframe
+agency_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df["Agency"].unique()),
+                                         value=["Select all"],
+                                         name="Agency")
+type_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Primary Type of AI'].unique()),
+                                         value=["Select all"],
+                                         name='Primary Type of AI')
+purpose_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df["Purpose of AI"].unique()),
+                                         value=["Select all"],
+                                         name="Purpose of AI")
+length_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Length of Usage'].unique()),
+                                         value=["Select all"],
+                                         name="Length of Usage")
+impact_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Does it directly impact the public?'].unique()),
+                                         value=["Select all"],
+                                         name='Does it directly impact the public?')
+vendor_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Vendor  System'].unique()),
+                                         value=["Select all"],
+                                         name='Vendor  System')
+row_filters = [agency_selector, type_selector, purpose_selector, length_selector, impact_selector,
+               vendor_selector]
+def custom_plot(table, column_selector, agency_selector):
+    if "Select all" not in agency_selector:
+        table = table[table['Agency'].isin(agency_selector)]
+    table = table[column_selector].value_counts().sort_values(ascending=True)
+    return table.hvplot.barh(width=600, height=400, color="#336BCC")
+def custom_table_filter(table,
+                        agency_selector,
+                        type_selector,
+                        purpose_selector,
+                        length_selector,
+                        impact_selector,
+                        vendor_selector):
     """
+    This repetitive approach was the only way I could get things working with a
+    'Select all' menu option.
     """
+    if "Select all" not in agency_selector:
+        table = table[table["Agency"].isin(agency_selector)]
+    if "Select all" not in type_selector:
+        table = table[table['Primary Type of AI'].isin(type_selector)]
+    if "Select all" not in purpose_selector:
+        table = table[table["Purpose of AI"].isin(purpose_selector)]
+    if "Select all" not in length_selector:
+        table = table[table['Length of Usage'].isin(length_selector)]
+    if "Select all" not in impact_selector:
+        table = table[table['Does it directly impact the public?'].isin(impact_selector)]
+    if "Select all" not in vendor_selector:
+        table = table[table['Vendor  System'].isin(vendor_selector)]
+    return table
+custom_table = pn.widgets.Tabulator(df, pagination="local", page_size=350, layout="fit_data",
+                                   width=800, height=550)
+custom_table.add_filter(pn.bind(custom_table_filter,
+                                agency_selector=agency_selector,
+                                type_selector=type_selector,
+                                purpose_selector=purpose_selector,
+                                length_selector=length_selector,
+                                impact_selector=impact_selector,
+                                vendor_selector=vendor_selector))
+interactive_plot = pn.bind(custom_plot, table=df, column_selector=plot_column_selector,
+                          agency_selector=plot_agency_selector)
+overview_stacked = pn.Column(
+    pn.pane.Markdown("""
+    Plot shows the total count of entries, aggregated by various categories.
+    Change the category with the dropdown menu.
+    The total number of records in the database is 337, but some fields have missing values.
+    In particular, 'Vendor System' and 'Primary Type of AI' were not always filled out."""),
+     pn.Column(pn.Row(plot_column_selector,
+             plot_agency_selector),
+    pn.Row(interactive_plot, width=500))
+)
+overview_card = pn.Card(overview_stacked, header="# Overview of the data")
+filename, button = custom_table.download_menu(
+    text_kwargs={'name': 'Enter filename ending in .csv or .json', 'value': 'default.csv'},
+    button_kwargs={'name': 'Download table'}
+)
+download_card = pn.Card(pn.pane.Markdown("""
+Download current table in .csv or .json format.
+File format will be automatically selected based on the file extension.
+             """),
+                 filename, button, header="### Download")
+table_card = pn.Card(
+             pn.Row(
+                 pn.Column(
+                     pn.pane.Markdown("""
+                     ### Filter with the menus below
+                     """),pn.WidgetBox(*row_filters),
+                     styles=dict(background='#DDE6FF')
+                 ), pn.Column(pn.pane.Markdown("""
+             ### Scroll horizontally and vertically to see all data
+             """), custom_table)),
+            download_card,
+    header="# Explore the data"
+)
+# stacked bar plot of impact by agency (static plot)
+impact_counts = df.groupby('Agency')['Does it directly impact the public?'].value_counts()
+impact_counts = impact_counts.sort_index(level="Agency", ascending=False)
+impact_count_df = pd.DataFrame(impact_counts).rename(columns={'Does it directly impact the public?' : "Count"})
+impact_plot = impact_count_df.hvplot.barh(stacked=True, width=500, height=400, color=[ "#019C6D", "#336BCC", "#F41903",], legend="bottom_right")
+impact_card = pn.Card(
+    pn.Column(
+    pn.pane.Markdown("""
+    Number of systems with no, indirect, or direct impact on the public.
+    These judgements were made by Anna Blue and are unique to her report."""),
+    impact_plot), header="# Impact on the public, by agency")
+# keyword search
+class TableIndices(param.Parameterized):
+    row_indices = param.List()
+    col_indices = param.List()
+    def __call__(self):
+        return (self.row_indices, self.col_indices)
+def run_search(text, pipeline):
+    if text == "":
+        return None
+    res = pipeline.run(query=text, params={"Retriever": {"top_k": 10}})
+    relevant_results = [r for r in res['documents'] if r.score > 0.5]
+    result_rows = [doc.meta['index'] for doc in relevant_results]
+    result_cols = [doc.meta['column_header'] for doc in relevant_results]
+    table_indices = TableIndices(row_indices=result_rows, col_indices=result_cols)
+    return table_indices
+def produce_table(df, table_indices):
+    if not table_indices:
+        return None
+    result_df = df.iloc[table_indices.row_indices, :]
+    result_df = result_df.drop_duplicates()
+    color_df = result_df.copy()
+    color_df.loc[:,:] = ''
+    for row, col in zip(table_indices.row_indices, table_indices.col_indices):
+        color_df.loc[row, col] = 'background-color: yellow'
+    result_tab = pn.widgets.Tabulator(result_df,pagination="local", page_size=350, layout="fit_data",
+                                   width=800, height=300)
+    # cell coloration is working, but does not update properly unless empty search is run in between;
+    # otherwise it re-uses the most recent color scheme; maybe related to https://github.com/holoviz/panel/issues/3363
+    # result_tab.style.apply(lambda x: color_df, axis=None)
+    # giving up for now
+    return result_tab
+def make_search_pane(result_tab):
+    if not result_tab:
+        return None
+    filename_2, button_2 = result_tab.download_menu(
+    text_kwargs={'name': 'Enter filename ending in .csv or .json', 'value': 'default.csv'},
+    button_kwargs={'name': 'Download search results'})
+    search_download_card = pn.Card(pn.pane.Markdown("""
+    Download search results in .csv or .json format.
+    File format will be automatically selected based on the file extension."""),
+                            filename_2, button_2, header="### Download")
+    search_result = pn.Column(pn.pane.Markdown("""
+    ### Scroll horizontally and vertically (if needed) to see everything.
+    """), result_tab, search_download_card)
+    return search_result
+# which columns to search
+col_list = ['Name of Inventory Item',
+     'Primary Type of AI',
+       'Purpose of AI',
+        'Description of Inventory Item',
+       'Other Notes']
+# create document store, where each string from any of the relevant columns is a doc
+# save the row index as metadata
+docs = []
+indices = list(df.index.values)
+for col in col_list:
+    values = df[col].tolist()
+    assert len(indices) == len(values)
+    for i, val in zip(indices, values):
+        dictionary = {'content' : val,
+                     'meta' : {"index": i, "column_header" : col}
+                     }
+        docs.append(Document.from_dict(dictionary))
+document_store = InMemoryDocumentStore(use_bm25=True)
+document_store.write_documents(docs)
+retriever = BM25Retriever(document_store=document_store)
+pipeline = DocumentSearchPipeline(retriever)
+text_input = pn.widgets.TextInput(name='Search', placeholder='Enter text here...')
+result_indices = pn.bind(run_search, text=text_input, pipeline=pipeline)
+result_table = pn.bind(produce_table, df=df, table_indices=result_indices)
+result_pane = pn.bind(make_search_pane, result_tab=result_table)
+search_card = pn.Card(
+    pn.Column(
+        pn.Row(
+            text_input,
+            pn.pane.Markdown("""
+            This will search text in the following columns:
+            * Name of Inventory Item
+            * Primary Type of AI
+            * Purpose of AI
+            * Description of Inventory Item
+            * Other Notes
+            This is a keyword search based on the BM25 algorithm as implemented in the Haystack python library.
+            """)),
+        pn.Row(result_pane),
+     ),
+    header="# Search the text"
 )
+main_text = """
+The data visualized here come from a report by Anna Blue, a Social Impact Fellow
+at the Responsible AI Institute. The report was released in May 2023. Some agencies have
+released updated inventories since then, which are not reflected here.
+Anna's report consolidated data released by individual government agencies in compliance with
+Executive Order 13960, which requires federal agencies to produce an annual inventory of their AI usage.
+See her [blog post](https://www.responsible.ai/post/federal-government-ai-use-cases) for additional details,
+ including links to the original data sources.
+"""
+template = pn.template.FastListTemplate(
+    title='U.S. Government Use of AI',
+    main=[pn.pane.Markdown(main_text),
+        pn.Row(overview_card,impact_card),
+        pn.Row(table_card),
+        pn.Row(search_card)],
+    accent_base_color="#FFDAC2",
+    header_background="#0037A2")
+template.servable()

requirements.txt CHANGED Viewed

@@ -1,6 +1,5 @@
 panel
-jupyter
-transformers
-numpy
-torch
-aiohttp

+hvplot
+farm-haystack[inference]
+pandas
 panel
+param