import os import scholarpy import pandas as pd import streamlit as st import leafmap.foliumap as leafmap import plotly.express as px import datetime current_year = datetime.datetime.now().year if "dsl" not in st.session_state: st.session_state["dsl"] = scholarpy.Dsl() def app(): st.title("Search Publications") dsl = st.session_state["dsl"] ( row1_col1, row1_col2, row1_col3, row1_col4, row1_col5, ) = st.columns([1, 0.7, 1, 1, 1]) row2_col1, row2_col2, row2_col3, row2_col4, row2_col5 = st.columns( [1, 0.7, 1, 1, 1] ) with row1_col1: keywords = st.text_input("Enter a keyword to search for") with row1_col2: exact_match = st.checkbox("Exact match", True) with row1_col3: scope = st.selectbox( "Select a search scope", [ "authors", "concepts", "full_data", "full_data_exact", "title_abstract_only", "title_only", ], index=5, ) with row1_col4: years = st.slider( "Select the start and end year:", 1950, current_year + 6, (1980, current_year), ) with row1_col5: limit = st.slider("Select the number of publications to return", 1, 1000, 100) if keywords: result = dsl.search_pubs_by_keyword( keywords, exact_match, scope, start_year=years[0], end_year=years[1], limit=limit, ) df = scholarpy.json_to_df(result) affiliations = result.as_dataframe_authors_affiliations() country_df = affiliations.groupby(["pub_id"])["aff_country"].unique() df = df.merge(country_df, left_on="id", right_on="pub_id") countries = [c[c.astype(bool)].size for c in df["aff_country"]] df["country_count"] = countries journal_counts = df.copy()["journal.title"].value_counts() if limit > result.count_total: limit = result.count_total markdown = f""" Returned Publications: {limit} (total = {result.count_total}) """ with row2_col1: st.markdown(markdown) with row2_col2: filter = st.checkbox("Filter by journal") if filter: df["journal.title"] = df["journal.title"].astype(str) journals = df["journal.title"].unique() journals.sort() with row2_col3: journal = st.selectbox("Select a journal", journals) df = df[df["journal.title"] == journal] with row2_col4: st.write("") with row2_col5: st.write("") if df is not None: st.dataframe(df) leafmap.st_download_button("Download data", df, csv_sep="\t") summary = pd.DataFrame( {"Journal": journal_counts.index, "Count": journal_counts} ).reset_index(drop=True) markdown = f""" - Total number of journals: **{len(summary)}** """ row3_col1, row3_col2 = st.columns([1, 1]) with row3_col1: st.markdown(markdown) st.dataframe(summary) leafmap.st_download_button("Download data", summary, csv_sep="\t") with row3_col2: fig = px.box(df, x="year", y="country_count", title="Country Counts") st.plotly_chart(fig)