Spaces:
Sleeping
Sleeping
# Copyright 2023 by Jan Philip Wahle, https://jpwahle.com/ | |
# All rights reserved. | |
import os | |
import numpy as np | |
import pandas as pd | |
import seaborn as sns | |
from matplotlib import pyplot as plt | |
from scipy.stats import gaussian_kde | |
dirname = os.path.dirname(__file__) | |
# Load the csv file into a pandas DataFrame | |
papers_df = pd.read_csv( | |
os.path.join(dirname, "data/nlp_papers_field_diversity.csv") | |
) | |
# Compute the mean CFDI | |
mean_cfdi = papers_df["incoming_diversity"].mean() | |
# Compute the mean CADI | |
mean_citation_ages = [] | |
# Open the file and read the content in a list | |
with open( | |
os.path.join(dirname, "data/nlp_papers_citation_age.txt"), | |
"r", | |
encoding="utf-8", | |
) as filehandle: | |
for line in filehandle: | |
temp = float(line[:-1]) | |
mean_citation_ages.append(temp) | |
def generate_cfdi_plot(input_cfdi, compute_type="paper"): | |
""" | |
Function to generate a plot for CFDI | |
""" | |
# Using kdeplot to fill the distribution curve | |
sns.set(font_scale=1.3, style="whitegrid") | |
data = papers_df[papers_df["incoming_diversity"] > 0]["incoming_diversity"] | |
kde = gaussian_kde(data) | |
x_vals = np.linspace(data.min(), data.max(), 1000) | |
y_vals = kde.evaluate(x_vals) | |
fig, ax = plt.subplots() # create a new figure and axis | |
ax.fill_between(x_vals, y_vals, color="skyblue", alpha=0.3) | |
ax.plot(x_vals, y_vals, color="skyblue", linewidth=2, label="Distribution") | |
interpolated_y_cfdi = np.interp(input_cfdi, x_vals, y_vals) | |
ax.scatter( | |
input_cfdi, | |
interpolated_y_cfdi, | |
c="r", | |
marker="*", | |
linewidths=2, | |
zorder=2, | |
s=32, | |
) | |
ax.vlines( | |
input_cfdi, | |
0, | |
interpolated_y_cfdi, | |
color="tomato", | |
ls="--", | |
lw=1.5, | |
) | |
epsilon = 0.005 | |
# Compute the average and plot it as a light grey vertical line | |
mean_val = np.mean(data) | |
# Interpolate the y value for the mean | |
interpolated_y_mean = np.interp(mean_val, x_vals, y_vals) | |
ax.vlines(mean_val, 0, interpolated_y_mean, color="grey", ls="--", lw=1.5) | |
ax.text( | |
mean_val + epsilon, | |
interpolated_y_mean + epsilon, | |
"Avg.", | |
{"color": "grey", "fontsize": 13}, | |
ha="left", # Horizontal alignment | |
) | |
ax.text( | |
input_cfdi + epsilon, | |
interpolated_y_cfdi + epsilon, | |
f"This {compute_type}", | |
{"color": "#DC143C", "fontsize": 13}, | |
ha="left", # Horizontal alignment | |
) | |
ax.set_xlabel("Citation Field Diversity Index (CFDI)", fontsize=15) | |
ax.set_ylabel("Density", fontsize=15) | |
sns.despine(left=True, bottom=True, right=True, top=True) | |
return fig | |
def generate_maoc_plot(input_maoc, compute_type="paper"): | |
""" | |
Function to generate a plot for MAOC | |
""" | |
# Using kdeplot to fill the distribution curve | |
sns.set(font_scale=1.3, style="whitegrid") | |
data = pd.DataFrame(mean_citation_ages)[0] | |
kde = gaussian_kde(data) | |
x_vals = np.linspace(data.min(), data.max(), 1000) | |
y_vals = kde.evaluate(x_vals) | |
fig, ax = plt.subplots() # create a new figure and axis | |
ax.fill_between(x_vals, y_vals, color="skyblue", alpha=0.3) | |
ax.plot(x_vals, y_vals, color="skyblue", linewidth=2, label="Distribution") | |
interpolated_y_cfdi = np.interp(input_maoc, x_vals, y_vals) | |
ax.scatter( | |
input_maoc, | |
interpolated_y_cfdi, | |
c="r", | |
marker="*", | |
linewidths=2, | |
zorder=2, | |
s=32, | |
) | |
ax.vlines( | |
input_maoc, | |
0, | |
interpolated_y_cfdi, | |
color="tomato", | |
ls="--", | |
lw=1.5, | |
) | |
epsilon = 0.005 | |
# Compute the average and plot it as a light grey vertical line | |
mean_val = np.mean(data) | |
# Interpolate the y value for the mean | |
interpolated_y_mean = np.interp(mean_val, x_vals, y_vals) | |
ax.vlines(mean_val, 0, interpolated_y_mean, color="grey", ls="--", lw=1.5) | |
ax.text( | |
mean_val + epsilon, | |
interpolated_y_mean + epsilon, | |
"Avg.", | |
{"color": "grey", "fontsize": 13}, | |
ha="left", # Horizontal alignment | |
) | |
ax.text( | |
input_maoc + epsilon, | |
interpolated_y_cfdi + epsilon, | |
f"This {compute_type}", | |
{"color": "#DC143C", "fontsize": 13}, | |
ha="left", # Horizontal alignment | |
) | |
ax.set_xlabel("Mean Age of Citation (mAoC)", fontsize=15) | |
ax.set_ylabel("Density", fontsize=15) | |
sns.despine(left=True, bottom=True, right=True, top=True) | |
return fig | |