Spaces:
Sleeping
Sleeping
File size: 3,420 Bytes
0fba077 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
# Copyright 2023 by Jan Philip Wahle, https://jpwahle.com/
# All rights reserved.
import os
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from scipy.stats import gaussian_kde
dirname = os.path.dirname(__file__)
# Load the csv file into a pandas DataFrame
papers_df = pd.read_csv(
os.path.join(dirname, "data/nlp_papers_field_diversity.csv")
)
# Compute the mean CFDI
mean_cfdi = papers_df["incoming_diversity"].mean()
# Compute the mean CADI
mean_citation_ages = []
# Open the file and read the content in a list
with open(
os.path.join(dirname, "data/nlp_papers_citation_age.txt"),
"r",
encoding="utf-8",
) as filehandle:
for line in filehandle:
temp = float(line[:-1])
mean_citation_ages.append(temp)
def generate_cfdi_plot(input_cfdi):
"""
Function to generate a plot for CFDI
"""
# Using kdeplot to fill the distribution curve
sns.set(font_scale=1.3, style="whitegrid")
data = papers_df[papers_df["incoming_diversity"] > 0]["incoming_diversity"]
kde = gaussian_kde(data)
x_vals = np.linspace(data.min(), data.max(), 1000)
y_vals = kde.evaluate(x_vals)
fig, ax = plt.subplots() # create a new figure and axis
ax.fill_between(x_vals, y_vals, color="skyblue", alpha=0.3)
ax.plot(x_vals, y_vals, color="skyblue", linewidth=2, label="Distribution")
interpolated_y_cfdi = np.interp(input_cfdi, x_vals, y_vals)
ax.scatter(
input_cfdi,
interpolated_y_cfdi,
c="r",
marker="*",
linewidths=1,
zorder=2,
)
ax.vlines(
input_cfdi, 0, interpolated_y_cfdi, color="tomato", ls="--", lw=1.5
)
epsilon = 0.005
# ax.text(
# input_cfdi + epsilon,
# interpolated_y_cfdi + epsilon,
# "Your paper",
# {"color": "#DC143C", "fontsize": 13},
# ha="left", # Horizontal alignment
# )
ax.set_xlabel("Citation Field Diversity Index (CFDI)", fontsize=15)
ax.set_ylabel("Density", fontsize=15)
sns.despine(left=True, bottom=True, right=True, top=True)
return fig
def generate_maoc_plot(input_maoc):
"""
Function to generate a plot for CFDI
"""
# Using kdeplot to fill the distribution curve
sns.set(font_scale=1.3, style="whitegrid")
data = pd.DataFrame(mean_citation_ages)[0]
kde = gaussian_kde(data)
x_vals = np.linspace(data.min(), data.max(), 1000)
y_vals = kde.evaluate(x_vals)
fig, ax = plt.subplots() # create a new figure and axis
ax.fill_between(x_vals, y_vals, color="skyblue", alpha=0.3)
ax.plot(x_vals, y_vals, color="skyblue", linewidth=2, label="Distribution")
interpolated_y_cfdi = np.interp(input_maoc, x_vals, y_vals)
ax.scatter(
input_maoc,
interpolated_y_cfdi,
c="r",
marker="*",
linewidths=1,
zorder=2,
)
ax.vlines(
input_maoc, 0, interpolated_y_cfdi, color="tomato", ls="--", lw=1.5
)
epsilon = 0.005
# ax.text(
# input_maoc + epsilon,
# interpolated_y_cfdi + epsilon,
# "Your paper",
# {"color": "#DC143C", "fontsize": 13},
# ha="left", # Horizontal alignment
# )
ax.set_xlabel("Mean Age of Citation (mAoC)", fontsize=15)
ax.set_ylabel("Density", fontsize=15)
sns.despine(left=True, bottom=True, right=True, top=True)
return fig
|