Spaces:

chrisfinlayson
/

foundry-pdf-knowledge-graph

Sleeping

Chris Finlayson commited on Dec 8, 2023

Commit

bef0c15

•

1 Parent(s): 2f9755f

Update to deps

Files changed (3) hide show

app.py CHANGED Viewed

@@ -92,15 +92,17 @@ def get_relation(sent):  # Define a function to get the relation from a sentence
 def execute_process(file, edge):  # Define a function to execute the process
     candidate_sentences = read_pdf(file)  # Read the PDF file
-    entity_pairs = []  # Initialize an empty list for the entity pairs
-    for i in tqdm(candidate_sentences["sentence"]):  # For each sentence in the DataFrame
-        entity_pairs.append(get_entities(i))  # Append the entities to the list
-    relations = [get_relation(i) for i in tqdm(candidate_sentences['sentence'])]  # Get the relations for each sentence
-    source = [i[0] for i in entity_pairs]  # Extract the subjects
-    target = [i[1] for i in entity_pairs]  # Extract the objects
-    kg_df = pd.DataFrame({'source':source, 'target':target, 'edge':relations})  # Create a DataFrame of the sources, targets, and edges
     unique_edges = kg_df['edge'].unique() if kg_df['edge'].nunique() != 0 else None  # Get the unique edges
     edge_counts = kg_df['edge'].value_counts()  # Get the counts of the edges
     unique_edges_df = pd.DataFrame({'edge': edge_counts.index, 'count': edge_counts.values})  # Create a DataFrame of the unique edges and their counts

 def execute_process(file, edge):  # Define a function to execute the process
     candidate_sentences = read_pdf(file)  # Read the PDF file
+    if 'kg_df' not in globals() or 'file' not in globals() or file != globals()['file']:  # Only execute if kg_df is not defined or if the file is not consistent with the persisted global
+        entity_pairs = []  # Initialize an empty list for the entity pairs
+        for i in tqdm(candidate_sentences["sentence"]):  # For each sentence in the DataFrame
+            entity_pairs.append(get_entities(i))  # Append the entities to the list
+        relations = [get_relation(i) for i in tqdm(candidate_sentences['sentence'])]  # Get the relations for each sentence
+        source = [i[0] for i in entity_pairs]  # Extract the subjects
+        target = [i[1] for i in entity_pairs]  # Extract the objects
+        globals()['kg_df'] = pd.DataFrame({'source':source, 'target':target, 'edge':relations})  # Create a DataFrame of the sources, targets, and edges
+        globals()['file'] = file  # Persist the file into a global variable
     unique_edges = kg_df['edge'].unique() if kg_df['edge'].nunique() != 0 else None  # Get the unique edges
     edge_counts = kg_df['edge'].value_counts()  # Get the counts of the edges
     unique_edges_df = pd.DataFrame({'edge': edge_counts.index, 'count': edge_counts.values})  # Create a DataFrame of the unique edges and their counts

graph.png CHANGED Viewed

requirements.txt CHANGED Viewed

@@ -1,5 +1,11 @@
-gradio
-PyMuPDF
-transformers
-plotly
-spacy

+gradio==1.7.7
+PyMuPDF==1.18.14
+transformers==4.6.1
+plotly==4.14.3
+spacy==3.0.6
+beautifulsoup4==4.9.3
+pandas==1.2.4
+requests==2.25.1
+networkx==2.5.1
+matplotlib==3.4.2
+tqdm==4.61.1