Chris Finlayson commited on
Commit
bef0c15
1 Parent(s): 2f9755f

Update to deps

Browse files
Files changed (3) hide show
  1. app.py +11 -9
  2. graph.png +0 -0
  3. requirements.txt +11 -5
app.py CHANGED
@@ -92,15 +92,17 @@ def get_relation(sent): # Define a function to get the relation from a sentence
92
  def execute_process(file, edge): # Define a function to execute the process
93
  candidate_sentences = read_pdf(file) # Read the PDF file
94
 
95
- entity_pairs = [] # Initialize an empty list for the entity pairs
96
- for i in tqdm(candidate_sentences["sentence"]): # For each sentence in the DataFrame
97
- entity_pairs.append(get_entities(i)) # Append the entities to the list
98
- relations = [get_relation(i) for i in tqdm(candidate_sentences['sentence'])] # Get the relations for each sentence
99
-
100
- source = [i[0] for i in entity_pairs] # Extract the subjects
101
- target = [i[1] for i in entity_pairs] # Extract the objects
102
- kg_df = pd.DataFrame({'source':source, 'target':target, 'edge':relations}) # Create a DataFrame of the sources, targets, and edges
103
-
 
 
104
  unique_edges = kg_df['edge'].unique() if kg_df['edge'].nunique() != 0 else None # Get the unique edges
105
  edge_counts = kg_df['edge'].value_counts() # Get the counts of the edges
106
  unique_edges_df = pd.DataFrame({'edge': edge_counts.index, 'count': edge_counts.values}) # Create a DataFrame of the unique edges and their counts
 
92
  def execute_process(file, edge): # Define a function to execute the process
93
  candidate_sentences = read_pdf(file) # Read the PDF file
94
 
95
+ if 'kg_df' not in globals() or 'file' not in globals() or file != globals()['file']: # Only execute if kg_df is not defined or if the file is not consistent with the persisted global
96
+ entity_pairs = [] # Initialize an empty list for the entity pairs
97
+ for i in tqdm(candidate_sentences["sentence"]): # For each sentence in the DataFrame
98
+ entity_pairs.append(get_entities(i)) # Append the entities to the list
99
+ relations = [get_relation(i) for i in tqdm(candidate_sentences['sentence'])] # Get the relations for each sentence
100
+
101
+ source = [i[0] for i in entity_pairs] # Extract the subjects
102
+ target = [i[1] for i in entity_pairs] # Extract the objects
103
+ globals()['kg_df'] = pd.DataFrame({'source':source, 'target':target, 'edge':relations}) # Create a DataFrame of the sources, targets, and edges
104
+ globals()['file'] = file # Persist the file into a global variable
105
+
106
  unique_edges = kg_df['edge'].unique() if kg_df['edge'].nunique() != 0 else None # Get the unique edges
107
  edge_counts = kg_df['edge'].value_counts() # Get the counts of the edges
108
  unique_edges_df = pd.DataFrame({'edge': edge_counts.index, 'count': edge_counts.values}) # Create a DataFrame of the unique edges and their counts
graph.png CHANGED
requirements.txt CHANGED
@@ -1,5 +1,11 @@
1
- gradio
2
- PyMuPDF
3
- transformers
4
- plotly
5
- spacy
 
 
 
 
 
 
 
1
+ gradio==1.7.7
2
+ PyMuPDF==1.18.14
3
+ transformers==4.6.1
4
+ plotly==4.14.3
5
+ spacy==3.0.6
6
+ beautifulsoup4==4.9.3
7
+ pandas==1.2.4
8
+ requests==2.25.1
9
+ networkx==2.5.1
10
+ matplotlib==3.4.2
11
+ tqdm==4.61.1