Didier Guillevic commited on
Commit
8507fc0
·
1 Parent(s): 9a919aa

Attempting to give more information on the table columns

Browse files
Files changed (2) hide show
  1. app.py +9 -4
  2. icij_utils.py +18 -16
app.py CHANGED
@@ -18,6 +18,7 @@ Generation:
18
 
19
  import gradio as gr
20
  import icij_utils
 
21
  import smolagents
22
  import os
23
  import pathlib
@@ -65,8 +66,8 @@ for table, doc in metadata.TABLE_DOCS.items():
65
  if table in schema:
66
  for col_name, col_type in schema[table].items():
67
  col_doc = metadata.COLUMN_DOCS.get(table, {}).get(col_name, "No documentation available")
68
- #tool_description += f" - {col_name}: {col_type}: {col_doc}\n"
69
- tool_description += f" - {col_name}: {col_type}\n"
70
 
71
  # Add source documentation
72
  #tool_description += "\n\nSource IDs:\n"
@@ -150,8 +151,12 @@ with gr.Blocks() as demo:
150
  with gr.Accordion("Sample questions", open=False):
151
  gr.Examples(
152
  [
153
- ["",],
154
- ["",],
 
 
 
 
155
  ],
156
  inputs=[question,],
157
  outputs=[response,],
 
18
 
19
  import gradio as gr
20
  import icij_utils
21
+ import sqlalchemy
22
  import smolagents
23
  import os
24
  import pathlib
 
66
  if table in schema:
67
  for col_name, col_type in schema[table].items():
68
  col_doc = metadata.COLUMN_DOCS.get(table, {}).get(col_name, "No documentation available")
69
+ tool_description += f" - {col_name}: {col_type}: {col_doc}\n"
70
+ #tool_description += f" - {col_name}: {col_type}\n"
71
 
72
  # Add source documentation
73
  #tool_description += "\n\nSource IDs:\n"
 
151
  with gr.Accordion("Sample questions", open=False):
152
  gr.Examples(
153
  [
154
+ [
155
+ (
156
+ "Can you list the entities with an address in Canada? "
157
+ "Please give the name of the entity an its address."
158
+ ),
159
+ ],
160
  ],
161
  inputs=[question,],
162
  outputs=[response,],
icij_utils.py CHANGED
@@ -229,7 +229,7 @@ class ICIJDatabaseMetadata:
229
  'name': "Legal name of the offshore entity",
230
  'original_name': "Name in original language/character set",
231
  'former_name': "Previous names of the entity",
232
- 'jurisdiction': "Country/region where the entity is registered",
233
  'jurisdiction_description': "Detailed description of the jurisdiction",
234
  'company_type': "Legal structure of the entity (e.g., corporation, trust)",
235
  'address': "Primary registered address",
@@ -240,29 +240,31 @@ class ICIJDatabaseMetadata:
240
  'dorm_date': "Date when entity became dormant",
241
  'status': "Current status of the entity",
242
  'service_provider': "Firm that provided offshore services",
243
- 'source_id': "Identifier for the leak source"
 
 
244
  },
245
 
246
  'others': {
247
  'name': "Name of the miscellaneous party or item",
248
  'type': "Type of the other party (e.g., vessel, legal case)",
249
  'incorporation_date': "Date of incorporation or creation if applicable",
250
- 'jurisdiction': "Jurisdiction associated with the party",
 
251
  'countries': "Countries associated with the party",
252
  'status': "Current status",
253
  'internal_id': "Unique identifier within the leak data",
254
  'address': "Associated address if available",
255
- 'source_id': "Identifier for the leak source",
256
  'valid_until': "Date until which the information is valid"
257
  },
258
 
259
  'officers': {
260
  'name': "Name of the individual or organization",
261
- 'country_codes': "Countries connected to the officer",
262
- 'source_id': "Identifier for the leak source",
 
263
  'valid_until': "Date until which the information is valid",
264
- 'status': "Current status of the officer",
265
- 'internal_id': "Unique identifier within the leak data"
266
  },
267
 
268
  'intermediaries': {
@@ -270,28 +272,28 @@ class ICIJDatabaseMetadata:
270
  'internal_id': "Unique identifier within the leak data",
271
  'address': "Business address",
272
  'status': "Current status",
273
- 'country_codes': "Countries where intermediary operates",
274
- 'source_id': "Identifier for the leak source"
 
275
  },
276
 
277
  'addresses': {
278
  'address': "Full address text",
279
  'name': "Name associated with address",
280
- 'country_codes': "Country codes for the address",
281
  'countries': "Full country names",
282
- 'source_id': "Identifier for the leak source",
283
  'valid_until': "Date until which address is valid",
284
- 'internal_id': "Unique identifier within the leak data"
285
  },
286
 
287
  'relationships': {
288
- 'from_id': "Internal ID of the source node",
289
- 'to_id': "Internal ID of the target node",
290
  'rel_type': "Type of relationship (e.g., shareholder, director)",
291
  'link': "Additional details about the relationship",
292
  'start_date': "When the relationship began",
293
  'end_date': "When the relationship ended",
294
- 'source_id': "Identifier for the leak source",
295
  'status': "Current status of the relationship"
296
  }
297
  }
 
229
  'name': "Legal name of the offshore entity",
230
  'original_name': "Name in original language/character set",
231
  'former_name': "Previous names of the entity",
232
+ #'jurisdiction': "Country/region where the entity is registered",
233
  'jurisdiction_description': "Detailed description of the jurisdiction",
234
  'company_type': "Legal structure of the entity (e.g., corporation, trust)",
235
  'address': "Primary registered address",
 
240
  'dorm_date': "Date when entity became dormant",
241
  'status': "Current status of the entity",
242
  'service_provider': "Firm that provided offshore services",
243
+ 'country_codes': '3 letter abbreviations of country names',
244
+ 'countries': 'name of country',
245
+ 'sourceID': "Identifier for the leak source"
246
  },
247
 
248
  'others': {
249
  'name': "Name of the miscellaneous party or item",
250
  'type': "Type of the other party (e.g., vessel, legal case)",
251
  'incorporation_date': "Date of incorporation or creation if applicable",
252
+ 'jurisdiction': "2 letter code of the Jurisdiction associated with the party",
253
+ 'jurisdiction-description': 'full name of the jurisdiction',
254
  'countries': "Countries associated with the party",
255
  'status': "Current status",
256
  'internal_id': "Unique identifier within the leak data",
257
  'address': "Associated address if available",
258
+ 'sourceID': "Identifier for the leak source",
259
  'valid_until': "Date until which the information is valid"
260
  },
261
 
262
  'officers': {
263
  'name': "Name of the individual or organization",
264
+ 'countries': 'full name of the country connected to the officer',
265
+ 'country_codes': "3 letter code of the countries connected to the officer",
266
+ 'sourceID': "Identifier for the leak source",
267
  'valid_until': "Date until which the information is valid",
 
 
268
  },
269
 
270
  'intermediaries': {
 
272
  'internal_id': "Unique identifier within the leak data",
273
  'address': "Business address",
274
  'status': "Current status",
275
+ 'countries': "Countries where intermediary operates",
276
+ 'country_codes': "3 letter abbreviations of the countries where intermediary operates",
277
+ 'sourceID': "Identifier for the leak source"
278
  },
279
 
280
  'addresses': {
281
  'address': "Full address text",
282
  'name': "Name associated with address",
283
+ 'country_codes': "3 letter country codes for the address",
284
  'countries': "Full country names",
285
+ 'sourceID': "Identifier for the leak source",
286
  'valid_until': "Date until which address is valid",
 
287
  },
288
 
289
  'relationships': {
290
+ 'node_id_start': "Internal ID of the source node",
291
+ 'node_id_end': "Internal ID of the target node",
292
  'rel_type': "Type of relationship (e.g., shareholder, director)",
293
  'link': "Additional details about the relationship",
294
  'start_date': "When the relationship began",
295
  'end_date': "When the relationship ended",
296
+ 'sourceID': "Identifier for the leak source",
297
  'status': "Current status of the relationship"
298
  }
299
  }