Spaces:
Running
Running
Didier Guillevic
commited on
Commit
·
8507fc0
1
Parent(s):
9a919aa
Attempting to give more information on the table columns
Browse files- app.py +9 -4
- icij_utils.py +18 -16
app.py
CHANGED
@@ -18,6 +18,7 @@ Generation:
|
|
18 |
|
19 |
import gradio as gr
|
20 |
import icij_utils
|
|
|
21 |
import smolagents
|
22 |
import os
|
23 |
import pathlib
|
@@ -65,8 +66,8 @@ for table, doc in metadata.TABLE_DOCS.items():
|
|
65 |
if table in schema:
|
66 |
for col_name, col_type in schema[table].items():
|
67 |
col_doc = metadata.COLUMN_DOCS.get(table, {}).get(col_name, "No documentation available")
|
68 |
-
|
69 |
-
tool_description += f" - {col_name}: {col_type}\n"
|
70 |
|
71 |
# Add source documentation
|
72 |
#tool_description += "\n\nSource IDs:\n"
|
@@ -150,8 +151,12 @@ with gr.Blocks() as demo:
|
|
150 |
with gr.Accordion("Sample questions", open=False):
|
151 |
gr.Examples(
|
152 |
[
|
153 |
-
[
|
154 |
-
|
|
|
|
|
|
|
|
|
155 |
],
|
156 |
inputs=[question,],
|
157 |
outputs=[response,],
|
|
|
18 |
|
19 |
import gradio as gr
|
20 |
import icij_utils
|
21 |
+
import sqlalchemy
|
22 |
import smolagents
|
23 |
import os
|
24 |
import pathlib
|
|
|
66 |
if table in schema:
|
67 |
for col_name, col_type in schema[table].items():
|
68 |
col_doc = metadata.COLUMN_DOCS.get(table, {}).get(col_name, "No documentation available")
|
69 |
+
tool_description += f" - {col_name}: {col_type}: {col_doc}\n"
|
70 |
+
#tool_description += f" - {col_name}: {col_type}\n"
|
71 |
|
72 |
# Add source documentation
|
73 |
#tool_description += "\n\nSource IDs:\n"
|
|
|
151 |
with gr.Accordion("Sample questions", open=False):
|
152 |
gr.Examples(
|
153 |
[
|
154 |
+
[
|
155 |
+
(
|
156 |
+
"Can you list the entities with an address in Canada? "
|
157 |
+
"Please give the name of the entity an its address."
|
158 |
+
),
|
159 |
+
],
|
160 |
],
|
161 |
inputs=[question,],
|
162 |
outputs=[response,],
|
icij_utils.py
CHANGED
@@ -229,7 +229,7 @@ class ICIJDatabaseMetadata:
|
|
229 |
'name': "Legal name of the offshore entity",
|
230 |
'original_name': "Name in original language/character set",
|
231 |
'former_name': "Previous names of the entity",
|
232 |
-
'jurisdiction': "Country/region where the entity is registered",
|
233 |
'jurisdiction_description': "Detailed description of the jurisdiction",
|
234 |
'company_type': "Legal structure of the entity (e.g., corporation, trust)",
|
235 |
'address': "Primary registered address",
|
@@ -240,29 +240,31 @@ class ICIJDatabaseMetadata:
|
|
240 |
'dorm_date': "Date when entity became dormant",
|
241 |
'status': "Current status of the entity",
|
242 |
'service_provider': "Firm that provided offshore services",
|
243 |
-
'
|
|
|
|
|
244 |
},
|
245 |
|
246 |
'others': {
|
247 |
'name': "Name of the miscellaneous party or item",
|
248 |
'type': "Type of the other party (e.g., vessel, legal case)",
|
249 |
'incorporation_date': "Date of incorporation or creation if applicable",
|
250 |
-
'jurisdiction': "Jurisdiction associated with the party",
|
|
|
251 |
'countries': "Countries associated with the party",
|
252 |
'status': "Current status",
|
253 |
'internal_id': "Unique identifier within the leak data",
|
254 |
'address': "Associated address if available",
|
255 |
-
'
|
256 |
'valid_until': "Date until which the information is valid"
|
257 |
},
|
258 |
|
259 |
'officers': {
|
260 |
'name': "Name of the individual or organization",
|
261 |
-
'
|
262 |
-
'
|
|
|
263 |
'valid_until': "Date until which the information is valid",
|
264 |
-
'status': "Current status of the officer",
|
265 |
-
'internal_id': "Unique identifier within the leak data"
|
266 |
},
|
267 |
|
268 |
'intermediaries': {
|
@@ -270,28 +272,28 @@ class ICIJDatabaseMetadata:
|
|
270 |
'internal_id': "Unique identifier within the leak data",
|
271 |
'address': "Business address",
|
272 |
'status': "Current status",
|
273 |
-
'
|
274 |
-
'
|
|
|
275 |
},
|
276 |
|
277 |
'addresses': {
|
278 |
'address': "Full address text",
|
279 |
'name': "Name associated with address",
|
280 |
-
'country_codes': "
|
281 |
'countries': "Full country names",
|
282 |
-
'
|
283 |
'valid_until': "Date until which address is valid",
|
284 |
-
'internal_id': "Unique identifier within the leak data"
|
285 |
},
|
286 |
|
287 |
'relationships': {
|
288 |
-
'
|
289 |
-
'
|
290 |
'rel_type': "Type of relationship (e.g., shareholder, director)",
|
291 |
'link': "Additional details about the relationship",
|
292 |
'start_date': "When the relationship began",
|
293 |
'end_date': "When the relationship ended",
|
294 |
-
'
|
295 |
'status': "Current status of the relationship"
|
296 |
}
|
297 |
}
|
|
|
229 |
'name': "Legal name of the offshore entity",
|
230 |
'original_name': "Name in original language/character set",
|
231 |
'former_name': "Previous names of the entity",
|
232 |
+
#'jurisdiction': "Country/region where the entity is registered",
|
233 |
'jurisdiction_description': "Detailed description of the jurisdiction",
|
234 |
'company_type': "Legal structure of the entity (e.g., corporation, trust)",
|
235 |
'address': "Primary registered address",
|
|
|
240 |
'dorm_date': "Date when entity became dormant",
|
241 |
'status': "Current status of the entity",
|
242 |
'service_provider': "Firm that provided offshore services",
|
243 |
+
'country_codes': '3 letter abbreviations of country names',
|
244 |
+
'countries': 'name of country',
|
245 |
+
'sourceID': "Identifier for the leak source"
|
246 |
},
|
247 |
|
248 |
'others': {
|
249 |
'name': "Name of the miscellaneous party or item",
|
250 |
'type': "Type of the other party (e.g., vessel, legal case)",
|
251 |
'incorporation_date': "Date of incorporation or creation if applicable",
|
252 |
+
'jurisdiction': "2 letter code of the Jurisdiction associated with the party",
|
253 |
+
'jurisdiction-description': 'full name of the jurisdiction',
|
254 |
'countries': "Countries associated with the party",
|
255 |
'status': "Current status",
|
256 |
'internal_id': "Unique identifier within the leak data",
|
257 |
'address': "Associated address if available",
|
258 |
+
'sourceID': "Identifier for the leak source",
|
259 |
'valid_until': "Date until which the information is valid"
|
260 |
},
|
261 |
|
262 |
'officers': {
|
263 |
'name': "Name of the individual or organization",
|
264 |
+
'countries': 'full name of the country connected to the officer',
|
265 |
+
'country_codes': "3 letter code of the countries connected to the officer",
|
266 |
+
'sourceID': "Identifier for the leak source",
|
267 |
'valid_until': "Date until which the information is valid",
|
|
|
|
|
268 |
},
|
269 |
|
270 |
'intermediaries': {
|
|
|
272 |
'internal_id': "Unique identifier within the leak data",
|
273 |
'address': "Business address",
|
274 |
'status': "Current status",
|
275 |
+
'countries': "Countries where intermediary operates",
|
276 |
+
'country_codes': "3 letter abbreviations of the countries where intermediary operates",
|
277 |
+
'sourceID': "Identifier for the leak source"
|
278 |
},
|
279 |
|
280 |
'addresses': {
|
281 |
'address': "Full address text",
|
282 |
'name': "Name associated with address",
|
283 |
+
'country_codes': "3 letter country codes for the address",
|
284 |
'countries': "Full country names",
|
285 |
+
'sourceID': "Identifier for the leak source",
|
286 |
'valid_until': "Date until which address is valid",
|
|
|
287 |
},
|
288 |
|
289 |
'relationships': {
|
290 |
+
'node_id_start': "Internal ID of the source node",
|
291 |
+
'node_id_end': "Internal ID of the target node",
|
292 |
'rel_type': "Type of relationship (e.g., shareholder, director)",
|
293 |
'link': "Additional details about the relationship",
|
294 |
'start_date': "When the relationship began",
|
295 |
'end_date': "When the relationship ended",
|
296 |
+
'sourceID': "Identifier for the leak source",
|
297 |
'status': "Current status of the relationship"
|
298 |
}
|
299 |
}
|