giho905e commited on
Commit
84e78bb
·
1 Parent(s): 7564b4e

Upload 30 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/Input/gadm41_DEU_4.json filter=lfs diff=lfs merge=lfs -text
ReadMe.md ADDED
@@ -0,0 +1 @@
 
 
1
+
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # App.py to launch the app via hugging face
3
+ #######################################################################################################
4
+ # IMPORT
5
+ #######################################################################################################
6
+
7
+ import pandas as pd
8
+ import geopandas as gpd
9
+ import os
10
+ from configparser import ConfigParser
11
+ import gradio as gr
12
+
13
+ # modules
14
+ from modules.geojson_github_loader import download_github_geojson
15
+ from modules.geojson_processor import geojson_processor_to_csv
16
+ from modules.language_model import TAPAS
17
+
18
+
19
+ #######################################################################################################
20
+ # CONFIG
21
+ #######################################################################################################
22
+ # Prints disabled!!
23
+ #print('\nCurrent Working Directory (CWD):\n' + os.getcwd())
24
+
25
+ config_object = ConfigParser()
26
+ if 'config.ini' in os.listdir():
27
+ config_object.read('config.ini')
28
+ #print('Setting have been imported from the config file.')
29
+ else:
30
+ print('No config file in the CWD')
31
+ quit()
32
+
33
+ # changing CWD and input output folders
34
+ os.chdir(format(config_object['CONFIG']['CWD']))
35
+
36
+ DATA = os.getcwd() + '\\' + format(config_object['CONFIG']['Input'])
37
+ OUT = os.getcwd() + '\\' + format(config_object['CONFIG']['Output'])
38
+ TEMP = os.getcwd() + '\\' + format(config_object['CONFIG']['Temp'])
39
+
40
+
41
+ #######################################################################################################
42
+ # Load and prepare Data
43
+ #######################################################################################################
44
+
45
+ # load github data
46
+ # attributes
47
+ github_user = "Giedeon25"
48
+ github_repo = "GID-Project"
49
+ file_path_github = "main/data/Input/gadm41_DEU_1.json"
50
+ token = "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"
51
+
52
+ local_file_path = DATA + '\\' + 'gadm41_DEU_1.json'
53
+
54
+ output_file = TEMP + '\\' + 'gadm41_DEU_1'
55
+
56
+ # load locally
57
+ geojson_data = gpd.read_file(local_file_path)
58
+
59
+
60
+ #######################################################################################################
61
+ # LLM
62
+ #######################################################################################################
63
+
64
+ # attributes
65
+ question = 'what is the geometry of Saxony?'
66
+ table_main = pd.read_csv(TEMP + '\\' + 'gadm41_DEU_1_main').astype(str)
67
+ table_geom = pd.read_csv(TEMP + '\\' + 'gadm41_DEU_1_geom')
68
+
69
+ # function
70
+ TAPAS(question, table_main)
71
+ ##################################################################################
72
+ # Function that enables testing
73
+ ##################################################################################
74
+ def AskAI(ques, lv, table_main = table_main):
75
+ level = int(lv) # Currently placeholder
76
+ question = str(ques)
77
+ ans = TAPAS(question = question, table_main= table_main)
78
+ return(ans)
79
+
80
+ def AskAI_easy(ques):
81
+ Tmain = pd.read_csv(TEMP + '\\' + 'gadm41_DEU_1_main').astype(str)
82
+ blub = str(AskAI(ques,1,Tmain))
83
+ return(blub)
84
+ #######################################################################################
85
+ # Gradio Interface
86
+ ###############################################################################
87
+ desc = 'Example: What is the geometry of Saxony?'
88
+ iface = gr.Interface(fn=AskAI_easy, inputs=['text'], outputs='text', description= desc)
89
+ iface.launch()
config.ini ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [CONFIG]
2
+ CWD = B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project
3
+ Input = data/Input
4
+ Output = data/Output
5
+ Temp = data/Temp
data/Input/gadm41_DEU_0.json ADDED
The diff for this file is too large to render. See raw diff
 
data/Input/gadm41_DEU_1.json ADDED
The diff for this file is too large to render. See raw diff
 
data/Input/gadm41_DEU_2.json ADDED
The diff for this file is too large to render. See raw diff
 
data/Input/gadm41_DEU_3.json ADDED
The diff for this file is too large to render. See raw diff
 
data/Input/gadm41_DEU_4.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29d963239303a0a2513352e073cb3ea3cf3d4064aae9d0848265fce8b641b906
3
+ size 15240303
data/Output/1.md ADDED
File without changes
data/Temp/gadm41_DEU_1_geom ADDED
The diff for this file is too large to render. See raw diff
 
data/Temp/gadm41_DEU_1_main ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GID_1,GID_0,COUNTRY,NAME_1,VARNAME_1,NL_NAME_1,TYPE_1,ENGTYPE_1,CC_1,HASC_1,ISO_1,geometry,bbox,centroid,neighbors
2
+ DEU.1_1,DEU,Germany,Baden-Württemberg,NA,NA,Land,State,08,DE.BW,NA,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;0;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;0;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;0;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;0;3
3
+ DEU.2_1,DEU,Germany,Bayern,Bavaria,NA,Freistaat,FreeState,09,DE.BY,DE-BY,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;1;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;1;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;1;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;1;3
4
+ DEU.3_1,DEU,Germany,Berlin,NA,NA,Land,State,11,DE.BE,DE-BE,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;2;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;2;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;2;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;2;3
5
+ DEU.4_1,DEU,Germany,Brandenburg,NA,NA,Land,State,12,DE.BR,DE-BB,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;3;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;3;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;3;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;3;3
6
+ DEU.5_1,DEU,Germany,Bremen,NA,NA,FreieHansestadt,State,04,DE.HB,DE-HB,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;4;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;4;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;4;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;4;3
7
+ DEU.6_1,DEU,Germany,Hamburg,NA,NA,FreieundHansestadt,State,02,DE.HH,DE-HH,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;5;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;5;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;5;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;5;3
8
+ DEU.7_1,DEU,Germany,Hessen,Hesse,NA,Land,State,06,DE.HE,DE-HE,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;6;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;6;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;6;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;6;3
9
+ DEU.8_1,DEU,Germany,Mecklenburg-Vorpommern,Mecklenburg-WestPomerania,NA,Land,State,13,DE.MV,DE-MV,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;7;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;7;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;7;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;7;3
10
+ DEU.9_1,DEU,Germany,Niedersachsen,LowerSaxony,NA,Land,State,03,DE.NI,DE-NI,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;8;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;8;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;8;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;8;3
11
+ DEU.10_1,DEU,Germany,Nordrhein-Westfalen,NorthRhine-Westphalia,NA,Land,State,05,DE.NW,DE-NW,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;9;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;9;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;9;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;9;3
12
+ DEU.11_1,DEU,Germany,Rheinland-Pfalz,Rhineland-Palatinate,NA,Land,State,07,DE.RP,DE-RP,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;10;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;10;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;10;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;10;3
13
+ DEU.12_1,DEU,Germany,Saarland,NA,NA,Land,State,10,DE.SL,DE-SL,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;11;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;11;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;11;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;11;3
14
+ DEU.14_1,DEU,Germany,Sachsen,Saxony,NA,Freistaat,State,14,DE.SN,DE-SN,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;12;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;12;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;12;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;12;3
15
+ DEU.13_1,DEU,Germany,Sachsen-Anhalt,Saxony-Anhalt,NA,Land,State,15,DE.ST,DE-ST,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;13;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;13;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;13;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;13;3
16
+ DEU.15_1,DEU,Germany,Schleswig-Holstein,NA,NA,Land,State,01,DE.SH,DE-SH,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;14;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;14;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;14;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;14;3
17
+ DEU.16_1,DEU,Germany,Thüringen,Thuringia,NA,Freistaat,State,16,DE.TH,NA,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;15;0,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;15;1,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;15;2,B:\Studium_MA\GIT_Semester02\GITXX_GeoInformationsDienste_github\HuggingFace\GID-Project\data/Temp\gadm41_DEU_1_geom;15;3
doc/AddditionalInfo ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GID-Project
2
+
3
+ ## Useful Links
4
+ Link for the Google Docs Draft https://docs.google.com/document/d/1OVhDRL7Myb2vIdw5bqCs1LjLkiYP2ykXYvt_nIBa8oI/edit
5
+
6
+ - Maybe use spaCy --> detect different locations in the question --> create sub-questions
7
+ - Agenten für das suchen in mehreren Tabellen
8
+
9
+ Beispiel-Notebook für gradio: https://colab.research.google.com/drive/1uViopfIDRpAI5G28y0guzFgb6SWjjsT0?usp=sharing
10
+
11
+ Short Youtube intro about the deployment of an app on Hugging Face: https://www.youtube.com/watch?v=3bSVKNKb_PY
doc/Product_Requirements_Document.md ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GID Project
2
+
3
+ ## Product Requirement Document
4
+
5
+ ### 1. Introduction
6
+
7
+ The purpose of this document is to outline the requirements for project aimed at using a Language Model (LLM) and tabular data to answer questions regarding the geometry of Germany, its states, and cities. The project's goal is to create a user-friendly, educational tool that provides information about the geographical aspects of Germany.
8
+
9
+ ### 2. Project Overview
10
+
11
+ #### 2.1. Project Name
12
+
13
+ The project is titled "we dont know yet"
14
+
15
+ #### 2.2. Project Description
16
+
17
+ wedontknowyet is an educational software application that leverages a Language Model (LLM) to provide detailed information about the geography of Germany, its states, and cities. Users can ask questions related to Germany's geographical features, such as the size, location, and topography of states and cities, and receive informative responses.
18
+
19
+ ### 3. Key Features
20
+
21
+ The software project should include the following key features:
22
+
23
+ #### 3.1. User Interface
24
+
25
+ - A user-friendly interface that allows users to input questions or queries regarding Germany's geography.
26
+ - The option to input questions in natural language, ensuring a seamless user experience.
27
+
28
+ #### 3.2. Data Integration
29
+
30
+ - Integration with a comprehensive dataset containing information on Germany's states, cities, and geographical features.
31
+ - The ability to update and maintain the dataset to ensure accuracy.
32
+
33
+ #### 3.3. Question-Answer Functionality
34
+
35
+ - The software should be able to process and interpret user queries and questions.
36
+ - The LLM should provide accurate and informative responses based on the data from the integrated dataset.
37
+
38
+ #### 3.4. Geographical Data
39
+
40
+ - Detailed information on the states and cities of Germany, including size, population, topography, and any other relevant geographical details.
41
+ - Interactive maps that can display the locations of cities and states within Germany.
42
+
43
+ #### 3.5. User Assistance
44
+
45
+ - An option to provide explanations and context when a user requests further details about a specific topic.
46
+ - The ability to offer links or references to external sources for more in-depth information.
47
+
48
+ ### 4. Functional Requirements
49
+
50
+ #### 4.1. Language Model Integration
51
+
52
+ - Integrate a language model to understand and process natural language queries.
53
+
54
+ #### 4.2. Data Integration
55
+
56
+ - Develop a data integration system that retrieves and maintains data related to Germany's states and cities.
57
+ - Ensure regular updates to the dataset to keep the information current.
58
+
59
+ #### 4.3. User Interaction
60
+
61
+ - Design a user interface that accepts natural language input from users.
62
+ - Implement a user-friendly system for submitting questions and queries.
63
+
64
+ #### 4.4. Question-Answer Functionality
65
+
66
+ - Develop an algorithm for processing and understanding user queries.
67
+ - Implement a system for generating informative responses using the integrated data and the language model.
68
+
69
+ #### 4.5. Data Presentation
70
+
71
+ - Create interactive and informative visual representations of Germany's geography using maps and other visual aids.
72
+
73
+ ### 5. Non-Functional Requirements
74
+
75
+ #### 5.1. Performance
76
+
77
+ - The system should respond to user queries promptly, with minimal latency.
78
+ - It should be able to handle multiple user requests simultaneously.
79
+
80
+ #### 5.2. Security
81
+
82
+ - Implement security measures to protect user data and the integrity of the integrated dataset.
83
+
84
+ #### 5.3. Accessibility
85
+
86
+ - Ensure that the application is accessible to individuals with disabilities.
87
+
88
+ #### 5.4. Scalability
89
+
90
+ - Design the system in a way that allows for scalability to accommodate potential future enhancements.
91
+
92
+ ### 6. Milestones
93
+
94
+ - **Milestone 1 (Month 1):** Data integration and initial UI design.
95
+ - **Milestone 2 (Month 2):** Language model integration and basic question-answering functionality.
96
+ - **Milestone 3 (Month 3):** User interface refinement and data presentation.
97
+ - **Milestone 4 (Month 4):** Performance optimization and security implementation.
98
+ - **Milestone 5 (Month 5):** Testing, user feedback, and final refinements.
99
+ - **Milestone 6 (Month 6):** Launch and ongoing maintenance.
100
+
101
+ ### 7. Budget and Resources
102
+
103
+ The project will require access to the necessary hardware, software, and cloud services for hosting and running the application. Funding should be allocated for data acquisition and maintenance. Human resources will include software developers, data scientists, and UX/UI designers.
104
+
105
+ ### 8. Conclusion
106
+
107
+ The "wedontknowyet" software project aims to provide a valuable educational resource for users interested in the geography of Germany, its states, and cities. By integrating a Language Model with tabular data, the software will enable users to obtain accurate and informative answers to their questions in a user-friendly manner. This project will enhance the understanding of Germany's geography and serve as a useful tool for students and enthusiasts.
doc/QuestionsTasks.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Level 0: Basic requirements
2
+
3
+ - What is the geometry of X? --> (Simple) extract from 1-2 interconnected tables ==> most basic requirement!! (output: link to 1_NUTS)
4
+ - What is the country code of X? ... --> Questions which could be answered by extracting values from the data table
5
+ - What is the bounding box of X? --> BB simplifies many subsequent tasks; recommendation to add as a value to all objects!
6
+
7
+
8
+ # Level 1: Table Interconnection, Multi-Output & Searching
9
+
10
+ - In what NUTS+1 lies X? --> X is given but the level containing X is wanted as an output (Example: In which federal state is the city Dresden => Saxony)
11
+ - Which NUTSX regions start with [letter], ...? --> Query more like searching, requiring multiple outputs (Example, which NUTS0 region starts with I => Italy)
12
+ - Call all NUTS-1 from NUTS where PROPERTY matches. --> Combines the prior 2 questions to query based on parent NUTS (Example: all federal states from Germany that start with the letter B)
13
+
14
+
15
+ # Level 2: Simple Spatial Connection & Manipulation
16
+
17
+ - What borders X? --> Estimation based using BB, check for correct borders using predetermined NUTS (Example: What borders Sachsen => Brandenburg (Berlin), Thuringia, Bavaria, Sachsen-Anhalt)
18
+ - What NUTS is between X and Y? --> Use BB for simplification/preselection (What federal state is between Hessen and Saxony => Thuringia, Bavaria, Lower-Saxony)
19
+ - Reproject X into PROJECTION (Reproject the Geometry of Saxony in UTM)
20
+
21
+
22
+
23
+ # Level 3: Spacial calculation // User specification
24
+ ***This demands a higher level of user specification)***
25
+ - What is CARDINAL from X? --> Need to extract the BB and check a multitude of NUTS for relation to X (Example: What is north of Saxony? => Brandenburg, Berlin, Mecklenburg)
26
+ -> Define what distance counts as "CARDINAL"; Combination of different NUTS-levels?
27
+ - Simplify geometry of X--> Load geometry of X,
28
+ - Degree of simplification? (Example question: simplify the geometry of Saxony, similar to [GeoJSON Utilities](http://opendatalab.de/projects/geojson-utilities/)?)
29
+
30
+ ---
doc/requirements.txt ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles @ file:///home/conda/feedstock_root/build_artifacts/aiofiles_1698945915105/work
2
+ altair @ file:///home/conda/feedstock_root/build_artifacts/altair_1696364485230/work
3
+ annotated-types @ file:///home/conda/feedstock_root/build_artifacts/annotated-types_1696634205638/work
4
+ anyio @ file:///home/conda/feedstock_root/build_artifacts/anyio_1688651106312/work/dist
5
+ appnope==0.1.3
6
+ archspec @ file:///croot/archspec_1697725767277/work
7
+ asttokens==2.4.1
8
+ attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1683424013410/work
9
+ boltons @ file:///Users/cbousseau/work/recipes/ci_py311/boltons_1677965141748/work
10
+ Brotli @ file:///Users/cbousseau/work/recipes/ci_py311/brotli-split_1677936346777/work
11
+ certifi==2023.7.22
12
+ cffi @ file:///Users/cbousseau/work/recipes/ci_py311/cffi_1677903595907/work
13
+ charset-normalizer @ file:///tmp/build/80754af9/charset-normalizer_1630003229654/work
14
+ click @ file:///home/conda/feedstock_root/build_artifacts/click_1692311806742/work
15
+ colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1666700638685/work
16
+ comm==0.2.0
17
+ conda @ file:///Users/runner/miniforge3/conda-bld/conda_1699392642856/work
18
+ conda-content-trust @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_5324skqvu9/croot/conda-content-trust_1693490622873/work
19
+ conda-libmamba-solver @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_4egpn3sd7i/croot/conda-libmamba-solver_1698961807382/work/src
20
+ conda-package-handling @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_fc4cx8vjhj/croot/conda-package-handling_1690999937094/work
21
+ conda_package_streaming @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_aecpaup22q/croot/conda-package-streaming_1690987978274/work
22
+ contourpy==1.2.0
23
+ cryptography @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_905z2r5rpq/croot/cryptography_1694211573866/work
24
+ cycler @ file:///home/conda/feedstock_root/build_artifacts/cycler_1696677705766/work
25
+ decorator==5.1.1
26
+ exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1692026125334/work
27
+ executing==2.0.1
28
+ fastapi @ file:///home/conda/feedstock_root/build_artifacts/fastapi_1698674661010/work
29
+ ffmpy @ file:///home/conda/feedstock_root/build_artifacts/ffmpy_1659474992694/work
30
+ filelock @ file:///home/conda/feedstock_root/build_artifacts/filelock_1698714947081/work
31
+ fonttools @ file:///Users/runner/miniforge3/conda-bld/fonttools_1700143150339/work
32
+ fsspec @ file:///home/conda/feedstock_root/build_artifacts/fsspec_1697919321618/work
33
+ gradio @ file:///home/conda/feedstock_root/build_artifacts/gradio_1699955356441/work
34
+ gradio_client @ file:///home/conda/feedstock_root/build_artifacts/gradio-client_1698767845356/work
35
+ h11 @ file:///home/conda/feedstock_root/build_artifacts/h11_1664132893548/work
36
+ h2 @ file:///home/conda/feedstock_root/build_artifacts/h2_1634280454336/work
37
+ hpack==4.0.0
38
+ httpcore @ file:///home/conda/feedstock_root/build_artifacts/httpcore_1699629103338/work
39
+ httpx @ file:///home/conda/feedstock_root/build_artifacts/httpx_1699030327261/work
40
+ huggingface-hub @ file:///home/conda/feedstock_root/build_artifacts/huggingface_hub_1700152335477/work
41
+ hyperframe @ file:///home/conda/feedstock_root/build_artifacts/hyperframe_1619110129307/work
42
+ idna @ file:///Users/cbousseau/work/recipes/ci_py311/idna_1677906072337/work
43
+ importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1688754491823/work
44
+ importlib-resources @ file:///home/conda/feedstock_root/build_artifacts/importlib_resources_1699364556997/work
45
+ ipympl==0.9.3
46
+ ipython==8.17.2
47
+ ipython-genutils==0.2.0
48
+ ipywidgets==8.1.1
49
+ jedi==0.19.1
50
+ Jinja2 @ file:///home/conda/feedstock_root/build_artifacts/jinja2_1654302431367/work
51
+ joblib==1.3.2
52
+ jsonpatch @ file:///tmp/build/80754af9/jsonpatch_1615747632069/work
53
+ jsonpointer==2.1
54
+ jsonschema @ file:///home/conda/feedstock_root/build_artifacts/jsonschema-meta_1700159890288/work
55
+ jsonschema-specifications @ file:///home/conda/feedstock_root/build_artifacts/jsonschema-specifications_1700059145511/work
56
+ jupyterlab-widgets==3.0.9
57
+ kiwisolver==1.4.5
58
+ laspy @ file:///Users/runner/miniforge3/conda-bld/laspy_1699607310769/work
59
+ lazrs @ file:///Users/runner/miniforge3/conda-bld/lazrs-python_1698372679597/work
60
+ libmambapy @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_59l2npsw_8/croot/mamba-split_1698782625405/work/libmambapy
61
+ markdown-it-py @ file:///home/conda/feedstock_root/build_artifacts/markdown-it-py_1686175045316/work
62
+ MarkupSafe @ file:///Users/runner/miniforge3/conda-bld/markupsafe_1695367660391/work
63
+ matplotlib @ file:///Users/runner/miniforge3/conda-bld/matplotlib-suite_1678135673869/work
64
+ matplotlib-inline==0.1.6
65
+ mdurl @ file:///home/conda/feedstock_root/build_artifacts/mdurl_1639515908913/work
66
+ munkres==1.1.4
67
+ numpy @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_362zs5g963/croot/numpy_and_numpy_base_1695830450707/work/dist/numpy-1.26.0-cp311-cp311-macosx_11_0_arm64.whl#sha256=35f9bcbdc8071f8981937b450fba07496cbb1e0d2a724e1d0619e6e714b42590
68
+ orjson @ file:///Users/runner/miniforge3/conda-bld/orjson_1698619146121/work/target/wheels/orjson-3.9.10-cp311-cp311-macosx_11_0_arm64.whl#sha256=655d5a2e944e3ebde9e30989514201f288ed152b82cc50d789ec03998bac369f
69
+ packaging @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_6dm6d4jd_t/croot/packaging_1693575176524/work
70
+ pandas==2.0.0
71
+ parso==0.8.3
72
+ pexpect==4.8.0
73
+ Pillow @ file:///Users/runner/miniforge3/conda-bld/pillow_1684654235906/work
74
+ pkgutil_resolve_name @ file:///home/conda/feedstock_root/build_artifacts/pkgutil-resolve-name_1694617248815/work
75
+ pluggy @ file:///Users/cbousseau/work/recipes/ci_py311/pluggy_1677906980825/work
76
+ prompt-toolkit==3.0.41
77
+ ptyprocess==0.7.0
78
+ pure-eval==0.2.2
79
+ pycosat @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_3eg8vdcs6z/croot/pycosat_1696536519213/work
80
+ pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work
81
+ pydantic @ file:///home/conda/feedstock_root/build_artifacts/pydantic_1700171233545/work
82
+ pydantic_core @ file:///Users/runner/miniforge3/conda-bld/pydantic-core_1700010143722/work
83
+ pydub @ file:///home/conda/feedstock_root/build_artifacts/pydub_1615612442567/work
84
+ Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1691408637400/work
85
+ pyOpenSSL @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_b8whqav6qm/croot/pyopenssl_1690223428943/work
86
+ pyparsing @ file:///home/conda/feedstock_root/build_artifacts/pyparsing_1690737849915/work
87
+ PySocks @ file:///Users/cbousseau/work/recipes/ci_py311/pysocks_1677906386870/work
88
+ python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work
89
+ python-multipart @ file:///home/conda/feedstock_root/build_artifacts/python-multipart_1679167423335/work
90
+ pytz @ file:///home/conda/feedstock_root/build_artifacts/pytz_1693930252784/work
91
+ PyYAML @ file:///Users/runner/miniforge3/conda-bld/pyyaml_1695373486380/work
92
+ referencing @ file:///home/conda/feedstock_root/build_artifacts/referencing_1700053204647/work
93
+ requests @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_54zi68h2nb/croot/requests_1690400233316/work
94
+ rich @ file:///home/conda/feedstock_root/build_artifacts/rich-split_1700160075651/work/dist
95
+ rpds-py @ file:///Users/runner/miniforge3/conda-bld/rpds-py_1700156534986/work
96
+ ruamel.yaml @ file:///Users/cbousseau/work/recipes/ci_py311/ruamel.yaml_1677934845850/work
97
+ scikit-learn==1.3.2
98
+ scipy==1.11.3
99
+ semantic-version @ file:///home/conda/feedstock_root/build_artifacts/semantic_version_1653579368137/work
100
+ shellingham @ file:///home/conda/feedstock_root/build_artifacts/shellingham_1698144360966/work
101
+ six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work
102
+ sniffio @ file:///home/conda/feedstock_root/build_artifacts/sniffio_1662051266223/work
103
+ stack-data==0.6.3
104
+ starlette @ file:///home/conda/feedstock_root/build_artifacts/starlette-recipe_1684245096404/work
105
+ threadpoolctl==3.2.0
106
+ tomlkit @ file:///home/conda/feedstock_root/build_artifacts/tomlkit_1690458286251/work
107
+ toolz @ file:///home/conda/feedstock_root/build_artifacts/toolz_1657485559105/work
108
+ tqdm @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_ac7zic_tin/croot/tqdm_1679561870178/work
109
+ traitlets==5.13.0
110
+ truststore @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_42mm7e6j06/croot/truststore_1695244298716/work
111
+ typer @ file:///home/conda/feedstock_root/build_artifacts/typer_1683029246636/work
112
+ typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1695040754690/work
113
+ tzdata @ file:///home/conda/feedstock_root/build_artifacts/python-tzdata_1680081134351/work
114
+ urllib3==1.26.6
115
+ uvicorn @ file:///Users/runner/miniforge3/conda-bld/uvicorn-split_1699219080682/work
116
+ wcwidth==0.2.10
117
+ websockets @ file:///Users/runner/miniforge3/conda-bld/websockets_1695410063212/work
118
+ widgetsnbextension==4.0.9
119
+ zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1695255097490/work
120
+ zstandard @ file:///Users/cbousseau/work/recipes/ci_py311_2/zstandard_1678996192313/work
main.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #######################################################################################################
2
+ # IMPORT
3
+ #######################################################################################################
4
+
5
+ import pandas as pd
6
+ import geopandas as gpd
7
+ import os
8
+ from configparser import ConfigParser
9
+
10
+ # modules
11
+ from modules.geojson_github_loader import download_github_geojson
12
+ from modules.geojson_processor import geojson_processor_to_csv
13
+ from modules.language_model import TAPAS
14
+
15
+ #######################################################################################################
16
+ # CONFIG
17
+ #######################################################################################################
18
+
19
+ print('\nCurrent Working Directory (CWD):\n' + os.getcwd())
20
+
21
+ config_object = ConfigParser()
22
+ if 'config.ini' in os.listdir():
23
+ config_object.read('config.ini')
24
+ print('Setting have been imported from the config file.')
25
+ else:
26
+ print('No config file in the CWD')
27
+ quit()
28
+
29
+ # changing CWD and input output folders
30
+ os.chdir(format(config_object['CONFIG']['CWD']))
31
+
32
+ DATA = os.getcwd() + '\\' + format(config_object['CONFIG']['Input'])
33
+ OUT = os.getcwd() + '\\' + format(config_object['CONFIG']['Output'])
34
+ TEMP = os.getcwd() + '\\' + format(config_object['CONFIG']['Temp'])
35
+
36
+
37
+ #######################################################################################################
38
+ # Load and prepare Data
39
+ #######################################################################################################
40
+
41
+ # load github data
42
+ # attributes
43
+ github_user = "Giedeon25"
44
+ github_repo = "GID-Project"
45
+ file_path_github = "main/data/Input/gadm41_DEU_1.json"
46
+ token = "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"
47
+
48
+ local_file_path = DATA + '\\' + 'gadm41_DEU_1.json'
49
+
50
+ output_file = TEMP + '\\' + 'gadm41_DEU_1'
51
+
52
+ # load from GitHub
53
+ geojson_data = download_github_geojson(github_user, github_repo, file_path_github, token)
54
+
55
+ # load locally
56
+ geojson_data = gpd.read_file(local_file_path)
57
+ print(geojson_data.head())
58
+
59
+ # convert and save data
60
+ # attributes
61
+
62
+
63
+ # function
64
+ geojson_processor_to_csv(geojson_data, output_file)
65
+
66
+
67
+ #######################################################################################################
68
+ # LLM
69
+ #######################################################################################################
70
+
71
+ # attributes
72
+ question = 'what is the geometry of Saxony?'
73
+ table_main = pd.read_csv(TEMP + '\\' + 'gadm41_DEU_1_main').astype(str)
74
+ table_geom = pd.read_csv(TEMP + '\\' + 'gadm41_DEU_1_geom')
75
+
76
+ # function
77
+ TAPAS(question, table_main)
main/GeoJSON_Bundesländer.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Imports --------------------------------------------------------------------------------------------------
2
+ # https://huggingface.co/tasks/table-question-answering
3
+
4
+ from transformers import pipeline
5
+ import pandas as pd
6
+ import time
7
+ import math
8
+ import os
9
+
10
+ # Small snippet to retrieve coordinates from a geojson file
11
+ import requests
12
+ import json
13
+
14
+
15
+ #Working Directory setzen
16
+ os.chdir('C:/Users/Jens_/Documents/Unterlagen/Studium Dresden/2. Semester/GIT06/GID-Project') #muss angepasst werden
17
+
18
+
19
+
20
+ # GADM Deutschland Beispielpolygone --------------------------------------------------------------------
21
+ # Vorbereitung
22
+ # Daten laden
23
+ with open('Daten/GeoJSON/gadm41_DEU_1.json', 'r', encoding='utf-8') as json_datei:
24
+ daten = json.load(json_datei)
25
+ os.close('Daten/GeoJSON/gadm41_DEU_1.json')
26
+ table = pd.DataFrame.from_dict(daten["features"])
27
+
28
+
29
+ # Daten in passendes Format bringen (Pandas Data-Frame)
30
+ prop = table['properties'] #Properties in die Tabelle integrieren
31
+ prop = pd.DataFrame(list(prop))
32
+ del table['properties']
33
+ table = pd.concat([table, prop], axis = 1)
34
+
35
+ geom = table["geometry"] # Koordinatenzeilen abspeichern und durch Indexe ersetzen (Berechnungsdauer)
36
+ table["geometry"] = [str(i) for i in range(table.shape[0])]
37
+ print(table)
38
+
39
+
40
+ # Modell
41
+ tqa = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq")
42
+
43
+
44
+ # Frage
45
+ question = 'give me the geometry of england'
46
+
47
+
48
+ # Berechnung
49
+ t = round(time.time()) # Berechnungszeit messen
50
+ i = tqa(table=table, query=question)['cells'][0]
51
+
52
+ try:
53
+ i = int(i)
54
+ answer = geom[i]
55
+ except:
56
+ InterruptedError ('Falsche Spalte (Es wurde kein Index ausgegeben)')
57
+ answer = 'answer: ' + i
58
+
59
+ t = round(time.time())-t
60
+ t = str(math.floor(t) // 3600) + "::" + str((t-(math.floor(t) // 360)*360) // 60) + "::" + str((t-(math.floor(t) // 60)*60))
61
+
62
+
63
+ # Überprüfung
64
+ print(answer)
65
+ table.iloc[i]
66
+ print(t) # ~2 sec
67
+
68
+
69
+
70
+
71
+
72
+ #Ziel der Frage, Orte der Frage
73
+
74
+ # Wie kann man verschiedene BL ausgegeben bekommen (in der Frage angegeben)
75
+ # Wie kann man verschiedene "Operationen" (Zwischen, alle, ect.) in der Frage erkennen
modules/__init__.py ADDED
File without changes
modules/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (217 Bytes). View file
 
modules/__pycache__/geojson_github_loader.cpython-311.pyc ADDED
Binary file (1.58 kB). View file
 
modules/__pycache__/geojson_processor.cpython-311.pyc ADDED
Binary file (3.65 kB). View file
 
modules/__pycache__/language_model.cpython-311.pyc ADDED
Binary file (1.43 kB). View file
 
modules/find_neighbors.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import shapely
3
+
4
+ def get_neighbors(geometries):
5
+ """
6
+ Find neighbors for all geometries.
7
+
8
+ Args:
9
+ geometries (str): list of Polygon coordinates
10
+ Returns:
11
+ pandas.core.series.Series: Pandas DataFrame Series (column) containing indexes of bordering polygons.
12
+ """
13
+
14
+ if not all(isinstance(geom, (shapely.MultiPolygon, shapely.Polygon, str)) for geom in geometries) or not isinstance(geometries, (list, pd.Series)):
15
+ raise ValueError("geometries must be a list with shapely.MultiPolygons/shapely.Polygon or strings of MultiPolygons/Polygons")
16
+
17
+ out_col = []
18
+
19
+ if type(geometries[0]) in [shapely.MultiPolygon, shapely.Polygon]:
20
+ for r in range(geometries.shape[0]):
21
+ out_col.append([])
22
+
23
+ for i in range(geometries.shape[0]):
24
+ if not r == i:
25
+ if geometries[r].intersects(geometries[i]):
26
+ out_col[r].append(i)
27
+
28
+ return(out_col)
29
+
30
+ if type(geometries[0]) == str:
31
+ geometries = shapely.wkt.loads(geometries)
32
+ for r in range(geometries.shape[0]):
33
+ out_col.append([])
34
+
35
+ for i in range(geometries.shape[0]):
36
+ if not r == i:
37
+ if geometries[r].intersects(geometries[i]):
38
+ out_col[r].append(i)
39
+
40
+ return(out_col)
41
+
42
+ raise ValueError("geometries must be a list with shapely.MultiPolygons or strings of MultiPolygons or Polygons")
modules/geojson_github_loader.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import requests
3
+ import geopandas as gpd
4
+
5
+ def download_github_geojson(github_user, repository, file_path, token):
6
+ """
7
+ Load GeoJSON data from a GitHub repository.
8
+
9
+ Args:
10
+ github_user (str): GitHub username.
11
+ repository (str): GitHub repository name.
12
+ file_path (str): Path of the GeoJSON file in the repository.
13
+
14
+ Returns:
15
+ pd.DataFrame: The loaded GeoJSON data.
16
+ """
17
+
18
+ # headers with personal access token
19
+ headers = {
20
+ "Authorization": f"token {token}"
21
+ }
22
+
23
+ # Create a URL to the raw GeoJSON file in the repository
24
+ raw_url = f"https://raw.githubusercontent.com/{github_user}/{repository}/{file_path}"
25
+
26
+ print(f"Debug: raw_url = {raw_url}") # Debugging line
27
+
28
+ # Make a GET request to the URL
29
+ response = requests.get(raw_url, headers=headers)
30
+
31
+ if response.status_code == 200:
32
+ # Parse the GeoJSON data
33
+ geojson_data = gpd.read_file(response.text)
34
+ print("File loaded succesfully.")
35
+ print(geojson_data.head())
36
+ return geojson_data
37
+
38
+ else:
39
+ print(f"Failed to retrieve GeoJSON data. Status code: {response.status_code}")
40
+ return None
modules/geojson_processor.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import geopandas as gpd
3
+
4
+ def geojson_processor_to_csv(geojson_data, output_file):
5
+ """
6
+ Procces and Convert GeoJSON data to a csv.
7
+
8
+ Args:
9
+ geojson_data (str): Parsed GeoJSON data path.
10
+ output_file (str): Name of the output CSV file.
11
+ Returns:
12
+ pd.DataFrame: Pandas DataFrames containing the GeoJSON features.
13
+ """
14
+
15
+ # split geometries from the DataFrame to reduce processing time in later steps
16
+ geom_df = gpd.GeoDataFrame(geojson_data["geometry"], crs="EPSG:4326")
17
+
18
+ # get geometry bounds
19
+ df_bounds = geom_df['geometry'].bounds
20
+
21
+ # create bbox column from bounds
22
+ geom_df['bbox'] = list(zip(df_bounds['minx'], df_bounds['miny'], df_bounds['maxx'], df_bounds['maxy']))
23
+ print('bboxes added to df.')
24
+
25
+ # calculate geometry centroids
26
+ geom_df['centroid'] = geom_df['geometry'].centroid
27
+ print('centroids added to df.')
28
+
29
+ # find neighbors
30
+ geom_df['neighbors'] = None
31
+
32
+ # Iterate through the GeoDataFrame to find neighbors
33
+ for index, row in geom_df.iterrows():
34
+ neighbors = []
35
+ for other_index, other_row in geom_df.iterrows():
36
+ if index != other_index and row['geometry'].touches(other_row['geometry']):
37
+ neighbors.append(other_row['ID'])
38
+ geom_df.at[index, 'neighbors'] = neighbors
39
+
40
+ # save df as csv
41
+ geom_df.to_csv(output_file + '_geom', index=False)
42
+ print('geometry file saved')
43
+
44
+ # assign unique string identifiers to each row based on its position in the DataFrame
45
+ geojson_data["geometry"] = [output_file + '_geom;' + str(i) + ";0" for i in range(geojson_data.shape[0])]
46
+ geojson_data["bbox"] = [output_file + '_geom;' + str(i) + ";1" for i in range(geojson_data.shape[0])]
47
+ geojson_data["centroid"] = [output_file + '_geom;' + str(i) + ";2" for i in range(geojson_data.shape[0])]
48
+ geojson_data["neighbors"] = [output_file + '_geom;' + str(i) + ";3" for i in range(geojson_data.shape[0])]
49
+
50
+ # save df as csv
51
+ geojson_data.to_csv(output_file + '_main', index=False)
52
+ print('main file saved.')
53
+
54
+ return 'Processing done and saved'
55
+
56
+
modules/getCountrycode.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import geopandas as gpd
2
+
3
+ # Sample DataFrame acording to actual structure (use your own data)
4
+ data = {'GID_1': ['DEU.1_1','DEU.2_1'],
5
+ 'GID_0': ['DEU', 'DEU'],
6
+ 'COUNTRY': ['Germany', 'Germany'],
7
+ 'NAME_1': ['Baden-Würtenberg', 'Bayern'],
8
+ 'VARNAME_1': ['NA','Bavaria'],
9
+ 'NL_NAME_1': ['NA', 'NA'],
10
+ 'TYPE_1': ['Land', 'Freistaat'],
11
+ 'ENGTYPE_1': ['State', 'Freestate'],
12
+ 'CC_1': ['08','09'],
13
+ 'HASC_1': ['DE.BW', 'DE.BY'], # Extra for subnational countys (https://de.wikipedia.org/wiki/Hierarchical_administrative_subdivision_codes)
14
+ 'ISO_1': ['NA', 'DE-BY'], # International Order --> Check First (https://de.wikipedia.org/wiki/ISO_3166)
15
+ 'geometry': [0,1]
16
+ }
17
+ gdf = gpd.GeoDataFrame(data)
18
+
19
+ # function to generate output
20
+ # Land should be a line from the geojson-table
21
+ # Currently only works for NUTS-1 areas!!!
22
+
23
+ def getCountrycode(land):
24
+ if land['ISO_1'] != 'NA':
25
+ return land['ISO_1']
26
+ elif land['HASC_1']:
27
+ return land['HASC_1']
28
+ else:
29
+ return False
30
+
modules/gradio_testing.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import
2
+ import geopandas as gpd
3
+
4
+ # Sample DataFrame acording to actual structure (use your own data)
5
+ data = {'GID_1': ['DEU.1_1','DEU.2_1'],
6
+ 'GID_0': ['DEU', 'DEU'],
7
+ 'COUNTRY': ['Germany', 'Germany'],
8
+ 'NAME_1': ['Baden-Würtenberg', 'Bayern'],
9
+ 'VARNAME_1': ['NA','Bavaria'],
10
+ 'NL_NAME_1': ['NA', 'NA'],
11
+ 'TYPE_1': ['Land', 'Freistaat'],
12
+ 'ENGTYPE_1': ['State', 'Freestate'],
13
+ 'CC_1': ['08','09'],
14
+ 'HASC_1': ['DE.BW', 'DE.BY'], # Extra for subnational countys (https://de.wikipedia.org/wiki/Hierarchical_administrative_subdivision_codes)
15
+ 'ISO_1': ['NA', 'DE-BY'], # International Order --> Check First (https://de.wikipedia.org/wiki/ISO_3166)
16
+ 'geometry': [0,1]
17
+ }
18
+ gdf = gpd.GeoDataFrame(data)
19
+
20
+
21
+ def getLand(landnr):
22
+ landnr = int(landnr)
23
+ #Test
24
+ data = {'GID_1': ['DEU.1_1','DEU.2_1'],
25
+ 'GID_0': ['DEU', 'DEU'],
26
+ 'COUNTRY': ['Germany', 'Germany'],
27
+ 'NAME_1': ['Baden-Würtenberg', 'Bayern'],
28
+ 'VARNAME_1': ['NA','Bavaria'],
29
+ 'NL_NAME_1': ['NA', 'NA'],
30
+ 'TYPE_1': ['Land', 'Freistaat'],
31
+ 'ENGTYPE_1': ['State', 'Freestate'],
32
+ 'CC_1': ['08','09'],
33
+ 'HASC_1': ['DE.BW', 'DE.BY'], # Extra for subnational countys (https://de.wikipedia.org/wiki/Hierarchical_administrative_subdivision_codes)
34
+ 'ISO_1': ['NA', 'DE-BY'], # International Order --> Check First (https://de.wikipedia.org/wiki/ISO_3166)
35
+ 'geometry': [0,1]
36
+ }
37
+ gdf = gpd.GeoDataFrame(data)
38
+ landR = gdf.iloc[landnr]
39
+ return landR
40
+
41
+ # function to generate output
42
+ # Land should be a line from the geojson-table
43
+ # Currently only works for NUTS-1 areas!!!
44
+
45
+ def getCountrycode(land, level = 1):
46
+
47
+ iso = 'ISO_'+ str(level)
48
+ hasc = 'HASC_' + str(level)
49
+ if land[iso] != 'NA':
50
+ return str(land[iso])
51
+ elif land[hasc]:
52
+ return str(land[hasc])
53
+ else:
54
+ return False
55
+
56
+
57
+ def grad_Country(landnr):
58
+ land = getLand(landnr)
59
+ kuerzel = getCountrycode(land)
60
+ return str(kuerzel)
61
+
62
+
63
+ #'''
64
+ import gradio as gr
65
+
66
+ def greet(name):
67
+ return "Hello " + name + "!!"
68
+
69
+ iface = gr.Interface(fn=grad_Country, inputs="number", outputs="text")
70
+
71
+ if __name__ == '__main__':
72
+
73
+ iface.launch()
74
+ #'''
modules/language_model.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ import pandas as pd
3
+
4
+ def TAPAS(question, table_main):
5
+ """
6
+ Processing the question using an expression and the main and geom table.
7
+
8
+ Args:
9
+ question (str): the question.
10
+ table_main (df): main table
11
+ table_geom (df): geom table
12
+ Returns:
13
+ answer (str): answer to the question
14
+ """
15
+
16
+ # set up a TAPAS pipeline for table-based question answering
17
+ tqa = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq")
18
+
19
+ # use the tqa pipeline to perform table-based question answering.
20
+ i = tqa(table=table_main, query=question)['cells'][0]
21
+
22
+ # Check if the output is the link to the TEMP DB:
23
+ # Has to be done because the entrys for geometry, ... are an array :(
24
+ if ';' in i:
25
+ i = i.split(";")
26
+ path = i[0]
27
+ r = int(i[1])
28
+ c = int(i[2])
29
+ answer_table = pd.read_csv(path)
30
+ answer = answer_table.iloc[r,c]
31
+ return(answer)
32
+
33
+ answer = str(i)
34
+ return(answer)
modules/neighbors.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import geopandas as gpd
2
+ from shapely.geometry import Polygon
3
+
4
+ # Sample DataFrame with geometries (use your own data)
5
+ data = {'ID': [1, 2, 3],
6
+ 'geometry': [Polygon([(0, 0), (0, 2), (2, 2), (2, 0)]),
7
+ Polygon([(2, 0), (2, 2), (4, 2), (4, 0)]),
8
+ Polygon([(4, 0), (4, 2), (6, 2), (6, 0)])]}
9
+ gdf = gpd.GeoDataFrame(data, crs="EPSG:4326")
10
+
11
+ # Create a new column to store the neighboring IDs
12
+ gdf['neighbors'] = None
13
+
14
+ # Iterate through the GeoDataFrame to find neighbors
15
+ for index, row in gdf.iterrows():
16
+ neighbors = []
17
+ for other_index, other_row in gdf.iterrows():
18
+ if index != other_index and row['geometry'].touches(other_row['geometry']):
19
+ neighbors.append(other_row['ID'])
20
+ gdf.at[index, 'neighbors'] = neighbors
21
+
22
+ # Display the DataFrame with neighbors
23
+ print(gdf[['ID', 'neighbors']])
24
+
25
+
26
+
27
+ def find_neighbors(geom_df):
28
+ geom_df['neighbors'] = geom_df.apply(lambda row: find_single_neighbors(row, geom_df), axis=1)
29
+ return geom_df
30
+
31
+ def find_single_neighbors(row, geom_df):
32
+ neighbors = []
33
+ for other_index, other_row in geom_df.iterrows():
34
+ if row.name != other_index and row['geometry'].touches(other_row['geometry']):
35
+ neighbors.append(other_row['ID'])
36
+ return neighbors
37
+
38
+ # Example usage:
39
+ # Replace 'your_data.geojson' with the path to your GeoJSON file or any other supported format
40
+ # Make sure the GeoDataFrame has a 'geometry' column
41
+ your_gdf = gpd.read_file('your_data.geojson')
42
+
43
+ # Call the function to find neighbors
44
+ result_gdf = find_neighbors(your_gdf)
45
+
46
+ # Print the resulting GeoDataFrame
47
+ print(result_gdf)
requirements.txt ADDED
Binary file (9.62 kB). View file