diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..45c4b1fb016007531581c3bae81f47d1ae27c1d0
--- /dev/null
+++ b/app.py
@@ -0,0 +1,153 @@
+import os
+
+import streamlit as st
+import streamlit.components.v1 as components
+from datasets import load_dataset
+
+
+st.set_page_config(page_title="Gaia Search", layout="wide")
+
+os.makedirs(os.path.join(os.getcwd(), ".streamlit"), exist_ok=True)
+with open(os.path.join(os.getcwd(), ".streamlit/config.toml"), "w") as file:
+ file.write('[theme]\nbase="light"')
+
+
+st.sidebar.markdown(
+ """
+
+
Gaia Search 🌖🌏
+ A search engine for the LAION large scale image caption corpora
+""",
+ unsafe_allow_html=True,
+)
+
+st.sidebar.markdown(
+ """
+
+
+GitHub | Project Report
+
+
+
+
+
+
+""",
+ unsafe_allow_html=True,
+)
+
+query = st.sidebar.text_input(label="Search query", value="")
+
+footer = """
+
+"""
+st.sidebar.markdown(footer, unsafe_allow_html=True)
+
+
+searcher = LuceneSearcher("index")
+ds = load_dataset("imdb", split="train")
+
+
+def search(query):
+ hits = searcher.search(query, k=10)
+ results = ds.select([int(hit.docid) for hit in hits])
+ return results + "
"
+
+
+if st.sidebar.button("Search"):
+ results = search(query)
+ rendered_results = f"""
+
+
+ {results}
+
+ """
+ st.markdown(
+ """
+
+ """,
+ unsafe_allow_html=True,
+ )
+ st.markdown(
+ """
+
+ """,
+ unsafe_allow_html=True,
+ )
+ st.markdown(
+ f"""
+
+ Gaia Search 🌖🌏
+
+
+
+
+
+
+
+ """,
+ unsafe_allow_html=True,
+ )
+ components.html(
+ """
+
+
+ """
+ + rendered_results,
+ height=800,
+ scrolling=True,
+ )
diff --git a/index/.gitkeep b/index/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/index/_c.fdm b/index/_c.fdm
new file mode 100644
index 0000000000000000000000000000000000000000..10b8ac6adbb0b3e4985c39826ef97cd0e54ed4ea
Binary files /dev/null and b/index/_c.fdm differ
diff --git a/index/_c.fdt b/index/_c.fdt
new file mode 100644
index 0000000000000000000000000000000000000000..e49bfc54ed689018520b8a95488144cc37c809d1
Binary files /dev/null and b/index/_c.fdt differ
diff --git a/index/_c.fdx b/index/_c.fdx
new file mode 100644
index 0000000000000000000000000000000000000000..6a5117cf644f557f657af78c40ac895bf372a1d5
Binary files /dev/null and b/index/_c.fdx differ
diff --git a/index/_c.fnm b/index/_c.fnm
new file mode 100644
index 0000000000000000000000000000000000000000..56d4d988faa2d929462e8d90bdf22efe14726d96
Binary files /dev/null and b/index/_c.fnm differ
diff --git a/index/_c.nvd b/index/_c.nvd
new file mode 100644
index 0000000000000000000000000000000000000000..3e65f3f2687952a41dea0f7f82dc9afdfb93bb7d
Binary files /dev/null and b/index/_c.nvd differ
diff --git a/index/_c.nvm b/index/_c.nvm
new file mode 100644
index 0000000000000000000000000000000000000000..978fae64be3faaf75727138fd7af3f8eacfc4533
Binary files /dev/null and b/index/_c.nvm differ
diff --git a/index/_c.si b/index/_c.si
new file mode 100644
index 0000000000000000000000000000000000000000..3b0036eb79312f07d58280d5099f477ff86071c3
Binary files /dev/null and b/index/_c.si differ
diff --git a/index/_c_Lucene90_0.doc b/index/_c_Lucene90_0.doc
new file mode 100644
index 0000000000000000000000000000000000000000..5415752e4ac1b1f430fb128662810693f903830d
Binary files /dev/null and b/index/_c_Lucene90_0.doc differ
diff --git a/index/_c_Lucene90_0.dvd b/index/_c_Lucene90_0.dvd
new file mode 100644
index 0000000000000000000000000000000000000000..93ac0aad32dfb61863a7ba8bc42ced4b3a135000
Binary files /dev/null and b/index/_c_Lucene90_0.dvd differ
diff --git a/index/_c_Lucene90_0.dvm b/index/_c_Lucene90_0.dvm
new file mode 100644
index 0000000000000000000000000000000000000000..2acc288ba50759bcb48e219b5d66cb810d75fbb1
Binary files /dev/null and b/index/_c_Lucene90_0.dvm differ
diff --git a/index/_c_Lucene90_0.pos b/index/_c_Lucene90_0.pos
new file mode 100644
index 0000000000000000000000000000000000000000..40685acedc3745d6cc7d726c773abc557da5a69a
--- /dev/null
+++ b/index/_c_Lucene90_0.pos
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4ac8c1c910a978617aa54300a6c2421ab1295ad8fd3a28da9eca9ffec36948d
+size 1240149
diff --git a/index/_c_Lucene90_0.tim b/index/_c_Lucene90_0.tim
new file mode 100644
index 0000000000000000000000000000000000000000..891d5adfd52c2ba8a9b8a9876f2074135b3f9620
Binary files /dev/null and b/index/_c_Lucene90_0.tim differ
diff --git a/index/_c_Lucene90_0.tip b/index/_c_Lucene90_0.tip
new file mode 100644
index 0000000000000000000000000000000000000000..f1dbd1530a175d89e5fb4c0b67116bdc1817b7a4
Binary files /dev/null and b/index/_c_Lucene90_0.tip differ
diff --git a/index/_c_Lucene90_0.tmd b/index/_c_Lucene90_0.tmd
new file mode 100644
index 0000000000000000000000000000000000000000..fee1dfc6376c303e5864f8aca905970e8bc66d41
Binary files /dev/null and b/index/_c_Lucene90_0.tmd differ
diff --git a/index/_d.fdm b/index/_d.fdm
new file mode 100644
index 0000000000000000000000000000000000000000..9c575771c7a6404658315019e45ced9e2514bff8
Binary files /dev/null and b/index/_d.fdm differ
diff --git a/index/_d.fdt b/index/_d.fdt
new file mode 100644
index 0000000000000000000000000000000000000000..5206c2a8662eb51c91787a12397a33c12b00e906
Binary files /dev/null and b/index/_d.fdt differ
diff --git a/index/_d.fdx b/index/_d.fdx
new file mode 100644
index 0000000000000000000000000000000000000000..1d8f2cf1ab35be1234ac27a2fcb770cc4a333dd9
Binary files /dev/null and b/index/_d.fdx differ
diff --git a/index/_d.fnm b/index/_d.fnm
new file mode 100644
index 0000000000000000000000000000000000000000..351b0a6eeddccf1b8acd8b740ce525fad8ceb83e
Binary files /dev/null and b/index/_d.fnm differ
diff --git a/index/_d.nvd b/index/_d.nvd
new file mode 100644
index 0000000000000000000000000000000000000000..661b3295974d300f895c3ad74d1162813e4b023b
Binary files /dev/null and b/index/_d.nvd differ
diff --git a/index/_d.nvm b/index/_d.nvm
new file mode 100644
index 0000000000000000000000000000000000000000..b115c745767bcbe106596aa124f99c63a52646a1
Binary files /dev/null and b/index/_d.nvm differ
diff --git a/index/_d.si b/index/_d.si
new file mode 100644
index 0000000000000000000000000000000000000000..f29513b61c310bd51edc94f2413758f6af6d8ff2
Binary files /dev/null and b/index/_d.si differ
diff --git a/index/_d_Lucene90_0.doc b/index/_d_Lucene90_0.doc
new file mode 100644
index 0000000000000000000000000000000000000000..7d396ea61e3ff79dd7e755af8be8355ec8230e36
Binary files /dev/null and b/index/_d_Lucene90_0.doc differ
diff --git a/index/_d_Lucene90_0.dvd b/index/_d_Lucene90_0.dvd
new file mode 100644
index 0000000000000000000000000000000000000000..d4a7cbbf7e5cba08fa75d0df43f5fadf5d7654d8
Binary files /dev/null and b/index/_d_Lucene90_0.dvd differ
diff --git a/index/_d_Lucene90_0.dvm b/index/_d_Lucene90_0.dvm
new file mode 100644
index 0000000000000000000000000000000000000000..15d150d265cf77c2a500d90fa19932ba8d1340cb
Binary files /dev/null and b/index/_d_Lucene90_0.dvm differ
diff --git a/index/_d_Lucene90_0.pos b/index/_d_Lucene90_0.pos
new file mode 100644
index 0000000000000000000000000000000000000000..64f55aac98e7346ce1845fb3d5d26ad92837ffbc
--- /dev/null
+++ b/index/_d_Lucene90_0.pos
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf1185b23dfe7d554487554e5b731375c22821508fcf8781aec15d0899984efa
+size 1262499
diff --git a/index/_d_Lucene90_0.tim b/index/_d_Lucene90_0.tim
new file mode 100644
index 0000000000000000000000000000000000000000..6433aaf0b1f1fc2e83e3be404d8d814921d18d79
Binary files /dev/null and b/index/_d_Lucene90_0.tim differ
diff --git a/index/_d_Lucene90_0.tip b/index/_d_Lucene90_0.tip
new file mode 100644
index 0000000000000000000000000000000000000000..3e30a5ec74b6e0f1af1019750cf9669938d144fe
Binary files /dev/null and b/index/_d_Lucene90_0.tip differ
diff --git a/index/_d_Lucene90_0.tmd b/index/_d_Lucene90_0.tmd
new file mode 100644
index 0000000000000000000000000000000000000000..bfb5eee9660fb3c8da2b99034b0f17511cbfc251
Binary files /dev/null and b/index/_d_Lucene90_0.tmd differ
diff --git a/index/_e.fdm b/index/_e.fdm
new file mode 100644
index 0000000000000000000000000000000000000000..454082aa384cfb8086ce2995dc0b15c4ad2ce4bb
Binary files /dev/null and b/index/_e.fdm differ
diff --git a/index/_e.fdt b/index/_e.fdt
new file mode 100644
index 0000000000000000000000000000000000000000..7ca4d2995f1475584639fed152d0f3b05b3f0cfe
Binary files /dev/null and b/index/_e.fdt differ
diff --git a/index/_e.fdx b/index/_e.fdx
new file mode 100644
index 0000000000000000000000000000000000000000..1a8f1e31889530040936442a1374bc34fef044ce
Binary files /dev/null and b/index/_e.fdx differ
diff --git a/index/_e.fnm b/index/_e.fnm
new file mode 100644
index 0000000000000000000000000000000000000000..0ae8e27d7b8442884ed7a9aa8f1314d0b76c4580
Binary files /dev/null and b/index/_e.fnm differ
diff --git a/index/_e.nvd b/index/_e.nvd
new file mode 100644
index 0000000000000000000000000000000000000000..46d107aee53a07b000a3b6465c659bbbd52e8784
Binary files /dev/null and b/index/_e.nvd differ
diff --git a/index/_e.nvm b/index/_e.nvm
new file mode 100644
index 0000000000000000000000000000000000000000..d87516ca46d5757777ff0e898deb51f538432fb2
Binary files /dev/null and b/index/_e.nvm differ
diff --git a/index/_e.si b/index/_e.si
new file mode 100644
index 0000000000000000000000000000000000000000..6a0e026ce203bf654493183bf067a569299dcd06
Binary files /dev/null and b/index/_e.si differ
diff --git a/index/_e_Lucene90_0.doc b/index/_e_Lucene90_0.doc
new file mode 100644
index 0000000000000000000000000000000000000000..be62c65b8cd41158b36e002b180845376063fd01
Binary files /dev/null and b/index/_e_Lucene90_0.doc differ
diff --git a/index/_e_Lucene90_0.dvd b/index/_e_Lucene90_0.dvd
new file mode 100644
index 0000000000000000000000000000000000000000..f5c5094cd6b4e72ecbcb5994beb8603fcd1c4cf9
Binary files /dev/null and b/index/_e_Lucene90_0.dvd differ
diff --git a/index/_e_Lucene90_0.dvm b/index/_e_Lucene90_0.dvm
new file mode 100644
index 0000000000000000000000000000000000000000..d4616fe3b49677c175d5ea4e8c090b986451d261
Binary files /dev/null and b/index/_e_Lucene90_0.dvm differ
diff --git a/index/_e_Lucene90_0.pos b/index/_e_Lucene90_0.pos
new file mode 100644
index 0000000000000000000000000000000000000000..ba777a257c6b66fb680290ffb4d8ca8eada22531
--- /dev/null
+++ b/index/_e_Lucene90_0.pos
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ccaee5e5633b5c35ba7045c5e12e692587380d32c3020b81bac24a28da69ca2
+size 1238216
diff --git a/index/_e_Lucene90_0.tim b/index/_e_Lucene90_0.tim
new file mode 100644
index 0000000000000000000000000000000000000000..fd06a9963205ee25f363ea89841f5f946510a06b
Binary files /dev/null and b/index/_e_Lucene90_0.tim differ
diff --git a/index/_e_Lucene90_0.tip b/index/_e_Lucene90_0.tip
new file mode 100644
index 0000000000000000000000000000000000000000..3ee5b6de12031e8106e3d84edd2d8ad3f7ee0916
Binary files /dev/null and b/index/_e_Lucene90_0.tip differ
diff --git a/index/_e_Lucene90_0.tmd b/index/_e_Lucene90_0.tmd
new file mode 100644
index 0000000000000000000000000000000000000000..41f93e8dff6770fd61e7e136d0e4d2995f7d609a
Binary files /dev/null and b/index/_e_Lucene90_0.tmd differ
diff --git a/index/_f.fdm b/index/_f.fdm
new file mode 100644
index 0000000000000000000000000000000000000000..61b9beb4afe5313305a2310a5e1f4c27e29551ac
Binary files /dev/null and b/index/_f.fdm differ
diff --git a/index/_f.fdt b/index/_f.fdt
new file mode 100644
index 0000000000000000000000000000000000000000..ac01d8eabd06489e5d77169dd76ffa44bfc3baa3
Binary files /dev/null and b/index/_f.fdt differ
diff --git a/index/_f.fdx b/index/_f.fdx
new file mode 100644
index 0000000000000000000000000000000000000000..8862acba6cf02065a84921ecb552aeb75bb529a1
Binary files /dev/null and b/index/_f.fdx differ
diff --git a/index/_f.fnm b/index/_f.fnm
new file mode 100644
index 0000000000000000000000000000000000000000..3145f0ef9470db6ad05b0f4bafb528bf7f936cdf
Binary files /dev/null and b/index/_f.fnm differ
diff --git a/index/_f.nvd b/index/_f.nvd
new file mode 100644
index 0000000000000000000000000000000000000000..3661480f2f798b9fc7390230c99a6dc0c1718781
Binary files /dev/null and b/index/_f.nvd differ
diff --git a/index/_f.nvm b/index/_f.nvm
new file mode 100644
index 0000000000000000000000000000000000000000..f532b7f80e4115225c1cd3c4b1e5ab8edecc5b28
Binary files /dev/null and b/index/_f.nvm differ
diff --git a/index/_f.si b/index/_f.si
new file mode 100644
index 0000000000000000000000000000000000000000..afc11e8ee19889f601d88489a67f808c022303ff
Binary files /dev/null and b/index/_f.si differ
diff --git a/index/_f_Lucene90_0.doc b/index/_f_Lucene90_0.doc
new file mode 100644
index 0000000000000000000000000000000000000000..7ae37621d4fc93fa85bf5a6b1e6c1f63d833e35c
Binary files /dev/null and b/index/_f_Lucene90_0.doc differ
diff --git a/index/_f_Lucene90_0.dvd b/index/_f_Lucene90_0.dvd
new file mode 100644
index 0000000000000000000000000000000000000000..5a947e5ac8ad16994d49ba38e4016abaa29ff611
Binary files /dev/null and b/index/_f_Lucene90_0.dvd differ
diff --git a/index/_f_Lucene90_0.dvm b/index/_f_Lucene90_0.dvm
new file mode 100644
index 0000000000000000000000000000000000000000..d80bee5604226dbab2ade0c7ac5e2e6011f495f3
Binary files /dev/null and b/index/_f_Lucene90_0.dvm differ
diff --git a/index/_f_Lucene90_0.pos b/index/_f_Lucene90_0.pos
new file mode 100644
index 0000000000000000000000000000000000000000..4c264eaac83ef60c64ca947d54515f2b36041f9e
--- /dev/null
+++ b/index/_f_Lucene90_0.pos
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e48a050c215696f91bd8560cdd2ff18a40980ae17d782d160ec7ac18852d4258
+size 1277898
diff --git a/index/_f_Lucene90_0.tim b/index/_f_Lucene90_0.tim
new file mode 100644
index 0000000000000000000000000000000000000000..837aa1d6a84d635012ac75302bc6887c7b6ca284
Binary files /dev/null and b/index/_f_Lucene90_0.tim differ
diff --git a/index/_f_Lucene90_0.tip b/index/_f_Lucene90_0.tip
new file mode 100644
index 0000000000000000000000000000000000000000..e507d49f4e6b51c1265aa6f9b84ff901ec02960e
Binary files /dev/null and b/index/_f_Lucene90_0.tip differ
diff --git a/index/_f_Lucene90_0.tmd b/index/_f_Lucene90_0.tmd
new file mode 100644
index 0000000000000000000000000000000000000000..e41cc7a2aa0a8fc14838ddcd1090808115d94d1a
Binary files /dev/null and b/index/_f_Lucene90_0.tmd differ
diff --git a/index/segments_4 b/index/segments_4
new file mode 100644
index 0000000000000000000000000000000000000000..38729e3ae6f7b3a294aa545b615275ffa8f697d1
Binary files /dev/null and b/index/segments_4 differ
diff --git a/index/write.lock b/index/write.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/packages.txt b/packages.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7083f85c3741aaa661aabe2d5048ef5ebdb13b71
--- /dev/null
+++ b/packages.txt
@@ -0,0 +1 @@
+openjdk-11-jdk