Spaces:
Running
Running
LordFarquaad42
commited on
Commit
·
09cb3f5
1
Parent(s):
722a2d3
adding lectures to embedded database
Browse files- add_data.py +82 -0
- chromadb_linux/chroma.sqlite3 +2 -2
- data/Class Notes/Classes 01 and 02/Classes 1 and 2.pdf +3 -0
- data/Class Notes/Classes 01 and 02/What do we mean by _Vacuously True_.scm +52 -0
- data/Class Notes/Classes 03 and 04/Classes 03 and 04.pdf +3 -0
- data/Class Notes/Classes 05 and 06/Class 05 February 6 and 8 2024.pdf +3 -0
- data/Class Notes/Classes 05 and 06/Class 06 February 8 and 13 2024.pdf +3 -0
- data/Class Notes/Classes 07 and 08/Class 07 CSc 335 February 15 2024.pdf +3 -0
- data/Class Notes/Classes 07 and 08/Class 08 Sections M and R February 20 and 27 2024.pdf +3 -0
- data/Class Notes/Classes 09 and 10/Class 09 CSc 335 Sections M and R February 27 and 29.pdf +3 -0
- data/Class Notes/Classes 09 and 10/Class 10 CSc 335 Sections M and R February 29 and March 5.pdf +3 -0
- data/Class Notes/Classes 10 and 11/Classes 10 and 11 CSc 335 Sections M and R February 29 and March 5 2024.pdf +3 -0
- data/Class Notes/Classes 12 and 13/Classes 12 and 13 CSc 335 Sections M and R March 7 and 12.pdf +3 -0
- data/Class Notes/Classes 13 and 14/Class 13 Part 2 March 14 2024.pdf +3 -0
- data/Class Notes/Classes 13 and 14/Class 14 CSc 335 March 14 2024.pdf +3 -0
- data/Class Notes/Classes 14.5 and 15 and 15.5/Class 15 March 19 2024.pdf +3 -0
- data/Class Notes/Classes 14.5 and 15 and 15.5/Classes 14.5 and 15.5 Some Solutions for Homework 06.pdf +3 -0
- data/Class Notes/Classes 16 and 17/Class 16 March 26 2024.pdf +3 -0
- data/Class Notes/Classes 16 and 17/Class 17 March 28 2024.pdf +3 -0
- data/Class Notes/Classes 18 and 19/Class 18 April 2 2024.pdf +3 -0
- data/Class Notes/Classes 18 and 19/Class 18 Part 2 April 4 2024.pdf +3 -0
- data/Class Notes/Classes 18 and 19/Class 19 April 4 2024.pdf +3 -0
- data/Class Notes/Classes 20 and 21/Class 20 April 9 2024.pdf +3 -0
- data/Class Notes/Classes 20 and 21/Class 21 April 11 2024.pdf +3 -0
- data/Class Notes/Classes 20 and 21/Makeup Class April 10 2024_annotated.pdf +3 -0
- data/Class Notes/Classes 22 and 23/Class 22 April 18 2024.pdf +3 -0
- data/Class Notes/Classes 22 and 23/Class 23 April 30 2024.pdf +3 -0
- data/Class Notes/Classes 22 and 23/HW Session April 17 2024.pdf +3 -0
add_data.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import chromadb
|
2 |
+
from chromadb.utils import embedding_functions
|
3 |
+
|
4 |
+
|
5 |
+
def get_client():
|
6 |
+
client = chromadb.PersistentClient(path="./chromadb_linux/")
|
7 |
+
MODEL_NAME: str = "mixedbread-ai/mxbai-embed-large-v1" # ~ 0.5 gb
|
8 |
+
COLLECTION_NAME: str = "scheme"
|
9 |
+
EMBEDDING_FUNC = embedding_functions.SentenceTransformerEmbeddingFunction(
|
10 |
+
model_name=MODEL_NAME
|
11 |
+
)
|
12 |
+
schemer = client.get_collection(
|
13 |
+
name=COLLECTION_NAME,
|
14 |
+
embedding_function=EMBEDDING_FUNC,
|
15 |
+
)
|
16 |
+
return schemer
|
17 |
+
|
18 |
+
|
19 |
+
def update_collection(iter: int, text: object, client: chromadb.Collection):
|
20 |
+
client.add(documents=[text["text"]], metadatas=[{"source": "pdf"}], ids=[text["content"] + str(iter)])
|
21 |
+
|
22 |
+
|
23 |
+
def encode_image(img_path: str):
|
24 |
+
import base64
|
25 |
+
|
26 |
+
with open(img_path, "rb") as image_file:
|
27 |
+
return base64.b64encode(image_file.read()).decode("utf-8")
|
28 |
+
|
29 |
+
|
30 |
+
async def image_to_text(image) -> object:
|
31 |
+
from openai import OpenAI
|
32 |
+
import json
|
33 |
+
|
34 |
+
client = OpenAI()
|
35 |
+
|
36 |
+
response = client.chat.completions.create(
|
37 |
+
model="gpt-4-turbo",
|
38 |
+
response_format={"type": "json_object"},
|
39 |
+
messages=[
|
40 |
+
{
|
41 |
+
"role": "user",
|
42 |
+
"content": [
|
43 |
+
{"type": "text", "text": "Transcribe the contents of this image and return a JSON object that contains the text. It must be structured in the following manner: two entries with the following keys: 'content' and 'text'. Content will be a line describing what the content of text will be, and text will be a simple transcription of the image"},
|
44 |
+
{
|
45 |
+
"type": "image_url",
|
46 |
+
"image_url": {
|
47 |
+
"url": f"data:image/jpeg;base64;,{image}",
|
48 |
+
"detail": "high",
|
49 |
+
},
|
50 |
+
},
|
51 |
+
],
|
52 |
+
}
|
53 |
+
],
|
54 |
+
)
|
55 |
+
return json.loads(response.choices[0].message.content)
|
56 |
+
|
57 |
+
|
58 |
+
async def start_troggin_off(dir: str):
|
59 |
+
import os
|
60 |
+
from pdf2image import convert_from_path
|
61 |
+
|
62 |
+
client = get_client()
|
63 |
+
|
64 |
+
for folder in os.listdir(dir):
|
65 |
+
folder_path = os.path.join(dir, folder)
|
66 |
+
if os.path.isdir(folder_path):
|
67 |
+
for file in os.listdir(folder_path):
|
68 |
+
if file.endswith(".pdf"):
|
69 |
+
print("Processing", file)
|
70 |
+
pdf_path = os.path.join(folder_path, file)
|
71 |
+
images = convert_from_path(pdf_path)
|
72 |
+
|
73 |
+
for i, image in enumerate(images):
|
74 |
+
image.save(f"out{i}.jpg", "JPEG")
|
75 |
+
encoded_image = encode_image(f"out{i}.jpg")
|
76 |
+
text = await image_to_text(encoded_image)
|
77 |
+
update_collection(i, text, client)
|
78 |
+
|
79 |
+
|
80 |
+
if __name__ == "__main__":
|
81 |
+
import asyncio
|
82 |
+
asyncio.run(start_troggin_off("data/Class Notes/"))
|
chromadb_linux/chroma.sqlite3
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2caf10f7d9b3bd7e24249045ec8b980ece782ab4b660016944407afb3df73d19
|
3 |
+
size 19333120
|
data/Class Notes/Classes 01 and 02/Classes 1 and 2.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b36140dff9f4b928bf26a7374945b08eb8d83cb769bba79142f45a7a06632ca2
|
3 |
+
size 950954
|
data/Class Notes/Classes 01 and 02/What do we mean by _Vacuously True_.scm
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
3 |
+
|
4 |
+
;; On the terms 'vacuous' and 'vacuously true'
|
5 |
+
|
6 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
7 |
+
|
8 |
+
;; can you explain why (+) has the value 0?
|
9 |
+
|
10 |
+
;;; well, there are no arguments, and the sum of zero numbers is 0
|
11 |
+
|
12 |
+
;; similarly, can you explain why (and) has the value #t?
|
13 |
+
|
14 |
+
;;; well -- given (and arg1 arg2 ... argk), and returns true if every one of
|
15 |
+
;;; arg1, arg2, ... , argk is true. For (and) -- ie, and applied to no arguments -- the set {arg1, arg2, ..., argk}
|
16 |
+
;;; is empty -- it follows then that every one of these arguments is true.
|
17 |
+
|
18 |
+
;;; one says then that (and) is vacuously true
|
19 |
+
|
20 |
+
;;; we will encounter this over and over again in the coming weeks -- you want
|
21 |
+
;;; to review universal quantifiers: "for every arg in the empty set {}, (f arg) is
|
22 |
+
;;; true whenever (f arg) computes a Boolean value". Intuitively,
|
23 |
+
;;; ask yourself "how could it be false?" Well, there would
|
24 |
+
;;; need to be an argument in the empty set for which the value(f arg) is false. But of course
|
25 |
+
;;; there are no arguments in the empty set.
|
26 |
+
|
27 |
+
;;; I like to refer to this as the 'green elephant argument'. The corresponding
|
28 |
+
;;; claim is this: "every green elephant in my office just now is wearing purple
|
29 |
+
;;; boots." This is a true statement, for the simple fact that there are no
|
30 |
+
;;; green elephants in my office at this time -- so -- vacuously -- every one of them is
|
31 |
+
;;; wearing purple boots!
|
32 |
+
|
33 |
+
;;; Another use of the phrase "vacuously true" arises when talking about
|
34 |
+
;;; propositions -- recall the definition of P ==> Q
|
35 |
+
|
36 |
+
;;; P Q P ==> Q
|
37 |
+
;;; --- --- ---------
|
38 |
+
;;; T T T
|
39 |
+
;;; T F F
|
40 |
+
;;; F T T
|
41 |
+
;;; F F T
|
42 |
+
|
43 |
+
;;; The last two lines are described by saying that when the antecedent P
|
44 |
+
;;; is false, then the implication P ==> Q is vacuously true.
|
45 |
+
|
46 |
+
;;; What about (or)? (or arg1 arg2 ... argk) is #t exactly when at least
|
47 |
+
;;; one of arg1, arg2, ..., argk is true. So -- if none are true, then
|
48 |
+
;;; the or evaluates to false.
|
49 |
+
|
50 |
+
;;; So the relevant question is: how many args in {} are true?
|
51 |
+
;;; Clearly, the answer is 0. So (or) _must_ evaluate to false.
|
52 |
+
|
data/Class Notes/Classes 03 and 04/Classes 03 and 04.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1eebfea29fa2f0d862b74eb160f14aefb99fd587681e1a983e205419c13e2391
|
3 |
+
size 1599847
|
data/Class Notes/Classes 05 and 06/Class 05 February 6 and 8 2024.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bdd8b44fb279a3e716b72abf2925b216fb2dc0dc6f29da01843e9cdc87218c1
|
3 |
+
size 1337486
|
data/Class Notes/Classes 05 and 06/Class 06 February 8 and 13 2024.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df1a5444d0ea5808b6d60c06537674341f6b9186f741bd52252afc5e7e82d583
|
3 |
+
size 736640
|
data/Class Notes/Classes 07 and 08/Class 07 CSc 335 February 15 2024.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5c8b6ef88905bb20fbc4aab445420a3dda9059afb4c0b12ef5f15d672a53a75
|
3 |
+
size 1150867
|
data/Class Notes/Classes 07 and 08/Class 08 Sections M and R February 20 and 27 2024.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb199f55c99d919003e53115d37e2f3a772e6fddf7f2765cb9b8c0d9f7f7ba46
|
3 |
+
size 485320
|
data/Class Notes/Classes 09 and 10/Class 09 CSc 335 Sections M and R February 27 and 29.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:adb85d173f0e5302a222cd518f02ebebcd055dd944c312d6a7cd34f618102c78
|
3 |
+
size 396354
|
data/Class Notes/Classes 09 and 10/Class 10 CSc 335 Sections M and R February 29 and March 5.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e72785571ed7b7d0f94099821bf96ff88c5597cb3cbb344cde954946efa43d9
|
3 |
+
size 236475
|
data/Class Notes/Classes 10 and 11/Classes 10 and 11 CSc 335 Sections M and R February 29 and March 5 2024.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2cdd933f08024c7fa84561c5a3e0b81a46958263c79b7355d4abdcdb29ebce09
|
3 |
+
size 462885
|
data/Class Notes/Classes 12 and 13/Classes 12 and 13 CSc 335 Sections M and R March 7 and 12.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4947be546733823436c2d7c5c9dfc784d05636bddd789833bd44fb83b264883
|
3 |
+
size 1278358
|
data/Class Notes/Classes 13 and 14/Class 13 Part 2 March 14 2024.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bddb9e76d3bd6772afbf3ce366aba5ef8fefbb7d08930c9c7246cbd8a35de70c
|
3 |
+
size 502354
|
data/Class Notes/Classes 13 and 14/Class 14 CSc 335 March 14 2024.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a9263e017d5029c303c02642efa92fc61524b27da298af43e9c6f137b59ec7c
|
3 |
+
size 425870
|
data/Class Notes/Classes 14.5 and 15 and 15.5/Class 15 March 19 2024.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26a38bc87090d180e32516fd5b2411d376b6001eb74d6b11b841289de142cc39
|
3 |
+
size 318626
|
data/Class Notes/Classes 14.5 and 15 and 15.5/Classes 14.5 and 15.5 Some Solutions for Homework 06.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38ba9e751d259923355dda46ea207cbcbfa1d0b259c3fa79cadb3f3f372b17ff
|
3 |
+
size 1156711
|
data/Class Notes/Classes 16 and 17/Class 16 March 26 2024.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:028b4f2b42d60e2da65425605d977837c8fb219b10e94c782864c8ca4618f2ed
|
3 |
+
size 275385
|
data/Class Notes/Classes 16 and 17/Class 17 March 28 2024.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0e3a08097afc357c90139477eeba1902d2716ea4f41c8f65b72caae6b1f2b44
|
3 |
+
size 293294
|
data/Class Notes/Classes 18 and 19/Class 18 April 2 2024.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58e0efbcbdee1ca410d7a42230e18c7a2c6ba0f04884b3932601ef7ab2fa4dd8
|
3 |
+
size 306468
|
data/Class Notes/Classes 18 and 19/Class 18 Part 2 April 4 2024.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89642f5abb9e7881053dc1dd5d1d06b811eb6e4028814fa736f032af3850e285
|
3 |
+
size 242600
|
data/Class Notes/Classes 18 and 19/Class 19 April 4 2024.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:564918141cadcb5ece58c329920b3ef75f03fc0a5fa644f24b4c3836ec7f0f2a
|
3 |
+
size 623521
|
data/Class Notes/Classes 20 and 21/Class 20 April 9 2024.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d1a0ae2b66fd9dec1e4a825d4e31e5e12cbfaeabca4fccbd58005004a5b5bd2
|
3 |
+
size 352232
|
data/Class Notes/Classes 20 and 21/Class 21 April 11 2024.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81324345eaacc87eb1739497bc4ddd5520f5d7783ca38417af1cd8d68afa303e
|
3 |
+
size 221162
|
data/Class Notes/Classes 20 and 21/Makeup Class April 10 2024_annotated.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2fe2b8042479a28eac4967418d1112f9e0006fdb7a7ea6dcfd393be9b02f3562
|
3 |
+
size 661558
|
data/Class Notes/Classes 22 and 23/Class 22 April 18 2024.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:056a44609b7094cbe182f8d70806dc4764ef92e97a8596a8111b00f921e6bc3b
|
3 |
+
size 184594
|
data/Class Notes/Classes 22 and 23/Class 23 April 30 2024.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a46473d2873cbed2197d546b52087b94fdab3cf9e09871f6be68b3d9a308af0b
|
3 |
+
size 219806
|
data/Class Notes/Classes 22 and 23/HW Session April 17 2024.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb9afede3ed0c01f579f7f30839ccf267b3c76ef1af635ae9186d9f6c2216cbe
|
3 |
+
size 280945
|