LordFarquaad42 commited on
Commit
09cb3f5
·
1 Parent(s): 722a2d3

adding lectures to embedded database

Browse files
Files changed (28) hide show
  1. add_data.py +82 -0
  2. chromadb_linux/chroma.sqlite3 +2 -2
  3. data/Class Notes/Classes 01 and 02/Classes 1 and 2.pdf +3 -0
  4. data/Class Notes/Classes 01 and 02/What do we mean by _Vacuously True_.scm +52 -0
  5. data/Class Notes/Classes 03 and 04/Classes 03 and 04.pdf +3 -0
  6. data/Class Notes/Classes 05 and 06/Class 05 February 6 and 8 2024.pdf +3 -0
  7. data/Class Notes/Classes 05 and 06/Class 06 February 8 and 13 2024.pdf +3 -0
  8. data/Class Notes/Classes 07 and 08/Class 07 CSc 335 February 15 2024.pdf +3 -0
  9. data/Class Notes/Classes 07 and 08/Class 08 Sections M and R February 20 and 27 2024.pdf +3 -0
  10. data/Class Notes/Classes 09 and 10/Class 09 CSc 335 Sections M and R February 27 and 29.pdf +3 -0
  11. data/Class Notes/Classes 09 and 10/Class 10 CSc 335 Sections M and R February 29 and March 5.pdf +3 -0
  12. data/Class Notes/Classes 10 and 11/Classes 10 and 11 CSc 335 Sections M and R February 29 and March 5 2024.pdf +3 -0
  13. data/Class Notes/Classes 12 and 13/Classes 12 and 13 CSc 335 Sections M and R March 7 and 12.pdf +3 -0
  14. data/Class Notes/Classes 13 and 14/Class 13 Part 2 March 14 2024.pdf +3 -0
  15. data/Class Notes/Classes 13 and 14/Class 14 CSc 335 March 14 2024.pdf +3 -0
  16. data/Class Notes/Classes 14.5 and 15 and 15.5/Class 15 March 19 2024.pdf +3 -0
  17. data/Class Notes/Classes 14.5 and 15 and 15.5/Classes 14.5 and 15.5 Some Solutions for Homework 06.pdf +3 -0
  18. data/Class Notes/Classes 16 and 17/Class 16 March 26 2024.pdf +3 -0
  19. data/Class Notes/Classes 16 and 17/Class 17 March 28 2024.pdf +3 -0
  20. data/Class Notes/Classes 18 and 19/Class 18 April 2 2024.pdf +3 -0
  21. data/Class Notes/Classes 18 and 19/Class 18 Part 2 April 4 2024.pdf +3 -0
  22. data/Class Notes/Classes 18 and 19/Class 19 April 4 2024.pdf +3 -0
  23. data/Class Notes/Classes 20 and 21/Class 20 April 9 2024.pdf +3 -0
  24. data/Class Notes/Classes 20 and 21/Class 21 April 11 2024.pdf +3 -0
  25. data/Class Notes/Classes 20 and 21/Makeup Class April 10 2024_annotated.pdf +3 -0
  26. data/Class Notes/Classes 22 and 23/Class 22 April 18 2024.pdf +3 -0
  27. data/Class Notes/Classes 22 and 23/Class 23 April 30 2024.pdf +3 -0
  28. data/Class Notes/Classes 22 and 23/HW Session April 17 2024.pdf +3 -0
add_data.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chromadb
2
+ from chromadb.utils import embedding_functions
3
+
4
+
5
+ def get_client():
6
+ client = chromadb.PersistentClient(path="./chromadb_linux/")
7
+ MODEL_NAME: str = "mixedbread-ai/mxbai-embed-large-v1" # ~ 0.5 gb
8
+ COLLECTION_NAME: str = "scheme"
9
+ EMBEDDING_FUNC = embedding_functions.SentenceTransformerEmbeddingFunction(
10
+ model_name=MODEL_NAME
11
+ )
12
+ schemer = client.get_collection(
13
+ name=COLLECTION_NAME,
14
+ embedding_function=EMBEDDING_FUNC,
15
+ )
16
+ return schemer
17
+
18
+
19
+ def update_collection(iter: int, text: object, client: chromadb.Collection):
20
+ client.add(documents=[text["text"]], metadatas=[{"source": "pdf"}], ids=[text["content"] + str(iter)])
21
+
22
+
23
+ def encode_image(img_path: str):
24
+ import base64
25
+
26
+ with open(img_path, "rb") as image_file:
27
+ return base64.b64encode(image_file.read()).decode("utf-8")
28
+
29
+
30
+ async def image_to_text(image) -> object:
31
+ from openai import OpenAI
32
+ import json
33
+
34
+ client = OpenAI()
35
+
36
+ response = client.chat.completions.create(
37
+ model="gpt-4-turbo",
38
+ response_format={"type": "json_object"},
39
+ messages=[
40
+ {
41
+ "role": "user",
42
+ "content": [
43
+ {"type": "text", "text": "Transcribe the contents of this image and return a JSON object that contains the text. It must be structured in the following manner: two entries with the following keys: 'content' and 'text'. Content will be a line describing what the content of text will be, and text will be a simple transcription of the image"},
44
+ {
45
+ "type": "image_url",
46
+ "image_url": {
47
+ "url": f"data:image/jpeg;base64;,{image}",
48
+ "detail": "high",
49
+ },
50
+ },
51
+ ],
52
+ }
53
+ ],
54
+ )
55
+ return json.loads(response.choices[0].message.content)
56
+
57
+
58
+ async def start_troggin_off(dir: str):
59
+ import os
60
+ from pdf2image import convert_from_path
61
+
62
+ client = get_client()
63
+
64
+ for folder in os.listdir(dir):
65
+ folder_path = os.path.join(dir, folder)
66
+ if os.path.isdir(folder_path):
67
+ for file in os.listdir(folder_path):
68
+ if file.endswith(".pdf"):
69
+ print("Processing", file)
70
+ pdf_path = os.path.join(folder_path, file)
71
+ images = convert_from_path(pdf_path)
72
+
73
+ for i, image in enumerate(images):
74
+ image.save(f"out{i}.jpg", "JPEG")
75
+ encoded_image = encode_image(f"out{i}.jpg")
76
+ text = await image_to_text(encoded_image)
77
+ update_collection(i, text, client)
78
+
79
+
80
+ if __name__ == "__main__":
81
+ import asyncio
82
+ asyncio.run(start_troggin_off("data/Class Notes/"))
chromadb_linux/chroma.sqlite3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:714af04cf0e3baa968ac6612f2a733861d1ab80d0370d981b3f93fcabf281af5
3
- size 16764928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2caf10f7d9b3bd7e24249045ec8b980ece782ab4b660016944407afb3df73d19
3
+ size 19333120
data/Class Notes/Classes 01 and 02/Classes 1 and 2.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b36140dff9f4b928bf26a7374945b08eb8d83cb769bba79142f45a7a06632ca2
3
+ size 950954
data/Class Notes/Classes 01 and 02/What do we mean by _Vacuously True_.scm ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3
+
4
+ ;; On the terms 'vacuous' and 'vacuously true'
5
+
6
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7
+
8
+ ;; can you explain why (+) has the value 0?
9
+
10
+ ;;; well, there are no arguments, and the sum of zero numbers is 0
11
+
12
+ ;; similarly, can you explain why (and) has the value #t?
13
+
14
+ ;;; well -- given (and arg1 arg2 ... argk), and returns true if every one of
15
+ ;;; arg1, arg2, ... , argk is true. For (and) -- ie, and applied to no arguments -- the set {arg1, arg2, ..., argk}
16
+ ;;; is empty -- it follows then that every one of these arguments is true.
17
+
18
+ ;;; one says then that (and) is vacuously true
19
+
20
+ ;;; we will encounter this over and over again in the coming weeks -- you want
21
+ ;;; to review universal quantifiers: "for every arg in the empty set {}, (f arg) is
22
+ ;;; true whenever (f arg) computes a Boolean value". Intuitively,
23
+ ;;; ask yourself "how could it be false?" Well, there would
24
+ ;;; need to be an argument in the empty set for which the value(f arg) is false. But of course
25
+ ;;; there are no arguments in the empty set.
26
+
27
+ ;;; I like to refer to this as the 'green elephant argument'. The corresponding
28
+ ;;; claim is this: "every green elephant in my office just now is wearing purple
29
+ ;;; boots." This is a true statement, for the simple fact that there are no
30
+ ;;; green elephants in my office at this time -- so -- vacuously -- every one of them is
31
+ ;;; wearing purple boots!
32
+
33
+ ;;; Another use of the phrase "vacuously true" arises when talking about
34
+ ;;; propositions -- recall the definition of P ==> Q
35
+
36
+ ;;; P Q P ==> Q
37
+ ;;; --- --- ---------
38
+ ;;; T T T
39
+ ;;; T F F
40
+ ;;; F T T
41
+ ;;; F F T
42
+
43
+ ;;; The last two lines are described by saying that when the antecedent P
44
+ ;;; is false, then the implication P ==> Q is vacuously true.
45
+
46
+ ;;; What about (or)? (or arg1 arg2 ... argk) is #t exactly when at least
47
+ ;;; one of arg1, arg2, ..., argk is true. So -- if none are true, then
48
+ ;;; the or evaluates to false.
49
+
50
+ ;;; So the relevant question is: how many args in {} are true?
51
+ ;;; Clearly, the answer is 0. So (or) _must_ evaluate to false.
52
+
data/Class Notes/Classes 03 and 04/Classes 03 and 04.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eebfea29fa2f0d862b74eb160f14aefb99fd587681e1a983e205419c13e2391
3
+ size 1599847
data/Class Notes/Classes 05 and 06/Class 05 February 6 and 8 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bdd8b44fb279a3e716b72abf2925b216fb2dc0dc6f29da01843e9cdc87218c1
3
+ size 1337486
data/Class Notes/Classes 05 and 06/Class 06 February 8 and 13 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df1a5444d0ea5808b6d60c06537674341f6b9186f741bd52252afc5e7e82d583
3
+ size 736640
data/Class Notes/Classes 07 and 08/Class 07 CSc 335 February 15 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5c8b6ef88905bb20fbc4aab445420a3dda9059afb4c0b12ef5f15d672a53a75
3
+ size 1150867
data/Class Notes/Classes 07 and 08/Class 08 Sections M and R February 20 and 27 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb199f55c99d919003e53115d37e2f3a772e6fddf7f2765cb9b8c0d9f7f7ba46
3
+ size 485320
data/Class Notes/Classes 09 and 10/Class 09 CSc 335 Sections M and R February 27 and 29.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adb85d173f0e5302a222cd518f02ebebcd055dd944c312d6a7cd34f618102c78
3
+ size 396354
data/Class Notes/Classes 09 and 10/Class 10 CSc 335 Sections M and R February 29 and March 5.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e72785571ed7b7d0f94099821bf96ff88c5597cb3cbb344cde954946efa43d9
3
+ size 236475
data/Class Notes/Classes 10 and 11/Classes 10 and 11 CSc 335 Sections M and R February 29 and March 5 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cdd933f08024c7fa84561c5a3e0b81a46958263c79b7355d4abdcdb29ebce09
3
+ size 462885
data/Class Notes/Classes 12 and 13/Classes 12 and 13 CSc 335 Sections M and R March 7 and 12.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4947be546733823436c2d7c5c9dfc784d05636bddd789833bd44fb83b264883
3
+ size 1278358
data/Class Notes/Classes 13 and 14/Class 13 Part 2 March 14 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bddb9e76d3bd6772afbf3ce366aba5ef8fefbb7d08930c9c7246cbd8a35de70c
3
+ size 502354
data/Class Notes/Classes 13 and 14/Class 14 CSc 335 March 14 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a9263e017d5029c303c02642efa92fc61524b27da298af43e9c6f137b59ec7c
3
+ size 425870
data/Class Notes/Classes 14.5 and 15 and 15.5/Class 15 March 19 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26a38bc87090d180e32516fd5b2411d376b6001eb74d6b11b841289de142cc39
3
+ size 318626
data/Class Notes/Classes 14.5 and 15 and 15.5/Classes 14.5 and 15.5 Some Solutions for Homework 06.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38ba9e751d259923355dda46ea207cbcbfa1d0b259c3fa79cadb3f3f372b17ff
3
+ size 1156711
data/Class Notes/Classes 16 and 17/Class 16 March 26 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:028b4f2b42d60e2da65425605d977837c8fb219b10e94c782864c8ca4618f2ed
3
+ size 275385
data/Class Notes/Classes 16 and 17/Class 17 March 28 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0e3a08097afc357c90139477eeba1902d2716ea4f41c8f65b72caae6b1f2b44
3
+ size 293294
data/Class Notes/Classes 18 and 19/Class 18 April 2 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58e0efbcbdee1ca410d7a42230e18c7a2c6ba0f04884b3932601ef7ab2fa4dd8
3
+ size 306468
data/Class Notes/Classes 18 and 19/Class 18 Part 2 April 4 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89642f5abb9e7881053dc1dd5d1d06b811eb6e4028814fa736f032af3850e285
3
+ size 242600
data/Class Notes/Classes 18 and 19/Class 19 April 4 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:564918141cadcb5ece58c329920b3ef75f03fc0a5fa644f24b4c3836ec7f0f2a
3
+ size 623521
data/Class Notes/Classes 20 and 21/Class 20 April 9 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d1a0ae2b66fd9dec1e4a825d4e31e5e12cbfaeabca4fccbd58005004a5b5bd2
3
+ size 352232
data/Class Notes/Classes 20 and 21/Class 21 April 11 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81324345eaacc87eb1739497bc4ddd5520f5d7783ca38417af1cd8d68afa303e
3
+ size 221162
data/Class Notes/Classes 20 and 21/Makeup Class April 10 2024_annotated.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fe2b8042479a28eac4967418d1112f9e0006fdb7a7ea6dcfd393be9b02f3562
3
+ size 661558
data/Class Notes/Classes 22 and 23/Class 22 April 18 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:056a44609b7094cbe182f8d70806dc4764ef92e97a8596a8111b00f921e6bc3b
3
+ size 184594
data/Class Notes/Classes 22 and 23/Class 23 April 30 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a46473d2873cbed2197d546b52087b94fdab3cf9e09871f6be68b3d9a308af0b
3
+ size 219806
data/Class Notes/Classes 22 and 23/HW Session April 17 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb9afede3ed0c01f579f7f30839ccf267b3c76ef1af635ae9186d9f6c2216cbe
3
+ size 280945