Update app.py
Browse files
app.py
CHANGED
@@ -39,19 +39,19 @@ def chunk_document(doc: str, doc_id: int, desired_chunk_size: int = 100, max_chu
|
|
39 |
if chunk:
|
40 |
yield (doc_id, chunk_number, chunk)
|
41 |
|
42 |
-
def chunk_documents(docs:
|
43 |
chunks = []
|
44 |
for doc_id, doc in enumerate(docs):
|
45 |
chunks.extend(chunk_document(doc, doc_id, desired_chunk_size, max_chunk_size))
|
46 |
return chunks
|
47 |
|
48 |
-
from typing import
|
49 |
import numpy as np
|
50 |
from rank_bm25 import BM25Okapi
|
51 |
from sentence_transformers import SentenceTransformer
|
52 |
import torch
|
53 |
class Retriever:
|
54 |
-
def __init__(self, docs:
|
55 |
|
56 |
self.chunks = chunk_documents(docs)
|
57 |
self.docs = [chunk[2] for chunk in self.chunks]
|
|
|
39 |
if chunk:
|
40 |
yield (doc_id, chunk_number, chunk)
|
41 |
|
42 |
+
def chunk_documents(docs: list[str], desired_chunk_size: int = 100, max_chunk_size: int = 3000):
|
43 |
chunks = []
|
44 |
for doc_id, doc in enumerate(docs):
|
45 |
chunks.extend(chunk_document(doc, doc_id, desired_chunk_size, max_chunk_size))
|
46 |
return chunks
|
47 |
|
48 |
+
#from typing import list
|
49 |
import numpy as np
|
50 |
from rank_bm25 import BM25Okapi
|
51 |
from sentence_transformers import SentenceTransformer
|
52 |
import torch
|
53 |
class Retriever:
|
54 |
+
def __init__(self, docs: list[str]):
|
55 |
|
56 |
self.chunks = chunk_documents(docs)
|
57 |
self.docs = [chunk[2] for chunk in self.chunks]
|