HemanthSai7 commited on
Commit
7a89bde
0 Parent(s):
Files changed (43) hide show
  1. .gitattributes +41 -0
  2. .github/workflows/checkfilesize.yaml +16 -0
  3. .github/workflows/githubtohfsync.yaml +20 -0
  4. .gitignore +137 -0
  5. Dockerfile +16 -0
  6. README.md +56 -0
  7. StudybotAPI/app.py +1 -0
  8. StudybotAPI/backend/__init__.py +40 -0
  9. StudybotAPI/backend/core/ExceptionHandlers.py +27 -0
  10. StudybotAPI/backend/core/Exceptions.py +48 -0
  11. StudybotAPI/backend/core/__init__.py +0 -0
  12. StudybotAPI/backend/core/configEnv.py +24 -0
  13. StudybotAPI/backend/ingestion/__init__.py +3 -0
  14. StudybotAPI/backend/ingestion/embeddings.py +55 -0
  15. StudybotAPI/backend/ingestion/prepare_data.py +79 -0
  16. StudybotAPI/backend/ingestion/preprocess_data.py +35 -0
  17. StudybotAPI/backend/retriever/__init__.py +2 -0
  18. StudybotAPI/backend/retriever/ops.py +25 -0
  19. StudybotAPI/backend/retriever/pipeline.py +72 -0
  20. StudybotAPI/backend/routes.py +65 -0
  21. StudybotAPI/backend/schemas/FrontendResponseSchema.py +12 -0
  22. StudybotAPI/backend/schemas/__init__.py +2 -0
  23. StudybotAPI/backend/schemas/chatschema.py +13 -0
  24. StudybotAPI/backend/utils/__init__.py +1 -0
  25. StudybotAPI/backend/utils/chain_loader.py +40 -0
  26. StudybotAPI/backend/utils/prompt.txt +12 -0
  27. StudybotAPI/backend/utils/prompts.py +62 -0
  28. StudybotAPI/config.yml +10 -0
  29. StudybotAPI/requirements.txt +15 -0
  30. frontend/components/__init__.py +2 -0
  31. frontend/components/authors.py +13 -0
  32. frontend/components/user_greetings.py +6 -0
  33. frontend/layouts/__init__.py +0 -0
  34. frontend/layouts/mainlayout.py +19 -0
  35. frontend/layouts/st_page_layouts.json +14 -0
  36. frontend/pages/2_🤖_bot.py +9 -0
  37. frontend/requirements.txt +1 -0
  38. frontend/test.py +26 -0
  39. frontend/🏡_Home.py +65 -0
  40. notebooks/Untitled6.ipynb +0 -0
  41. notebooks/embeddings.ipynb +0 -0
  42. notebooks/selfrag.ipynb +0 -0
  43. test.py +26 -0
.gitattributes ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png
37
+ .png
38
+ frontend/images/architecture.png filter=lfs diff=lfs merge=lfs -text
39
+ StudybotAPI/assets/*.png filter=lfs diff=lfs merge=lfs -text
40
+ StudybotAPI/backend/data/History_1.pdf filter=lfs diff=lfs merge=lfs -text
41
+
.github/workflows/checkfilesize.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Check file size
2
+ on: # or directly `on: [push]` to run the action on every push on any branch
3
+ pull_request:
4
+ branches: [main]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - name: Check large files
14
+ uses: ActionsDesk/lfs-warning@v2.0
15
+ with:
16
+ filesizelimit: 10485760 # this is 10MB so we can sync to HF Spaces
.github/workflows/githubtohfsync.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [main]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v3
14
+ with:
15
+ fetch-depth: 0
16
+ lfs: true
17
+ - name: Push to hub
18
+ env:
19
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
+ run: git push https://HemanthSai7:$HF_TOKEN@huggingface.co/spaces/HemanthSai7/StudybotAPI main
.gitignore ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ marvin.toml
2
+ .marvin-history
3
+ marvin.egg-info
4
+
5
+ # Byte-compiled / optimized / DLL files
6
+ __pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+
10
+ # C extensions
11
+ *.so
12
+
13
+ # Distribution / packaging
14
+ .Python
15
+ build/
16
+ develop-eggs/
17
+ # dist/
18
+ downloads/
19
+ eggs/
20
+ .eggs/
21
+ lib/
22
+ lib64/
23
+ parts/
24
+ sdist/
25
+ var/
26
+ wheels/
27
+ pip-wheel-metadata/
28
+ share/python-wheels/
29
+ *.egg-info/
30
+ .installed.cfg
31
+ *.egg
32
+ MANIFEST
33
+
34
+ # PyInstaller
35
+ # Usually these files are written by a python script from a template
36
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
37
+ *.manifest
38
+ *.spec
39
+
40
+ # Installer logs
41
+ pip-log.txt
42
+ pip-delete-this-directory.txt
43
+
44
+ # Unit test / coverage reports
45
+ htmlcov/
46
+ .tox/
47
+ .nox/
48
+ .coverage
49
+ .coverage.*
50
+ .cache
51
+ nosetests.xml
52
+ coverage.xml
53
+ *.cover
54
+ *.py,cover
55
+ .hypothesis/
56
+ .pytest_cache/
57
+
58
+ # Translations
59
+ *.mo
60
+ *.pot
61
+
62
+ # Django stuff:
63
+ *.log
64
+ local_settings.py
65
+ db.sqlite3
66
+ db.sqlite3-journal
67
+
68
+ # Flask stuff:
69
+ instance/
70
+ .webassets-cache
71
+
72
+ # vector store
73
+ studybot/vectorstore/
74
+
75
+ # Scrapy stuff:
76
+ .scrapy
77
+
78
+ # Sphinx documentation
79
+ docs/_build/
80
+
81
+ # PyBuilder
82
+ target/
83
+
84
+ # Jupyter Notebook
85
+ .ipynb_checkpoints
86
+
87
+ # IPython
88
+ profile_default/
89
+ ipython_config.py
90
+
91
+ # pyenv
92
+ .python-version
93
+
94
+ # pipenv
95
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
97
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
98
+ # install all needed dependencies.
99
+ #Pipfile.lock
100
+
101
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102
+ __pypackages__/
103
+
104
+ # Celery stuff
105
+ celerybeat-schedule
106
+ celerybeat.pid
107
+
108
+ # SageMath parsed files
109
+ *.sage.py
110
+
111
+ # Environments
112
+ .env
113
+ .venv
114
+ env/
115
+ venv/
116
+ ENV/
117
+ env.bak/
118
+ venv.bak/
119
+ cpcli-env/
120
+
121
+ # Spyder project settings
122
+ .spyderproject
123
+ .spyproject
124
+
125
+ # Rope project settings
126
+ .ropeproject
127
+
128
+ # mkdocs documentation
129
+ /site
130
+
131
+ # mypy
132
+ .mypy_cache/
133
+ .dmypy.json
134
+ dmypy.json
135
+
136
+ # Pyre type checker
137
+ .pyre/
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV HOME=/home/user \
6
+ PATH=/home/user/.local/bin:$PATH
7
+
8
+ COPY --chown=user ./StudybotAPI $HOME/StudybotAPI
9
+
10
+ WORKDIR $HOME/StudybotAPI
11
+
12
+ RUN mkdir $HOME/.cache
13
+
14
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
15
+
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: StudybotAPI
3
+ emoji: 😻
4
+ colorFrom: gray
5
+ colorTo: blue
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ # StudyBot x Streamlit
11
+
12
+ ✨ Streamlit is an open-source app framework for Machine Learning and Data Science teams. Create beautiful data apps in hours, not weeks. All in pure Python. ✨
13
+
14
+ ## Installation
15
+
16
+ ```bash
17
+ pip install -r requirements.txt
18
+ ```
19
+
20
+ ## Start development server
21
+
22
+ > If you're using the default template, **remember to set the OpenAI API key** in `main.py`.
23
+
24
+ Run the following command:
25
+
26
+ ```bash
27
+ cd StudybotAPI
28
+ uvicorn app:app --reload
29
+ ```
30
+
31
+ Now go to [http://localhost:4000](http://localhost:4000) and start chatting with your bot! The bot will automatically reload when you change the code.
32
+
33
+ ## Motive
34
+ When studying a theoretical subject, which has a lot of concepts, dates, important events etc. No matter how hard we try to momrize them, its hard to remember them all. So, I thought of making a bot which can help us in quick revision of the subject. For example, if we are studying history, and we forget the event of 1857 revolt, we can ask the bot like **"What happened in 1857?"** and it will tell us brief answer. This will help us in quick revision of the subject.
35
+
36
+ ## How to use
37
+ Input the prompt in the text box and press enter. The bot will give you the answer. If you want to ask another question, just enter the question and the bot will try to answer.
38
+
39
+ ## Screenshots
40
+ ![image](StudybotAPI/assets/ss1.png)
41
+ ![image](StudybotAPI/assets/ss2.png)
42
+ ![image](StudybotAPI/assets/ss3.png)
43
+
44
+ ## How it works
45
+ ![image](StudybotAPI/assets/flowchart.png)
46
+
47
+ ## Tech Stack
48
+ ![Python](https://img.shields.io/badge/python-3670A0?style=for-the-badge&logo=python&logoColor=ffdd54)
49
+ ![FastAPI](https://img.shields.io/badge/FastAPI-005571?style=for-the-badge&logo=fastapi)
50
+ ![JavaScript](https://img.shields.io/badge/javascript-%23323330.svg?style=for-the-badge&logo=javascript&logoColor=%23F7DF1E)
51
+ ![HTML5](https://img.shields.io/badge/html5-%23E34F26.svg?style=for-the-badge&logo=html5&logoColor=white)
52
+ ![GitHub Actions](https://img.shields.io/badge/github%20actions-%232671E5.svg?style=for-the-badge&logo=githubactions&logoColor=white)
53
+ ![Langchain](https://img.shields.io/badge/langchain-%23E34F26.svg?style=for-the-badge&logo=langchains&logoColor=white)
54
+ ![Huggingface](https://img.shields.io/badge/huggingface-%23E34F26.svg?style=for-the-badge&logo=huggingface&logoColor=white)
55
+ ![Streamlit](https://img.shields.io/badge/streamlit-%23E34F26.svg?style=for-the-badge&logo=streamlit&logoColor=white)
56
+ ![Docker](https://img.shields.io/badge/docker-%23E34F26.svg?style=for-the-badge&logo=docker&logoColor=white)
StudybotAPI/app.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from backend import app
StudybotAPI/backend/__init__.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import box
3
+ import yaml
4
+
5
+ from fastapi import FastAPI
6
+
7
+ from backend.ingestion import *
8
+
9
+ # from langchain.llms.huggingface_pipeline import HuggingFacePipeline
10
+
11
+
12
+ app = FastAPI(title="StudyBot API", version="0.1.0", description="API for StudyBot Project")
13
+
14
+ from backend import routes
15
+ # from backend.retriever import EmbeddingModel
16
+
17
+
18
+ try:
19
+ os.environ["TRANSFORMERS_CACHE"] = "/.cache"
20
+
21
+ with open("config.yml", "r", encoding="utf8") as ymlfile:
22
+ cfg = box.Box(yaml.safe_load(ymlfile))
23
+ app.state.emb = Embeddings(cfg)
24
+
25
+ # llm = HuggingFacePipeline(pipeline=EmbeddingModel._initialize_pipeline())
26
+ # llm = LlamaCpp(
27
+ # streaming=True,
28
+ # model_path="models/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
29
+ # max_tokens=1500,
30
+ # temperature=0.4,
31
+ # top_p=1,
32
+ # gpu_layers=0,
33
+ # stream=True,
34
+ # verbose=False,
35
+ # n_threads=int(os.cpu_count() / 2),
36
+ # n_ctx=4096
37
+ # )
38
+
39
+ except Exception as e:
40
+ print(e)
StudybotAPI/backend/core/ExceptionHandlers.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from backend import app
2
+ from .Exceptions import *
3
+
4
+ from fastapi.responses import JSONResponse
5
+ from fastapi.requests import Request
6
+ from fastapi import status
7
+
8
+ @app.exception_handler(ModelDeploymentException)
9
+ async def model_deploying_exception_handler(request: Request, exc: ModelDeploymentException):
10
+ return JSONResponse(
11
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
12
+ content=repr(exc)
13
+ )
14
+
15
+ @app.exception_handler(InfoNotProvidedException)
16
+ async def info_not_provided_exception_handler(request: Request, exc: InfoNotProvidedException):
17
+ return JSONResponse(
18
+ status_code=status.HTTP_400_BAD_REQUEST,
19
+ content=repr(exc)
20
+ )
21
+
22
+ @app.exception_handler(DataNotUploadedException)
23
+ async def data_not_uploaded_exception_handler(request: Request, exc: DataNotUploadedException):
24
+ return JSONResponse(
25
+ status_code=status.HTTP_400_BAD_REQUEST,
26
+ content=repr(exc)
27
+ )
StudybotAPI/backend/core/Exceptions.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from backend.schemas import FrontendResponseModel
2
+
3
+
4
+ class ModelDeploymentException(Exception):
5
+ def __init__(self, response_result: FrontendResponseModel):
6
+ self.response_result = response_result
7
+ self.set_status()
8
+ super(ModelDeploymentException, self).__init__()
9
+
10
+ def set_status(self):
11
+ self.response_result["status"] = "Error"
12
+ self.response_result["message"][0]="Model is deploying. Please try again later."
13
+
14
+ def __repr__(self):
15
+ return f"exception.ModelDeployingException()"
16
+
17
+
18
+
19
+ class InfoNotProvidedException(Exception):
20
+ def __init__(self, response_result: FrontendResponseModel, message: str):
21
+ self.response_result = response_result
22
+ self.message = message
23
+ self.set_status()
24
+ super(InfoNotProvidedException, self).__init__(message)
25
+
26
+ def set_status(self):
27
+ self.response_result["status"] = "Error"
28
+ self.response_result["message"][0] = "Information not provided."
29
+ self.response_result["message"].append(self.message)
30
+
31
+ def __repr__(self):
32
+ return f"exception.InfoNotProvidedException()"
33
+
34
+
35
+ class DataNotUploadedException(Exception):
36
+ def __init__(self, response_result: FrontendResponseModel):
37
+ self.response_result = response_result
38
+ self.set_status()
39
+ super(ModelDeploymentException, self).__init__()
40
+
41
+ def set_status(self):
42
+ self.response_result["status"] = "Error"
43
+ self.response_result["message"].append(
44
+ "Data not uploaded. Please upload a file."
45
+ )
46
+
47
+ def __repr__(self):
48
+ return f"exception.DataNotUploadedException()"
StudybotAPI/backend/core/__init__.py ADDED
File without changes
StudybotAPI/backend/core/configEnv.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Config class for handling env variables.
2
+ """
3
+ from functools import lru_cache
4
+ from pydantic import BaseSettings
5
+
6
+
7
+ class Settings(BaseSettings):
8
+ QDRANT_API_KEY: str
9
+ APP_ID: str
10
+ USER_ID: str
11
+ MODEL_ID: str
12
+ CLARIFAI_PAT: str
13
+ MODEL_VERSION_ID: str
14
+
15
+ class Config:
16
+ env_file = ".env"
17
+
18
+
19
+ @lru_cache()
20
+ def get_settings():
21
+ return Settings()
22
+
23
+
24
+ config = get_settings()
StudybotAPI/backend/ingestion/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .embeddings import Embeddings
2
+ from .prepare_data import *
3
+ from .preprocess_data import *
StudybotAPI/backend/ingestion/embeddings.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.vectorstores import Qdrant
2
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+
5
+ import shutil
6
+ import warnings
7
+ from backend.ingestion import *
8
+
9
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
10
+
11
+
12
+ class Embeddings:
13
+ def __init__(self, cfg):
14
+ self.cfg = cfg
15
+
16
+ def split_docs(self, documents, chunk_size=1000, chunk_overlap=150):
17
+ text_splitter = RecursiveCharacterTextSplitter(
18
+ chunk_size=chunk_size, chunk_overlap=chunk_overlap
19
+ )
20
+ docs = text_splitter.split_documents(documents)
21
+ return docs
22
+
23
+ def store_embeddings(self, docs):
24
+ embeddings = HuggingFaceBgeEmbeddings(
25
+ model_name=self.cfg.EMBEDDINGS,
26
+ model_kwargs={"device": self.cfg.DEVICE},
27
+ encode_kwargs={"normalize_embeddings": self.cfg.NORMALIZE_EMBEDDINGS}
28
+ # cache_folder = self.cfg.CACHE_FOLDER
29
+ )
30
+
31
+ shutil.rmtree(self.cfg.VECTOR_DB, ignore_errors=True)
32
+
33
+ texts = self.split_docs(docs)
34
+
35
+ vector_store = Qdrant.from_documents(
36
+ texts,
37
+ embeddings,
38
+ # path=self.cfg.VECTOR_DB,
39
+ location=":memory:",
40
+ # host="localhost",
41
+ # prefer_grpc=True,
42
+ collection_name=self.cfg.VECTOR_DB,
43
+ )
44
+
45
+ print(f"Vector store created at {self.cfg.VECTOR_DB}")
46
+
47
+ return vector_store
48
+
49
+
50
+ # with open("config.yml", "r", encoding="utf8") as ymlfile:
51
+ # cfg = box.Box(yaml.safe_load(ymlfile))
52
+ # emb = Embeddings(cfg)
53
+
54
+ # docs=PDFDataLoader(cfg.DATA_PATH).load()
55
+ # emb.store_embeddings(docs)
StudybotAPI/backend/ingestion/prepare_data.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Iterator
3
+
4
+ from langchain.schema import Document
5
+ from langchain.document_loaders.base import BaseLoader
6
+ from langchain.document_loaders import PyPDFLoader, TextLoader, DirectoryLoader
7
+
8
+ from .preprocess_data import make_texts_tokenisation_safe
9
+
10
+
11
+ class DataLoader(BaseLoader):
12
+ def __init__(self, data_dir: str):
13
+ self.data_dir = data_dir
14
+ self.metadata = {}
15
+
16
+ def lazy_load(self) -> Iterator[Document]:
17
+ raise NotImplementedError(
18
+ f"{self.__class__.__name__} does not implement lazy_load()"
19
+ )
20
+
21
+ def load(self):
22
+ documents = list(self.lazy_load())
23
+ self.metadata.update({"num_documents": len(documents)})
24
+ return documents
25
+
26
+
27
+ class PDFDataLoader(DataLoader):
28
+ def __init__(self, data_dir: str):
29
+ super().__init__(data_dir)
30
+ self.metadata = {
31
+ "data_dir": data_dir,
32
+ "loader": "PDFDataLoader",
33
+ "num_documents": None,
34
+ }
35
+
36
+ @make_texts_tokenisation_safe
37
+ def lazy_load(self) -> Iterator[Document]:
38
+ try:
39
+ # document = DirectoryLoader(
40
+ # self.data_dir, glob="*.pdf", loader_cls=PyPDFLoader
41
+ # ).load()
42
+ document = PyPDFLoader(self.data_dir).load()
43
+ for doc in document:
44
+ doc.metadata["file_type"] = os.path.splitext(doc.metadata["source"])[-1]
45
+ return document
46
+
47
+ except Exception as e:
48
+ print(f"Error loading : {e}")
49
+ return None
50
+
51
+
52
+ class TextDataLoader(DataLoader):
53
+ def __init__(self, data_dir: str):
54
+ super().__init__(data_dir)
55
+ self.metadata = {
56
+ "data_dir": data_dir,
57
+ "loader": "TextDataLoader",
58
+ "num_documents": None,
59
+ }
60
+
61
+ @make_texts_tokenisation_safe
62
+ def lazy_load(self) -> Iterator[Document]:
63
+ try:
64
+ document = DirectoryLoader(
65
+ self.data_dir, glob="*.txt", loader_cls=TextLoader
66
+ ).load()
67
+ for doc in document:
68
+ doc.metadata["file_type"] = os.path.splitext(doc.metadata["source"])[-1]
69
+ return document
70
+
71
+ except Exception as e:
72
+ print(f"Error loading : {e}")
73
+ return None
74
+
75
+
76
+ # pdf_loader = PDFDataLoader(data_dir="E:/Projects/Hackathons/StudyBot/data")
77
+
78
+ # documents = pdf_loader.load()
79
+ # print(documents)
StudybotAPI/backend/ingestion/preprocess_data.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tiktoken
2
+
3
+ def make_texts_tokenisation_safe(func):
4
+ encoding = tiktoken.get_encoding("cl100k_base")
5
+ special_tokens_set = encoding.special_tokens_set
6
+
7
+ def remove_special_tokens(text):
8
+ for token in special_tokens_set:
9
+ text = text.replace(token, "")
10
+ return text
11
+
12
+ def wrapper(*args, **kwargs):
13
+ documents = func(*args, **kwargs)
14
+ for document in documents:
15
+ document.page_content = remove_special_tokens(document.page_content)
16
+ return documents
17
+
18
+ return wrapper
19
+
20
+
21
+ # def remove_whitespace(func):
22
+ # def wrapper(*args, **kwargs):
23
+ # result = func(*args, **kwargs)
24
+ # if isinstance(result, str):
25
+ # return result.replace(" ", "").replace("\t", "").replace("\n", "")
26
+ # elif isinstance(result, bytes):
27
+ # return (
28
+ # result.decode("utf-8")
29
+ # .replace(" ", "")
30
+ # .replace("\t", "")
31
+ # .replace("\n", "")
32
+ # )
33
+ # return result
34
+
35
+ # return wrapper
StudybotAPI/backend/retriever/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .ops import *
2
+ from .pipeline import *
StudybotAPI/backend/retriever/ops.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from backend.schemas import *
2
+
3
+ from backend.core.Exceptions import *
4
+ from backend.core.ExceptionHandlers import *
5
+ from backend import app
6
+
7
+ from clarifai_grpc.grpc.api.status import status_code_pb2
8
+
9
+ def ops_inference(response_result: FrontendResponseModel, question: str):
10
+
11
+ if question == "":
12
+ raise InfoNotProvidedException(response_result, "Come on, I'm not telepathic. I can't read your mind. Please provide me with a question.")
13
+
14
+ try:
15
+ llm_response = app.state.qa_chain(question)
16
+ output = Inference(
17
+ answer=llm_response["result"].strip(),
18
+ source_documents=llm_response["source_documents"],
19
+ )
20
+
21
+ response_result["result"] = output.dict()
22
+ except Exception as e:
23
+ response_result["status"] = "error"
24
+ response_result["message"].append(str(e))
25
+ raise ModelDeploymentException(response_result)
StudybotAPI/backend/retriever/pipeline.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from transformers import (
4
+ BitsAndBytesConfig,
5
+ AutoModelForCausalLM,
6
+ AutoTokenizer,
7
+ GenerationConfig,
8
+ pipeline,
9
+ )
10
+
11
+ import warnings
12
+
13
+ warnings.filterwarnings("ignore")
14
+
15
+
16
+ class EmbeddingModel:
17
+ def __init__(self, model_name, generation_config):
18
+ self.model_name = model_name
19
+ self.generation_config = generation_config
20
+ self.tokenizer = self._initialize_tokenizer()
21
+ self.model = self._initialize_model()
22
+
23
+ def _initialize_tokenizer(self):
24
+ tokenizer = AutoTokenizer.from_pretrained(self.model_name, use_fast=True)
25
+ tokenizer.pad_token = tokenizer.eos_token
26
+ return tokenizer
27
+
28
+ def _initialize_model(self):
29
+ quantization_config = BitsAndBytesConfig(
30
+ load_in_4bit=True,
31
+ bnb_4bit_compute_dtype=torch.float16,
32
+ bnb_4bit_quant_type="nf4",
33
+ bnb_4bit_use_double_quant=True,
34
+ )
35
+
36
+ model = AutoModelForCausalLM.from_pretrained(
37
+ self.model_name,
38
+ torch_dtype=torch.float16,
39
+ trust_remote_code=True,
40
+ device_map="auto",
41
+ quantization_config=quantization_config,
42
+ )
43
+ return model
44
+
45
+ def _initialize_generation_config(self):
46
+ generation_config = GenerationConfig.from_pretrained(self.model_name)
47
+ generation_config.max_new_tokens = 1024
48
+ generation_config.temperature = 0.0001
49
+ generation_config.top_p = 0.95
50
+ generation_config.do_sample = True
51
+ generation_config.repetition_penalty = 1.15
52
+ return generation_config
53
+
54
+ def _initialize_pipeline(self):
55
+ pipeline_obj = pipeline(
56
+ "text-generation",
57
+ model=self.model,
58
+ tokenizer=self.tokenizer,
59
+ return_full_text=True,
60
+ generation_config=self.generation_config,
61
+ )
62
+ return pipeline_obj
63
+
64
+
65
+ # if __name__ == "__main__":
66
+ # MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"
67
+
68
+ # text_generator = MyTextGenerator(
69
+ # MODEL_NAME, MyTextGenerator._initialize_generation_config()
70
+ # )
71
+ # generated_text = text_generator.generate_text()
72
+ # print(generated_text)
StudybotAPI/backend/routes.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from pathlib import Path
4
+ from typing import Callable
5
+ from tempfile import NamedTemporaryFile
6
+
7
+ from fastapi import Request, BackgroundTasks
8
+ from fastapi import UploadFile
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+
11
+ from backend import app
12
+ from backend.schemas import *
13
+ from backend.retriever import *
14
+ from backend.utils import *
15
+
16
+
17
+ app.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=["*"],
20
+ allow_credentials=True,
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
+ )
24
+
25
+
26
+ @app.get("/", tags=["Home"])
27
+ def api_home():
28
+ return {"detail": "Welcome to Studybot API"}
29
+
30
+
31
+ @app.post("/api/upload", response_model=DataResponseModel,summary="Upload", tags=["Resource Server"])
32
+ async def upload_data(file: UploadFile = File(...)):
33
+
34
+ response_result = {
35
+ "status": "success",
36
+ "message": ["Data uploaded successfully."]
37
+ }
38
+
39
+ try:
40
+ suffix = Path(file.filename).suffix
41
+ with NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
42
+ shutil.copyfileobj(file.file, tmp)
43
+ tmp_path = os.path.join(os.getcwd(), tmp.name)
44
+ except Exception as e:
45
+ response_result["status"] = "error"
46
+ response_result["message"][0]=str(e)
47
+ raise DataNotUploadedException(response_result)
48
+
49
+ finally:
50
+ file.file.close()
51
+
52
+ await llm_chain_loader(DATA_PATH=tmp_path)
53
+ return response_result
54
+
55
+
56
+ @app.post("/api/inference",summary="Inference",response_model=FrontendResponseModel,tags=["Resource Server"])
57
+ def inference(data: Chat):
58
+ response_result = {
59
+ "status": "success",
60
+ "message": [""],
61
+ "result": {}
62
+ }
63
+
64
+ ops_inference(response_result, data.promptMessage)
65
+ return response_result
StudybotAPI/backend/schemas/FrontendResponseSchema.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from pydantic import BaseModel
3
+
4
+
5
+ class FrontendResponseModel(BaseModel):
6
+ status: str
7
+ message: List[str]
8
+ result: dict
9
+
10
+ class DataResponseModel(BaseModel):
11
+ status: str
12
+ message: List[str]
StudybotAPI/backend/schemas/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .chatschema import *
2
+ from .FrontendResponseSchema import *
StudybotAPI/backend/schemas/chatschema.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from fastapi import File, UploadFile
3
+
4
+
5
+ class Inference(BaseModel):
6
+ answer: str
7
+ source_documents: list
8
+
9
+ class Chat(BaseModel):
10
+ promptMessage: str
11
+
12
+ class Upload(BaseModel):
13
+ file : UploadFile
StudybotAPI/backend/utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .chain_loader import *
StudybotAPI/backend/utils/chain_loader.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from backend.ingestion import *
2
+ from backend import app
3
+ from backend.core.configEnv import config
4
+
5
+ from langchain.chains import (
6
+ LLMChain,
7
+ SimpleSequentialChain,
8
+ RetrievalQA,
9
+ ConversationalRetrievalChain,
10
+ )
11
+ from langchain.llms import Clarifai
12
+ from langchain.prompts import PromptTemplate
13
+
14
+
15
+ async def llm_chain_loader(DATA_PATH: str):
16
+ docs = PDFDataLoader(DATA_PATH).load()
17
+ db = app.state.emb.store_embeddings(docs)
18
+
19
+ with open("backend/utils/prompt.txt", "r", encoding="utf8") as f:
20
+ prompt = f.read()
21
+
22
+ prompt = PromptTemplate(template=prompt, input_variables=["context", "question"])
23
+
24
+ llm = Clarifai(
25
+ pat=config.CLARIFAI_PAT,
26
+ user_id=config.USER_ID,
27
+ app_id=config.APP_ID,
28
+ model_id=config.MODEL_ID,
29
+ model_version_id=config.MODEL_VERSION_ID,
30
+ )
31
+
32
+ qa_chain = RetrievalQA.from_chain_type(
33
+ llm=llm,
34
+ chain_type="stuff",
35
+ retriever=db.as_retriever(search_type="similarity",search_kwargs={"k": 2}),
36
+ return_source_documents=True,
37
+ chain_type_kwargs={"prompt": prompt},
38
+ )
39
+
40
+ app.state.qa_chain = qa_chain
StudybotAPI/backend/utils/prompt.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [INST] <>
2
+ You are a revolutionary educational AI bot to assist students in quick revisions of theoretical subjects, which often involve numerous concepts, dates, and important events. You need to answer such that it aids in recalling key information for efficient study sessions. If you do not know the answer reply with 'I am sorry, I dont have enough information.
3
+ ALWAYS return a "SOURCES" part in your answer.
4
+ The "SOURCES" part should be a reference to the source of the document from which you got your answer.
5
+ <>
6
+
7
+ {context}
8
+
9
+ Consider a student engaged in the study of any theoretical subject, where the abundance of concepts and events poses a challenge to memorization. The aim is to overcome this hurdle and be capable of providing brief answers to specific queries. For example, if a student forgets a key concept, date, or event, they can ask the bot a question like "What is [specific query]?" for a concise answer.
10
+ Note that students can also ask multiple questions in a single query. For example, "What is [specific query 1]?, What is [specific query 2]?, What is [specific query 3]?".
11
+
12
+ {question} [/INST]
StudybotAPI/backend/utils/prompts.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ social_sciences_teacher = """
2
+ [INST] <>
3
+ You are a revolutionary educational AI bot to assist students in quick revisions of theoretical subjects, which often involve numerous concepts, dates, and important events. You need to answer such that it aids in recalling key information for efficient study sessions. If you do not know the answer reply with 'I am sorry, I dont have enough information.
4
+ ALWAYS return a "SOURCES" part in your answer.
5
+ The "SOURCES" part should be a reference to the source of the document from which you got your answer.
6
+ <>
7
+
8
+ {context}
9
+
10
+ Consider a student engaged in the study of any theoretical subject, where the abundance of concepts and events poses a challenge to memorization. The aim is to overcome this hurdle and be capable of providing brief answers to specific queries. For example, if a student forgets a key concept, date, or event, they can ask the bot a question like "What is [specific query]?" for a concise answer.
11
+ Note that students can also ask multiple questions in a single query. For example, "What is [specific query 1]?, What is [specific query 2]?, What is [specific query 3]?".
12
+
13
+ {question} [/INST]
14
+ """
15
+
16
+
17
+ english_teacher = """
18
+ [INST] <>
19
+ You are an innovative language learning AI designed to assist students in improving their English language skills. Your goal is to provide helpful and engaging responses that aid in language comprehension, grammar, and vocabulary building. If you are unable to answer a question, respond with 'I am sorry, I don't have enough information.'
20
+ ALWAYS include a "RESOURCES" section in your answer, providing guidance on where students can find additional information or practice materials.
21
+ <>
22
+
23
+ {context}
24
+
25
+ Imagine a student immersed in the study of the English language, aiming to enhance their understanding of grammar, vocabulary, and overall language proficiency. Your role is to assist them by providing concise and informative answers to specific language-related queries. For instance, if a student is uncertain about a grammatical rule or the meaning of a word, they can ask you questions like "What is [specific query]?" for clear and detailed responses.
26
+ Keep in mind that students may pose multiple questions within a single query. For example, "What is the meaning of [specific query 1]?, How do I use [specific query 2] in a sentence?, Can you explain the grammar rule for [specific query 3]?".
27
+
28
+ {question} [/INST]
29
+ """
30
+
31
+
32
+ science_teacher = """
33
+ [INST] <>
34
+ You are a cutting-edge science education AI, dedicated to helping students grasp complex scientific concepts and theories. Your mission is to provide insightful and understandable explanations to foster a deeper understanding of various scientific topics. If you don't have enough information to answer a question, respond with 'I am sorry, I don't have enough information.'
35
+ Always include a "REFERENCES" section in your answer, directing students to relevant sources for further exploration.
36
+ <>
37
+
38
+ {context}
39
+
40
+ Envision a student engrossed in the study of science, navigating through intricate theories and phenomena. Your purpose is to assist them by offering clear and concise answers to their scientific inquiries. For instance, if a student is struggling to comprehend a particular scientific concept or needs clarification on an experiment, they can pose questions like "What is [specific query]?" for detailed responses.
41
+ It's important to note that students might present multiple questions within a single query. For instance, "Explain [specific query 1]?, How does [specific query 2] work?, Can you provide examples of [specific query 3]?".
42
+
43
+ {question} [/INST]
44
+ """
45
+
46
+ prompt_infos = [
47
+ {
48
+ "name": "Math Teacher",
49
+ "description": "Good for answering questions about Social Sciences subject like History, Geography, Civics, etc.",
50
+ "prompt_template": social_sciences_teacher,
51
+ },
52
+ {
53
+ "name": "Spanish Teacher",
54
+ "description": "Good for answering questions about English Language",
55
+ "prompt_template": english_teacher,
56
+ },
57
+ {
58
+ "name": "Calculus Teacher",
59
+ "description": "Good for answering questions about Science subjects like Physics, Chemistry, Biology, etc.",
60
+ "prompt_template": science_teacher,
61
+ },
62
+ ]
StudybotAPI/config.yml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ CHUNK_SIZE: 1000
2
+ CHUNK_OVERLAP: 100
3
+ NUM_RESULTS: 3
4
+ EMBEDDINGS: "BAAI/llm-embedder"
5
+ VECTOR_DB: "./vectorstore/studybotstore"
6
+ NORMALIZE_EMBEDDINGS: True
7
+ COLLECTION_NAME: "studybotstore"
8
+ DEVICE: "cpu"
9
+ VECTOR_SPACE: "cosine"
10
+ CACHE_FOLDER: "./cache"
StudybotAPI/requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.99.1
2
+ uvicorn
3
+ requests
4
+ langchain==0.0.346
5
+ pydantic==1.10.2
6
+ pypdf
7
+ python-box
8
+ qdrant-client
9
+ torch
10
+ transformers
11
+ sentence_transformers
12
+ clarifai
13
+ Pillow
14
+ tiktoken
15
+ python-multipart
frontend/components/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .authors import *
2
+ from .user_greetings import *
frontend/components/authors.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import streamlit as st
3
+
4
+
5
+ def authors():
6
+ st.sidebar.divider()
7
+ st.sidebar.info(
8
+ """
9
+ Follow me on:
10
+
11
+ Github → [@HemanthSai7](https://github.com/HemanthSai7)
12
+ """
13
+ )
frontend/components/user_greetings.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def user_greetings():
4
+ with st.sidebar.expander("👋 Greetings!", expanded=True):
5
+ st.write("Welcome to Studybot! This is a tool to help you revise your subjects. You can use the sidebar to navigate to the different pages. Have fun!")
6
+ st.write("If you have any feedback, please contact me on [LinkedIn](https://www.linkedin.com/in/hemanthsai7/) or [GitHub](https://github.com/HemanthSai7).")
frontend/layouts/__init__.py ADDED
File without changes
frontend/layouts/mainlayout.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import streamlit as st
3
+ from typing import Callable
4
+ from components import authors, user_greetings
5
+
6
+
7
+ def mainlayout(func: Callable):
8
+ def wrapper():
9
+ with open("layouts/st_page_layouts.json", "r", encoding="utf-8") as f:
10
+ st_page_layouts = json.load(f)
11
+
12
+ st.set_page_config(**st_page_layouts[f"{func.__name__}" if func.__name__ in st_page_layouts.keys() else "home"])
13
+ st.markdown('# :rainbow[Welcome to Studybot]🚀')
14
+ user_greetings()
15
+ authors()
16
+
17
+ func()
18
+
19
+ return wrapper
frontend/layouts/st_page_layouts.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "home": {
3
+ "page_title": "Studybot",
4
+ "layout": "wide",
5
+ "page_icon": "🏡",
6
+ "initial_sidebar_state": "expanded"
7
+ },
8
+ "bot": {
9
+ "page_title": "Chatbot",
10
+ "layout": "wide",
11
+ "page_icon": "💻",
12
+ "initial_sidebar_state": "expanded"
13
+ }
14
+ }
frontend/pages/2_🤖_bot.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from layouts.mainlayout import mainlayout
4
+
5
+ @mainlayout
6
+ def bot():
7
+ st.subheader("Revise your subjects with Studybot!")
8
+
9
+ bot()
frontend/requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ streamlit
frontend/test.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ import requests
4
+
5
+ # from studybot import qa_chain
6
+ # st.write(qa_chain)
7
+
8
+ upload_pdf = st.file_uploader("Upload PDF", type="pdf")
9
+ if upload_pdf is not None:
10
+ files = {"file": upload_pdf}
11
+ response = requests.post(
12
+ "https://hemanthsai7-studybotapi.hf.space/api/upload", files=files
13
+ )
14
+ st.write(response)
15
+
16
+ query = st.text_input("Question", key="question")
17
+ st.write(st.session_state)
18
+
19
+ if st.button("Ask"):
20
+ # answer = qa_chain(query)
21
+ answer = requests.post(
22
+ "https://hemanthsai7-studybotapi.hf.space/api/inference",
23
+ json={"promptMessage": query},
24
+ ).json()
25
+ st.write(answer)
26
+ # st.session_state["question"] = ""
frontend/🏡_Home.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from layouts.mainlayout import mainlayout
4
+
5
+
6
+ @mainlayout
7
+ def home():
8
+ st.subheader("Revise your subjects with Studybot!")
9
+
10
+ st.markdown(
11
+ """
12
+ <p style='text-align: justify;'>
13
+ <b>Studybot</b> is a <b>free</b> and <b>open-source</b> tool for <b>automated</b> and <b>personalized</b> learning.
14
+ It is a <b>chatbot</b> that helps you to <b>learn</b> and <b>memorize</b> new information in a <b>fun</b> and <b>easy</b> way.
15
+ Studybot is <b>free</b> and <b>open-source</b>, so you can use it <b>without any limitations</b> and <b>without any costs</b>.
16
+ You can also <b>customize</b> it to your needs and <b>contribute</b> to its development.
17
+ <b>Enjoy</b> your learning with Studybot!
18
+ </p>
19
+ """,
20
+ unsafe_allow_html=True,
21
+ )
22
+
23
+ with st.expander("How does it work?", expanded=True):
24
+ st.markdown(
25
+ """
26
+ - When you upload a document, it will be divided into smaller chunks and stored in a special type of database called a vector index that allows for semantic search and retrieval. I'm using Qdrant vector database for this purpose.
27
+
28
+ - When you ask a question, Studybot will search through the document chunks and find the most relevant ones using the vector index. Then, it will use Mistral-7B-instruct to generate a final answer.
29
+
30
+ """
31
+ )
32
+
33
+ with st.expander("FAQs 🤔"):
34
+ st.markdown(
35
+ """
36
+ - **What is the best way to upload a document?**<br>
37
+ The best way to upload a document is to upload a PDF file. Studybot will automatically divide it into smaller chunks. You can also upload a text file. In this case, Studybot will divide it into smaller chunks.
38
+
39
+ - **What is the best way to ask questions?**<br>
40
+ The best way to ask questions is to ask questions that are related to the document you uploaded. If you ask questions that are not related to the document you uploaded, Studybot will not be able to answer them.
41
+
42
+ - **Is my data safe?**<br>
43
+ Yes, your data is safe. Studybot does not store your documents or questions. All uploaded data is deleted after you close the browser tab since it is stored in the RAM memory. However, if you want to be sure that your data is safe, you can use the `Clear data 🧹` button to delete all uploaded data.
44
+
45
+ - **Why does it take so long to index my document?**<br>
46
+ When you upload a document, it is divided into smaller chunks and stored in a special type of database called a vector index that allows for semantic search and retrieval. It takes some time to index your document because it has to be divided into smaller chunks and stored in the vector index. However, once your document is indexed, it will be much faster to search through it.
47
+
48
+ - **Are the answers 100% accurate?**<br>
49
+ - No, the answers are not 100% accurate. Studybot uses Mistral-7B to generate answers. Mistral-7B is a powerful language model, but it sometimes makes mistakes and is prone to hallucinations. Also, Studybot uses semantic search to find the most relevant chunks and does not see the entire document, which means that it may not be able to find all the relevant information and may not be able to answer all questions (especially summary-type questions or questions that require a lot of context from the document).
50
+
51
+ - But for most of the time, Studybot is very accurate and can answer most questions. Always check with the sources to make sure that the answers are correct.
52
+
53
+ - **What is the best way to contribute to Studybot?**<br>
54
+ The best way to contribute to Studybot is to create an issue on GitHub. I will be happy to answer your questions and help you with your contributions.
55
+ """,
56
+ unsafe_allow_html=True,
57
+ )
58
+
59
+ st.divider()
60
+ # architecture heading in the middle
61
+ st.markdown("<h2 style='text-align: center; color: black;'>Studybot Architecture</h1>", unsafe_allow_html=True)
62
+ st.image("images/architecture.png")
63
+
64
+
65
+ home()
notebooks/Untitled6.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/embeddings.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/selfrag.ipynb ADDED
File without changes
test.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class A:
2
+ def __init__(self):
3
+ self._bill = 1
4
+
5
+ @property
6
+ def bill(self):
7
+ return self._bill
8
+
9
+ @bill.setter
10
+ def bill(self,value):
11
+ self._bill = value
12
+ # raise PermissionError("You can't change the bill")
13
+
14
+ class B(A):
15
+ def __init__(self):
16
+ super().__init__()
17
+ self._bill = 2
18
+
19
+ # b = B()
20
+ # print(b.bill)
21
+ a=A()
22
+ a.bill=3
23
+ print(a.bill)
24
+
25
+
26
+