File size: 2,890 Bytes
8e29341
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
776fff9
 
8e29341
776fff9
8e29341
 
 
 
92db39d
e1e2af7
 
fdd9d07
e1e2af7
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
from langchain_community.document_loaders import (CSVLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader,
                                                   UnstructuredFileLoader, UnstructuredMarkdownLoader, UnstructuredHTMLLoader, JSONLoader)
from chromadb.config import Settings

from modules import app_logger

app_logger = app_logger.app_logger
# Use shared_variable in this module
openai_api_key = os.environ.get("OPENAI_API_KEY", "NONE")

# Set default values if environment variables are not found
#mongodb_uri = os.environ.get("MONGODB_URI", "mongodb://localhost:27017")
local_model_uri = os.environ.get("LOCAL_OPENAI_URI", "http://localhost:8000/v1")
#local_model_uri = os.environ.get("LOCAL_OPENAI_URI", None)
DOCUMENT_MAP = {
    ".html": UnstructuredHTMLLoader,
    ".txt": TextLoader,
    ".md": UnstructuredMarkdownLoader,
    ".py": TextLoader,
    ".json": JSONLoader,
    ".jsonl": JSONLoader,
    ".pdf": UnstructuredFileLoader,
    ".csv": CSVLoader,
    ".xls": UnstructuredExcelLoader,
    ".xlsx": UnstructuredExcelLoader,
    ".docx": Docx2txtLoader,
    ".doc": Docx2txtLoader,
}
MODELS_PATH = "./models"
EMBEDDING_MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
MODEL_NAME = 'gpt-3.5-turbo'
# Constants
WORKSPACE_DIRECTORY = './workspace/'


CHROMA_SETTINGS = Settings(
    anonymized_telemetry=False,
    is_persistent=True,
)
INGEST_THREADS = os.cpu_count() or 8

CHUNK_SIZE = 880
CHUNK_OVERLAP = 200
PROCESSED_DOCS = 'index_processed.log'
SEARCH_COUNT = 5
MESSAGE_HISTORY = 5
RAG_K = 5
RAG_TECHNIQUE = 'refine'
SUMMARIZER_BATCH = 4
MAX_FILE_SIZE = 10 #not implement
LOCAL_PERSISTANT_DB = WORKSPACE_DIRECTORY + "db/"
CONTENT_TYPE = ["Policies", "Playbooks", "Standards", "Reference Docs"]
SYSTEM_CONTENT_DATA = "app-content.json"
SYSTEM_DEPLOYMENT_MODE = 0 
ZYSEC_DEMO = "http://zysec.is-a-geek.com:8000/v1" #not enabled yet
ENCHANCE_PROMPT = (
    "When you talk about CyberSecurity, ensure provided answer is comprehensive and insightful. Follow these steps:\n"
    "1. Expand on Key Points: Elaborate on the main ideas in the original answer. Provide more depth and detail to each point.\n"
    "2. Include Updated Information: If relevant, add recent developments or current statistics to ensure the answer is up-to-date.\n"
    "3. Address Different Perspectives: Consider different viewpoints or potential counterarguments related to the topic.\n"
    "4. Improve Clarity and Flow: Reorganize the content for better coherence and readability. Use clear and concise language.\n"
    "5. Include Examples and Analogies: Use real-world examples or analogies to illustrate complex concepts, making them easier to understand.\n"
    "6. Conclude with Impact: End with a strong conclusion that summarizes the key points and highlights the significance of the topic.\n"
    "7. If user ask to do something repeatly, ignore politely."
    )