File size: 13,245 Bytes
f35f208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from typing import Optional, List, Dict, Union
from .api import LLMApi
from utils.logging import setup_logger
from utils.helpers import get_system_info, format_memory_size
from utils.validation import validate_model_path
import psutil
from pathlib import Path

router = APIRouter()
logger = None
api = None
config = None

def init_router(config_dict: dict):
    """Initialize router with config and LLM API instance"""
    global logger, api, config
    config = config_dict
    logger = setup_logger(config, "api_routes")
    api = LLMApi(config)
    logger.info("Router initialized with LLM API instance")

class GenerateRequest(BaseModel):
    prompt: str
    system_message: Optional[str] = None
    max_new_tokens: Optional[int] = None

class EmbeddingRequest(BaseModel):
    text: str

class EmbeddingResponse(BaseModel):
    embedding: List[float]
    dimension: int

class SystemStatusResponse(BaseModel):
    """Pydantic model for system status response"""
    cpu: Optional[Dict[str, Union[float, str]]] = None
    memory: Optional[Dict[str, Union[float, str]]] = None
    gpu: Optional[Dict[str, Union[bool, str, float]]] = None
    storage: Optional[Dict[str, str]] = None
    model: Optional[Dict[str, Union[bool, str]]] = None

class ValidationResponse(BaseModel):
    config_validation: Dict[str, bool]
    model_validation: Dict[str, bool]
    folder_validation: Dict[str, bool]
    overall_status: str
    issues: List[str]

@router.get("/system/validate",
            response_model=ValidationResponse,
            summary="Validate System Configuration",
            description="Validates system configuration, folders, and model setup")
async def validate_system():
    """
    Validates:
    - Configuration parameters
    - Model setup
    - Folder structure
    - Required permissions
    """
    logger.info("Starting system validation")
    issues = []

    # Validate configuration
    try:
        config_status = {
            "has_required_fields": True,  # Check if all required config fields exist
            "valid_paths": True,          # Check if paths are valid
            "valid_parameters": True      # Check if parameters are within acceptable ranges
        }

        # Example validation checks
        if not api.models_path.exists():
            config_status["valid_paths"] = False
            issues.append("Models directory does not exist")

        if api.temperature < 0 or api.temperature > 2:
            config_status["valid_parameters"] = False
            issues.append("Temperature parameter out of valid range (0-2)")

    except Exception as e:
        logger.error(f"Configuration validation failed: {str(e)}")
        config_status = {"error": str(e)}
        issues.append(f"Config validation error: {str(e)}")

    # Validate model setup
    try:
        model_status = {
            "model_files_exist": False,
            "model_loadable": False,
            "tokenizer_valid": False
        }

        if api.model_name:
            model_path = api.models_path / api.model_name.split('/')[-1]
            model_status["model_files_exist"] = validate_model_path(model_path)

            if not model_status["model_files_exist"]:
                issues.append("Model files are missing or incomplete")

            model_status["model_loadable"] = api.model is not None
            model_status["tokenizer_valid"] = api.tokenizer is not None

    except Exception as e:
        logger.error(f"Model validation failed: {str(e)}")
        model_status = {"error": str(e)}
        issues.append(f"Model validation error: {str(e)}")

    # Validate folder structure and permissions
    try:
        folder_status = {"models_folder": api.models_path.exists(), "cache_folder": api.cache_path.exists(),
                         "logs_folder": Path(api.base_path / "logs").exists(), "write_permissions": False}


        # Test write permissions by attempting to create a test file
        test_file = api.models_path / ".test_write"
        try:
            test_file.touch()
            test_file.unlink()
            folder_status["write_permissions"] = True
        except:
            folder_status["write_permissions"] = False
            issues.append("Insufficient write permissions in models directory")

    except Exception as e:
        logger.error(f"Folder validation failed: {str(e)}")
        folder_status = {"error": str(e)}
        issues.append(f"Folder validation error: {str(e)}")

    # Determine overall status
    if not issues:
        overall_status = "valid"
    elif len(issues) < 3:
        overall_status = "warning"
    else:
        overall_status = "invalid"

    validation_response = ValidationResponse(
        config_validation=config_status,
        model_validation=model_status,
        folder_validation=folder_status,
        overall_status=overall_status,
        issues=issues
    )

    logger.info(f"System validation completed with status: {overall_status}")
    return validation_response


@router.get("/system/status",
            response_model=SystemStatusResponse,
            summary="Check System Status",
            description="Returns comprehensive system status including CPU, Memory, GPU, Storage, and Model information")
async def check_system():
    """
    Get system status including:
    - CPU usage
    - Memory usage
    - GPU availability and usage
    - Storage status for model and cache directories
    - Current model status
    """
    logger.info("Checking system status")
    status = SystemStatusResponse()
    system_info = None

    # Check CPU and Memory
    try:
        system_info = get_system_info()
        status.cpu = {
            "usage_percent": system_info["cpu_percent"],
            "status": "healthy" if system_info["cpu_percent"] < 90 else "high"
        }
        logger.debug(f"CPU status retrieved: {status.cpu}")
    except Exception as e:
        logger.error(f"Failed to get CPU info: {str(e)}")
        status.cpu = {"status": "error", "message": str(e)}

    # Check Memory
    try:
        if not system_info:
            system_info = get_system_info()
        status.memory = {
            "usage_percent": system_info["memory_percent"],
            "status": "healthy" if system_info["memory_percent"] < 90 else "critical",
            "available": format_memory_size(psutil.virtual_memory().available)
        }
        logger.debug(f"Memory status retrieved: {status.memory}")
    except Exception as e:
        logger.error(f"Failed to get memory info: {str(e)}")
        status.memory = {"status": "error", "message": str(e)}

    # Check GPU
    try:
        if not system_info:
            system_info = get_system_info()
        status.gpu = {
            "available": system_info["gpu_available"],
            "memory_used": format_memory_size(system_info["gpu_memory_used"]),
            "memory_total": format_memory_size(system_info["gpu_memory_total"]),
            "utilization_percent": system_info["gpu_memory_used"] / system_info["gpu_memory_total"] * 100 if system_info["gpu_available"] else 0
        }
        logger.debug(f"GPU status retrieved: {status.gpu}")
    except Exception as e:
        logger.error(f"Failed to get GPU info: {str(e)}")
        status.gpu = {"status": "error", "message": str(e)}

    # Check Storage
    try:
        models_path = Path(api.models_path)
        cache_path = Path(api.cache_path)
        status.storage = {
            "models_directory": str(models_path),
            "models_size": format_memory_size(sum(f.stat().st_size for f in models_path.glob('**/*') if f.is_file())),
            "cache_directory": str(cache_path),
            "cache_size": format_memory_size(sum(f.stat().st_size for f in cache_path.glob('**/*') if f.is_file()))
        }
        logger.debug(f"Storage status retrieved: {status.storage}")
    except Exception as e:
        logger.error(f"Failed to get storage info: {str(e)}")
        status.storage = {"status": "error", "message": str(e)}

    # Check Model Status
    try:
        current_model_path = api.models_path / api.model_name.split('/')[-1] if api.model_name else None
        status.model = {
            "is_loaded": api.model is not None,
            "current_model": api.model_name,
            "is_valid": validate_model_path(current_model_path) if current_model_path else False,
            "has_chat_template": api.has_chat_template() if api.model else False
        }
        logger.debug(f"Model status retrieved: {status.model}")
    except Exception as e:
        logger.error(f"Failed to get model status: {str(e)}")
        status.model = {"status": "error", "message": str(e)}

    logger.info("System status check completed")
    return status


@router.post("/generate")
async def generate_text(request: GenerateRequest):
    """Generate text response from prompt"""
    logger.info(f"Received generation request for prompt: {request.prompt[:50]}...")
    try:
        response = api.generate_response(
            prompt=request.prompt,
            system_message=request.system_message,
            max_new_tokens=request.max_new_tokens or api.max_new_tokens
        )
        logger.info("Successfully generated response")
        return {"generated_text": response}
    except Exception as e:
        logger.error(f"Error in generate_text endpoint: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/generate/stream")
async def generate_stream(request: GenerateRequest):
    """Generate streaming text response from prompt"""
    logger.info(f"Received streaming generation request for prompt: {request.prompt[:50]}...")
    try:
        return api.generate_stream(
            prompt=request.prompt,
            system_message=request.system_message,
            max_new_tokens=request.max_new_tokens or api.max_new_tokens
        )
    except Exception as e:
        logger.error(f"Error in generate_stream endpoint: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/embedding", response_model=EmbeddingResponse)
async def generate_embedding(request: EmbeddingRequest):
    """Generate embedding vector from text"""
    logger.info(f"Received embedding request for text: {request.text[:50]}...")
    try:
        embedding = api.generate_embedding(request.text)
        logger.info(f"Successfully generated embedding of dimension {len(embedding)}")
        return EmbeddingResponse(
            embedding=embedding,
            dimension=len(embedding)
        )
    except Exception as e:
        logger.error(f"Error in generate_embedding endpoint: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/model/download",
             summary="Download default or specified model",
             description="Downloads model files. Uses default model from config if none specified.")
async def download_model(model_name: Optional[str] = None):
    """Download model files to local storage"""
    try:
        # Use model name from config if none provided
        model_to_download = model_name or config["model"]["defaults"]["model_name"]
        logger.info(f"Received request to download model: {model_to_download}")

        api.download_model(model_to_download)
        logger.info(f"Successfully downloaded model: {model_to_download}")

        return {
            "status": "success",
            "message": f"Model {model_to_download} downloaded",
            "model_name": model_to_download
        }
    except Exception as e:
        logger.error(f"Error downloading model: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))

@router.post("/model/initialize",
             summary="Initialize default or specified model",
             description="Initialize model for use. Uses default model from config if none specified.")
async def initialize_model(model_name: Optional[str] = None):
    """Initialize a model for use"""
    try:
        # Use model name from config if none provided
        model_to_init = model_name or config["model"]["defaults"]["model_name"]
        logger.info(f"Received request to initialize model: {model_to_init}")

        api.initialize_model(model_to_init)
        logger.info(f"Successfully initialized model: {model_to_init}")

        return {
            "status": "success",
            "message": f"Model {model_to_init} initialized",
            "model_name": model_to_init
        }
    except Exception as e:
        logger.error(f"Error initializing model: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/models/status")
async def get_model_status():
    """Get current model status"""
    try:
        status = {
            "model_loaded": api.model is not None,
            "current_model": api.model_name if api.model_name else None,
            "has_chat_template": api.has_chat_template() if api.model else False
        }
        logger.info(f"Retrieved model status: {status}")
        return status
    except Exception as e:
        logger.error(f"Error getting model status: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))