LLMServer / main /_app.py
AurelioAguirre's picture
running dummy App
5d274cb
raw
history blame
4.45 kB
import yaml
import sys
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
import uvicorn
from .api import LLMApi
from .routes import router, init_router
from .utils.logging import setup_logger
from huggingface_hub import login
from pathlib import Path
from dotenv import load_dotenv
import os
def validate_hf():
"""
Validate Hugging Face authentication.
Checks for .env file, loads environment variables, and attempts HF login if token exists.
"""
logger = setup_logger(config, "hf_validation")
# Check for .env file
env_path = Path('.env')
if env_path.exists():
logger.info("Found .env file, loading environment variables")
load_dotenv()
else:
logger.warning("No .env file found. Fine if you're on Huggingface, but you need one to run locally on your PC.")
# Check for HF token
hf_token = os.getenv('HF_TOKEN')
if not hf_token:
logger.error("No HF_TOKEN found in environment variables")
return False
try:
# Attempt login
login(token=hf_token)
logger.info("Successfully authenticated with Hugging Face")
return True
except Exception as e:
logger.error(f"Failed to authenticate with Hugging Face: {str(e)}")
return False
def load_config():
"""Load configuration from yaml file"""
with open("main/config.yaml", "r") as f:
return yaml.safe_load(f)
def create_app():
config = load_config()
logger = setup_logger(config, "main")
logger.info("Starting LLM API server")
app = FastAPI(
title="LLM API",
description="API for Large Language Model operations",
version=config["api"]["version"]
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=config["api"]["cors"]["origins"],
allow_credentials=config["api"]["cors"]["credentials"],
allow_methods=["*"],
allow_headers=["*"],
)
# Initialize routes with config
init_router(config)
app.include_router(router, prefix=f"{config['api']['prefix']}/{config['api']['version']}")
logger.info("FastAPI application created successfully")
return app
def test_locally():
"""Run local tests for development and debugging"""
config = load_config()
logger = setup_logger(config, "test")
logger.info("Starting local tests")
api = LLMApi(config)
model_name = config["model"]["defaults"]["model_name"]
logger.info(f"Testing with model: {model_name}")
# Test download
logger.info("Testing model download...")
api.download_model(model_name)
logger.info("Download complete")
# Test initialization
logger.info("Initializing model...")
api.initialize_model(model_name)
logger.info("Model initialized")
# Test embedding
test_text = "Dette er en test av embeddings generering fra en teknisk tekst om HMS rutiner på arbeidsplassen."
logger.info("Testing embedding generation...")
embedding = api.generate_embedding(test_text)
logger.info(f"Generated embedding of length: {len(embedding)}")
logger.info(f"First few values: {embedding[:5]}")
# Test generation
test_prompts = [
"Tell me what happens in a nuclear reactor.",
]
# Test regular generation
logger.info("Testing regular generation:")
for prompt in test_prompts:
logger.info(f"Prompt: {prompt}")
response = api.generate_response(
prompt=prompt,
system_message="You are a helpful assistant."
)
logger.info(f"Response: {response}")
# Test streaming generation
logger.info("Testing streaming generation:")
logger.info(f"Prompt: {test_prompts[0]}")
for chunk in api.generate_stream(
prompt=test_prompts[0],
system_message="You are a helpful assistant."
):
print(chunk, end="", flush=True)
print("\n")
logger.info("Local tests completed")
app = create_app()
if __name__ == "__main__":
config = load_config()
#validate_hf()
if len(sys.argv) > 1 and sys.argv[1] == "test":
test_locally()
else:
uvicorn.run(
"main.app:app",
host=config["server"]["host"],
port=config["server"]["port"],
log_level="trace",
reload=True,
workers=1,
access_log=False,
use_colors=True
)