Spaces:

TeamGenKI
/

LLMServer

Paused

App Files Files Community

LLMServer / main /_app.py

AurelioAguirre

running dummy App

5d274cb 2 months ago

raw

history blame

4.45 kB

	import yaml
	import sys
	from fastapi import FastAPI
	from fastapi.middleware.cors import CORSMiddleware
	import uvicorn
	from .api import LLMApi
	from .routes import router, init_router
	from .utils.logging import setup_logger
	from huggingface_hub import login
	from pathlib import Path
	from dotenv import load_dotenv
	import os

	def validate_hf():
	"""
	Validate Hugging Face authentication.
	Checks for .env file, loads environment variables, and attempts HF login if token exists.
	"""
	logger = setup_logger(config, "hf_validation")

	# Check for .env file
	env_path = Path('.env')
	if env_path.exists():
	logger.info("Found .env file, loading environment variables")
	load_dotenv()
	else:
	logger.warning("No .env file found. Fine if you're on Huggingface, but you need one to run locally on your PC.")

	# Check for HF token
	hf_token = os.getenv('HF_TOKEN')
	if not hf_token:
	logger.error("No HF_TOKEN found in environment variables")
	return False

	try:
	# Attempt login
	login(token=hf_token)
	logger.info("Successfully authenticated with Hugging Face")
	return True
	except Exception as e:
	logger.error(f"Failed to authenticate with Hugging Face: {str(e)}")
	return False

	def load_config():
	"""Load configuration from yaml file"""
	with open("main/config.yaml", "r") as f:
	return yaml.safe_load(f)

	def create_app():
	config = load_config()
	logger = setup_logger(config, "main")
	logger.info("Starting LLM API server")

	app = FastAPI(
	title="LLM API",
	description="API for Large Language Model operations",
	version=config["api"]["version"]
	)

	# Add CORS middleware
	app.add_middleware(
	CORSMiddleware,
	allow_origins=config["api"]["cors"]["origins"],
	allow_credentials=config["api"]["cors"]["credentials"],
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Initialize routes with config
	init_router(config)

	app.include_router(router, prefix=f"{config['api']['prefix']}/{config['api']['version']}")

	logger.info("FastAPI application created successfully")
	return app

	def test_locally():
	"""Run local tests for development and debugging"""
	config = load_config()
	logger = setup_logger(config, "test")
	logger.info("Starting local tests")

	api = LLMApi(config)
	model_name = config["model"]["defaults"]["model_name"]

	logger.info(f"Testing with model: {model_name}")

	# Test download
	logger.info("Testing model download...")
	api.download_model(model_name)
	logger.info("Download complete")

	# Test initialization
	logger.info("Initializing model...")
	api.initialize_model(model_name)
	logger.info("Model initialized")

	# Test embedding
	test_text = "Dette er en test av embeddings generering fra en teknisk tekst om HMS rutiner på arbeidsplassen."
	logger.info("Testing embedding generation...")
	embedding = api.generate_embedding(test_text)
	logger.info(f"Generated embedding of length: {len(embedding)}")
	logger.info(f"First few values: {embedding[:5]}")

	# Test generation
	test_prompts = [
	"Tell me what happens in a nuclear reactor.",
	]

	# Test regular generation
	logger.info("Testing regular generation:")
	for prompt in test_prompts:
	logger.info(f"Prompt: {prompt}")
	response = api.generate_response(
	prompt=prompt,
	system_message="You are a helpful assistant."
	)
	logger.info(f"Response: {response}")

	# Test streaming generation
	logger.info("Testing streaming generation:")
	logger.info(f"Prompt: {test_prompts[0]}")
	for chunk in api.generate_stream(
	prompt=test_prompts[0],
	system_message="You are a helpful assistant."
	):
	print(chunk, end="", flush=True)
	print("\n")

	logger.info("Local tests completed")

	app = create_app()

	if __name__ == "__main__":
	config = load_config()
	#validate_hf()
	if len(sys.argv) > 1 and sys.argv[1] == "test":
	test_locally()
	else:
	uvicorn.run(
	"main.app:app",
	host=config["server"]["host"],
	port=config["server"]["port"],
	log_level="trace",
	reload=True,
	workers=1,
	access_log=False,
	use_colors=True
	)