Spaces:

TeamGenKI
/

LLMServer

Paused

App Files Files Community

AurelioAguirre commited on 27 days ago

Commit

9174d0d

•

1 Parent(s): 78ba59d

Trying Python 3.9, again

Browse files

Files changed (4) hide show

Dockerfile +2 -7
main/_app.py +0 -149
main/app.py +140 -33
requirements.txt +49 -2

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-FROM python:3.10-slim
 # Set working directory
 WORKDIR /app
@@ -7,9 +7,7 @@ WORKDIR /app
 COPY requirements.txt .
 # Install dependencies and clean cache
-RUN pip install --no-cache-dir -r requirements.txt && \
-    rm -rf /root/.cache/huggingface && \
-    rm -rf /root/.cache/pip
 # Copy only what's needed
 COPY main/ ./main/
@@ -18,9 +16,6 @@ COPY main/ ./main/
 ENV PYTHONPATH=/app
 ENV PYTHONUNBUFFERED=1
-# Clear any cache that might have been created
-RUN rm -rf ~/.cache/huggingface
 # Expose the port
 EXPOSE 7680

+FROM python:3.9
 # Set working directory
 WORKDIR /app
 COPY requirements.txt .
 # Install dependencies and clean cache
+RUN pip install --no-cache-dir -r requirements.txt
 # Copy only what's needed
 COPY main/ ./main/
 ENV PYTHONPATH=/app
 ENV PYTHONUNBUFFERED=1
 # Expose the port
 EXPOSE 7680

main/_app.py DELETED Viewed

@@ -1,149 +0,0 @@
-import yaml
-import sys
-from fastapi import FastAPI
-from fastapi.middleware.cors import CORSMiddleware
-import uvicorn
-from .api import LLMApi
-from .routes import router, init_router
-from .utils.logging import setup_logger
-from huggingface_hub import login
-from pathlib import Path
-from dotenv import load_dotenv
-import os
-def validate_hf():
-    """
-    Validate Hugging Face authentication.
-    Checks for .env file, loads environment variables, and attempts HF login if token exists.
-    """
-    logger = setup_logger(config, "hf_validation")
-    # Check for .env file
-    env_path = Path('.env')
-    if env_path.exists():
-        logger.info("Found .env file, loading environment variables")
-        load_dotenv()
-    else:
-        logger.warning("No .env file found. Fine if you're on Huggingface, but you need one to run locally on your PC.")
-    # Check for HF token
-    hf_token = os.getenv('HF_TOKEN')
-    if not hf_token:
-        logger.error("No HF_TOKEN found in environment variables")
-        return False
-    try:
-        # Attempt login
-        login(token=hf_token)
-        logger.info("Successfully authenticated with Hugging Face")
-        return True
-    except Exception as e:
-        logger.error(f"Failed to authenticate with Hugging Face: {str(e)}")
-        return False
-def load_config():
-    """Load configuration from yaml file"""
-    with open("main/config.yaml", "r") as f:
-        return yaml.safe_load(f)
-def create_app():
-    config = load_config()
-    logger = setup_logger(config, "main")
-    logger.info("Starting LLM API server")
-    app = FastAPI(
-        title="LLM API",
-        description="API for Large Language Model operations",
-        version=config["api"]["version"]
-    )
-    # Add CORS middleware
-    app.add_middleware(
-        CORSMiddleware,
-        allow_origins=config["api"]["cors"]["origins"],
-        allow_credentials=config["api"]["cors"]["credentials"],
-        allow_methods=["*"],
-        allow_headers=["*"],
-    )
-    # Initialize routes with config
-    init_router(config)
-    app.include_router(router, prefix=f"{config['api']['prefix']}/{config['api']['version']}")
-    logger.info("FastAPI application created successfully")
-    return app
-def test_locally():
-    """Run local tests for development and debugging"""
-    config = load_config()
-    logger = setup_logger(config, "test")
-    logger.info("Starting local tests")
-    api = LLMApi(config)
-    model_name = config["model"]["defaults"]["model_name"]
-    logger.info(f"Testing with model: {model_name}")
-    # Test download
-    logger.info("Testing model download...")
-    api.download_model(model_name)
-    logger.info("Download complete")
-    # Test initialization
-    logger.info("Initializing model...")
-    api.initialize_model(model_name)
-    logger.info("Model initialized")
-    # Test embedding
-    test_text = "Dette er en test av embeddings generering fra en teknisk tekst om HMS rutiner på arbeidsplassen."
-    logger.info("Testing embedding generation...")
-    embedding = api.generate_embedding(test_text)
-    logger.info(f"Generated embedding of length: {len(embedding)}")
-    logger.info(f"First few values: {embedding[:5]}")
-    # Test generation
-    test_prompts = [
-        "Tell me what happens in a nuclear reactor.",
-    ]
-    # Test regular generation
-    logger.info("Testing regular generation:")
-    for prompt in test_prompts:
-        logger.info(f"Prompt: {prompt}")
-        response = api.generate_response(
-            prompt=prompt,
-            system_message="You are a helpful assistant."
-        )
-        logger.info(f"Response: {response}")
-    # Test streaming generation
-    logger.info("Testing streaming generation:")
-    logger.info(f"Prompt: {test_prompts[0]}")
-    for chunk in api.generate_stream(
-            prompt=test_prompts[0],
-            system_message="You are a helpful assistant."
-    ):
-        print(chunk, end="", flush=True)
-    print("\n")
-    logger.info("Local tests completed")
-app = create_app()
-if __name__ == "__main__":
-    config = load_config()
-    #validate_hf()
-    if len(sys.argv) > 1 and sys.argv[1] == "test":
-        test_locally()
-    else:
-        uvicorn.run(
-            "main.app:app",
-            host=config["server"]["host"],
-            port=config["server"]["port"],
-            log_level="trace",
-            reload=True,
-            workers=1,
-            access_log=False,
-            use_colors=True
-        )

main/app.py CHANGED Viewed

@@ -1,42 +1,149 @@
 import sys
-print("Python Version:", sys.version)
-print("Starting application...")
 from fastapi import FastAPI
-print("FastAPI imported successfully")
-app = FastAPI(
-    title="Minimal Test API",
-    version="1.0.0",
-    default_response_class=None  # Disable automatic response serialization
-)
-print("FastAPI app created")
-@app.get("/")
-async def root():
-    return {"message": "Server is running"}
-@app.on_event("startup")
-async def startup_event():
-    print("FastAPI startup event triggered")
-print("Routes defined")
-if __name__ == "__main__":
-    print("Starting uvicorn server...")
-    import uvicorn
-    config = uvicorn.Config(
-        "main.app:app",
-        host="0.0.0.0",
-        port=7680,
-        workers=1,
-        log_level="info",
-        reload=False,
-        proxy_headers=False,
-        server_header=False,
-        date_header=False
     )
-    server = uvicorn.Server(config)
-    server.run()

+import yaml
 import sys
 from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+import uvicorn
+from .api import LLMApi
+from .routes import router, init_router
+from .utils.logging import setup_logger
+from huggingface_hub import login
+from pathlib import Path
+from dotenv import load_dotenv
+import os
+def validate_hf():
+    """
+    Validate Hugging Face authentication.
+    Checks for .env file, loads environment variables, and attempts HF login if token exists.
+    """
+    logger = setup_logger(config, "hf_validation")
+    # Check for .env file
+    env_path = Path('.env')
+    if env_path.exists():
+        logger.info("Found .env file, loading environment variables")
+        load_dotenv()
+    else:
+        logger.warning("No .env file found. Fine if you're on Huggingface, but you need one to run locally on your PC.")
+    # Check for HF token
+    hf_token = os.getenv('HF_TOKEN')
+    if not hf_token:
+        logger.error("No HF_TOKEN found in environment variables")
+        return False
+    try:
+        # Attempt login
+        login(token=hf_token)
+        logger.info("Successfully authenticated with Hugging Face")
+        return True
+    except Exception as e:
+        logger.error(f"Failed to authenticate with Hugging Face: {str(e)}")
+        return False
+def load_config():
+    """Load configuration from yaml file"""
+    with open("main/config.yaml", "r") as f:
+        return yaml.safe_load(f)
+def create_app():
+    config = load_config()
+    logger = setup_logger(config, "main")
+    logger.info("Starting LLM API server")
+    app = FastAPI(
+        title="LLM API",
+        description="API for Large Language Model operations",
+        version=config["api"]["version"]
+    )
+    # Add CORS middleware
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=config["api"]["cors"]["origins"],
+        allow_credentials=config["api"]["cors"]["credentials"],
+        allow_methods=["*"],
+        allow_headers=["*"],
     )
+    # Initialize routes with config
+    init_router(config)
+    app.include_router(router, prefix=f"{config['api']['prefix']}/{config['api']['version']}")
+    logger.info("FastAPI application created successfully")
+    return app
+def test_locally():
+    """Run local tests for development and debugging"""
+    config = load_config()
+    logger = setup_logger(config, "test")
+    logger.info("Starting local tests")
+    api = LLMApi(config)
+    model_name = config["model"]["defaults"]["model_name"]
+    logger.info(f"Testing with model: {model_name}")
+    # Test download
+    logger.info("Testing model download...")
+    api.download_model(model_name)
+    logger.info("Download complete")
+    # Test initialization
+    logger.info("Initializing model...")
+    api.initialize_model(model_name)
+    logger.info("Model initialized")
+    # Test embedding
+    test_text = "Dette er en test av embeddings generering fra en teknisk tekst om HMS rutiner på arbeidsplassen."
+    logger.info("Testing embedding generation...")
+    embedding = api.generate_embedding(test_text)
+    logger.info(f"Generated embedding of length: {len(embedding)}")
+    logger.info(f"First few values: {embedding[:5]}")
+    # Test generation
+    test_prompts = [
+        "Tell me what happens in a nuclear reactor.",
+    ]
+    # Test regular generation
+    logger.info("Testing regular generation:")
+    for prompt in test_prompts:
+        logger.info(f"Prompt: {prompt}")
+        response = api.generate_response(
+            prompt=prompt,
+            system_message="You are a helpful assistant."
+        )
+        logger.info(f"Response: {response}")
+    # Test streaming generation
+    logger.info("Testing streaming generation:")
+    logger.info(f"Prompt: {test_prompts[0]}")
+    for chunk in api.generate_stream(
+            prompt=test_prompts[0],
+            system_message="You are a helpful assistant."
+    ):
+        print(chunk, end="", flush=True)
+    print("\n")
+    logger.info("Local tests completed")
+app = create_app()
+if __name__ == "__main__":
+    config = load_config()
+    #validate_hf()
+    if len(sys.argv) > 1 and sys.argv[1] == "test":
+        test_locally()
+    else:
+        uvicorn.run(
+            "main.app:app",
+            host=config["server"]["host"],
+            port=config["server"]["port"],
+            log_level="trace",
+            reload=True,
+            workers=1,
+            access_log=False,
+            use_colors=True
+        )

requirements.txt CHANGED Viewed

@@ -1,2 +1,49 @@
-fastapi==0.115.5
-uvicorn==0.32.1

+annotated-types==0.7.0
+anyio==4.6.2.post1
+certifi==2024.8.30
+charset-normalizer==3.4.0
+click==8.1.7
+exceptiongroup==1.2.2
+fastapi==0.115.6
+filelock==3.16.1
+fsspec==2024.10.0
+h11==0.14.0
+huggingface-hub==0.26.3
+idna==3.10
+Jinja2==3.1.4
+MarkupSafe==3.0.2
+mpmath==1.3.0
+networkx==3.2.1
+numpy==2.0.2
+nvidia-cublas-cu12==12.4.5.8
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+nvidia-cuda-runtime-cu12==12.4.127
+nvidia-cudnn-cu12==9.1.0.70
+nvidia-cufft-cu12==11.2.1.3
+nvidia-curand-cu12==10.3.5.147
+nvidia-cusolver-cu12==11.6.1.9
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-nccl-cu12==2.21.5
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-nvtx-cu12==12.4.127
+packaging==24.2
+psutil==6.1.0
+pydantic==2.10.3
+pydantic_core==2.27.1
+python-dotenv==1.0.1
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+safetensors==0.4.5
+sniffio==1.3.1
+starlette==0.41.3
+sympy==1.13.1
+tokenizers==0.20.3
+torch==2.5.1
+tqdm==4.67.1
+transformers==4.46.3
+triton==3.1.0
+typing_extensions==4.12.2
+urllib3==2.2.3
+uvicorn==0.32.1