Spaces:

saifeddinemk
/

mlai

Sleeping

App Files Files Community

saifeddinemk commited on Nov 9, 2024

Commit

5aaa320

1 Parent(s): 79b52c8

Fixed app v2

Browse files

Files changed (1) hide show

app.py +69 -73

app.py CHANGED Viewed

@@ -1,86 +1,82 @@
-from fastapi import FastAPI
 from pydantic import BaseModel
 from typing import List
-from transformers import pipeline
-# Initialize FastAPI app
 app = FastAPI()
-unmasker = pipeline("fill-mask", model="s2w-ai/CyBERTuned-SecurityLLM")
-# Define request model
-class LogRequest(BaseModel):
-    log: str
-# Define response model
-class ThreatResponse(BaseModel):
-    log: str
-    prompt: str
-    #threat_level_predictions: List[str]
-    #threat_type_predictions: List[str]
-    #detected_threat_level: str
-    #detected_threat_type: str
-    pred : List[object]
-# Function to predict masked words for threat level and type
-def predict_threat(log: str, unmasker, topk=5) -> List[List[str]]:
-    # Create prompt with masked tokens for threat level and threat type
-    prompt = f"{log} Source Ip : <mask> Dest Ip : <mask> , Threat Level : <mask> , Threat Type : <mask>"
-    # Predict top options for each <mask>
-    predictions = unmasker(prompt, top_k=topk)
-    # Extract top predictions for each <mask>
-    #threat_level_predictions = [pred["token_str"].strip() for pred in predictions[:topk]]
-    #threat_type_predictions = [pred["token_str"].strip() for pred in predictions[topk:2*topk]]
-    return predictions
-def get_maximum_predictions(data):
-    # Initialize list to store maximum values for each prediction array
-    max_predictions = []
-    # Loop over each prediction array in "pred"
-    for index, predictions in enumerate(data["pred"]):
-        max_score = float('-inf')
-        max_prediction = None
-        # Find the prediction with the highest score in the current array
-        for pred in predictions:
-            if pred["score"] > max_score:
-                max_score = pred["score"]
-                max_prediction = pred["token_str"].strip()
-        # Append the result with the max prediction for this array
-        max_predictions.append({
-            f"max_prediction_{index + 1}": max_prediction
-        })
-    return max_predictions
-# Get result
-# FastAPI endpoint for detecting threat level and type
-@app.post("/detect_threat", response_model=ThreatResponse)
-async def detect_threat(log_request: LogRequest):
-    log = log_request.log
-    # Predict the threat level and type for the given log entry
-    predictions = predict_threat(log, unmasker)
-    # Extract top predictions for threat level and type
-    ##threat_level_predictions = predictions[0] if len(predictions) > 0 else ["Unknown"]
-   ## threat_type_predictions = predictions[1] if len(predictions) > 1 else ["Unknown"]
-    # Use the top prediction as the most likely threat level and type
-    ##detected_threat_level = threat_level_predictions[0] if threat_level_predictions else "Unknown"
-    #detected_threat_type = threat_type_predictions[0] if threat_type_predictions else "Unknown"
-    # Prepare response
-    response = ThreatResponse(
-        log=log,
-        prompt=f"{log}  Source Ip : <mask> \n Dest Ip : <mask> \n , Threat Level : <mask> \n Threat Type : <mask>",
-        pred=predictions
-    )
-    return response

+import torch
+import json
+from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+from transformers import AutoModelForCausalLM, AutoTokenizer
 from typing import List
+# Initialize the FastAPI app
 app = FastAPI()
+# Model and tokenizer paths and loading
+model_path = "WhiteRabbitNeo/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B"
+output_file_path = "/home/user/conversations.jsonl"
+model = AutoModelForCausalLM.from_pretrained(
+    model_path,
+    torch_dtype=torch.float16,
+    device_map="auto",
+    load_in_4bit=False,
+    trust_remote_code=False,
+)
+tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+# Function to generate text
+def generate_text(instruction):
+    tokens = tokenizer.encode(instruction)
+    tokens = torch.LongTensor(tokens).unsqueeze(0)
+    tokens = tokens.to("cuda")
+    instance = {
+        "input_ids": tokens,
+        "top_p": 1.0,
+        "temperature": 0.75,
+        "generate_len": 2048,
+        "top_k": 50,
+    }
+    length = len(tokens[0])
+    with torch.no_grad():
+        rest = model.generate(
+            input_ids=tokens,
+            max_length=length + instance["generate_len"],
+            use_cache=True,
+            do_sample=True,
+            top_p=instance["top_p"],
+            temperature=instance["temperature"],
+            top_k=instance["top_k"],
+            num_return_sequences=1,
+            pad_token_id=tokenizer.eos_token_id,
+        )
+    output = rest[0][length:]
+    string = tokenizer.decode(output, skip_special_tokens=True)
+    return f"{string}"
+# Data model for FastAPI input
+class UserInput(BaseModel):
+    conversation: str
+    user_input: str
+@app.post("/generate/")
+async def generate_response(user_input: UserInput):
+    try:
+        # Construct the prompt
+        conversation = user_input.conversation
+        llm_prompt = f"{conversation}{user_input.user_input}<|im_end|>\n<|im_start|>assistant\nSure! Let me provide a complete and a thorough answer to your question, with functional and production-ready code.\n"
+        # Generate response
+        answer = generate_text(llm_prompt)
+        # Update conversation for future requests
+        updated_conversation = f"{llm_prompt}{answer}<|im_end|>\n<|im_start|>user\n"
+        return {
+            "response": answer,
+            "updated_conversation": updated_conversation
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# Run the app
+# To start the server, use the command: uvicorn filename:app --host 0.0.0.0 --port 8000