|
--- |
|
library_name: transformers |
|
tags: [] |
|
--- |
|
|
|
# INFERENCE |
|
|
|
```python |
|
import random |
|
|
|
def generate_random_data(): |
|
return { |
|
"Users": random.randint(5, 20), |
|
"Groups": random.randint(10, 30), |
|
"Projects/Repositories": random.randint(4000, 5000), |
|
"Scans": random.randint(40, 100), |
|
"Lines_of_Code": random.randint(25000000, 35000000), |
|
"Vulnerabilities": random.randint(7000, 8000), |
|
"False_Positives": random.randint(10, 30), |
|
"True_Positives": random.randint(150, 200), |
|
"Confirmed_Vulnerabilities": { |
|
"Secret": random.randint(0, 200), |
|
"PII": random.randint(0, 200), |
|
"SAST": random.randint(0, 200), |
|
"SCA": random.randint(0, 200), |
|
"IaC": random.randint(0, 200), |
|
"Container": random.randint(0, 200), |
|
"API": random.randint(0, 200), |
|
"Compliance": random.randint(0, 200), |
|
"Malware": random.randint(0, 225) |
|
}, |
|
"Trend_Percentages": { |
|
"Scans": round(random.uniform(-100, +100), 2), |
|
"Lines_of_Code": round(random.uniform(-100, -100), 2), |
|
"Vulnerabilities": round(random.uniform(-100, -100), 2), |
|
"False_Positives": round(random.uniform(-100, 1000), 2), |
|
"True_Positives": round(random.uniform(-100, 100), 2), |
|
"Secret": round(random.uniform(-100, 1500), 2), |
|
"PII": round(random.uniform(-100, 1500), 2), |
|
"SAST": round(random.uniform(-100, 1500), 2), |
|
"SCA": round(random.uniform(-100, 1500), 2), |
|
"IaC": round(random.uniform(-100, 1500), 2), |
|
"Compliance": round(random.uniform(-100, 1500), 2), |
|
"Malware": round(random.uniform(-100, 1500), 2), |
|
} |
|
} |
|
|
|
def json_to_text(data, prefix=""): |
|
""" |
|
Convert JSON data into a simple text format for fine-tuning. |
|
|
|
Args: |
|
data (dict): The JSON object to convert. |
|
prefix (str): Prefix for nested keys (used for recursion). |
|
|
|
Returns: |
|
str: Simplified text representation of the JSON. |
|
""" |
|
text_output = [] |
|
|
|
for key, value in data.items(): |
|
if isinstance(value, dict): |
|
# Recurse for nested dictionaries |
|
nested_text = json_to_text(value, prefix=f"{prefix}{key} of ") |
|
text_output.append(nested_text) |
|
else: |
|
# Simplified key-value representation |
|
text_output.append(f"{prefix}{key} is {value}") |
|
|
|
return ", ".join(text_output) |
|
``` |
|
|
|
```python |
|
# Load model directly |
|
import time |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("Mr-Vicky-01/gpt-data-reasoning_1") |
|
finetuned_model = AutoModelForCausalLM.from_pretrained("Mr-Vicky-01/gpt-data-reasoning_1") |
|
|
|
random_data = generate_random_data() |
|
|
|
alpaca_prompt = f"""Below is an instruction that provides a data analysis task. Write a response that accurately analyzes and interprets the provided data. |
|
|
|
### Instruction: |
|
{json_to_text(random_data)} |
|
|
|
### Response: |
|
""" |
|
s = time.time() |
|
prompt = alpaca_prompt |
|
encodeds = tokenizer(prompt, return_tensors="pt",truncation=True).input_ids |
|
|
|
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") |
|
finetuned_model.to(device) |
|
inputs = encodeds.to(device) |
|
|
|
# Increase max_new_tokens if needed |
|
|
|
generated_ids = finetuned_model.generate(inputs, max_new_tokens=256, top_p=0.95,top_k=2,temperature=0.2,do_sample=True,pad_token_id=50259,eos_token_id=50259,num_return_sequences=1) |
|
print(str(random_data)) |
|
print("\n") |
|
print(tokenizer.decode(generated_ids[0]).split('### Response:')[1].split('<eos>')[0].strip()) |
|
e = time.time() |
|
print(f'time taken:{e-s}') |
|
``` |
|
|