File size: 3,355 Bytes
f3232c4 90114d3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
---
library_name: transformers
tags: []
---
Inference
```python
import random
import json
def generate_random_data():
return {
"Users": random.randint(5, 20),
"Groups": random.randint(10, 30),
"Projects/Repositories": random.randint(4000, 5000),
"Scans": random.randint(40, 100),
"Lines_of_Code": random.randint(25000000, 35000000),
"Vulnerabilities": random.randint(7000, 8000),
"False_Positives": random.randint(10, 30),
"True_Positives": random.randint(150, 200),
"Confirmed_Vulnerabilities": {
"Secret": random.randint(0, 200),
"PII": random.randint(0, 200),
"SAST": random.randint(0, 200),
"SCA": random.randint(0, 200),
"IaC": random.randint(0, 200),
"Container": random.randint(0, 200),
"API": random.randint(0, 200),
"Compliance": random.randint(0, 200),
"Malware": random.randint(0, 225)
},
"Trend_Percentages": {
"Scans": round(random.uniform(-100, +100), 2),
"Lines_of_Code": round(random.uniform(-100, -100), 2),
"Vulnerabilities": round(random.uniform(-100, -100), 2),
"False_Positives": round(random.uniform(-100, 1000), 2),
"True_Positives": round(random.uniform(-100, 100), 2),
"Secret": round(random.uniform(-100, 1500), 2),
"PII": round(random.uniform(-100, 1500), 2),
"SAST": round(random.uniform(-100, 1500), 2),
"SCA": round(random.uniform(-100, 1500), 2),
"IaC": round(random.uniform(-100, 1500), 2),
"Compliance": round(random.uniform(-100, 1500), 2),
"Malware": round(random.uniform(-100, 1500), 2),
}
}
def json_to_semi_structured_text(data):
try:
data = json.loads(data.replace("'",'"'))
except:
pass
"""
Convert JSON data into a semi-structured text format for training T5-Flan.
Args:
data (dict): The JSON object to convert.
Returns:
str: Semi-structured text representation of the JSON.
"""
text_output = []
for key, value in data.items():
if isinstance(value, dict):
# Handle nested dictionaries
text_output.append(f"{key.capitalize()}:")
for sub_key, sub_value in value.items():
text_output.append(f"- {sub_key}: {sub_value}")
else:
# Direct key-value pairs
text_output.append(f"{key.replace('_', ' ').capitalize()}: {value}")
return "\n".join(text_output)
```
```python
# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
tokenizer = AutoTokenizer.from_pretrained("Mr-Vicky-01/T5-data-reasoning")
model = AutoModelForSeq2SeqLM.from_pretrained("Mr-Vicky-01/T5-data-reasoning")
data_inp = json_to_semi_structured_text(generate_random_data())
inp = "Summarize and reason: " + data_inp
import time
start = time.time()
inputs = tokenizer(inp, return_tensors="pt",truncation=True)
model.to(device)
inputs = inputs.to(device)
outputs = model.generate(**inputs,max_length=256,do_sample=False)
answer = tokenizer.decode(outputs[0])
print(answer)
end = time.time()
print(f"Time taken: {end - start}")
print('\n\n')
print("input: "+inp)
``` |