File size: 14,346 Bytes
2ce8f2c 3382eca 2ce8f2c fcbf96e 2ce8f2c f8f4068 2ce8f2c ef89b98 2ce8f2c ef89b98 2ce8f2c d8db46a 2ce8f2c e954464 fcbf96e 2ce8f2c f8f4068 2ce8f2c e667bce 2ce8f2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 |
from huggingface_hub import InferenceClient
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from transformers import pipeline
from sentence_transformers.cross_encoder import CrossEncoder
import re
import os
def setupDB(domain, hasLLM):
history = []
history.append("")
history.append("")
crossmodel = CrossEncoder("cross-encoder/stsb-distilroberta-base")
models,allState = nandState()
support_db = nandGetChroma(domain)
insts_db = nandGetChroma("insts")
pdf_dbs = []
if domain == 'en':
pdfs = [] #"pdf_0em", "pdf_1em", "pdf_2em", "pdf_3em","pdf_4em"]
for onepdf in pdfs:
pdfdb = nandGetChroma(onepdf)
pdf_dbs.append(pdfdb)
para = {}
para['history'] = history
para['disnum'] = 10
para['domain'] = domain
para['crossmodel'] = crossmodel
para['insts_db'] = insts_db
para['support_db'] = support_db
para['pdf_dbs'] = pdf_dbs
para['hasLLM'] = hasLLM
return para
def remapScore(domain, inscore):
if domain == 'ch':
xin = 1 - inscore
a = -0.2
b = 1.2
y = a * xin * xin + b * xin
return int(y * 100)
else:
xin = 1 - inscore
a = -1.2
b = 2.2
y = a * xin * xin + b * xin
return int(y * 100)
def process_query(iniquery, para):
query = re.sub("<br>", "", iniquery)
ch2en, query = toEn(query)
if ch2en:
print(f"Received from connected users : {query}")
else:
print(f"Received from connected users : {query}", end='')
disnum = para['disnum']
domain = para['domain']
history = para['history']
crossmodel = para['crossmodel']
insts_db = para['insts_db']
support_db = para['support_db']
pdf_dbs = para['pdf_dbs']
hasLLM = para['hasLLM']
ret = ""
needScriptScores = crossmodel.predict([["write a perl ECO script", query]])
print(f"THE QUERY SCORE for creating eco script: score={needScriptScores[0]}")
allapis = []
threshold = 0.45
itisscript = 0
if needScriptScores[0] > threshold:
itisscript = 1
print(f"THE QUERY REQUIRES CREATING AN ECO SCRIPT score={needScriptScores[0]} > {threshold}")
retinsts = insts_db.similarity_search_with_score(query, k=10)
accu = 0
for inst in retinsts:
instdoc = inst[0]
instscore = inst[1]
instname = instdoc.metadata['source']
otherfile = re.sub("^insts", "src_en", instname)
otherfile = re.sub("\.\d+", "", otherfile)
if not otherfile in allapis:
allapis.append(otherfile)
modfile = otherfile.replace("\\", "/")
apisize = os.path.getsize(modfile)
accu += apisize
print(f"INST: {instname} SCORE: {instscore} API-size: {apisize} Accu: {accu}")
results = []
docs = support_db.similarity_search_with_score(query, k=8)
for doc in docs:
results.append([doc[0], doc[1]])
for onepdfdb in pdf_dbs:
pdocs = onepdfdb.similarity_search_with_score(query, k=8)
for doc in pdocs:
results.append([doc[0], doc[1]+0.2])
results.sort(key=lambda x: x[1])
docnum = len(results)
index = 1
for ii in range(docnum):
doc = results[ii][0]
source = doc.metadata['source']
path = source #source.replace("\\", "/")
#print(f"path={path}")
if path in allapis:
print(f"dont use path={path}, it's in instruction list")
continue
prefix = "Help:"
if re.search("api\.", source):
prefix = "API:"
elif re.search("man\.", source):
prefix = "Manual:"
elif re.search("\.pdf$", source):
prefix = "PDF:";
score = remapScore(domain, results[ii][1])
retcont = doc.page_content
if re.search("\.pdf$", source):
page = doc.metadata['page'] + 1
subpage = doc.metadata['subpage']
retcont += f"\n<a target='_blank' href='/AI/{path}#page={page}'>PDF{page} {subpage}</a>\n"
ret += f"Return {index} ({score}) {prefix} {retcont}\n"
if len(ret) > 6000:
break
index += 1
if index > disnum:
break
if hasLLM:
context = "Context information is below\n---------------------\n"
if len(allapis):
context += scriptExamples()
for oneapi in allapis:
modfile = oneapi.replace("\\", "/")
cont = GetContent(modfile)
cont = re.sub("</h3>", " API Detail:", cont)
cont = re.sub('<.*?>', '', cont)
cont = re.sub('Examples:.*', '', cont, flags=re.DOTALL)
context += cont
else:
context += "GOF is abreviation of Gats On the Fly, it is netlist process platform.\n";
context += "ECO is abbrevation of engineering change order.\n";
context += "LEC is abbrevation of logic equivalence checking.\n";
context += "Netlist ECO is to change netlist incrementally by tool or manually.\n";
context += "Automatic ECO is to use GOF ECO to do functional netlist ECO automatically.\n";
context += ret
prompt = f"{context}\n"
prompt += "------------------------------------------\n"
if len(allapis):
prompt += "Given the context information and not prior knowledge, creat a Perl ECO script by following the format and sequence in the script examples provided above.\n"
#prompt += "1. Following the format in the script examples provided above.\n"
#prompt += "2. Following the API sequence in the script examples above, for instance, APIs get_spare_cells and map_spare_cells should be after fix_design.\n"
else:
prompt += "Given the context information and not prior knowledge, answer the query.\n"
prompt += f"Query: {query}\n"
llmout = llmGenerate(prompt)
history[0] = query
history[1] = llmout
#return llmout
outlen = len(llmout)
prolen = len(prompt)
print(f"Prompt len: {prolen} LLMOUT len: {outlen} itisscript: {itisscript}")
return itisscript,llmout
allret = "LLM_OUTPUT_START:"+llmout+"\nEND OF LLM OUTPUT\n"+prompt
return itisscript,allret
return itisscript,ret
def toEn(intxt):
pattern = re.compile(r'[\u4e00-\u9fff]+')
if pattern.search(intxt):
translator = pipeline(task="translation", model="Helsinki-NLP/opus-mt-zh-en")
ini_text = translator(intxt, max_length=500)[0]['translation_text']
out_text = re.sub("ECO foot", "ECO Script", ini_text)
out_text = re.sub("web-based", "netlist", out_text)
out_text = re.sub(r"\bweb\b", "netlist", out_text)
out_text = re.sub(r"\bwebsheet\b", "netlist", out_text)
out_text = re.sub(r"\bweblists?\b", "netlist", out_text)
print(f"AFTER RESULT: {out_text}")
return 1, out_text
return 0, intxt
def nandGetChroma(domain):
models,allState = nandState()
chdb = allState[domain]['chroma']
print(f"domain: {domain} has chroma dir {chdb}")
model_ind = allState[domain]['model']
model_name = models[model_ind]
embedding_function = SentenceTransformerEmbeddings(model_name=model_name)
chroma_db = Chroma(persist_directory=chdb, embedding_function=embedding_function)
return chroma_db
def nandState():
models = {'em': "all-MiniLM-L6-v2",
'en': "all-mpnet-base-v2",
'ch': "shibing624/text2vec-base-chinese-sentence"}
# chunk is to cut the big PDF page to smaller, 1000byte chunks, and chinese page into smaller chunks
allState = {'insts':{'cstate':{},'pstate':{},'dir':'insts','json':'filestatus.insts.json','chroma':'chroma_db_insts','model':'en','chunk':0},
'en':{'cstate':{},'pstate':{},'dir':'src_en','json':'filestatus.english.json','chroma':'chroma_db_en','model':'en','chunk':0},
'ch':{'cstate':{},'pstate':{},'dir':'src_ch','json':'filestatus.chinese.json','chroma':'chroma_db_ch','model':'ch','chunk':1}
}
for ind in range(12):
name = f"pdf_{ind}em"
allState[name] = {'cstate':{},'pstate':{},'dir':f"pdf_sub{ind}",'json':f"filestatus.{name}.json",'chroma':f"chroma_db_{name}",'model':'em','chunk':1}
return models, allState
def formatPrompt(message, history):
if history[0]:
prompt = "Create a new query based on previous query/answer paire and current query:\n"
prompt += f"Previous query: {history[0]}"
prompt += f"Previous answer: {histroy[1]}"
prompt += f"Current query: {message}"
prompt += "New query:"
return prompt
return message
def llmNewQuery(prompt, history):
newpend = formatPrompt(prompt, history)
newquery = llmGenerate(newpend)
return newquery
def llmGenerate(prompt, temperature=0.001, max_new_tokens=2048, top_p=0.95, repetition_penalty=1.0):
#temperature = float(temperature)
#if temperature < 1e-2:
# temperature = 1e-2
top_p = float(top_p)
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
seed=42,
)
llmclient = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")
stream = llmclient.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
output = ""
for response in stream:
output += response.token.text
#yield output
return output
def thoseRemove():
those = ["redundant"]
return those
def GetContent(file):
fcont = ""
with open(file) as f:
fcont = f.read()
return fcont
def scriptExamples():
exp = """
#The first ECO scipt example for manual ECO:
use strict;
setup_eco("eco_example");
read_library("tsmc.5nm.lib");
read_design("-imp", "implementation.gv");
set_top("topmod");
change_pin("u_abc/state_reg_0_/D", "INVX1", "", "-");
change_pin("u_abc/state_reg_1_/D", "INVX1", "", "-");
change_pin("u_abc/state_reg_2_/D", "INVX1", "", "-");
report_eco(); # ECO report
check_design();
write_verilog("eco_verilog.v");# Write out ECO result in Verilog
#End of the manual ECO script example
#The second ECO script example for automatic ECO:
use strict;
setup_eco("eco_example");# Setup ECO name
read_library("tsmc.5nm.lib");# Read in standard library
# SVF files are optional, best to be used when the design involves multibit flops
#read_svf("-ref", "reference.svf.txt");
#read_svf("-imp", "implementation.svf.txt");
read_design("-ref", "reference.gv");
read_design("-imp", "implementation.gv");
set_top("topmod");# Set the top module
# Preserve DFT Test Logic
set_ignore_output("scan_out*");
set_pin_constant("scan_enable", 0);
set_pin_constant("scan_mode", 0);
fix_design();
report_eco(); # ECO report
check_design();
write_verilog("eco_verilog.v");# Write out ECO result in Verilog
run_lec(); # Run GOF LEC to generate Formality help files
#End of automatic ECO script example
#The third ECO script example is for automatic metal only ECO:
use strict;
setup_eco("eco_example");# Setup ECO name
read_library("tsmc.5nm.lib");# Read in standard library
# SVF files are optional, best to be used when the design involves multibit flops
#read_svf("-ref", "reference.svf.txt");
#read_svf("-imp", "implementation.svf.txt");
read_design("-ref", "reference.gv");# Read in Reference Netlist
read_design("-imp", "implementation.gv");
set_top("topmod");# Set the top module
set_ignore_output("scan_out*");
set_pin_constant("scan_enable", 0);
set_pin_constant("scan_mode", 0);
read_lef("tsmc.lef"); # Read LEF
read_def("topmod.def"); # Read Design Exchange Format file
fix_design(); # Must run before get_spare_cells and map_spare_cells
get_spare_cells("*/*_SPARE*");
map_spare_cells();
report_eco(); # ECO report
check_design();# Check if the ECO causes any issue, like floating
write_verilog("eco_verilog.v");# Write out ECO result in Verilog
write_perl("eco_result.pl");# Write out result in Perl script
run_lec(); # Run GOF LEC to generate Formality help files
#End of automatic ECO script example
#The four ECO script example is the same as the third ECO script, except fix_design
# list_file option to load in the ECO points list file converted from RTL-to-RTL LEC result
fix_design("-list_file", "the_eco_points.txt");
#The 5th ECO script example is the same as the 3rd ECO script, except fix_design
# Enable flatten mode ECO. The default mode is hierarchical. The flatten mode is for small fix but the changes go across
# module boundaries
fix_design("-flatten");
#The 6th ECO script is similar to the third ECO script, but it dumps formality help file after LEC
run_lec(); # Run GOF LEC to generate Formality help files
write_compare_points("compare_points.report");
write_formality_help_files("fm_dir/formality_help"); # formality_help files are generated in fm_dir folder
#The 7th ECO script is similar to the third ECO script, but it uses gate array spare cells
fix_design(); # Must run before get_spare_cells and map_spare_cells
# Enable Gate Array Spare Cells Metal Only ECO Flow, map_spare_cells will map to Gate Array Cells only
get_spare_cells("-gate_array", "G*", "-gate_array_filler", "GFILL*|GDCAP*");
map_spare_cells();
#The 8th ECO script is similar to the third ECO script, but it uses only deleted gates or freed up gates in ECO as spare cells
fix_design(); # Must run before get_spare_cells and map_spare_cells
get_spare_cells("-addfreed");
map_spare_cells();
#The 9th ECO script is manual ECO, find all memory hierarchically and tie the pin TEST_SHIFT of memory to net "TEST_EN"
use strict;
setup_eco("eco_example");
read_library("tsmc.3nm.lib");
read_design("-imp", "from_backend.gv");
set_top("topmod");
# Get all memories hierarchically, instance naming, "U_HMEM*"
my @mems = get_cells("-hier", "U_HMEM*");
foreach my $mem (@mems){
change_pin("$mem/TEST_SHIFT", "TEST_EN");
}
report_eco(); # ECO report
check_design();
write_verilog("mem_eco.v");
"""
return exp
|