Change Liao
commited on
Commit
·
0952648
1
Parent(s):
94b4e2b
update 教學jupyter notebook
Browse files- Langchain_demo.ipynb +126 -4
Langchain_demo.ipynb
CHANGED
@@ -11,8 +11,27 @@
|
|
11 |
{
|
12 |
"cell_type": "markdown",
|
13 |
"id": "0e42d7d7-8815-4c76-ad6c-f5d09719e17b",
|
14 |
-
"metadata": {
|
|
|
|
|
15 |
"source": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
"### pip install everything\n",
|
17 |
"我會提供我的 requirements.txt, 讓大家安裝相同的virtualenv.; 當然也可以使用其他的 virtual environment"
|
18 |
]
|
@@ -1385,7 +1404,70 @@
|
|
1385 |
"id": "225fbc10-a54f-4a01-b580-592437b55234",
|
1386 |
"metadata": {},
|
1387 |
"outputs": [],
|
1388 |
-
"source": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1389 |
},
|
1390 |
{
|
1391 |
"cell_type": "markdown",
|
@@ -1405,7 +1487,33 @@
|
|
1405 |
"metadata": {},
|
1406 |
"outputs": [],
|
1407 |
"source": [
|
1408 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1409 |
]
|
1410 |
},
|
1411 |
{
|
@@ -1426,7 +1534,21 @@
|
|
1426 |
"metadata": {},
|
1427 |
"outputs": [],
|
1428 |
"source": [
|
1429 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1430 |
]
|
1431 |
}
|
1432 |
],
|
|
|
11 |
{
|
12 |
"cell_type": "markdown",
|
13 |
"id": "0e42d7d7-8815-4c76-ad6c-f5d09719e17b",
|
14 |
+
"metadata": {
|
15 |
+
"jp-MarkdownHeadingCollapsed": true
|
16 |
+
},
|
17 |
"source": [
|
18 |
+
"### 寫在前面\n",
|
19 |
+
"Gen-AI 是一種可以幫你省力, 但又不能完全依賴的工具. AI的三種時代:\n",
|
20 |
+
"* ANI: 弱人工智能, 能幫人類但是比人類弱\n",
|
21 |
+
"* AGI: 跟人類做得一樣好\n",
|
22 |
+
"* ASI: 做得比人還好\n",
|
23 |
+
"\n",
|
24 |
+
"現在是ANI 時代, 所以, 你的客戶可能對Gen-AI 有錯誤的期待. 以為它可以完美地的取代掉人類, 或是會得到100%正確的結果, 這些都是不對的.\n",
|
25 |
+
"\n",
|
26 |
+
"正確的態度是:\n",
|
27 |
+
"你把它當成是一個 `不會累而且很厲害的社會新鮮人` , 可以做很多事, 但產出的結果你一定要多看一下, 多驗證一下. 這個觀念, 你需要去訓練跟教育你的客戶.\n",
|
28 |
+
"\n",
|
29 |
+
"### 為啥要弄成程式來做Gen-AI?\n",
|
30 |
+
"已經有很多整合好Gen-AI的app, 大家也都自己有用ChatGPT, 那為啥還要在程式層級去使用?\n",
|
31 |
+
"答案: \n",
|
32 |
+
"* 整合到現有開發的系統\n",
|
33 |
+
"* Local LLM(Gen-AI) 的使用.\n",
|
34 |
+
" \n",
|
35 |
"### pip install everything\n",
|
36 |
"我會提供我的 requirements.txt, 讓大家安裝相同的virtualenv.; 當然也可以使用其他的 virtual environment"
|
37 |
]
|
|
|
1404 |
"id": "225fbc10-a54f-4a01-b580-592437b55234",
|
1405 |
"metadata": {},
|
1406 |
"outputs": [],
|
1407 |
+
"source": [
|
1408 |
+
"##存文件進去\n",
|
1409 |
+
"def initial_croma_db(db_name, files_path, file_ext, collection_name):\n",
|
1410 |
+
" _db_name = db_name\n",
|
1411 |
+
"\n",
|
1412 |
+
" documents = multidocs_loader(files_path, file_ext)\n",
|
1413 |
+
" ##embedded 是一種向量化的model, azure 有提供\n",
|
1414 |
+
" embeddings = OpenAIEmbeddings(\n",
|
1415 |
+
" deployment=\"CivetGPT_embedding\",\n",
|
1416 |
+
" model=\"text-embedding-ada-002\",\n",
|
1417 |
+
" openai_api_base=\"https://civet-project-001.openai.azure.com/\",\n",
|
1418 |
+
" openai_api_type=\"azure\",\n",
|
1419 |
+
" openai_api_key = \"0e3e5b666818488fa1b5cb4e4238ffa7\",\n",
|
1420 |
+
" chunk_size=1\n",
|
1421 |
+
" )\n",
|
1422 |
+
"\n",
|
1423 |
+
" chroma_db = Chroma.from_documents(\n",
|
1424 |
+
" documents,\n",
|
1425 |
+
" embeddings,\n",
|
1426 |
+
" collection_name = collection_name,\n",
|
1427 |
+
" persist_directory= root_file_path+ persist_db,\n",
|
1428 |
+
" chroma_db_impl=chroma_db_impl\n",
|
1429 |
+
" )\n",
|
1430 |
+
"\n",
|
1431 |
+
" chroma_db.persist()\n",
|
1432 |
+
" print('vectorstore done!')\n",
|
1433 |
+
"\n",
|
1434 |
+
"#詢問問題\n",
|
1435 |
+
"def local_vector_search(question_str,\n",
|
1436 |
+
" chat_history,\n",
|
1437 |
+
" collection_name = hr_collection_name):\n",
|
1438 |
+
" embedding = get_openaiembeddings()\n",
|
1439 |
+
" vectorstore = Chroma( embedding_function=embedding,\n",
|
1440 |
+
" collection_name=collection_name,\n",
|
1441 |
+
" persist_directory=root_file_path+persist_db,\n",
|
1442 |
+
" )\n",
|
1443 |
+
"\n",
|
1444 |
+
" memory = ConversationBufferMemory(memory_key=\"chat_history\", return_messages=True, ai_prefix = \"AI超級助理\")\n",
|
1445 |
+
"\n",
|
1446 |
+
" llm = AzureOpenAI(\n",
|
1447 |
+
" deployment_name = global_deployment_id,\n",
|
1448 |
+
" model_name= global_model_name,\n",
|
1449 |
+
" temperature = 0.0)\n",
|
1450 |
+
"\n",
|
1451 |
+
" chat_llm = AzureChatOpenAI(\n",
|
1452 |
+
" deployment_name = global_deployment_id,\n",
|
1453 |
+
" model_name= global_model_name,\n",
|
1454 |
+
" temperature = 0.0)\n",
|
1455 |
+
"\n",
|
1456 |
+
" prompt = PromptTemplate(\n",
|
1457 |
+
" template=get_prompt_template_string(),\n",
|
1458 |
+
" input_variables=[\"question\",\"chat_history\"]\n",
|
1459 |
+
" )\n",
|
1460 |
+
" prompt.format(question=question_str,chat_history=chat_history)\n",
|
1461 |
+
" km_chain = ConversationalRetrievalChain.from_llm(\n",
|
1462 |
+
" llm=chat_llm,\n",
|
1463 |
+
" retriever=vectorstore.as_retriever(),\n",
|
1464 |
+
" memory=memory,\n",
|
1465 |
+
" condense_question_prompt=prompt,\n",
|
1466 |
+
" )\n",
|
1467 |
+
" \n",
|
1468 |
+
" result=km_chain(question_str)\n",
|
1469 |
+
" print(result)"
|
1470 |
+
]
|
1471 |
},
|
1472 |
{
|
1473 |
"cell_type": "markdown",
|
|
|
1487 |
"metadata": {},
|
1488 |
"outputs": [],
|
1489 |
"source": [
|
1490 |
+
"def agent_demo():\n",
|
1491 |
+
" #其他chat_llm 的宣告需要自己寫\n",
|
1492 |
+
" \n",
|
1493 |
+
" km_tool = Tool(\n",
|
1494 |
+
" name='Knowledge Base',\n",
|
1495 |
+
" func=km_chain.run,\n",
|
1496 |
+
" description='一個非常有用的工具, 當要查詢任何公司政策以及鴻海相關資料都使用這個工具'\n",
|
1497 |
+
" )\n",
|
1498 |
+
"\n",
|
1499 |
+
" math_math = LLMMathChain(llm=llm,verbose=True)\n",
|
1500 |
+
" math_tool = Tool(\n",
|
1501 |
+
" name='Calculator',\n",
|
1502 |
+
" func=math_math.run,\n",
|
1503 |
+
" description='Useful for when you need to answer questions about math.'\n",
|
1504 |
+
" )\n",
|
1505 |
+
"\n",
|
1506 |
+
" tools=[math_tool,km_tool]\n",
|
1507 |
+
" agent=initialize_agent(\n",
|
1508 |
+
" agent=AgentType.OPENAI_FUNCTIONS,\n",
|
1509 |
+
" tools=tools,\n",
|
1510 |
+
" llm=chat_llm,\n",
|
1511 |
+
" verbose=True,\n",
|
1512 |
+
" memory=memory,\n",
|
1513 |
+
" max_iterations=30,\n",
|
1514 |
+
" )\n",
|
1515 |
+
" result=agent.run(question_str)\n",
|
1516 |
+
" print(result)\n"
|
1517 |
]
|
1518 |
},
|
1519 |
{
|
|
|
1534 |
"metadata": {},
|
1535 |
"outputs": [],
|
1536 |
"source": [
|
1537 |
+
"chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler])\n",
|
1538 |
+
"chain.invoke({\"number\":2})\n",
|
1539 |
+
"chain.invoke({\"number\":2}, {\"callbacks\":[handler]})"
|
1540 |
+
]
|
1541 |
+
},
|
1542 |
+
{
|
1543 |
+
"cell_type": "markdown",
|
1544 |
+
"id": "bb0ef293-e9f1-4fb0-bd05-5daecbecc982",
|
1545 |
+
"metadata": {},
|
1546 |
+
"source": [
|
1547 |
+
"# Local LLM\n",
|
1548 |
+
"目前在虎躍雲上有GPU Inference Server\n",
|
1549 |
+
"ip:\n",
|
1550 |
+
"\n",
|
1551 |
+
"### 擁有的Open Source LLM\n"
|
1552 |
]
|
1553 |
}
|
1554 |
],
|