Spaces:
Runtime error
Runtime error
File size: 22,075 Bytes
75df934 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\Program\\Anaconda\\envs\\python_project\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"d:\\Program\\Anaconda\\envs\\python_project\\lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
" warnings.warn(\n",
"d:\\Program\\Anaconda\\envs\\python_project\\lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
" warnings.warn(\n"
]
}
],
"source": [
"import google.generativeai as genai\n",
"import arxiv_bot_utils as utils\n",
"import os\n",
"from getpass import getpass\n",
"import json\n",
"#chỉ là import một cách bình thường\n",
"#nội dung là "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"models/gemini-1.0-pro\n",
"models/gemini-1.0-pro-001\n",
"models/gemini-1.0-pro-latest\n",
"models/gemini-1.0-pro-vision-latest\n",
"models/gemini-1.5-pro-latest\n",
"models/gemini-pro\n",
"models/gemini-pro-vision\n"
]
}
],
"source": [
"os.environ['GEMINI_API_KEY'] = getpass(\"Input your API key: \")\n",
"# gán biến môi trường luôn\n",
"gemini_api_key = os.getenv(\"GEMINI_API_KEY\") # string trong môi trường\n",
"if not gemini_api_key:\n",
" raise ValueError(\n",
" \"Gemini API Key not provided. Please provide GEMINI_API_KEY as an environment variable\"\n",
" )\n",
"genai.configure(api_key=gemini_api_key)\n",
"for m in genai.list_models():\n",
" if 'generateContent' in m.supported_generation_methods:\n",
" print(m.name)\n",
" #models nằm trên máy chủ\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"config = genai.GenerationConfig(max_output_tokens=2048,\n",
" temperature=0.7)\n",
"safety_settings = [\n",
" {\n",
" \"category\": \"HARM_CATEGORY_DANGEROUS\",\n",
" \"threshold\": \"BLOCK_NONE\",\n",
" },\n",
" {\n",
" \"category\": \"HARM_CATEGORY_HARASSMENT\",\n",
" \"threshold\": \"BLOCK_NONE\",\n",
" },\n",
" {\n",
" \"category\": \"HARM_CATEGORY_HATE_SPEECH\",\n",
" \"threshold\": \"BLOCK_NONE\",\n",
" },\n",
" {\n",
" \"category\": \"HARM_CATEGORY_SEXUALLY_EXPLICIT\",\n",
" \"threshold\": \"BLOCK_NONE\",\n",
" },\n",
" {\n",
" \"category\": \"HARM_CATEGORY_DANGEROUS_CONTENT\",\n",
" \"threshold\": \"BLOCK_NONE\",\n",
" },\n",
"]\n",
"model = genai.GenerativeModel(\"gemini-pro\",\n",
" generation_config=config,\n",
" safety_settings=safety_settings)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def extract_keyword_prompt(query):\n",
" \"\"\"A prompt that return a JSON block as arguments for querying database\"\"\"\n",
"\n",
" prompt = (\n",
" \"\"\"[INST] SYSTEM: You are an assistant that choose only one action below based on guest question.\n",
" 1. If the guest question is asking for a single specific document or article with explicit title, you need to respond the information in JSON format with 2 keys \"title\", \"author\" if found any above. The authors are separated with the word 'and'. \n",
" 2. If the guest question is asking for relevant informations about a topic, you need to respond the information in JSON format with 2 keys \"keywords\", \"description\", include a list of keywords represent the main academic topic, \\\n",
" and a description about the main topic. You may paraphrase the keywords to add more. \\\n",
" 3. If the guest is not asking for any informations or documents, you need to respond with a polite answer in JSON format with 1 key \"answer\".\n",
" QUESTION: '{query}'\n",
" [/INST]\n",
" ANSWER: \n",
" \"\"\"\n",
" ).format(query=query)\n",
"\n",
" return prompt\n",
"\n",
"def make_answer_prompt(input, contexts):\n",
" \"\"\"A prompt that return the final answer, based on the queried context\"\"\"\n",
"\n",
" prompt = (\n",
" \"\"\"[INST] You are a library assistant that help to search articles and documents based on user's question.\n",
" From guest's question, you have found some records and documents that may help. Now you need to answer the guest with the information found.\n",
" If no information found in the database, you may generate some other recommendation related to user's question using your own knowledge. Each article or paper must have a link to the pdf download page.\n",
" You should answer in a conversational form politely.\n",
" QUESTION: '{input}'\n",
" INFORMATION: '{contexts}'\n",
" [/INST]\n",
" ANSWER:\n",
" \"\"\"\n",
" ).format(input=input, contexts=contexts)\n",
"\n",
" return prompt"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def response(args):\n",
" \"\"\"Create response context, based on input arguments\"\"\"\n",
" keys = list(dict.keys(args))\n",
" if \"answer\" in keys:\n",
" return args['answer'], None # trả lời trực tiếp\n",
" \n",
" if \"keywords\" in keys:\n",
" # perform query\n",
" query_texts = args[\"description\"]\n",
" keywords = args[\"keywords\"]\n",
" results = utils.db.query_relevant(keywords=keywords, query_texts=query_texts)\n",
" # print(results)\n",
" ids = results['metadatas'][0]\n",
" if len(ids) == 0:\n",
" # go crawl some\n",
" new_records = utils.crawl_arxiv(keyword_list=keywords, max_results=10)\n",
" print(\"Got new records: \",len(new_records))\n",
" if type(new_records) == str:\n",
" return \"Error occured, information not found\", new_records\n",
" utils.db.add(new_records)\n",
" utils.sqldb.add(new_records)\n",
" results = utils.db.query_relevant(keywords=keywords, query_texts=query_texts)\n",
" ids = results['metadatas'][0]\n",
" print(\"Re-queried on chromadb, results: \",ids)\n",
" paper_id = [id['paper_id'] for id in ids]\n",
" paper_info = utils.sqldb.query_id(paper_id)\n",
" print(paper_info)\n",
" records = [] # get title (2), author (3), link (6)\n",
" result_string = \"\"\n",
" if paper_info:\n",
" for i in range(len(paper_info)):\n",
" result_string += \"Title: {}, Author: {}, Link: {}\".format(paper_info[i][2],paper_info[i][3],paper_info[i][6])\n",
" records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])\n",
" return result_string, records\n",
" else:\n",
" return \"Information not found\", \"Information not found\"\n",
" # invoke llm and return result\n",
"\n",
" if \"title\" in keys:\n",
" title = args['title']\n",
" authors = utils.authors_str_to_list(args['author'])\n",
" paper_info = utils.sqldb.query(title = title,author = authors)\n",
" # if query not found then go crawl brh\n",
" # print(paper_info)\n",
"\n",
" if len(paper_info) == 0:\n",
" new_records = utils.crawl_exact_paper(title=title,author=authors)\n",
" print(\"Got new records: \",len(new_records))\n",
" if type(new_records) == str:\n",
" # print(new_records)\n",
" return \"Error occured, information not found\", \"Information not found\"\n",
" utils.db.add(new_records)\n",
" utils.sqldb.add(new_records)\n",
" paper_info = utils.sqldb.query(title = title,author = authors)\n",
" print(\"Re-queried on chromadb, results: \",paper_info)\n",
" # -------------------------------------\n",
" records = [] # get title (2), author (3), link (6)\n",
" result_string = \"\"\n",
" for i in range(len(paper_info)):\n",
" result_string += \"Title: {}, Author: {}, Link: {}\".format(paper_info[i][2],paper_info[i][3],paper_info[i][6])\n",
" records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])\n",
" # process results:\n",
" if len(result_string) == 0:\n",
" return \"Information not found\", \"Information not found\"\n",
" return result_string, records\n",
" # invoke llm and return result"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def full_chain_single_question(input_prompt):\n",
" try:\n",
" first_prompt = extract_keyword_prompt(input_prompt)\n",
" temp_answer = model.generate_content(first_prompt).text\n",
"\n",
" args = json.loads(utils.trimming(temp_answer))\n",
" contexts, results = response(args)\n",
" if not results:\n",
" print(contexts)\n",
" else:\n",
" output_prompt = make_answer_prompt(input_prompt,contexts)\n",
" answer = model.generate_content(output_prompt).text\n",
" return temp_answer, answer\n",
" except Exception as e:\n",
" print(e)\n",
" return temp_answer, \"Error occured: \" + str(e)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[('1903.04824v1', 'computer science', 'Proceedings of the Fifth International Conference on Cloud and Robotics (ICCR2018)', ' Huaxi, Zhang, Jacques Malenfan', '2019-03-12', '2019-03-12', 'http://arxiv.org/pdf/1903.04824v1'), ('1709.07597v1', 'economics', 'Inverse Reinforcement Learning with Conditional Choice Probabilities', 'Mohit Sharma, Kris M. Kitani, Joachim Groege', '2017-09-22', '2017-09-22', 'http://arxiv.org/pdf/1709.07597v1')]\n",
"Sure, here are some key papers on model predictive control for nonlinear systems:\n",
"\n",
"* **Nonlinear Model Predictive Control: A Survey** by Garcia, P.D., Prett, D.M., and Morari, M. (1989)\n",
"* **Model Predictive Control for Nonlinear Systems** by Camacho, E.F. and Bordons, C. (1999)\n",
"* **Nonlinear Model Predictive Control** by Rawlings, J.B. and Mayne, D.Q. (2009)\n",
"\n",
"As for recent reviews on the application of control theory to robotics, here are a few:\n",
"\n",
"* **Control of Robot Manipulators** by Spong, M.W., Hutchinson, S., and Vidyasagar, M. (2006)\n",
"* **Robotics: Modelling, Planning and Control** by Siciliano, B., Sciavicco, L., Villani, L., and Oriolo, G. (2010)\n",
"* **Control of Robot Arms** by Featherstone, R. (2014)\n",
"\n",
"I hope this information is helpful. Please let me know if you have any other questions.\n"
]
}
],
"source": [
"# test response, second step\n",
"input_prompt = \"Can you suggest some key papers on model predictive control for nonlinear systems, and are there any recent reviews on the application of control theory to robotics?\"\n",
"args = \"{\\n \\\"keywords\\\": [\\\"Model predictive control\\\", \\\"Nonlinear systems\\\", \\\"Robotics\\\", \\\"Control theory\\\"],\\n \\\"description\\\": \\\"Model predictive control (MPC) is a control algorithm that uses a model of the system to predict future behavior and optimize the control inputs. MPC is particularly well-suited for nonlinear systems, as it can handle the complex dynamics of these systems. In recent years, MPC has been increasingly applied to robotics, as it can improve the performance and safety of robotic systems. Control theory is a branch of mathematics that deals with the analysis and design of control systems. Control theory has been applied to a wide range of problems in robotics, including motion planning, trajectory tracking, and force control.\\\"\\n}\"\n",
"args = json.loads(args)\n",
"contexts, results = response(args)\n",
"if not results:\n",
" # direct answer\n",
" print(contexts)\n",
"else:\n",
" output_prompt = make_answer_prompt(input_prompt,contexts)\n",
" answer = model.generate_content(output_prompt).text\n",
" print(answer)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'desired': 'Natural Language Processing (Computer Science)', 'question': 'What are some recent papers on deep learning architectures for text classification, and can you recommend any surveys or reviews on the topic?'}\n",
"0\n",
"[('1808.08121v1', 'computer science', 'An Improvement of Data Classification Using Random Multimodel Deep Learning (RMDL)', 'Mojtaba Heidarysafa, Kamran Kowsari, Donald E. Brown, Kiana Jafari Meimandi, Laura E. Barne', '2018-08-23', '2018-08-23', 'http://arxiv.org/pdf/1808.08121v1'), ('1904.08067v5', 'computer science', 'Text Classification Algorithms: A Survey', 'Kamran Kowsari, Kiana Jafari Meimandi, Mojtaba Heidarysafa, Sanjana Mendu, Laura E. Barnes, Donald E. Brow', '2020-05-20', '2019-04-17', 'http://arxiv.org/pdf/1904.08067v5'), ('2202.09144v1', 'computer science', 'Modelling the semantics of text in complex document layouts using graph transformer networks', 'Thomas Roland Barillot, Jacob Saks, Polena Lilyanova, Edward Torgas, Yachen Hu, Yuanqing Liu, Varun Balupuri, Paul Gaskel', '2022-02-18', '2022-02-18', 'http://arxiv.org/pdf/2202.09144v1')]\n",
"1\n",
"[('1601.04187v1', 'computer science', 'Conversion of Artificial Recurrent Neural Networks to Spiking Neural Networks for Low-power Neuromorphic Hardware', 'Peter U. Diehl, Guido Zarrella, Andrew Cassidy, Bruno U. Pedroni, Emre Neftc', '2016-01-16', '2016-01-16', 'http://arxiv.org/pdf/1601.04187v1'), ('1801.01093v3', 'economics', 'Comparing the Forecasting Performances of Linear Models for Electricity Prices with High RES Penetration', 'Angelica Gianfreda, Francesco Ravazzolo, Luca Rossin', '2019-11-12', '2018-01-03', 'http://arxiv.org/pdf/1801.01093v3'), ('2302.11093v1', 'electrical engineering and system science', 'Use Cases for Time-Frequency Image Representations and Deep Learning Techniques for Improved Signal Classification', 'Mehmet Parla', '2023-02-22', '2023-02-22', 'http://arxiv.org/pdf/2302.11093v1')]\n",
"2\n",
"[('1505.07907v4', 'economics', 'Linking Economic Complexity, Institutions and Income Inequality', 'D. Hartmann, M. R. Guevara, C. Jara-Figueroa, M. Aristaran, C. A. Hidalg', '2017-01-04', '2015-05-29', 'http://arxiv.org/pdf/1505.07907v4'), ('2107.06855v2', 'economics', 'Comparing Intellectual property policy in the Global North and South -- A one-size-fits-all policy for economic prosperity?', 'S Sidhartha Narayan, Malavika Ranjan, Madhumitha Raghurama', '2021-08-10', '2021-07-14', 'http://arxiv.org/pdf/2107.06855v2'), ('1910.11780v1', 'economics', 'Inequality in Turkey: Looking Beyond Growth', 'Bayram Cakir, Ipek Ergu', '2019-10-25', '2019-10-25', 'http://arxiv.org/pdf/1910.11780v1')]\n",
"3\n",
"[('1607.06583v2', 'computer science', \"Classification of Alzheimer's Disease Structural MRI Data by Deep Learning Convolutional Neural Networks\", 'Saman Sarraf, Ghassem Tofigh', '2017-05-19', '2016-07-22', 'http://arxiv.org/pdf/1607.06583v2'), ('2101.10265v1', 'computer science', 'Superiorities of Deep Extreme Learning Machines against Convolutional Neural Networks', 'Gokhan Altan, Yakup Kutl', '2021-01-21', '2021-01-21', 'http://arxiv.org/pdf/2101.10265v1'), ('2208.03143v1', 'computer science', 'Deep Learning and Health Informatics for Smart Monitoring and Diagnosis', 'Amin Gasm', '2022-08-05', '2022-08-05', 'http://arxiv.org/pdf/2208.03143v1')]\n",
"4\n",
"[('2302.06584v3', 'computer science', 'Thermodynamic AI and the fluctuation frontier', 'Patrick J. Coles, Collin Szczepanski, Denis Melanson, Kaelan Donatella, Antonio J. Martinez, Faris Sbah', '2023-06-13', '2023-02-09', 'http://arxiv.org/pdf/2302.06584v3'), ('2307.12298v1', 'computer science', 'Stabilization and Dissipative Information Transfer of a Superconducting Kerr-Cat Qubit', 'Ufuk Korkmaz, Deniz Türkpenç', '2023-07-23', '2023-07-23', 'http://arxiv.org/pdf/2307.12298v1'), ('2106.10421v1', 'computer science', 'QFCNN: Quantum Fourier Convolutional Neural Network', 'Feihong Shen, Jun Li', '2021-06-19', '2021-06-19', 'http://arxiv.org/pdf/2106.10421v1')]\n",
"5\n",
"[('2308.16539v2', 'computer science', 'On a Connection between Differential Games, Optimal Control, and Energy-based Models for Multi-Agent Interactions', 'Christopher Diehl, Tobias Klosek, Martin Krüger, Nils Murzyn, Torsten Bertra', '2023-10-16', '2023-08-31', 'http://arxiv.org/pdf/2308.16539v2'), ('2404.12474v1', 'computer science', 'Learning a Stable, Safe, Distributed Feedback Controller for a Heterogeneous Platoon of Vehicles', 'Michael H. Shaham, Taskin Padi', '2024-04-18', '2024-04-18', 'http://arxiv.org/pdf/2404.12474v1'), ('2008.13221v1', 'computer science', 'Human-in-the-Loop Methods for Data-Driven and Reinforcement Learning Systems', 'Vinicius G. Goeck', '2020-08-30', '2020-08-30', 'http://arxiv.org/pdf/2008.13221v1')]\n",
"6\n",
"[('1911.06206v3', 'economics', 'Bayesian state-space modeling for analyzing heterogeneous network effects of US monetary policy', 'Niko Hauzenberger, Michael Pfarrhofe', '2020-09-10', '2019-11-14', 'http://arxiv.org/pdf/1911.06206v3'), ('2302.14114v1', 'economics', 'Econometric assessment of the monetary policy shocks in Morocco: Evidence from a Bayesian Factor-Augmented VAR', 'Marouane Daou', '2023-02-27', '2023-02-27', 'http://arxiv.org/pdf/2302.14114v1'), ('2311.11858v1', 'economics', 'Theory coherent shrinkage of Time-Varying Parameters in VARs', 'Andrea Renzett', '2023-11-20', '2023-11-20', 'http://arxiv.org/pdf/2311.11858v1')]\n",
"7\n",
"[('2310.03365v2', 'computer science', 'Swin-Tempo: Temporal-Aware Lung Nodule Detection in CT Scans as Video Sequences Using Swin Transformer-Enhanced UNet', 'Hossein Jafari, Karim Faez, Hamidreza Amindava', '2023-10-14', '2023-10-05', 'http://arxiv.org/pdf/2310.03365v2'), ('1808.08531v1', 'computer science', 'DeepTracker: Visualizing the Training Process of Convolutional Neural Networks', 'Dongyu Liu, Weiwei Cui, Kai Jin, Yuxiao Guo, Huamin Q', '2018-08-26', '2018-08-26', 'http://arxiv.org/pdf/1808.08531v1'), ('2105.10448v1', 'computer science', 'Distinguishing artefacts: evaluating the saturation point of convolutional neural networks', 'Ric Real, James Gopsill, David Jones, Chris Snider, Ben Hick', '2021-05-21', '2021-05-21', 'http://arxiv.org/pdf/2105.10448v1')]\n",
"8\n",
"Got new records: 10\n",
"Re-queried on chromadb, results: []\n",
"None\n",
"9\n",
"[('2403.07017v1', 'computer science', 'Mathematics of multi-agent learning systems at the interface of game theory and artificial intelligence', 'Long Wang, Feng Fu, Xingru Che', '2024-03-09', '2024-03-09', 'http://arxiv.org/pdf/2403.07017v1'), ('2210.02205v1', 'computer science', 'Game Theoretic Rating in N-player general-sum games with Equilibria', 'Luke Marris, Marc Lanctot, Ian Gemp, Shayegan Omidshafiei, Stephen McAleer, Jerome Connor, Karl Tuyls, Thore Graepe', '2022-10-05', '2022-10-05', 'http://arxiv.org/pdf/2210.02205v1'), ('2212.05357v3', 'economics', 'On Blockchain We Cooperate: An Evolutionary Game Perspective', 'Luyao Zhang, Xinyu Tia', '2023-01-19', '2022-12-10', 'http://arxiv.org/pdf/2212.05357v3')]\n"
]
}
],
"source": [
"with open(\"test_questions.txt\",\"r\") as infile:\n",
" data = json.load(infile)\n",
"print(data[0])\n",
"\n",
"test_log = []\n",
"for i,t in enumerate(data):\n",
" print(i)\n",
" temp_answer, answer = full_chain_single_question(t['question'])\n",
" test_log.append({'desired topic':t['desired'],\n",
" 'question':t['question'],\n",
" 'first answer':temp_answer,\n",
" 'final answer':answer})\n",
"with open(\"test_results.json\",\"w\") as outfile:\n",
" json.dump(test_log,outfile)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|