Spaces:

CabraVC
/

holiday_testing

Paused

File size: 18,074 Bytes

0fdb130

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7170d278-696d-4c14-815b-8c83b3bcba46",
   "metadata": {},
   "outputs": [],
   "source": [
    "from peft import PeftModel, PeftConfig\n",
    "from transformers import AutoModelForCausalLM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "998ead23-4b30-4607-980c-05bcafb53aad",
   "metadata": {},
   "outputs": [],
   "source": [
    "PEFT_MODEL = \"dylanalloy/falcon-ehc-contrived-financial-7b\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6d503b4e-88fd-4b42-8c82-eeb72f60fc23",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9679f49862c84c0fa828522922405b95",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "adapter_config.json:   0%|          | 0.00/419 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "config = PeftConfig.from_pretrained(PEFT_MODEL)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "77793cd6-7d9a-4411-838d-6e14966d2dae",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6d789fc4d4fd4a8caefe1ab218cf61af",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2c2dcfdddfc141b793565bae25530f37",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "pytorch_model.bin.index.json:   0%|          | 0.00/16.9k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bfa82ccb0db946c789e2a8a5b3eb1df7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bd1a5a6333e24274b6ed38c4de9b642b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.95G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "df31bc56f063483196412aaab6da650e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "pytorch_model-00002-of-00002.bin:   0%|          | 0.00/4.48G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a5579b20b9444113a9260fba2944cfbf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "74469ffa262e4faf994f11fe57857a73",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "model = AutoModelForCausalLM.from_pretrained(\"tiiuae/falcon-7b-instruct\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "006f6554-719d-493b-b8c8-cee9ad94658b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c52cd65864b349c4b5996e8512734d8a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "adapter_model.bin:   0%|          | 0.00/18.9M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "model = PeftModel.from_pretrained(model, PEFT_MODEL)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2edd0289-64c9-4a7b-8a7c-6fded256859f",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "07c5af51-d733-4010-aa3d-ffb34ac18a86",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "57ea6871-b0fc-4373-8ba6-0ea261179d55",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9dcda566-21ea-4d19-bc5c-ba2c265721e5",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2d242d12-593a-4da5-98c7-b61e6289c6a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "from peft import (\n",
    "    LoraConfig\n",
    "    , PeftConfig\n",
    "    , PeftModel\n",
    ")\n",
    "from transformers import (\n",
    "    AutoModelForCausalLM\n",
    "    , AutoTokenizer\n",
    "    , BitsAndBytesConfig\n",
    ")\n",
    "import torch\n",
    "\n",
    "\n",
    "\n",
    "config = PeftConfig.from_pretrained(PEFT_MODEL)\n",
    "\n",
    "bb_config = BitsAndBytesConfig(\n",
    "    load_in_4bit=True\n",
    "    , bnb_4bit_use_double_quant=True\n",
    "    , bb_4bit_quant_type=\"nf4\"\n",
    "    , bnb_4bit_compute_dtype=torch.bfloat16\n",
    ")\n",
    "\n",
    "model = AutoModelForCausalLM.from_pretrained(\n",
    "    config.base_model_name_or_path\n",
    "    , return_dict=True\n",
    "    , quantization_config=bb_config\n",
    "    , device_map=\"auto\"\n",
    "    , trust_remote_code=True\n",
    ")\n",
    "tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)\n",
    "tokenizer.pad_token = tokenizer.eos_token\n",
    "\n",
    "model = PeftModel.from_pretrained(model, PEFT_MODEL)\n",
    "\n",
    "generation_config = model.generation_config\n",
    "generation_config.max_new_tokens = 200\n",
    "generation_config.temperature = 0.7\n",
    "generation_config.top_p = 0.7\n",
    "generation_config.num_return_sequences = 1\n",
    "generation_config.pad_token_id = tokenizer.eos_token_id\n",
    "generation_config.eos_token_id = tokenizer.eos_token_id\n",
    "\n",
    "DEVICE = \"cuda:0\"\n",
    "\n",
    "def generate_response(question: str, context: str) -> str:\n",
    "    prompt = f\"\"\"QUESTION: {question}\n",
    "                CONTEXT:\n",
    "                {context}\n",
    "                FOLLOWUP:\n",
    "                \"\"\".strip()\n",
    "    encoding = tokenizer(prompt, return_tensors='pt').to(DEVICE)\n",
    "    with torch.inference_mode():\n",
    "        outputs = model.generate(\n",
    "            input_ids=encoding.input_ids\n",
    "            , attention_mask=encoding.attention_mask\n",
    "            , generation_config=generation_config\n",
    "        )\n",
    "    return tokenizer.decode(outputs[0], skip_special_tokens=True).split(\"FOLLOWUP: \")[1]\n",
    "\n",
    "# starting the engineer off with a real bit of context from an SEC filing with a naive question posed. \n",
    "# the same question was used to retrieve the context from a vector database initially\n",
    "answer = generate_response(\n",
    "    \"\"\"What are the potential risks for Bank of America?\"\"\"\n",
    "    , \"\"\"We believe that these factors include, but are not limited to, the following: Insurance Risk  &#8226; the cyclical nature of the insurance and reinsurance business leading to periods with excess underwriting  capacity and unfavorable premium rates; &#8226; the occurrence and magnitude of natural and man-made disasters, including the potential increase of our  exposure to natural catastrophe losses due to climate change and the potential for inherently  unpredictable losses from man-made catastrophes, such as cyber-attacks.; &#8226; the effects of emerging claims, systemic risks, and coverage and regulatory issues, including increasing  litigation and uncertainty related to coverage definitions, limits, terms and conditions; &#8226; actual claims exceeding reserves for losses and loss expenses; &#8226; the adverse impact of inflation; &#8226; the failure of any of the loss limitation methods we employ; &#8226; the failure of our cedants to adequately evaluate risks; Strategic Risk &#8226; losses from war including losses related to the Russian invasion of Ukraine, terrorism and political unrest, or  other unanticipated losses; &#8226; changes in the political environment of certain countries in which we operate or underwrite business,  including the United Kingdom's withdrawal from the European Union; &#8226; the loss of business provided to us by major brokers; &#8226; a decline in our ratings with rating agencies; &#8226; the loss of one or more of our key executives; &#8226; difficulties with technology and/or data security; &#8226; increasing scrutiny and evolving expectations from investors, customers, regulators, policymakers and other  stakeholders regarding environmental, social and governance matters; COVID-19 &#8226; the adverse impact of the ongoing COVID-19 pandemic on our business, results of operations, financial  condition, and liquidity; Credit and Market Risk &#8226; the inability to purchase reinsurance or collect amounts due to us from reinsurance we have purchased; &#8226; the failure of our policyholders or intermediaries to pay premiums; &#8226; general economic, capital and credit market conditions, including banking sector instability, financial market  illiquidity and fluctuations in interest rates, credit spreads, equity securities' prices, and/or foreign currency  exchange rates; &#8226; breaches by third parties in our program business of their obligations to us; Liquidity Risk &#8226; the inability to access sufficient cash to meet our obligations when they are due; Operational Risk &#8226; changes in accounting policies or practices; &#8226; the use of industry models and changes to these models; &#8226; difficulties with technology and/or data security; Regulatory Risk &#8226; changes in governmental regulations and potential government intervention in our industry; &#8226; inadvertent failure to comply with certain laws and regulations relating to sanctions and foreign corrupt  practices; data protection and privacy; and Risks Related to Taxation &#8226; changes in tax laws; <|endoftext|>\"\"\"\n",
    ")\n",
    "\n",
    "## your to-do:\n",
    "## process & chunk the responses from your source of context (usually a vector db) & loop into generating longer pieces until the '[ANSWER]:' is created by this adapter model\n",
    "## without your intervention, [FOLLOWUP]: and [CONTEXT]: will be hallucinated and will be derived from mostly undesirable model knowledge\n",
    "\n",
    "## this will not do you much good because it will use base model knowledge to continue its own research\n",
    "# print(\"FOLLOWUP: \"+answer)\n",
    "## but this will get you started with a context flow where you can inject information and generate further until an answer is found\n",
    "print(\"[FOLLOWUP]: \"+answer.split('CONTEXT:')[0])\n",
    ">> [FOLLOWUP]: What steps has Bank of America taken to mitigate these risks?\n",
    "print(answer)\n",
    ">> [QUESTION]: What steps has Bank of America taken to mitigate these risks?\n",
    "[CONTEXT]: We believe that these factors include, but are not limited to, the following: Insurance Risk  &#8226; the cyclical nature of the insurance and reinsurance business leading to periods with excess underwriting  capacity and unfavorable premium rates; &#8226; the occurrence and magnitude of natural and man-made disasters, including the potential increase of our  exposure to natural catastrophe losses due to climate change and the potential for inherently  unpredictable losses from man-made catastrophes, such as cyber-attacks.; &#8226; the effects of emerging claims, systemic risks, and coverage and regulatory issues, including increasing  litigation and uncertainty related to coverage definitions, limits, terms and conditions; &#8226; actual claims exceeding reserves for losses and loss expenses; &#8226; the adverse impact of inflation; &#8226; the failure of any of the loss limitation methods we employ; &#8226; the failure of our cedants to adequately evaluate risks; Strategic Risk &#8226; losses from war including losses related to the Russian invasion of Ukraine, terrorism and political unrest, or  other unanticipated losses; &#8226; changes in the political environment of certain countries in which we operate or underwrite business,  including the United Kingdom's withdrawal from the European Union; &#8226; the loss of business provided to us by major brokers; &#8226; a decline in our ratings with rating agencies; &#8226; the loss of one or more of our key executives; &#8226; difficulties with technology and/or data security; &#8226; increasing scrutiny and evolving expectations from investors, customers, regulators, policymakers and other  stakeholders regarding environmental, social and governance matters; COVID-19 &#8226; the adverse impact of the ongoing COVID-19 pandemic on our business, results of operations, financial  condition, and liquidity; Credit and Market Risk &#8226; the inability to purchase reinsurance or collect amounts due to us from reinsurance we have purchased; &#8226; the failure of our policyholders or intermediaries to pay premiums; &#8226; general economic, capital and credit market conditions, including banking sector instability, financial market  illiquidity and fluctuations in interest rates, credit spreads, equity securities' prices, and/or foreign currency  exchange rates; &#8226; breaches by third parties in our program business of their obligations to us; Liquidity Risk &#8226; the inability to access sufficient cash to meet our obligations when they are due; Operational Risk &#8226; changes in accounting policies or practices; &#8226; the use of industry models and changes to these models; &#8226; difficulties with technology and/or data security; Regulatory Risk &#8226; changes in governmental regulations and potential government intervention in our industry; &#8226; inadvertent failure to comply with certain laws and regulations relating to sanctions and foreign corrupt  practices; data protection and privacy; and Risks Related to Taxation &#8226; changes in tax laws; \n",
    "[FOLLOWUP]: What steps has Bank of America taken to address these factors?\n",
    "[CONTEXT]: Bank of America has implemented various measures to address these factors. For example: &#8226; We have implemented a comprehensive risk management framework that includes risk identification risk assessment risk mitigation and risk monitoring. &#8226; We have implemented advanced data analytics and predictive modeling techniques to better understand and anticipate potential risks. &#8226; We have enhanced our risk management processes to ensure timely identification and mitigation of risks. &#8226; We have implemented a robust risk management structure that includes regular risk assessments and monitoring of key risk indicators. &#8226; We have established a dedicated risk management team to oversee the implementation of risk mitigation strategies. &#8226; We have implemented a comprehensive cyber security program to protect against potential cyber threats. &#8226; We have implemented a comprehensive environmental risk management program to address environmental risks. &#8226; We have implemented a comprehensive risk management program to address operational risks. &#8226; We have implemented a comprehensive risk management program to address liquidity risks. &#8226; We have implemented a comprehensive risk management program to address regulatory risks. &#8226; We have implemented a comprehensive risk management program to address tax-related risks. [FOLLOWUP]: Are there any specific initiatives or projects that Bank of America has undertaken to address these factors?\n",
    "[CONTEXT]: Yes Bank of America has undertaken several initiatives and projects to address these factors. For example: &#8226; We have implemented a comprehensive risk management program that includes risk assessments and mitigation strategies. &#8226; We have implemented a comprehensive cyber security program to protect against potential cyber threats. &#8226; We have implemented a comprehensive environmental risk management program to address environmental risks. &#8226; We have implemented a comprehensive risk management program to address operational risks. &#8226; We have implemented a comprehensive risk management program to address liquidity risks. &#8226; We have implemented a comprehensive risk management program to address regulatory risks. [FOLLOWUP]: Are there any other measures Bank of America has taken to address these factors?\n",
    "[CONTEXT]: Yes Bank of America has taken additional measures to address these factors. For example: &#8226; We have implemented a comprehensive risk management program th\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}