andymbryant commited on
Commit
2bb5de3
1 Parent(s): 0affa33

added hf config and dotenv

Browse files
.github/workflows/check_size.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Check file size
2
+ on:
3
+ pull_request:
4
+ branches: [main]
5
+
6
+ workflow_dispatch:
7
+
8
+ jobs:
9
+ sync-to-hub:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - name: Check large files
13
+ uses: ActionsDesk/lfs-warning@v2.0
14
+ with:
15
+ filesizelimit: 10485760
.github/workflows/sync_hf.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [main]
5
+
6
+ workflow_dispatch:
7
+
8
+ jobs:
9
+ sync-to-hub:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v3
13
+ with:
14
+ fetch-depth: 0
15
+ lfs: true
16
+ - name: Push to hub
17
+ env:
18
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
19
+ run: git push https://andymbryant:$HF_TOKEN@huggingface.co/spaces/andymbryant/data-mapper main
.gitignore CHANGED
@@ -1,3 +1,4 @@
1
  venv/
2
  __pycache__/
3
  *.pyc
 
 
1
  venv/
2
  __pycache__/
3
  *.pyc
4
+ .env
README.md CHANGED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ colorTo: coral
3
+ colorFrom: green
4
+ emoji: ✨
5
+ license: BSD 2-Clause License
6
+ title: Data Mapper
7
+ sdk: gradio
8
+ ---
requirements.txt CHANGED
@@ -4,3 +4,4 @@ ipykernel
4
  pandas
5
  tabulate
6
  gradio
 
 
4
  pandas
5
  tabulate
6
  gradio
7
+ python-dotenv
src/core.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import pandas as pd
3
  from langchain.output_parsers import PydanticOutputParser
4
  from langchain.prompts import ChatPromptTemplate
@@ -10,7 +11,7 @@ from src.types import TableMapping
10
  from src.vars import NUM_ROWS_TO_RETURN
11
  from src.prompt import DATA_SCIENTIST_PROMPT_STR, SPEC_WRITER_PROMPT_STR, ENGINEER_PROMPT_STR
12
 
13
- os.environ["OPENAI_API_KEY"] = "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"
14
 
15
  DATA_DIR_PATH = os.path.join(os.path.dirname(__file__), 'data')
16
  SYNTHETIC_DATA_DIR_PATH = os.path.join(DATA_DIR_PATH, 'synthetic')
@@ -20,7 +21,7 @@ TRANSFORM_MODEL = ChatOpenAI(
20
  temperature=0,
21
  )
22
 
23
- natural_language_model = ChatOpenAI(
24
  model_name='gpt-4',
25
  temperature=0.1,
26
  )
@@ -46,7 +47,7 @@ def get_table_mapping(source_df, template_df) -> TableMapping:
46
 
47
  def get_code_spec(table_mapping: TableMapping) -> str:
48
  writer_prompt = ChatPromptTemplate.from_template(SPEC_WRITER_PROMPT_STR)
49
- writer_chain = writer_prompt | natural_language_model | StrOutputParser()
50
  return writer_chain.invoke({"table_mapping": str(table_mapping)})
51
 
52
 
 
1
  import os
2
+ from dotenv import load_dotenv
3
  import pandas as pd
4
  from langchain.output_parsers import PydanticOutputParser
5
  from langchain.prompts import ChatPromptTemplate
 
11
  from src.vars import NUM_ROWS_TO_RETURN
12
  from src.prompt import DATA_SCIENTIST_PROMPT_STR, SPEC_WRITER_PROMPT_STR, ENGINEER_PROMPT_STR
13
 
14
+ load_dotenv()
15
 
16
  DATA_DIR_PATH = os.path.join(os.path.dirname(__file__), 'data')
17
  SYNTHETIC_DATA_DIR_PATH = os.path.join(DATA_DIR_PATH, 'synthetic')
 
21
  temperature=0,
22
  )
23
 
24
+ NATURAL_LANGUAGE_MODEL = ChatOpenAI(
25
  model_name='gpt-4',
26
  temperature=0.1,
27
  )
 
47
 
48
  def get_code_spec(table_mapping: TableMapping) -> str:
49
  writer_prompt = ChatPromptTemplate.from_template(SPEC_WRITER_PROMPT_STR)
50
+ writer_chain = writer_prompt | NATURAL_LANGUAGE_MODEL | StrOutputParser()
51
  return writer_chain.invoke({"table_mapping": str(table_mapping)})
52
 
53
 
src/notebooks/brainstorm.ipynb CHANGED
@@ -20,7 +20,9 @@
20
  "from pygments import highlight\n",
21
  "from pygments.lexers import PythonLexer\n",
22
  "from pygments.formatters import HtmlFormatter\n",
23
- "import json"
 
 
24
  ]
25
  },
26
  {
@@ -30,7 +32,6 @@
30
  "outputs": [],
31
  "source": [
32
  "langchain.debug = True\n",
33
- "os.environ[\"OPENAI_API_KEY\"] = \"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\"\n",
34
  "data_dir_path = os.path.join(os.getcwd(), \"data\")"
35
  ]
36
  },
 
20
  "from pygments import highlight\n",
21
  "from pygments.lexers import PythonLexer\n",
22
  "from pygments.formatters import HtmlFormatter\n",
23
+ "import json\n",
24
+ "from dotenv import load_dotenv\n",
25
+ "load_dotenv()"
26
  ]
27
  },
28
  {
 
32
  "outputs": [],
33
  "source": [
34
  "langchain.debug = True\n",
 
35
  "data_dir_path = os.path.join(os.getcwd(), \"data\")"
36
  ]
37
  },
src/notebooks/brainstorm2.ipynb CHANGED
@@ -14,7 +14,9 @@
14
  "from langchain.tools import PythonAstREPLTool\n",
15
  "from langchain.chat_models import ChatOpenAI\n",
16
  "from pydantic import BaseModel, Field\n",
17
- "from langchain.memory import ConversationBufferMemory"
 
 
18
  ]
19
  },
20
  {
@@ -24,7 +26,6 @@
24
  "outputs": [],
25
  "source": [
26
  "langchain.debug = True\n",
27
- "os.environ[\"OPENAI_API_KEY\"] = \"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\"\n",
28
  "data_dir_path = os.path.join(os.getcwd())\n",
29
  "pd.set_option('display.max_rows', 20)\n",
30
  "pd.set_option('display.max_columns', 20)\n",
 
14
  "from langchain.tools import PythonAstREPLTool\n",
15
  "from langchain.chat_models import ChatOpenAI\n",
16
  "from pydantic import BaseModel, Field\n",
17
+ "from langchain.memory import ConversationBufferMemory\n",
18
+ "from dotenv import load_dotenv\n",
19
+ "load_dotenv()"
20
  ]
21
  },
22
  {
 
26
  "outputs": [],
27
  "source": [
28
  "langchain.debug = True\n",
 
29
  "data_dir_path = os.path.join(os.getcwd())\n",
30
  "pd.set_option('display.max_rows', 20)\n",
31
  "pd.set_option('display.max_columns', 20)\n",
src/notebooks/brainstorm3.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 34,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -17,7 +17,9 @@
17
  "from langchain.memory import ConversationBufferMemory\n",
18
  "from langchain.schema.output_parser import StrOutputParser\n",
19
  "import json\n",
20
- "import gradio as gr"
 
 
21
  ]
22
  },
23
  {
@@ -27,7 +29,6 @@
27
  "outputs": [],
28
  "source": [
29
  "langchain.debug = True\n",
30
- "os.environ[\"OPENAI_API_KEY\"] = \"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\"\n",
31
  "data_dir_path = os.path.join(os.getcwd())\n",
32
  "pd.set_option('display.max_rows', 20)\n",
33
  "pd.set_option('display.max_columns', 20)\n",
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": null,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
17
  "from langchain.memory import ConversationBufferMemory\n",
18
  "from langchain.schema.output_parser import StrOutputParser\n",
19
  "import json\n",
20
+ "import gradio as gr\n",
21
+ "from dotenv import load_dotenv\n",
22
+ "load_dotenv()"
23
  ]
24
  },
25
  {
 
29
  "outputs": [],
30
  "source": [
31
  "langchain.debug = True\n",
 
32
  "data_dir_path = os.path.join(os.getcwd())\n",
33
  "pd.set_option('display.max_rows', 20)\n",
34
  "pd.set_option('display.max_columns', 20)\n",
src/notebooks/brainstorm4.ipynb CHANGED
@@ -2,9 +2,20 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 4,
6
  "metadata": {},
7
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
8
  "source": [
9
  "import os\n",
10
  "import pandas as pd\n",
@@ -17,7 +28,9 @@
17
  "from langchain.prompts import ChatPromptTemplate\n",
18
  "from langchain.tools import PythonAstREPLTool\n",
19
  "from langchain.chat_models import ChatOpenAI\n",
20
- "from langchain.schema.output_parser import StrOutputParser"
 
 
21
  ]
22
  },
23
  {
@@ -27,8 +40,6 @@
27
  "outputs": [],
28
  "source": [
29
  "langchain.debug = False\n",
30
- "# Throwaway key with strict usage limit\n",
31
- "os.environ[\"OPENAI_API_KEY\"] = \"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\"\n",
32
  "pd.set_option('display.max_columns', 20)\n",
33
  "pd.set_option('display.max_rows', 20)"
34
  ]
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 90,
6
  "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "data": {
10
+ "text/plain": [
11
+ "True"
12
+ ]
13
+ },
14
+ "execution_count": 90,
15
+ "metadata": {},
16
+ "output_type": "execute_result"
17
+ }
18
+ ],
19
  "source": [
20
  "import os\n",
21
  "import pandas as pd\n",
 
28
  "from langchain.prompts import ChatPromptTemplate\n",
29
  "from langchain.tools import PythonAstREPLTool\n",
30
  "from langchain.chat_models import ChatOpenAI\n",
31
+ "from langchain.schema.output_parser import StrOutputParser\n",
32
+ "from dotenv import load_dotenv\n",
33
+ "load_dotenv()"
34
  ]
35
  },
36
  {
 
40
  "outputs": [],
41
  "source": [
42
  "langchain.debug = False\n",
 
 
43
  "pd.set_option('display.max_columns', 20)\n",
44
  "pd.set_option('display.max_rows', 20)"
45
  ]