Spaces:
Runtime error
Runtime error
andymbryant
commited on
Commit
•
2bb5de3
1
Parent(s):
0affa33
added hf config and dotenv
Browse files- .github/workflows/check_size.yaml +15 -0
- .github/workflows/sync_hf.yaml +19 -0
- .gitignore +1 -0
- README.md +8 -0
- requirements.txt +1 -0
- src/core.py +4 -3
- src/notebooks/brainstorm.ipynb +3 -2
- src/notebooks/brainstorm2.ipynb +3 -2
- src/notebooks/brainstorm3.ipynb +4 -3
- src/notebooks/brainstorm4.ipynb +16 -5
.github/workflows/check_size.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Check file size
|
2 |
+
on:
|
3 |
+
pull_request:
|
4 |
+
branches: [main]
|
5 |
+
|
6 |
+
workflow_dispatch:
|
7 |
+
|
8 |
+
jobs:
|
9 |
+
sync-to-hub:
|
10 |
+
runs-on: ubuntu-latest
|
11 |
+
steps:
|
12 |
+
- name: Check large files
|
13 |
+
uses: ActionsDesk/lfs-warning@v2.0
|
14 |
+
with:
|
15 |
+
filesizelimit: 10485760
|
.github/workflows/sync_hf.yaml
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Sync to Hugging Face hub
|
2 |
+
on:
|
3 |
+
push:
|
4 |
+
branches: [main]
|
5 |
+
|
6 |
+
workflow_dispatch:
|
7 |
+
|
8 |
+
jobs:
|
9 |
+
sync-to-hub:
|
10 |
+
runs-on: ubuntu-latest
|
11 |
+
steps:
|
12 |
+
- uses: actions/checkout@v3
|
13 |
+
with:
|
14 |
+
fetch-depth: 0
|
15 |
+
lfs: true
|
16 |
+
- name: Push to hub
|
17 |
+
env:
|
18 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
19 |
+
run: git push https://andymbryant:$HF_TOKEN@huggingface.co/spaces/andymbryant/data-mapper main
|
.gitignore
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
venv/
|
2 |
__pycache__/
|
3 |
*.pyc
|
|
|
|
1 |
venv/
|
2 |
__pycache__/
|
3 |
*.pyc
|
4 |
+
.env
|
README.md
CHANGED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
colorTo: coral
|
3 |
+
colorFrom: green
|
4 |
+
emoji: ✨
|
5 |
+
license: BSD 2-Clause License
|
6 |
+
title: Data Mapper
|
7 |
+
sdk: gradio
|
8 |
+
---
|
requirements.txt
CHANGED
@@ -4,3 +4,4 @@ ipykernel
|
|
4 |
pandas
|
5 |
tabulate
|
6 |
gradio
|
|
|
|
4 |
pandas
|
5 |
tabulate
|
6 |
gradio
|
7 |
+
python-dotenv
|
src/core.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os
|
|
|
2 |
import pandas as pd
|
3 |
from langchain.output_parsers import PydanticOutputParser
|
4 |
from langchain.prompts import ChatPromptTemplate
|
@@ -10,7 +11,7 @@ from src.types import TableMapping
|
|
10 |
from src.vars import NUM_ROWS_TO_RETURN
|
11 |
from src.prompt import DATA_SCIENTIST_PROMPT_STR, SPEC_WRITER_PROMPT_STR, ENGINEER_PROMPT_STR
|
12 |
|
13 |
-
|
14 |
|
15 |
DATA_DIR_PATH = os.path.join(os.path.dirname(__file__), 'data')
|
16 |
SYNTHETIC_DATA_DIR_PATH = os.path.join(DATA_DIR_PATH, 'synthetic')
|
@@ -20,7 +21,7 @@ TRANSFORM_MODEL = ChatOpenAI(
|
|
20 |
temperature=0,
|
21 |
)
|
22 |
|
23 |
-
|
24 |
model_name='gpt-4',
|
25 |
temperature=0.1,
|
26 |
)
|
@@ -46,7 +47,7 @@ def get_table_mapping(source_df, template_df) -> TableMapping:
|
|
46 |
|
47 |
def get_code_spec(table_mapping: TableMapping) -> str:
|
48 |
writer_prompt = ChatPromptTemplate.from_template(SPEC_WRITER_PROMPT_STR)
|
49 |
-
writer_chain = writer_prompt |
|
50 |
return writer_chain.invoke({"table_mapping": str(table_mapping)})
|
51 |
|
52 |
|
|
|
1 |
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
import pandas as pd
|
4 |
from langchain.output_parsers import PydanticOutputParser
|
5 |
from langchain.prompts import ChatPromptTemplate
|
|
|
11 |
from src.vars import NUM_ROWS_TO_RETURN
|
12 |
from src.prompt import DATA_SCIENTIST_PROMPT_STR, SPEC_WRITER_PROMPT_STR, ENGINEER_PROMPT_STR
|
13 |
|
14 |
+
load_dotenv()
|
15 |
|
16 |
DATA_DIR_PATH = os.path.join(os.path.dirname(__file__), 'data')
|
17 |
SYNTHETIC_DATA_DIR_PATH = os.path.join(DATA_DIR_PATH, 'synthetic')
|
|
|
21 |
temperature=0,
|
22 |
)
|
23 |
|
24 |
+
NATURAL_LANGUAGE_MODEL = ChatOpenAI(
|
25 |
model_name='gpt-4',
|
26 |
temperature=0.1,
|
27 |
)
|
|
|
47 |
|
48 |
def get_code_spec(table_mapping: TableMapping) -> str:
|
49 |
writer_prompt = ChatPromptTemplate.from_template(SPEC_WRITER_PROMPT_STR)
|
50 |
+
writer_chain = writer_prompt | NATURAL_LANGUAGE_MODEL | StrOutputParser()
|
51 |
return writer_chain.invoke({"table_mapping": str(table_mapping)})
|
52 |
|
53 |
|
src/notebooks/brainstorm.ipynb
CHANGED
@@ -20,7 +20,9 @@
|
|
20 |
"from pygments import highlight\n",
|
21 |
"from pygments.lexers import PythonLexer\n",
|
22 |
"from pygments.formatters import HtmlFormatter\n",
|
23 |
-
"import json"
|
|
|
|
|
24 |
]
|
25 |
},
|
26 |
{
|
@@ -30,7 +32,6 @@
|
|
30 |
"outputs": [],
|
31 |
"source": [
|
32 |
"langchain.debug = True\n",
|
33 |
-
"os.environ[\"OPENAI_API_KEY\"] = \"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\"\n",
|
34 |
"data_dir_path = os.path.join(os.getcwd(), \"data\")"
|
35 |
]
|
36 |
},
|
|
|
20 |
"from pygments import highlight\n",
|
21 |
"from pygments.lexers import PythonLexer\n",
|
22 |
"from pygments.formatters import HtmlFormatter\n",
|
23 |
+
"import json\n",
|
24 |
+
"from dotenv import load_dotenv\n",
|
25 |
+
"load_dotenv()"
|
26 |
]
|
27 |
},
|
28 |
{
|
|
|
32 |
"outputs": [],
|
33 |
"source": [
|
34 |
"langchain.debug = True\n",
|
|
|
35 |
"data_dir_path = os.path.join(os.getcwd(), \"data\")"
|
36 |
]
|
37 |
},
|
src/notebooks/brainstorm2.ipynb
CHANGED
@@ -14,7 +14,9 @@
|
|
14 |
"from langchain.tools import PythonAstREPLTool\n",
|
15 |
"from langchain.chat_models import ChatOpenAI\n",
|
16 |
"from pydantic import BaseModel, Field\n",
|
17 |
-
"from langchain.memory import ConversationBufferMemory"
|
|
|
|
|
18 |
]
|
19 |
},
|
20 |
{
|
@@ -24,7 +26,6 @@
|
|
24 |
"outputs": [],
|
25 |
"source": [
|
26 |
"langchain.debug = True\n",
|
27 |
-
"os.environ[\"OPENAI_API_KEY\"] = \"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\"\n",
|
28 |
"data_dir_path = os.path.join(os.getcwd())\n",
|
29 |
"pd.set_option('display.max_rows', 20)\n",
|
30 |
"pd.set_option('display.max_columns', 20)\n",
|
|
|
14 |
"from langchain.tools import PythonAstREPLTool\n",
|
15 |
"from langchain.chat_models import ChatOpenAI\n",
|
16 |
"from pydantic import BaseModel, Field\n",
|
17 |
+
"from langchain.memory import ConversationBufferMemory\n",
|
18 |
+
"from dotenv import load_dotenv\n",
|
19 |
+
"load_dotenv()"
|
20 |
]
|
21 |
},
|
22 |
{
|
|
|
26 |
"outputs": [],
|
27 |
"source": [
|
28 |
"langchain.debug = True\n",
|
|
|
29 |
"data_dir_path = os.path.join(os.getcwd())\n",
|
30 |
"pd.set_option('display.max_rows', 20)\n",
|
31 |
"pd.set_option('display.max_columns', 20)\n",
|
src/notebooks/brainstorm3.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
@@ -17,7 +17,9 @@
|
|
17 |
"from langchain.memory import ConversationBufferMemory\n",
|
18 |
"from langchain.schema.output_parser import StrOutputParser\n",
|
19 |
"import json\n",
|
20 |
-
"import gradio as gr"
|
|
|
|
|
21 |
]
|
22 |
},
|
23 |
{
|
@@ -27,7 +29,6 @@
|
|
27 |
"outputs": [],
|
28 |
"source": [
|
29 |
"langchain.debug = True\n",
|
30 |
-
"os.environ[\"OPENAI_API_KEY\"] = \"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\"\n",
|
31 |
"data_dir_path = os.path.join(os.getcwd())\n",
|
32 |
"pd.set_option('display.max_rows', 20)\n",
|
33 |
"pd.set_option('display.max_columns', 20)\n",
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
|
|
17 |
"from langchain.memory import ConversationBufferMemory\n",
|
18 |
"from langchain.schema.output_parser import StrOutputParser\n",
|
19 |
"import json\n",
|
20 |
+
"import gradio as gr\n",
|
21 |
+
"from dotenv import load_dotenv\n",
|
22 |
+
"load_dotenv()"
|
23 |
]
|
24 |
},
|
25 |
{
|
|
|
29 |
"outputs": [],
|
30 |
"source": [
|
31 |
"langchain.debug = True\n",
|
|
|
32 |
"data_dir_path = os.path.join(os.getcwd())\n",
|
33 |
"pd.set_option('display.max_rows', 20)\n",
|
34 |
"pd.set_option('display.max_columns', 20)\n",
|
src/notebooks/brainstorm4.ipynb
CHANGED
@@ -2,9 +2,20 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"source": [
|
9 |
"import os\n",
|
10 |
"import pandas as pd\n",
|
@@ -17,7 +28,9 @@
|
|
17 |
"from langchain.prompts import ChatPromptTemplate\n",
|
18 |
"from langchain.tools import PythonAstREPLTool\n",
|
19 |
"from langchain.chat_models import ChatOpenAI\n",
|
20 |
-
"from langchain.schema.output_parser import StrOutputParser"
|
|
|
|
|
21 |
]
|
22 |
},
|
23 |
{
|
@@ -27,8 +40,6 @@
|
|
27 |
"outputs": [],
|
28 |
"source": [
|
29 |
"langchain.debug = False\n",
|
30 |
-
"# Throwaway key with strict usage limit\n",
|
31 |
-
"os.environ[\"OPENAI_API_KEY\"] = \"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\"\n",
|
32 |
"pd.set_option('display.max_columns', 20)\n",
|
33 |
"pd.set_option('display.max_rows', 20)"
|
34 |
]
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 90,
|
6 |
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"data": {
|
10 |
+
"text/plain": [
|
11 |
+
"True"
|
12 |
+
]
|
13 |
+
},
|
14 |
+
"execution_count": 90,
|
15 |
+
"metadata": {},
|
16 |
+
"output_type": "execute_result"
|
17 |
+
}
|
18 |
+
],
|
19 |
"source": [
|
20 |
"import os\n",
|
21 |
"import pandas as pd\n",
|
|
|
28 |
"from langchain.prompts import ChatPromptTemplate\n",
|
29 |
"from langchain.tools import PythonAstREPLTool\n",
|
30 |
"from langchain.chat_models import ChatOpenAI\n",
|
31 |
+
"from langchain.schema.output_parser import StrOutputParser\n",
|
32 |
+
"from dotenv import load_dotenv\n",
|
33 |
+
"load_dotenv()"
|
34 |
]
|
35 |
},
|
36 |
{
|
|
|
40 |
"outputs": [],
|
41 |
"source": [
|
42 |
"langchain.debug = False\n",
|
|
|
|
|
43 |
"pd.set_option('display.max_columns', 20)\n",
|
44 |
"pd.set_option('display.max_rows', 20)"
|
45 |
]
|