Commit
·
e233c57
1
Parent(s):
134c095
Add LC_code and make everything run again.
Browse files- CF_Code.py +5 -3
- CF_CodeCollab.yaml +2 -6
- CF_CodeCritic.py +3 -4
- CF_CodeDebug.yaml +2 -6
- CF_CodeReflect.yaml +2 -6
- CF_CodeTesting.py +2 -2
- CF_CodeTesting.yaml +2 -2
- CodeTesting.py +2 -2
- LC_Code.py +8 -0
- LC_Code.yaml +84 -0
- __init__.py +34 -23
- src/evaluation/testing_utils_codeforces.py +2 -2
CF_Code.py
CHANGED
@@ -1,8 +1,10 @@
|
|
|
|
1 |
import os
|
2 |
|
3 |
-
from
|
4 |
-
|
5 |
-
|
|
|
6 |
|
7 |
|
8 |
class CF_Code(OpenAIChatAtomicFlow):
|
|
|
1 |
+
import importlib
|
2 |
import os
|
3 |
|
4 |
+
from local.OpenAIChatAtomicFlow import OpenAIChatAtomicFlow
|
5 |
+
|
6 |
+
# flow_module = importlib.import_module(os.getenv("OpenAIChatAtomicFlow_module"))
|
7 |
+
# OpenAIChatAtomicFlow = getattr(flow_module, "OpenAIChatAtomicFlow")
|
8 |
|
9 |
|
10 |
class CF_Code(OpenAIChatAtomicFlow):
|
CF_CodeCollab.yaml
CHANGED
@@ -22,9 +22,7 @@ output_keys:
|
|
22 |
- "code"
|
23 |
|
24 |
subflows_config:
|
25 |
-
- _target_:
|
26 |
-
repository_id: ${oc.env:CC_FLOWS}
|
27 |
-
class_name: CF_Code
|
28 |
overrides:
|
29 |
name: "CodeGenerator"
|
30 |
model_name: "gpt-4"
|
@@ -64,9 +62,7 @@ subflows_config:
|
|
64 |
output_keys:
|
65 |
- "code"
|
66 |
- "end_of_interaction"
|
67 |
-
- _target_:
|
68 |
-
repository_id: ${oc.env:CC_FLOWS}
|
69 |
-
class_name: CF_CodeCritic
|
70 |
overrides:
|
71 |
name: CodeCritic
|
72 |
output_data_transformations:
|
|
|
22 |
- "code"
|
23 |
|
24 |
subflows_config:
|
25 |
+
- _target_: ${oc.env:CC_flows_module}.CF_Code..instantiate_from_default_config
|
|
|
|
|
26 |
overrides:
|
27 |
name: "CodeGenerator"
|
28 |
model_name: "gpt-4"
|
|
|
62 |
output_keys:
|
63 |
- "code"
|
64 |
- "end_of_interaction"
|
65 |
+
- _target_: ${oc.env:CC_flows_module}.CF_CodeCritic.instantiate_from_default_config
|
|
|
|
|
66 |
overrides:
|
67 |
name: CodeCritic
|
68 |
output_data_transformations:
|
CF_CodeCritic.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
import os
|
|
|
2 |
|
3 |
-
|
4 |
-
|
5 |
-
OpenAIChatAtomicFlow = load_class(repository_id, "OpenAIChatAtomicFlow")
|
6 |
-
|
7 |
|
8 |
|
9 |
class CF_CodeCritic(OpenAIChatAtomicFlow):
|
|
|
1 |
import os
|
2 |
+
import importlib
|
3 |
|
4 |
+
flow_module = importlib.import_module(os.getenv("OpenAIChatAtomicFlow_module"))
|
5 |
+
OpenAIChatAtomicFlow = getattr(flow_module, "OpenAIChatAtomicFlow")
|
|
|
|
|
6 |
|
7 |
|
8 |
class CF_CodeCritic(OpenAIChatAtomicFlow):
|
CF_CodeDebug.yaml
CHANGED
@@ -23,9 +23,7 @@ output_keys:
|
|
23 |
- "code"
|
24 |
|
25 |
subflows_config:
|
26 |
-
- _target_:
|
27 |
-
repository_id: ${oc.env:CC_FLOWS}
|
28 |
-
class_name: CF_Code
|
29 |
overrides:
|
30 |
name: "CodeGenerator"
|
31 |
model_name: "gpt-4"
|
@@ -61,8 +59,6 @@ subflows_config:
|
|
61 |
output_keys:
|
62 |
- "code"
|
63 |
- "end_of_interaction"
|
64 |
-
- _target_:
|
65 |
-
repository_id: ${oc.env:CC_FLOWS}
|
66 |
-
class_name: CF_CodeTesting
|
67 |
overrides:
|
68 |
name: "CodeTestingCritic"
|
|
|
23 |
- "code"
|
24 |
|
25 |
subflows_config:
|
26 |
+
- _target_: ${oc.env:CC_flows_module}.CF_Code.instantiate_from_default_config
|
|
|
|
|
27 |
overrides:
|
28 |
name: "CodeGenerator"
|
29 |
model_name: "gpt-4"
|
|
|
59 |
output_keys:
|
60 |
- "code"
|
61 |
- "end_of_interaction"
|
62 |
+
- _target_: ${oc.env:CC_flows_module}.CF_CodeTesting.instantiate_from_default_config
|
|
|
|
|
63 |
overrides:
|
64 |
name: "CodeTestingCritic"
|
CF_CodeReflect.yaml
CHANGED
@@ -22,9 +22,7 @@ output_keys:
|
|
22 |
- "code"
|
23 |
|
24 |
subflows_config:
|
25 |
-
- _target_:
|
26 |
-
repository_id: ${oc.env:CC_FLOWS}
|
27 |
-
class_name: CF_Code
|
28 |
overrides:
|
29 |
name: "CodeGenerator"
|
30 |
model_name: "gpt-4"
|
@@ -48,6 +46,4 @@ subflows_config:
|
|
48 |
- "code"
|
49 |
- "end_of_interaction"
|
50 |
|
51 |
-
- _target_:
|
52 |
-
repository_id: ${oc.env:CC_FLOWS}
|
53 |
-
class_name: FixedReply_CodeReflect
|
|
|
22 |
- "code"
|
23 |
|
24 |
subflows_config:
|
25 |
+
- _target_: ${oc.env:CC_flows_module}.CF_Code.instantiate_from_default_config
|
|
|
|
|
26 |
overrides:
|
27 |
name: "CodeGenerator"
|
28 |
model_name: "gpt-4"
|
|
|
46 |
- "code"
|
47 |
- "end_of_interaction"
|
48 |
|
49 |
+
- _target_: ${oc.env:CC_flows_module}.FixedReply_CodeReflect.instantiate_from_default_config
|
|
|
|
CF_CodeTesting.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
from typing import Any, Dict
|
2 |
|
3 |
-
from flows import
|
4 |
from .src.evaluation import testing_utils_codeforces
|
5 |
from .CodeTesting import CodeTesting
|
6 |
|
7 |
-
log =
|
8 |
|
9 |
# ToDo: Add a flags to control whether hidden, public or both tests should be used for evaluation
|
10 |
|
|
|
1 |
from typing import Any, Dict
|
2 |
|
3 |
+
from flows import logging
|
4 |
from .src.evaluation import testing_utils_codeforces
|
5 |
from .CodeTesting import CodeTesting
|
6 |
|
7 |
+
log = logging.get_logger(__name__)
|
8 |
|
9 |
# ToDo: Add a flags to control whether hidden, public or both tests should be used for evaluation
|
10 |
|
CF_CodeTesting.yaml
CHANGED
@@ -9,10 +9,10 @@ output_keys:
|
|
9 |
- "all_tests_passed"
|
10 |
- "testing_results_summary"
|
11 |
output_data_transformations:
|
12 |
-
- _target_:
|
13 |
input_key: "raw_response.public_tests_results"
|
14 |
output_key: "all_tests_passed"
|
15 |
-
- _target_:
|
16 |
output_key: "testing_results_summary"
|
17 |
|
18 |
single_test_error_message: True
|
|
|
9 |
- "all_tests_passed"
|
10 |
- "testing_results_summary"
|
11 |
output_data_transformations:
|
12 |
+
- _target_: ${oc.env:CC_flows_module}.src.data_transformations.CorrectnessFlag # ToDo: This import style would not work if the flow is synced in the current implementation (the outer directory is a hash and not the name; figure out how to do the import robustly; e.g., using relative imports)
|
13 |
input_key: "raw_response.public_tests_results"
|
14 |
output_key: "all_tests_passed"
|
15 |
+
- _target_: ${oc.env:CC_flows_module}.src.data_transformations.TestingResultsSummaryGeneration
|
16 |
output_key: "testing_results_summary"
|
17 |
|
18 |
single_test_error_message: True
|
CodeTesting.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
from copy import deepcopy
|
2 |
from typing import Optional, Any, List, Dict
|
3 |
|
4 |
-
from flows import
|
5 |
from flows.base_flows.abstract import AtomicFlow
|
6 |
|
7 |
-
log =
|
8 |
|
9 |
|
10 |
class CodeTesting(AtomicFlow):
|
|
|
1 |
from copy import deepcopy
|
2 |
from typing import Optional, Any, List, Dict
|
3 |
|
4 |
+
from flows import logging
|
5 |
from flows.base_flows.abstract import AtomicFlow
|
6 |
|
7 |
+
log = logging.get_logger(__name__)
|
8 |
|
9 |
|
10 |
class CodeTesting(AtomicFlow):
|
LC_Code.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from local.OpenAIChatAtomicFlow import OpenAIChatAtomicFlow
|
4 |
+
|
5 |
+
|
6 |
+
class LC_Code(OpenAIChatAtomicFlow):
|
7 |
+
def __init__(self, **kwargs):
|
8 |
+
super().__init__(**kwargs)
|
LC_Code.yaml
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: "Code_Flow"
|
2 |
+
verbose: True
|
3 |
+
description: "ToDO: add description"
|
4 |
+
|
5 |
+
model_name: "gpt-4"
|
6 |
+
generation_parameters:
|
7 |
+
n: 1
|
8 |
+
max_tokens: 3000
|
9 |
+
temperature: 0.3
|
10 |
+
|
11 |
+
model_kwargs:
|
12 |
+
top_p: 0.2
|
13 |
+
frequency_penalty: 0
|
14 |
+
presence_penalty: 0
|
15 |
+
|
16 |
+
system_message_prompt_template:
|
17 |
+
_target_: langchain.PromptTemplate
|
18 |
+
template: |2-
|
19 |
+
Your goal is to provide executable Python code that solves a coding interview problem. The code should correctly handle all corner cases in order to pass the hidden test cases, which are used to evaluate the correctness of the solution.
|
20 |
+
|
21 |
+
The user will specify the problem by providing you with:
|
22 |
+
- the problem statement
|
23 |
+
- example test cases
|
24 |
+
- the constraints of the problem
|
25 |
+
|
26 |
+
The user will provide you with a task and an output format that you will strictly follow.
|
27 |
+
input_variables: []
|
28 |
+
template_format: jinja2
|
29 |
+
|
30 |
+
human_message_prompt_template:
|
31 |
+
_target_: langchain.PromptTemplate
|
32 |
+
template: "{{query}}"
|
33 |
+
input_variables:
|
34 |
+
- "query"
|
35 |
+
template_format: jinja2
|
36 |
+
|
37 |
+
query_message_prompt_template:
|
38 |
+
_target_: langchain.PromptTemplate
|
39 |
+
template: |2-
|
40 |
+
# Problem statement
|
41 |
+
{{problem_description}}
|
42 |
+
|
43 |
+
{{io_description}}
|
44 |
+
|
45 |
+
# Constraints
|
46 |
+
{{constraints}}
|
47 |
+
|
48 |
+
|
49 |
+
Return Python code that solves the problem. The code should extend the following stub:
|
50 |
+
```python
|
51 |
+
{{python_stub}}
|
52 |
+
```
|
53 |
+
without changing the method signatures.
|
54 |
+
Reply in the following format:
|
55 |
+
```python
|
56 |
+
{{code_placeholder}}
|
57 |
+
```
|
58 |
+
input_variables:
|
59 |
+
- "problem_description"
|
60 |
+
- "io_description"
|
61 |
+
- "constraints"
|
62 |
+
- "python_stub"
|
63 |
+
partial_variables:
|
64 |
+
code_placeholder: "{{python_code}}"
|
65 |
+
template_format: jinja2
|
66 |
+
|
67 |
+
input_data_transformations: []
|
68 |
+
input_keys:
|
69 |
+
- "problem_description"
|
70 |
+
- "io_description"
|
71 |
+
- "constraints"
|
72 |
+
- "python_stub"
|
73 |
+
|
74 |
+
output_data_transformations:
|
75 |
+
- _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
|
76 |
+
regex: '(?<=```python)([\s\S]*?)(?=```)'
|
77 |
+
regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
|
78 |
+
input_key: "raw_response"
|
79 |
+
output_key: "code"
|
80 |
+
strip: True
|
81 |
+
assert_unique: True
|
82 |
+
verbose: True
|
83 |
+
output_keys:
|
84 |
+
- "code"
|
__init__.py
CHANGED
@@ -1,5 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# cf-code
|
2 |
from .CF_Code import CF_Code
|
|
|
|
|
3 |
|
4 |
# cf-code_reflect
|
5 |
from .FixedReply_CodeReflect import FixedReply_CodeReflect
|
@@ -10,30 +21,30 @@ from .CF_CodeCritic import CF_CodeCritic
|
|
10 |
from .CF_CodeCollab import CF_CodeCollab
|
11 |
|
12 |
# cf-plan-code (and cf-plan_oracle-code)
|
13 |
-
from .CF_Plan import CF_Plan
|
14 |
-
from .CF_CodeWithPlan import CF_CodeWithPlan
|
15 |
-
from .CF_Plan_Code import CF_Plan_Code
|
16 |
-
|
17 |
-
# cf-plan_reflect-code
|
18 |
-
from .FixedReply_PlanReflect import FixedReply_PlanReflect
|
19 |
-
from .CF_PlanReflect import CF_PlanReflect
|
20 |
-
from .CF_PlanReflect_Code import CF_PlanReflect_Code
|
21 |
-
|
22 |
-
# cf-plan_collab-code
|
23 |
-
from .CF_PlanCritic import CF_PlanCritic
|
24 |
-
from .CF_PlanCollab import CF_PlanCollab
|
25 |
-
from .CF_PlanCollab_Code import CF_PlanCollab_Code
|
26 |
-
|
27 |
# cf-code_debug
|
28 |
from .CF_CodeTesting import CF_CodeTesting
|
29 |
from .CF_CodeDebug import CF_CodeDebug
|
30 |
|
31 |
-
# cf-code_debug_collab
|
32 |
-
from .CF_CodeCriticWrongAttempt import CF_CodeCriticWrongAttempt
|
33 |
-
from .CF_CodeDebugCritic import CF_CodeDebugCritic
|
34 |
-
from .CF_CodeDebugCollab import CF_CodeDebugCollab
|
35 |
-
|
36 |
-
# cf-plan_oracle-code_debug_collab
|
37 |
-
from .CF_CodeCriticWrongAttemptWithPlan import CF_CodeCriticWrongAttemptWithPlan
|
38 |
-
from .CF_CodeDebugCriticWithPlan import CF_CodeDebugCriticWithPlan
|
39 |
-
from .CF_CodeDebugCollabWithPlan import CF_CodeDebugCollabWithPlan
|
|
|
1 |
+
import os
|
2 |
+
from flows import flow_verse
|
3 |
+
|
4 |
+
dependencies = [
|
5 |
+
{"url": os.getenv("OpenAIChatAtomicFlow_repo"), "revision": "main"},
|
6 |
+
# {"url": os.getenv("CC_flows_repo"), "revision": "main"}
|
7 |
+
]
|
8 |
+
flow_verse.sync_dependencies(dependencies)
|
9 |
+
|
10 |
# cf-code
|
11 |
from .CF_Code import CF_Code
|
12 |
+
# lc-code
|
13 |
+
from .LC_Code import LC_Code
|
14 |
|
15 |
# cf-code_reflect
|
16 |
from .FixedReply_CodeReflect import FixedReply_CodeReflect
|
|
|
21 |
from .CF_CodeCollab import CF_CodeCollab
|
22 |
|
23 |
# cf-plan-code (and cf-plan_oracle-code)
|
24 |
+
# from .CF_Plan import CF_Plan
|
25 |
+
# from .CF_CodeWithPlan import CF_CodeWithPlan
|
26 |
+
# from .CF_Plan_Code import CF_Plan_Code
|
27 |
+
#
|
28 |
+
# # cf-plan_reflect-code
|
29 |
+
# from .FixedReply_PlanReflect import FixedReply_PlanReflect
|
30 |
+
# from .CF_PlanReflect import CF_PlanReflect
|
31 |
+
# from .CF_PlanReflect_Code import CF_PlanReflect_Code
|
32 |
+
#
|
33 |
+
# # cf-plan_collab-code
|
34 |
+
# from .CF_PlanCritic import CF_PlanCritic
|
35 |
+
# from .CF_PlanCollab import CF_PlanCollab
|
36 |
+
# from .CF_PlanCollab_Code import CF_PlanCollab_Code
|
37 |
+
#
|
38 |
# cf-code_debug
|
39 |
from .CF_CodeTesting import CF_CodeTesting
|
40 |
from .CF_CodeDebug import CF_CodeDebug
|
41 |
|
42 |
+
# # cf-code_debug_collab
|
43 |
+
# from .CF_CodeCriticWrongAttempt import CF_CodeCriticWrongAttempt
|
44 |
+
# from .CF_CodeDebugCritic import CF_CodeDebugCritic
|
45 |
+
# from .CF_CodeDebugCollab import CF_CodeDebugCollab
|
46 |
+
#
|
47 |
+
# # cf-plan_oracle-code_debug_collab
|
48 |
+
# from .CF_CodeCriticWrongAttemptWithPlan import CF_CodeCriticWrongAttemptWithPlan
|
49 |
+
# from .CF_CodeDebugCriticWithPlan import CF_CodeDebugCriticWithPlan
|
50 |
+
# from .CF_CodeDebugCollabWithPlan import CF_CodeDebugCollabWithPlan
|
src/evaluation/testing_utils_codeforces.py
CHANGED
@@ -19,9 +19,9 @@ import threading
|
|
19 |
|
20 |
from src.datasets.schema import assert_test_format_codeforces
|
21 |
|
22 |
-
import logging
|
23 |
|
24 |
-
log = logging.
|
25 |
lock = threading.Lock()
|
26 |
|
27 |
|
|
|
19 |
|
20 |
from src.datasets.schema import assert_test_format_codeforces
|
21 |
|
22 |
+
from flows import logging
|
23 |
|
24 |
+
log = logging.get_logger(__name__)
|
25 |
lock = threading.Lock()
|
26 |
|
27 |
|