Spaces:
Sleeping
Sleeping
File size: 5,749 Bytes
3860419 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
"""
Module `collect` - Data Handling and RudderStack Integration
This module provides functionalities to handle and send learning data to RudderStack
for the purpose of analysis and to improve the gpt-engineer system. The data is sent
only when the user gives consent to share.
Functions:
send_learning(learning): Sends learning data to RudderStack.
collect_learnings(prompt, model, temperature, config, memory, review): Processes and sends learning data.
collect_and_send_human_review(prompt, model, temperature, config, memory): Collects human feedback and sends it.
Dependencies:
hashlib: For generating SHA-256 hash.
typing: For type annotations.
gpt_engineer.core: Core functionalities of gpt-engineer.
gpt_engineer.cli.learning: Handles the extraction of learning data.
Notes:
Data sent to RudderStack is not shared with third parties and is used solely to
improve gpt-engineer and allow it to handle a broader range of use cases.
Consent logic is in gpt_engineer/learning.py.
"""
from typing import Tuple
from gpt_engineer.applications.cli.learning import (
Learning,
Review,
extract_learning,
human_review_input,
)
from gpt_engineer.core.default.disk_memory import DiskMemory
from gpt_engineer.core.prompt import Prompt
def send_learning(learning: Learning):
"""
Send the learning data to RudderStack for analysis.
Parameters
----------
learning : Learning
An instance of the Learning class containing the data to be sent.
Notes
-----
This function is only called if consent is given to share data.
Data is not shared to a third party. It is used with the sole purpose of
improving gpt-engineer, and letting it handle more use cases.
Consent logic is in gpt_engineer/learning.py.
"""
import rudderstack.analytics as rudder_analytics
rudder_analytics.write_key = "2Re4kqwL61GDp7S8ewe6K5dbogG"
rudder_analytics.dataPlaneUrl = "https://gptengineerezm.dataplane.rudderstack.com"
rudder_analytics.track(
user_id=learning.session,
event="learning",
properties=learning.to_dict(), # type: ignore
)
def collect_learnings(
prompt: Prompt,
model: str,
temperature: float,
config: any,
memory: DiskMemory,
review: Review,
):
"""
Collect the learning data and send it to RudderStack for analysis.
Parameters
----------
prompt : str
The initial prompt or question that was provided to the model.
model : str
The name of the model used for generating the response.
temperature : float
The temperature setting used in the model's response generation.
config : any
Configuration parameters used for the learning session.
memory : DiskMemory
An instance of DiskMemory for storing and retrieving data.
review : Review
An instance of Review containing human feedback on the model's response.
Notes
-----
This function attempts to send the learning data to RudderStack. If the data size exceeds
the maximum allowed size, it trims the data and retries sending it.
"""
learnings = extract_learning(prompt, model, temperature, config, memory, review)
try:
send_learning(learnings)
except RuntimeError:
# try to remove some parts of learning that might be too big
# rudderstack max event size is 32kb
max_size = 32 << 10 # 32KB in bytes
current_size = len(learnings.to_json().encode("utf-8")) # get size in bytes
overflow = current_size - max_size
# Add some extra characters for the "[REMOVED...]" string and for safety margin
remove_length = overflow + len(f"[REMOVED {overflow} CHARACTERS]") + 100
learnings.logs = (
learnings.logs[:-remove_length]
+ f"\n\n[REMOVED {remove_length} CHARACTERS]"
)
print(
"WARNING: learning too big, removing some parts. "
"Please report if this results in a crash."
)
try:
send_learning(learnings)
except RuntimeError:
print(
"Sending learnings crashed despite truncation. Progressing without saving learnings."
)
# def steps_file_hash():
# """
# Compute the SHA-256 hash of the steps file.
#
# Returns
# -------
# str
# The SHA-256 hash of the steps file.
# """
# with open(steps.__file__, "r") as f:
# content = f.read()
# return hashlib.sha256(content.encode("utf-8")).hexdigest()
def collect_and_send_human_review(
prompt: Prompt,
model: str,
temperature: float,
config: Tuple[str, ...],
memory: DiskMemory,
):
"""
Collects human feedback on the code and sends it for analysis.
Parameters
----------
prompt : str
The initial prompt or question that was provided to the model.
model : str
The name of the model used for generating the response.
temperature : float
The temperature setting used in the model's response generation.
config : Tuple[str, ...]
Configuration parameters used for the learning session.
memory : DiskMemory
An instance of DiskMemory for storing and retrieving data.
Returns
-------
None
Notes
-----
This function prompts the user for a review of the generated or improved code using the
`human_review_input` function. If a valid review is provided, it's serialized to JSON format
and stored within the database's memory under the "review" key.
"""
review = human_review_input()
if review:
collect_learnings(prompt, model, temperature, config, memory, review)
|