Corianas commited on
Commit
bef2eac
·
verified ·
1 Parent(s): 6a97afa

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +80 -0
README.md ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ ---
6
+ This is a Re-act style model trained from meta-llama/Meta-Llama-3-8B
7
+
8
+ Dataset was parsed with:
9
+ ```
10
+ def extract_trajectory_info(data):
11
+ """
12
+ Extracts the question, thoughts, actions, and observations from the trajectory field of the data.
13
+
14
+ Parameters:
15
+ data (dict): The data entry containing the trajectory field.
16
+
17
+ Returns:
18
+ dict: A dictionary containing the extracted question, thoughts, actions, and observations.
19
+ """
20
+ # Extracting the question
21
+ question = data.get('question', '')
22
+
23
+ # Extracting thoughts, actions, and observations using regex
24
+ thoughts = re.findall(r'Thought \d+: (.+?)(?=Action|\Z)', data.get('trajectory', ''), re.DOTALL)
25
+ actions = re.findall(r'Action \d+: (.+?)(?=Observation|\Z)', data.get('trajectory', ''), re.DOTALL)
26
+ observations = re.findall(r'Observation \d+: (.+?)(?=Thought|\Z)', data.get('trajectory', ''), re.DOTALL)
27
+
28
+ # Cleaning up the extracted data
29
+ thoughts = [thought.strip() for thought in thoughts]
30
+ actions = [action.strip() for action in actions]
31
+ observations = [observation.strip() for observation in observations]
32
+
33
+ return {
34
+ 'question': question,
35
+ 'thoughts': thoughts,
36
+ 'actions': actions,
37
+ 'observations': observations
38
+ }
39
+ # Sample data
40
+ extracted_info = extract_trajectory_info(ds["train"][0])
41
+ ```
42
+ Then remade into a new dataset with
43
+ ```
44
+ # Predefine the instructions for the task
45
+ preamble = """Tools available:
46
+ (1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
47
+ (2) Lookup[keyword], which returns the next sentence containing the keyword in the current passage.
48
+ (3) Finish[answer], which returns the answer and finishes the task.
49
+ """
50
+ dataset = []
51
+ # Iterate through a specified number of examples in the training set
52
+ for i in range(len(ds['train'])):
53
+ extracted_info = extract_trajectory_info(ds['train'][i])
54
+
55
+ # Iterate through each thought in the extracted information
56
+ for j in range(len(extracted_info['thoughts'])):
57
+ out = f"{preamble}---\nQuestion: {extracted_info['question']}\n"
58
+ prev = ""
59
+ # Construct output for the first thought
60
+ if j == 0:
61
+ out += f"Thought: {extracted_info['thoughts'][0]}\n"
62
+ out += f"Action: {extracted_info['actions'][0]}\nPAUSE\n\n\n\n"
63
+
64
+ else:
65
+ for k in range(1, j + 1):
66
+ # Use appropriate indexing to avoid out-of-bounds errors
67
+ prev += f"Thought:{extracted_info['thoughts'][j - k]}\n"
68
+ prev += f"Action: {extracted_info['actions'][j - k]}\nPAUSE\n"
69
+
70
+ prev += f"Observation: {extracted_info['observations'][j - k]}\n"
71
+
72
+ out += prev # Remove trailing space
73
+ out += f"---\nThought: {extracted_info['thoughts'][j]}\n"
74
+ out += f"Action: {extracted_info['actions'][j]}\nPAUSE\n\n\n\n"
75
+
76
+ # Print the constructed output
77
+ print(out)
78
+ dataset.append(out)
79
+ #print(len(out))
80
+ ```