Fraser-Greenlee
commited on
Commit
•
bbad868
1
Parent(s):
c623bcf
update dreamcoder
Browse files
dreamcoder/domains/list/main.py
CHANGED
@@ -260,24 +260,23 @@ def list_options(parser):
|
|
260 |
parser.add_argument("--random-seed", type=int, default=17)
|
261 |
|
262 |
|
263 |
-
def main(
|
264 |
"""
|
265 |
Takes the return value of the `commandlineArguments()` function as input and
|
266 |
trains/tests the model on manipulating sequences of numbers.
|
267 |
"""
|
268 |
-
random.seed(
|
269 |
|
270 |
-
dataset = args.pop("dataset")
|
271 |
tasks = {
|
272 |
"Lucas-old": lambda: retrieveJSONTasks("data/list_tasks.json") + sortBootstrap(),
|
273 |
"bootstrap": make_list_bootstrap_tasks,
|
274 |
"sorting": sortBootstrap,
|
275 |
-
|
276 |
-
"Lucas-
|
277 |
-
"Lucas-
|
|
|
278 |
}[dataset]()
|
279 |
|
280 |
-
maxTasks = args.pop("maxTasks")
|
281 |
if maxTasks and len(tasks) > maxTasks:
|
282 |
necessaryTasks = [] # maxTasks will not consider these
|
283 |
if dataset.startswith("Lucas2.0") and dataset != "Lucas2.0-depth1":
|
@@ -344,67 +343,4 @@ def main(args):
|
|
344 |
return all( len(xs) == 1 and xs[0] == y for xs, y in t.examples )
|
345 |
eprint("Removed", sum(isIdentityTask(t) for t in tasks), "tasks that were just the identity function")
|
346 |
tasks = [t for t in tasks if not isIdentityTask(t) ]
|
347 |
-
|
348 |
-
prims = {"base": basePrimitives,
|
349 |
-
"McCarthy": McCarthyPrimitives,
|
350 |
-
"common": bootstrapTarget_extra,
|
351 |
-
"noLength": no_length,
|
352 |
-
"rich": primitives}[args.pop("primitives")]()
|
353 |
-
haveLength = not args.pop("noLength")
|
354 |
-
haveMap = not args.pop("noMap")
|
355 |
-
haveUnfold = not args.pop("noUnfold")
|
356 |
-
eprint(f"Including map as a primitive? {haveMap}")
|
357 |
-
eprint(f"Including length as a primitive? {haveLength}")
|
358 |
-
eprint(f"Including unfold as a primitive? {haveUnfold}")
|
359 |
-
baseGrammar = Grammar.uniform([p
|
360 |
-
for p in prims
|
361 |
-
if (p.name != "map" or haveMap) and \
|
362 |
-
(p.name != "unfold" or haveUnfold) and \
|
363 |
-
(p.name != "length" or haveLength)])
|
364 |
-
|
365 |
-
extractor = {
|
366 |
-
"learned": LearnedFeatureExtractor,
|
367 |
-
}[args.pop("extractor")]
|
368 |
-
extractor.H = args.pop("hidden")
|
369 |
-
|
370 |
-
timestamp = datetime.datetime.now().isoformat()
|
371 |
-
outputDirectory = "experimentOutputs/list/%s"%timestamp
|
372 |
-
os.system("mkdir -p %s"%outputDirectory)
|
373 |
-
|
374 |
-
args.update({
|
375 |
-
"featureExtractor": extractor,
|
376 |
-
"outputPrefix": "%s/list"%outputDirectory,
|
377 |
-
"evaluationTimeout": 0.0005,
|
378 |
-
})
|
379 |
-
|
380 |
-
|
381 |
-
eprint("Got {} list tasks".format(len(tasks)))
|
382 |
-
split = args.pop("split")
|
383 |
-
if split:
|
384 |
-
train_some = defaultdict(list)
|
385 |
-
for t in tasks:
|
386 |
-
necessary = train_necessary(t)
|
387 |
-
if not necessary:
|
388 |
-
continue
|
389 |
-
if necessary == "some":
|
390 |
-
train_some[t.name.split()[0]].append(t)
|
391 |
-
else:
|
392 |
-
t.mustTrain = True
|
393 |
-
for k in sorted(train_some):
|
394 |
-
ts = train_some[k]
|
395 |
-
random.shuffle(ts)
|
396 |
-
ts.pop().mustTrain = True
|
397 |
-
|
398 |
-
test, train = testTrainSplit(tasks, split)
|
399 |
-
if True:
|
400 |
-
test = [t for t in test
|
401 |
-
if t.name not in EASYLISTTASKS]
|
402 |
-
|
403 |
-
eprint(
|
404 |
-
"Alotted {} tasks for training and {} for testing".format(
|
405 |
-
len(train), len(test)))
|
406 |
-
else:
|
407 |
-
train = tasks
|
408 |
-
test = []
|
409 |
-
|
410 |
-
explorationCompression(baseGrammar, train, testingTasks=test, **args)
|
|
|
260 |
parser.add_argument("--random-seed", type=int, default=17)
|
261 |
|
262 |
|
263 |
+
def main(dataset='Lucas-old', maxTasks=10_000):
|
264 |
"""
|
265 |
Takes the return value of the `commandlineArguments()` function as input and
|
266 |
trains/tests the model on manipulating sequences of numbers.
|
267 |
"""
|
268 |
+
random.seed(9)
|
269 |
|
|
|
270 |
tasks = {
|
271 |
"Lucas-old": lambda: retrieveJSONTasks("data/list_tasks.json") + sortBootstrap(),
|
272 |
"bootstrap": make_list_bootstrap_tasks,
|
273 |
"sorting": sortBootstrap,
|
274 |
+
# removed as file over 10MB
|
275 |
+
# "Lucas-depth1": lambda: retrieveJSONTasks("data/list_tasks2.json")[:105],
|
276 |
+
# "Lucas-depth2": lambda: retrieveJSONTasks("data/list_tasks2.json")[:4928],
|
277 |
+
# "Lucas-depth3": lambda: retrieveJSONTasks("data/list_tasks2.json"),
|
278 |
}[dataset]()
|
279 |
|
|
|
280 |
if maxTasks and len(tasks) > maxTasks:
|
281 |
necessaryTasks = [] # maxTasks will not consider these
|
282 |
if dataset.startswith("Lucas2.0") and dataset != "Lucas2.0-depth1":
|
|
|
343 |
return all( len(xs) == 1 and xs[0] == y for xs, y in t.examples )
|
344 |
eprint("Removed", sum(isIdentityTask(t) for t in tasks), "tasks that were just the identity function")
|
345 |
tasks = [t for t in tasks if not isIdentityTask(t) ]
|
346 |
+
return tasks
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dreamcoder/domains/list/makeListTasks.py
CHANGED
@@ -85,7 +85,7 @@ def make_list_task(name, examples, **params):
|
|
85 |
yield Task(name, program_type, examples, cache=cache)
|
86 |
|
87 |
|
88 |
-
def make_list_tasks(n_examples):
|
89 |
import listroutines as lr
|
90 |
|
91 |
for routine in lr.find(count=100): # all routines
|
|
|
85 |
yield Task(name, program_type, examples, cache=cache)
|
86 |
|
87 |
|
88 |
+
def make_list_tasks(n_examples=4):
|
89 |
import listroutines as lr
|
90 |
|
91 |
for routine in lr.find(count=100): # all routines
|
dreamcoder/domains/text/makeTextTasks.py
CHANGED
@@ -91,12 +91,10 @@ def randomWords(ds, minimum=1, lb=2, ub=4):
|
|
91 |
return s
|
92 |
|
93 |
|
94 |
-
def makeTasks():
|
95 |
import random
|
96 |
random.seed(9)
|
97 |
|
98 |
-
NUMBEROFEXAMPLES = 4
|
99 |
-
|
100 |
problems = []
|
101 |
|
102 |
def toList(s): return [c for c in s]
|
@@ -108,7 +106,7 @@ def makeTasks():
|
|
108 |
if isinstance(x, list):
|
109 |
return [preprocess(z) for z in x]
|
110 |
if isinstance(x, str):
|
111 |
-
return
|
112 |
if isinstance(x, bool):
|
113 |
return x
|
114 |
assert False
|
@@ -126,25 +124,25 @@ def makeTasks():
|
|
126 |
if d1 != d2:
|
127 |
problem("Replace '%s' w/ '%s'" % (d1, d2),
|
128 |
[((x,), x.replace(d1, d2))
|
129 |
-
for _ in range(
|
130 |
for x in [randomWords(d1)]],
|
131 |
needToTrain=False)
|
132 |
for d in delimiters:
|
133 |
problem("drop first word delimited by '%s'" % d,
|
134 |
[((x,), d.join(x.split(d)[1:]))
|
135 |
-
for _ in range(
|
136 |
for x in [randomWords(d)]],
|
137 |
needToTrain=True)
|
138 |
for n in [0, 1, -1]:
|
139 |
problem("nth (n=%d) word delimited by '%s'" % (n, d),
|
140 |
[((x,), x.split(d)[n])
|
141 |
-
for _ in range(
|
142 |
for x in [randomWords(d)]],
|
143 |
needToTrain=True)
|
144 |
for d1 in delimiters:
|
145 |
problem("Append two words delimited by '%s'" % (d1),
|
146 |
[((x, y), x + d1 + y)
|
147 |
-
for _ in range(
|
148 |
for x in [randomWord()]
|
149 |
for y in [randomWord()]],
|
150 |
needToTrain=True)
|
@@ -154,20 +152,20 @@ def makeTasks():
|
|
154 |
:len(delimiters)]:
|
155 |
problem("Append two words delimited by '%s%s'" % (d1, d2),
|
156 |
[((x, y), x + d1 + d2 + y)
|
157 |
-
for _ in range(
|
158 |
for x in [randomWord()]
|
159 |
for y in [randomWord()]],
|
160 |
needToTrain=True)
|
161 |
for n in range(1, 6):
|
162 |
problem("Drop last %d characters" % n,
|
163 |
[((x,), x[:-n])
|
164 |
-
for _ in range(
|
165 |
for x in [randomWord(minimum=n)]],
|
166 |
needToTrain=True)
|
167 |
if n > 1:
|
168 |
problem("Take first %d characters" % n,
|
169 |
[((x,), x[:n])
|
170 |
-
for _ in range(
|
171 |
for x in [randomWord(minimum=n)]],
|
172 |
needToTrain=True)
|
173 |
for d1, d2 in randomPermutation(
|
@@ -176,7 +174,7 @@ def makeTasks():
|
|
176 |
:len(delimiters)]:
|
177 |
problem("Extract word delimited by '%s' - '%s'" % (d1, d2),
|
178 |
[((a + d1 + b + d2 + c + d + e,), b)
|
179 |
-
for _ in range(int(
|
180 |
for d in [d1, d2]
|
181 |
for a in [randomWord()]
|
182 |
for b in [randomWord()]
|
@@ -187,7 +185,7 @@ def makeTasks():
|
|
187 |
for n in range(len(delimiters)):
|
188 |
problem("First letters of words (%s)" % ("I" * (1 + n)),
|
189 |
[((x,), "".join(map(lambda z: z[0], x.split(' '))))
|
190 |
-
for _ in range(
|
191 |
for x in [randomWords(' ')]
|
192 |
],
|
193 |
needToTrain=True)
|
@@ -195,27 +193,27 @@ def makeTasks():
|
|
195 |
for d in delimiters:
|
196 |
problem("Take first character and append '%s'" % d,
|
197 |
[((x,), x[0] + d)
|
198 |
-
for _ in range(
|
199 |
for x in [randomWord()]],
|
200 |
needToTrain=True)
|
201 |
|
202 |
for n in range(len(delimiters)):
|
203 |
problem("Abbreviate separate words (%s)" % ("I" * (n + 1)),
|
204 |
[((x, y), "%s.%s." % (x[0], y[0]))
|
205 |
-
for _ in range(
|
206 |
for y in [randomWord()]
|
207 |
for x in [randomWord()]])
|
208 |
d = delimiters[n]
|
209 |
problem("Abbreviate words separated by '%s'" % d,
|
210 |
[((x + d + y,), "%s.%s." % (x[0], y[0]))
|
211 |
-
for _ in range(
|
212 |
for y in [randomWord()]
|
213 |
for x in [randomWord()]])
|
214 |
|
215 |
for n in range(len(delimiters)):
|
216 |
problem("Append 2 strings (%s)" % ('I' * (n + 1)),
|
217 |
[((x, y), x + y)
|
218 |
-
for _ in range(
|
219 |
for y in [randomWord()]
|
220 |
for x in [randomWord()]],
|
221 |
needToTrain=True)
|
@@ -224,33 +222,33 @@ def makeTasks():
|
|
224 |
w = randomWord(minimum=3)
|
225 |
problem("Prepend '%s'" % w,
|
226 |
[((x,), w + x)
|
227 |
-
for _ in range(
|
228 |
for x in [randomWord()]])
|
229 |
w = randomWord(minimum=3)
|
230 |
problem("Append '%s'" % w,
|
231 |
[((x,), x + w)
|
232 |
-
for _ in range(
|
233 |
for x in [randomWord()]])
|
234 |
w = randomWord(minimum=3)
|
235 |
problem("Prepend '%s' to first word" % w,
|
236 |
[((x + ' ' + y,), w + x)
|
237 |
-
for _ in range(
|
238 |
for x in [randomWord()]
|
239 |
for y in [randomWord()]])
|
240 |
|
241 |
for n in range(1,6):
|
242 |
problem("parentheses around a single word (%s)"%('I'*n),
|
243 |
[((w,),"(%s)"%w)
|
244 |
-
for _ in range(
|
245 |
for w in [randomWord()] ])
|
246 |
problem("parentheses around first word",
|
247 |
[((w + " " + s,),"(%s)"%w)
|
248 |
-
for _ in range(
|
249 |
for w in [randomWord()]
|
250 |
for s in [randomWords(" ")] ])
|
251 |
problem("parentheses around second word",
|
252 |
[((s,), "(%s)"%(s.split(" ")[1]))
|
253 |
-
for _ in range(
|
254 |
for s in [randomWords(" ")] ])
|
255 |
|
256 |
allowed = [d for d in delimiters if d not in "()"]
|
@@ -258,7 +256,7 @@ def makeTasks():
|
|
258 |
problem("parentheses around word delimited by '%s' & '%s'"%(d1,d2),
|
259 |
[((prefix + d1 + word + d2 + suffix,),
|
260 |
prefix + d1 + '(' + word + ')' + d2 + suffix)
|
261 |
-
for _ in range(
|
262 |
for prefix in [randomWords("", lb=0, ub=1)]
|
263 |
for suffix in [randomWords(allowed, ub=2, lb=1)]
|
264 |
for word in [randomWord()] ])
|
@@ -267,7 +265,7 @@ def makeTasks():
|
|
267 |
w = randomWord(minimum=3)
|
268 |
problem("ensure suffix `%s`"%w,
|
269 |
[ ((s + (w if f else ""),), s + w)
|
270 |
-
for _ in range(
|
271 |
for s in [randomWords(" ")]
|
272 |
for f in [random.choice([True,False])] ])
|
273 |
|
|
|
91 |
return s
|
92 |
|
93 |
|
94 |
+
def makeTasks(n_examples = 4):
|
95 |
import random
|
96 |
random.seed(9)
|
97 |
|
|
|
|
|
98 |
problems = []
|
99 |
|
100 |
def toList(s): return [c for c in s]
|
|
|
106 |
if isinstance(x, list):
|
107 |
return [preprocess(z) for z in x]
|
108 |
if isinstance(x, str):
|
109 |
+
return x
|
110 |
if isinstance(x, bool):
|
111 |
return x
|
112 |
assert False
|
|
|
124 |
if d1 != d2:
|
125 |
problem("Replace '%s' w/ '%s'" % (d1, d2),
|
126 |
[((x,), x.replace(d1, d2))
|
127 |
+
for _ in range(n_examples)
|
128 |
for x in [randomWords(d1)]],
|
129 |
needToTrain=False)
|
130 |
for d in delimiters:
|
131 |
problem("drop first word delimited by '%s'" % d,
|
132 |
[((x,), d.join(x.split(d)[1:]))
|
133 |
+
for _ in range(n_examples)
|
134 |
for x in [randomWords(d)]],
|
135 |
needToTrain=True)
|
136 |
for n in [0, 1, -1]:
|
137 |
problem("nth (n=%d) word delimited by '%s'" % (n, d),
|
138 |
[((x,), x.split(d)[n])
|
139 |
+
for _ in range(n_examples)
|
140 |
for x in [randomWords(d)]],
|
141 |
needToTrain=True)
|
142 |
for d1 in delimiters:
|
143 |
problem("Append two words delimited by '%s'" % (d1),
|
144 |
[((x, y), x + d1 + y)
|
145 |
+
for _ in range(n_examples)
|
146 |
for x in [randomWord()]
|
147 |
for y in [randomWord()]],
|
148 |
needToTrain=True)
|
|
|
152 |
:len(delimiters)]:
|
153 |
problem("Append two words delimited by '%s%s'" % (d1, d2),
|
154 |
[((x, y), x + d1 + d2 + y)
|
155 |
+
for _ in range(n_examples)
|
156 |
for x in [randomWord()]
|
157 |
for y in [randomWord()]],
|
158 |
needToTrain=True)
|
159 |
for n in range(1, 6):
|
160 |
problem("Drop last %d characters" % n,
|
161 |
[((x,), x[:-n])
|
162 |
+
for _ in range(n_examples)
|
163 |
for x in [randomWord(minimum=n)]],
|
164 |
needToTrain=True)
|
165 |
if n > 1:
|
166 |
problem("Take first %d characters" % n,
|
167 |
[((x,), x[:n])
|
168 |
+
for _ in range(n_examples)
|
169 |
for x in [randomWord(minimum=n)]],
|
170 |
needToTrain=True)
|
171 |
for d1, d2 in randomPermutation(
|
|
|
174 |
:len(delimiters)]:
|
175 |
problem("Extract word delimited by '%s' - '%s'" % (d1, d2),
|
176 |
[((a + d1 + b + d2 + c + d + e,), b)
|
177 |
+
for _ in range(int(n_examples / 2))
|
178 |
for d in [d1, d2]
|
179 |
for a in [randomWord()]
|
180 |
for b in [randomWord()]
|
|
|
185 |
for n in range(len(delimiters)):
|
186 |
problem("First letters of words (%s)" % ("I" * (1 + n)),
|
187 |
[((x,), "".join(map(lambda z: z[0], x.split(' '))))
|
188 |
+
for _ in range(n_examples)
|
189 |
for x in [randomWords(' ')]
|
190 |
],
|
191 |
needToTrain=True)
|
|
|
193 |
for d in delimiters:
|
194 |
problem("Take first character and append '%s'" % d,
|
195 |
[((x,), x[0] + d)
|
196 |
+
for _ in range(n_examples)
|
197 |
for x in [randomWord()]],
|
198 |
needToTrain=True)
|
199 |
|
200 |
for n in range(len(delimiters)):
|
201 |
problem("Abbreviate separate words (%s)" % ("I" * (n + 1)),
|
202 |
[((x, y), "%s.%s." % (x[0], y[0]))
|
203 |
+
for _ in range(n_examples)
|
204 |
for y in [randomWord()]
|
205 |
for x in [randomWord()]])
|
206 |
d = delimiters[n]
|
207 |
problem("Abbreviate words separated by '%s'" % d,
|
208 |
[((x + d + y,), "%s.%s." % (x[0], y[0]))
|
209 |
+
for _ in range(n_examples)
|
210 |
for y in [randomWord()]
|
211 |
for x in [randomWord()]])
|
212 |
|
213 |
for n in range(len(delimiters)):
|
214 |
problem("Append 2 strings (%s)" % ('I' * (n + 1)),
|
215 |
[((x, y), x + y)
|
216 |
+
for _ in range(n_examples)
|
217 |
for y in [randomWord()]
|
218 |
for x in [randomWord()]],
|
219 |
needToTrain=True)
|
|
|
222 |
w = randomWord(minimum=3)
|
223 |
problem("Prepend '%s'" % w,
|
224 |
[((x,), w + x)
|
225 |
+
for _ in range(n_examples)
|
226 |
for x in [randomWord()]])
|
227 |
w = randomWord(minimum=3)
|
228 |
problem("Append '%s'" % w,
|
229 |
[((x,), x + w)
|
230 |
+
for _ in range(n_examples)
|
231 |
for x in [randomWord()]])
|
232 |
w = randomWord(minimum=3)
|
233 |
problem("Prepend '%s' to first word" % w,
|
234 |
[((x + ' ' + y,), w + x)
|
235 |
+
for _ in range(n_examples)
|
236 |
for x in [randomWord()]
|
237 |
for y in [randomWord()]])
|
238 |
|
239 |
for n in range(1,6):
|
240 |
problem("parentheses around a single word (%s)"%('I'*n),
|
241 |
[((w,),"(%s)"%w)
|
242 |
+
for _ in range(n_examples)
|
243 |
for w in [randomWord()] ])
|
244 |
problem("parentheses around first word",
|
245 |
[((w + " " + s,),"(%s)"%w)
|
246 |
+
for _ in range(n_examples)
|
247 |
for w in [randomWord()]
|
248 |
for s in [randomWords(" ")] ])
|
249 |
problem("parentheses around second word",
|
250 |
[((s,), "(%s)"%(s.split(" ")[1]))
|
251 |
+
for _ in range(n_examples)
|
252 |
for s in [randomWords(" ")] ])
|
253 |
|
254 |
allowed = [d for d in delimiters if d not in "()"]
|
|
|
256 |
problem("parentheses around word delimited by '%s' & '%s'"%(d1,d2),
|
257 |
[((prefix + d1 + word + d2 + suffix,),
|
258 |
prefix + d1 + '(' + word + ')' + d2 + suffix)
|
259 |
+
for _ in range(n_examples)
|
260 |
for prefix in [randomWords("", lb=0, ub=1)]
|
261 |
for suffix in [randomWords(allowed, ub=2, lb=1)]
|
262 |
for word in [randomWord()] ])
|
|
|
265 |
w = randomWord(minimum=3)
|
266 |
problem("ensure suffix `%s`"%w,
|
267 |
[ ((s + (w if f else ""),), s + w)
|
268 |
+
for _ in range(n_examples)
|
269 |
for s in [randomWords(" ")]
|
270 |
for f in [random.choice([True,False])] ])
|
271 |
|