Niv Sardi commited on
Commit
4b45b50
1 Parent(s): 861b56b

ts: add simple REST endpoint

Browse files
Files changed (1) hide show
  1. deno/index.ts +52 -55
deno/index.ts CHANGED
@@ -1,27 +1,11 @@
1
  import PQueue from "https://deno.land/x/p_queue@1.0.1/mod.ts"
 
2
 
3
  import * as CSV from './csv.ts';
4
  import Puppet from './puppet.ts';
5
  import selectors from './selectors.ts';
6
 
7
  const puppet = new Puppet();
8
- const queue = new PQueue({
9
- concurrency: 10,
10
- timeout: 60000
11
- })
12
- let count = 0
13
- let statInterval
14
- queue.addEventListener("active", () =>
15
- console.log(`Working on item #${++count}. Size: ${queue.size} Pending: ${queue.pending}`))
16
- queue.addEventListener("next", () =>
17
- console.log(`task finished, Size: ${queue.size} Pending: ${queue.pending}`))
18
-
19
- queue.addEventListener("idle", async () => {
20
- clearInterval(statInterval)
21
- await puppet.close()
22
- console.log("all done")
23
- })
24
-
25
  async function get_logos(page, selector): {}[] {
26
  const logos = await page.$$(selector) || [];
27
  for (const i in logos) {
@@ -36,14 +20,8 @@ async function get_logos(page, selector): {}[] {
36
  return logos;
37
  }
38
 
39
-
40
- function process(o: { id: int, url: string, bco: string, name: string }): Promise<void> {
41
- const promises: Promise<void>[] = [];
42
-
43
- return puppet.run(async page => {
44
- const url = o.url.replace('http:', 'https:');
45
- promises.push(new Promise<void>((accept, _reject) => {
46
- page.once('load', async () => {
47
  try {
48
  const imgs = await get_logos(page, selectors.img_logo);
49
  const ids = await get_logos(page, selectors.id_logo);
@@ -60,49 +38,68 @@ function process(o: { id: int, url: string, bco: string, name: string }): Promis
60
  || (bb.height < 10)
61
  || (bb.x + bb.width < 0)
62
  || (bb.y + bb.height < 0)) continue;
63
- console.log('got bb', o.bco, bb)
64
 
65
  try {
66
- await logos[i].screenshot({ path: `./data/logos/${o.bco}.logo${i}.png` })
 
 
 
 
67
  annotations +=
68
- `${o.id} ${bb.x + bb.width / 2} ${bb.y + bb.height / 2} ${bb.width} ${bb.height}\n`
69
  } catch (e) {
70
  console.error(`couldn't screenshot logo: ${e}`);
71
  }
72
  }
73
  if (logos.length) {
74
- await Deno.writeTextFile(`./data/${o.bco}.chrome.full.txt`, annotations);
75
- }
76
- await page.screenshot({ path: `./data/${o.bco}.chrome.full.png`, fullPage: true })
77
- console.log(`screenshot ok for ${o.name}`);
 
78
  } catch (err) {
79
  console.error(`error in screenshot: ${err}`);
80
  }
81
- accept()
82
- })
83
- }))
 
 
 
 
 
 
 
 
 
84
 
85
- try {
86
- await page.goto(url)
87
- .catch(() => page.goto(o.url))
88
- } catch (e) {
89
- console.error(`got error: ${e}`);
 
 
90
  }
91
- await Promise.all(promises);
 
92
  })
93
- }
 
 
 
 
 
 
 
94
 
95
- async function run() {
96
- let text;
97
- try {
98
- text = await Deno.readTextFile("./data/entities.csv")
99
- } catch (e) {
100
- console.error(`couldn't read csv: ${e}`)
101
- }
102
- if (!text) return setTimeout(run, 1000)
103
- statInterval = setInterval(() =>
104
- console.log(`Size: ${queue.size} Pending: ${queue.pending}`), 1000);
105
 
106
- CSV.parse(text, o => queue.add(() => process(o)))
107
- }
108
- run()
 
 
 
1
  import PQueue from "https://deno.land/x/p_queue@1.0.1/mod.ts"
2
+ import { Application, Router } from "https://deno.land/x/oak@v9.0.0/mod.ts";
3
 
4
  import * as CSV from './csv.ts';
5
  import Puppet from './puppet.ts';
6
  import selectors from './selectors.ts';
7
 
8
  const puppet = new Puppet();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  async function get_logos(page, selector): {}[] {
10
  const logos = await page.$$(selector) || [];
11
  for (const i in logos) {
 
20
  return logos;
21
  }
22
 
23
+ async function fetch_logos(page, id, dest) {
24
+ console.error(`getting logos for: ${id}`)
 
 
 
 
 
 
25
  try {
26
  const imgs = await get_logos(page, selectors.img_logo);
27
  const ids = await get_logos(page, selectors.id_logo);
 
38
  || (bb.height < 10)
39
  || (bb.x + bb.width < 0)
40
  || (bb.y + bb.height < 0)) continue;
41
+ console.log('got bb', bb)
42
 
43
  try {
44
+ await logos[i].screenshot({
45
+ path: dest
46
+ .replace('images', 'logos')
47
+ .replace('.png', `.${i}.png`)
48
+ })
49
  annotations +=
50
+ `${id} ${bb.x + bb.width / 2} ${bb.y + bb.height / 2} ${bb.width} ${bb.height}\n`
51
  } catch (e) {
52
  console.error(`couldn't screenshot logo: ${e}`);
53
  }
54
  }
55
  if (logos.length) {
56
+ await Deno.writeTextFile(dest
57
+ .replace('images', 'labels')
58
+ .replace('png', 'txt'),
59
+ annotations);
60
+ }
61
  } catch (err) {
62
  console.error(`error in screenshot: ${err}`);
63
  }
64
+ }
65
+
66
+ const app = new Application();
67
+ const router = new Router();
68
+
69
+ const stats = {
70
+ in_flight: 0,
71
+ done: 0
72
+ }
73
+ router.post('/screenshot', async (ctx) => {
74
+ const {request, response} = ctx;
75
+ const q = await request.body().value;
76
 
77
+ stats.in_flight++;
78
+ const ret = await puppet.run(async page => {
79
+ console.error('running', q, stats)
80
+ await page.goto(q.url, {waitUntil: 'networkidle2', timeout: 60000})
81
+ await page.screenshot({ path: q.path, fullPage: true })
82
+ if (q.logos) {
83
+ await fetch_logos(page, q.id, q.logos)
84
  }
85
+ console.error(`screenshot ok: ${q.path}`)
86
+ return {response: 'ok'}
87
  })
88
+ stats.in_flight--;
89
+ stats.done++
90
+ response.body = ret
91
+ })
92
+ router.post('/bco', async (ctx) => {
93
+ const {request, response} = ctx;
94
+ const q = await request.body().value;
95
+ const ret = await process(q)
96
 
97
+ console.error(`ret: ${ret}`)
98
+ response.body = ret
99
+ });
 
 
 
 
 
 
 
100
 
101
+ app.use(router.routes())
102
+ app.use(router.allowedMethods())
103
+ const addr = '0.0.0.0:8000'
104
+ console.error(`listen on ${addr}`)
105
+ app.listen(addr)