-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path1-scope.process.js
More file actions
110 lines (101 loc) · 3.7 KB
/
Copy path1-scope.process.js
File metadata and controls
110 lines (101 loc) · 3.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
// IMPORTS
import { Env } from '../helper/env.helper.js';
import { logger } from '../helper/logger.helper.js';
import { MongoDB } from '../dao/mongodb.dao.js';
import { Process } from './process.process.js';
import { GitHubApiRequest } from '../model/github-api-request.model.js';
import { GitHubApiClient } from '../helper/github-api-client.helper.js';
import { GitHubApiQueue } from '../helper/github-api-queue.helper.js';
/**
* Represents a process to scope the search space.
*/
class ProcessScope extends Process {
constructor() {
super();
// Dependencies.
this.env = new Env(); // Environment variables.
this.mongoDb = new MongoDB(
this.env.getMongoDbUrl(),
this.env.getMongoDbName(),
); // MongoDB connection.
this.githubApiClients = this.env
.getGitHubApiTokens()
.map((token) => new GitHubApiClient(token)); // GitHub search API clients.
this.githubApiQueue = new GitHubApiQueue(this.githubApiClients); // GitHub search API queue.
this.idleTime = 0; // Time without any request.
// Constants.
this.QUEUE_WAITING_TIME = 1000;
this.IDLE_TIME_MAX = 1000 * 60 * 1; // 1 minute.
this.MIN_RANGE_SIZE = 500; // 500 KB
this.MAX_RANGE_SIZE = 1000000; // 1 GB
this.MIN_STARS = 100;
this.MAX_RESULTS_PER_PAGE = 1000;
}
/**
* Executes the process to scope the search space.
*/
process() {
this.githubApiQueue.start();
const rangeQueue = [{ min: this.MIN_RANGE_SIZE, max: this.MAX_RANGE_SIZE }];
const scope = () => {
// Loops on the range queue waiting for requests.
if (rangeQueue.length === 0) {
// Stops the queue loop if the idle time is over the limit.
if (this.idleTime >= this.IDLE_TIME_MAX) {
this.githubApiQueue.stop();
this.mongoDb.disconnect();
return;
}
// Loops.
this.idleTime += this.QUEUE_WAITING_TIME;
setTimeout(scope, this.QUEUE_WAITING_TIME);
return;
}
// Builds and queues the requests.
const requests = [];
while (rangeQueue.length > 0) {
// Resets the idle time.
this.idleTime = 0;
// Builds the requests.
const { min, max } = rangeQueue.shift();
// FILTER: size 500..1000000 && stars >= 100
const query = `size:${min}..${max} stars:>=${this.MIN_STARS}`;
const url = `https://api.github.com/search/repositories?q=${encodeURIComponent(query)}`;
requests.push(
new GitHubApiRequest(url, {}, (result) => {
const totalCount = result.data.total_count;
if (totalCount === 0) {
// No result, range skipped.
} else if (min === max) {
// Same bounds, range kept because indivisible.
this.mongoDb.saveRange(min, max, totalCount);
} else if (totalCount <= this.MAX_RESULTS_PER_PAGE) {
// Different bounds and results less than threshold, range kept.
this.mongoDb.saveRange(min, max, totalCount);
} else {
// Range divisible, split in two.
const mid = Math.floor((min + max) / 2);
rangeQueue.push({ min, max: mid }, { min: mid + 1, max });
}
logger.info(
`[scope] Range: range=${min}..${max}, total_count=${totalCount}`,
);
}),
);
}
// Queues the requests built.
this.githubApiQueue.push(...requests);
setTimeout(scope, 0); // Loops.
};
this.mongoDb
.connect()
.then(() => {
scope(); // Process entry point.
})
.catch((error) => {
logger.error(`[scope] ${error.message}`);
});
}
}
let processScope = new ProcessScope();
processScope.process();