I created a small-ish NodeJS script which takes as argument the name of a Wikipedia article (and optionally a Wikipedia edition, "en" is default) and creates a Git repository with each change made to the Wikipedia article being represented with a commit.
Here are the code rules that I tried to follow:
- I attempted to structure the code so that it looks as much as possible like a basic binding between the Wikipedia API and the nodegit library
- I'm not very familiar with promises, but I tried to use them when possible
- I used
argparse
to provide a good user experience with parameters.
The code is on GitHub as well so one can look there if there's a need for details about dependencies.
I would be interested to have feedback about the following aspects:
- Is the code structured well enough for a developer to understand what it's doing? In particular, is there too much code complexity (and indentation)?
- Am I using too many dependencies? Not enough?
#!/usr/bin/env node
var pjson = require('./package.json');
var path = require('path');
var nodegit = require('nodegit');
var promisify = require('promisify-node');
var fse = promisify(require('fs-extra'));
var https = require("https");
var moment = require("moment");
var winston = require("winston");
var ArgumentParser = require("argparse").ArgumentParser;
var argparser = new ArgumentParser({
description: pjson.name,
version: pjson.version
});
argparser.addArgument('--language',{ nargs: 1, defaultValue: 'en', help: 'The Wikipedia language version to use (ex: en, fr, etc.)' });
argparser.addArgument('-vvv',{ nargs: 0, help: 'Verbose log' });
argparser.addArgument('articleName');
var args = argparser.parseArgs();
var defaults = {
commitMessageLength: 100,
logLevel: 'info'
};
var log = new (winston.Logger)({
transports: [
new (winston.transports.Console)({ level: args.vvv ? 'verbose': defaults.logLevel })
]
});
var fileName = args.articleName + '.wiki';
var apiRoot = 'https://' + args.language + '.wikipedia.org/w/api.php';
var url = apiRoot + '?action=query&format=json&prop=revisions&titles=' + encodeURIComponent(args.articleName) + '&rvprop=timestamp%7Cuser%7Ccomment%7Ccontent&rvlimit=max';
var repoDir = './' + args.language + '.wikipedia.org/' + args.articleName;
var repoPath = path.resolve(process.cwd(), repoDir);
var repo;
var revisions;
var currentRevisionId;
function createCommitForCurrentRevision() {
var revision = revisions[currentRevisionId];
var fileContent = revision['*'];
var message = revision.comment.substr(0, defaults.commitMessageLength);
var author = revision.user;
var date = revision.timestamp;
log.verbose("Creating commit for revision " + currentRevisionId);
promisify(fse.writeFile(path.join(repo.workdir(), fileName), fileContent))
.then(function(){
return repo.refreshIndex();
})
.then(function(idx) {
index = idx;
})
.then(function() {
return index.addByPath(fileName);
})
.then(function() {
return index.write();
})
.then(function() {
var timestamp = moment(date, moment.ISO_8601);
var authorSignature = nodegit.Signature.create(author, author + "@test.com", timestamp.unix(), 60);
if (currentRevisionId === 0) { // First commit
return index.writeTree()
.then(function(oid) {
return repo.createCommit("HEAD", authorSignature, authorSignature, message, oid, []);
})
}
else {
return index.writeTree()
.then(function(oidResult) {
oid = oidResult;
return nodegit.Reference.nameToId(repo, "HEAD");
})
.then(function(head) {
return repo.getCommit(head);
})
.then(function(parent) {
return repo.createCommit("HEAD", authorSignature, authorSignature, message, oid, [parent]);
})
}
})
.done(function(commitId) {
log.verbose("New commit created: ", commitId);
currentRevisionId++;
if (currentRevisionId < revisions.length) {
createCommitForCurrentRevision();
}
else {
log.info('The article\'s revision history was saved in ' + repoPath);
}
});
}
log.verbose("Cleaning previous local repository if existing");
fse.removeSync(repoPath);
promisify(fse.ensureDir)(repoPath)
.then(function() {
return nodegit.Repository.init(repoPath, 0);
})
.then(function(repoCreated) {
log.verbose("Created empty repository " + repoCreated);
repo = repoCreated;
log.verbose("Retrieving article history from " + url);
https.get(url, function(res){
var body = '';
res.on('data', function(chunk){
body += chunk;
});
res.on('end', function(){
log.verbose("Article history has been retrieved");
var response = JSON.parse(body);
Object.keys(response.query.pages).forEach(function(pageId) {
var page = response.query.pages[pageId];
revisions = page.revisions.reverse();
currentRevisionId = 0;
createCommitForCurrentRevision();
});
});
})
});
-
1\$\begingroup\$ Thank you so much for such a useful tool! \$\endgroup\$Victor Yarema– Victor Yarema2024年05月17日 19:26:58 +00:00Commented May 17, 2024 at 19:26
-
\$\begingroup\$ @VictorYarema Happy to help :-) \$\endgroup\$Bruno Pérel– Bruno Pérel2024年05月18日 20:14:20 +00:00Commented May 18, 2024 at 20:14