Commit bfff820a authored by Rene Saarsoo's avatar Rene Saarsoo
Browse files

Begin script to extract users data from comments database.

Most comments have both author and userId field, but some earlier
comments lack the latter, so we query the sencha forum database to find
out the ID-s by username.

Also change users.name field to users.username - more clear.
parent d48bf764
Loading
Loading
Loading
Loading

comments/.gitignore

0 → 100644
+2 −0
Original line number Diff line number Diff line
config.js
node_modules

comments/database.js

0 → 100644
+73 −0
Original line number Diff line number Diff line

/**
 * Defines comment schema and connects to database
 */

var mongoose = require('mongoose'),
    config = require('./config');

CommentSchema = new mongoose.Schema({
    sdk:         String,
    version:     String,

    author:      String,
    userId:      Number,
    content:     String,
    contentHtml: String,
    createdAt:   Date,
    downVotes:   Array,
    emailHash:   String,
    rating:      Number,
    target:      Array,
    upVotes:     Array,
    deleted:     Boolean,
    updates:     Array,
    mod:         Boolean,
    title:       String,
    url:         String
});

// Helper method for adding new comments.
// When moderator posts comment, mark it automatically as read.
CommentSchema.methods.saveNew = function(user, next) {
    var comment = this;
    if (user.moderator) {
        comment.save(function(err) {
            var meta = new Meta({
                userId: user.userid,
                commentId: comment._id,
                metaType: 'read'
            });
            meta.save(next);
        });
    }
    else {
        comment.save(next);
    }
};

Comment = mongoose.model('Comment', CommentSchema);

Subscription = mongoose.model('Subscription', new mongoose.Schema({
    sdk:         String,
    version:     String,

    createdAt:   Date,
    userId:      Number,
    email:       String,
    target:      Array
}));

Meta = mongoose.model('Meta', new mongoose.Schema({
    sdk:         String,
    version:     String,

    createdAt:   Date,
    userId:      Number,
    commentId:   String,
    metaType:    String
}));

mongoose.connect(config.mongoDb, function(err, ok) {
    console.log("Connected to DB")
});
+2 −2
Original line number Diff line number Diff line
@@ -12,7 +12,7 @@ CREATE TABLE comments (

CREATE TABLE users (
    id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
    name VARCHAR(255) NOT NULL,
    username VARCHAR(255) NOT NULL,
    external_id INT NOT NULL, -- (link to Sencha Forum database)
    email VARCHAR(255) NOT NULL, -- (from subscriptions)
    email_hash VARCHAR(255) NOT NULL,
@@ -108,7 +108,7 @@ GROUP BY target.cls
-- get users with most upvotes

SELECT
    user.name,
    user.username,
    SUM(c.vote) AS votes
FROM users JOIN voted_comments c ON c.user_id = users.id
GROUP BY user.id
+75 −0
Original line number Diff line number Diff line
/**
 * Extracts users data from comments database
 */

var config = require('./config');
require('./database');

var mysql = require('mysql');
var db = mysql.createConnection({
    host: config.db.host,
    user: config.db.user,
    password: config.db.password,
    database: config.db.dbName
});


Comment.find({}, function(err, comments) {
    if (err) throw err;

    var usersMap = extractUniqueUsers(comments);
    var users = objectValues(usersMap);

    console.log(users.length + " users found.");

    var usersWithoutId = users.filter(function(u){ return !u.external_id; });
    console.log(usersWithoutId.length + " without external_id.");

    var usernames = usersWithoutId.map(function(u){ return u.username; });
    db.query("SELECT userid, username FROM user WHERE username IN (?)", [usernames], function(err, rows) {
        if (err) throw err;

        rows.forEach(function(r) {
            if (usersMap[r.username]) {
                usersMap[r.username].external_id = r.userid;
            }
            else {
                // Turns out that Sencha Forum DB contains users zoob
                // and Zoob.  Only Zoob has commented in docs, but our
                // SELECT query compares strings case-insensitively
                // resulting in bot zoob and Zoob being selected.
                // So simply ignore zoob and other users like him.
            }
        });

        var usersWithoutId = users.filter(function(u){ return !u.external_id; });
        console.log(usersWithoutId.length + " without external_id in the end.");

        process.exit();
    });
});

function extractUniqueUsers(comments) {
    var usersMap = {};

    comments.forEach(function(c) {
        var record = {
            username: c.author,
            email_hash: c.emailHash,
            external_id: c.userId,
            moderator: c.moderator
        };
        // always overwrite with latest user data
        usersMap[c.author] = record;
    });

    return usersMap;
}

function objectValues(obj) {
    var values = [];
    for (var i in obj) {
        values.push(obj[i]);
    }
    return values;
}

comments/package.json

0 → 100644
+12 −0
Original line number Diff line number Diff line
{
    "name": "jsduck_comments",
    "version": "2.0.0",
    "description": "Commenting backend for JSDuck Documentation",
    "author": "Rene Saarsoo <nene@triin.net>",
    "dependencies": {
        // The github page of node-mysql recommends me to install the
        // 2.0 version despite it being alpha.
        "mysql": "2.0.0-alpha3",
        "mongoose": ""
    }
}
Loading