Commit 16cd5271 authored by Rene Saarsoo's avatar Rene Saarsoo
Browse files

Optimize use of V8 engine.

parent 1f80f72c
Loading
Loading
Loading
Loading
+43 −53
Original line number Diff line number Diff line
@@ -2,6 +2,10 @@ require 'v8'
require 'json'
require 'singleton'

class V8::Object
  attr_reader :native
end

module JsDuck

  # Uses Esprima.js engine through V8 to tokenize JavaScript string.
@@ -10,71 +14,57 @@ module JsDuck

    def initialize
      @v8 = V8::Context.new
      fname = File.dirname(File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__)))))+"/esprima/esprima.js";
      @v8.load(fname)
      esprima = File.dirname(File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__)))))+"/esprima/esprima.js";
      @v8.load(esprima)
      wrapper = File.dirname((File.expand_path(__FILE__)))+"/esprima_wrapper.js";
      @v8.load(wrapper)
    end

    # Input must be a String.
    def tokenize(input)
      @v8['js'] = @input = input
      program = JSON.parse(@v8.eval(<<-EOS), :max_nesting => false)
        var out = esprima.parse(js, {tokens: true, comment: true});
        JSON.stringify({tokens: out.tokens, comments: out.comments});
      EOS
      doc_comments = program["comments"].find_all {|c| doc_comment?(c) }
      return merge_tokens(program["tokens"], doc_comments).map {|tok| to_jsduck_token(tok) }
    end

    private
      out = @v8.eval("EsprimaWrapper.parse(js)")

      len = out["type"].length
      out_type = out["type"].native
      out_value = out["value"].native
      out_linenr = out["linenr"].native
      out_value_array = out["valueArray"].native

      lock = V8::C::Locker.new

    # True if comment is a /** doc-comment */
    def doc_comment?(comment)
      comment["type"] == "Block" && !!(comment["value"] =~ /^\*/)
      type_array = [
        :number,
        :string,
        :ident,
        :regex,
        :operator,
        :keyword,
        :doc_comment,
      ]

      value_array = []
      for i in (0..(out_value_array.Length()-1))
        value_array << out_value_array.Get(i).AsciiValue();
      end

    # Combines tokens and comments arrays into one array
    # while keeping them in correct order.
    def merge_tokens(tokens, comments)
      result = []
      com = comments.shift
      tok = tokens.shift
      while com || tok
        if !com || tok && (tok["range"][0] < com["range"][0])
          result << tok
          tok = tokens.shift
      tokens = []
      for i in (0..(len-1))
        t = type_array[out_type.Get(i)]
        if t == :doc_comment
          tokens << { :type => t, :value => out_value.Get(i).AsciiValue(), :linenr => out_linenr.Get(i) }
        elsif t == :keyword
          kw = value_array[out_value.Get(i)].to_sym
          tokens << { :type => kw, :value => kw }
        else
          result << com
          com = comments.shift
        end
          tokens << { :type => t, :value => value_array[out_value.Get(i)] }
        end
      result
      end

    # Converts Esprima token to JSDuck token
    def to_jsduck_token(tok)
      case tok["type"]
      when "Numeric"
        {:type => :number, :value => tok["value"]}
      when "String"
        {:type => :string, :value => tok["value"].gsub(/\A['"]|['"]\Z/m, "")}
      when "Identifier", "Boolean", "Null"
        {:type => :ident, :value => tok["value"]}
      when "RegularExpression"
        {:type => :regex, :value => tok["value"]}
      when "Punctuator"
        {:type => :operator, :value => tok["value"]}
      when "Keyword"
        kw = tok["value"].to_sym
        {:type => kw, :value => kw}
      when "Block"
        {
          :type => :doc_comment,
          :value => "/*#{tok['value']}*/",
          :linenr => @input[0...tok["range"][0]].count("\n") + 1,
        }
      else
        throw "Unknown Esprima token type #{tok['type']}"
      end
      lock.delete

      tokens
    end

  end
+100 −0
Original line number Diff line number Diff line
var EsprimaWrapper = (function() {

    var typeMap = {
        "Numeric": 0,
        "String": 1,
        "Identifier": 2,
        "Boolean": 2,
        "Null": 2,
        "RegularExpression": 3,
        "Punctuator": 4,
        "Keyword": 5,
        "Block": 6
    };

    function exportTokens(tokens, full_input) {
        var valueMap = {};
        var valueCounter = 0;
        var type = [], value = [], linenr = [];
        var lastComIndex = 0;
        var lastComLineNr = 1;

        for (var i=0, len=tokens.length; i<len; i++) {
            var t = tokens[i];

            type[i] = typeMap[t.type];

            var v = t.value;
            if (t.type === "Block") {
                value[i] = "/*" + v + "*/";
                linenr[i] = (full_input.slice(lastComIndex, t.range[0]).match(/\n/g) || []).length + lastComLineNr;
                lastComIndex = t.range[0];
                lastComLineNr = linenr[i];
            }
            else {
                if (t.type === "String") {
                    v = v.replace(/^['"]|['"]$/g, "");
                }
                if (!Object.prototype.hasOwnProperty.call(valueMap, v)) {
                    valueMap[v] = valueCounter;
                    valueCounter++;
                }
                value[i] = valueMap[v];
                linenr[i] = 0;
            }
        }

        var valueArray = [];
        for (var v in valueMap) {
            if (Object.prototype.hasOwnProperty.call(valueMap, v)) {
                valueArray[valueMap[v]] = v;
            }
        }

        return {type: type, value: value, linenr: linenr, valueArray: valueArray};
    };

    function filterDocComments(comments) {
        var docs = [];
        for (var i=0, len=comments.length; i<len; i++) {
            var c = comments[i];
            if (c.type === "Block" && /^\*/.test(c.value)) {
                docs.push(c);
            }
        }
        return docs;
    }

    // Combines tokens and comments arrays into one array
    // while keeping them in correct order.
    function mergeTokens(tokens, comments) {
        var result = [];
        var c = 0, com = comments[c];
        var t = 0, tok = tokens[t];
        while (com || tok) {
            if (!com || tok && (tok.range[0] < com.range[0])) {
                result.push(tok);
                tok = tokens[++t];
            }
            else {
                result.push(com);
                com = comments[++c];
            }
        }
        return result;
    }

    function parse(js) {
        var program = esprima.parse(js, {tokens: true, comment: true});

        var tokens = program.tokens;
        var comments = filterDocComments(program.comments);

        // return exportTokens(tokens.concat(comments));
        return exportTokens(mergeTokens(tokens, comments), js);
    }

    return {parse: parse};

})();