From 3fe0dc0ec70c6f02bb65d7d8fd2305c748d1482f Mon Sep 17 00:00:00 2001 From: Rene Saarsoo Date: Thu, 13 Jun 2013 18:41:17 +0300 Subject: [PATCH] Extract Js::Associator class from Js::Parser. Intending to use the same logic in RKellyParser. --- lib/jsduck/js/associator.rb | 222 ++++++++++++++++++++++++++++++++++++ lib/jsduck/js/parser.rb | 208 +-------------------------------- 2 files changed, 227 insertions(+), 203 deletions(-) create mode 100644 lib/jsduck/js/associator.rb diff --git a/lib/jsduck/js/associator.rb b/lib/jsduck/js/associator.rb new file mode 100644 index 00000000..dc6294d8 --- /dev/null +++ b/lib/jsduck/js/associator.rb @@ -0,0 +1,222 @@ +require 'jsduck/logger' + +module JsDuck + module Js + + # Associates comments with syntax nodes. + class Associator + + def initialize(input) + @input = input + + # Initialize line number counting + @start_index = 0 + @start_linenr = 1 + end + + # Analyzes the comments and AST nodes and returns array of + # hashes like this: + # + # { + # :comment => "The contents of the comment", + # :code => {...AST data structure for code following the comment...}, + # :linenr => 12, // Beginning with 1 + # :type => :doc_comment, // or :plain_comment + # } + # + def associate(ast) + @ast = ast + + @ast["comments"] = merge_comments(@ast["comments"]) + locate_comments + end + + private + + # Merges consecutive line-comments and Establishes links between + # comments, so we can easily use comment["next"] to get to the + # next comment. + def merge_comments(original_comments) + result = [] + + comment = original_comments[0] + i = 0 + + while comment + i += 1 + next_comment = original_comments[i] + + if next_comment && mergeable?(comment, next_comment) + # Merge next comment to current one + comment["value"] += "\n" + next_comment["value"] + comment["range"][1] = next_comment["range"][1] + else + # Create a link and continue with next comment + comment["next"] = next_comment + result << comment + comment = next_comment + end + end + + result + end + + # Two comments can be merged if they are both line-comments and + # they are separated only by whitespace (only one newline at the + # end of the first comment is allowed) + def mergeable?(c1, c2) + if c1["type"] == "Line" && c2["type"] == "Line" + /\A(\r\n|\n|\r)?[ \t]*\z/ =~ @input.slice((c1["range"][1])..(c2["range"][0]-1)) + else + false + end + end + + def locate_comments + @ast["comments"].map do |comment| + # Detect comment type and strip * at the beginning of doc-comment + value = comment["value"] + if comment["type"] == "Block" && value =~ /\A\*/ + type = :doc_comment + value = value.slice(1, value.length-1) + else + type = :plain_comment + end + + { + :comment => value, + :code => stuff_after(comment), + :linenr => line_number(comment["range"][0]), + :type => type, + } + end + end + + # Given index inside input string, returns the corresponding line number + def line_number(index) + # To speed things up, remember the index until which we counted, + # then next time just begin counting from there. This way we + # only count each line once. + @start_linenr = @input[@start_index...index].count("\n") + @start_linenr + @start_index = index + return @start_linenr + end + + # Sees if there is some code following the comment. + # Returns the code found. But if the comment is instead + # followed by another comment, returns nil. + def stuff_after(comment) + code = code_after(comment["range"], @ast) + if code && comment["next"] + return code["range"][0] < comment["next"]["range"][0] ? code : nil + else + code + end + end + + # Looks for code following the given range. + # + # The second argument is the parent node within which we perform + # our search. + def code_after(range, parent) + # Look through all child nodes of parent... + child_nodes(parent).each do |node| + if less(range, node["range"]) + # If node is after our range, then that's it. There could + # be comments in our way, but that's taken care of in + # #stuff_after method. + return node + elsif within(range, node["range"]) + # Our range is within the node --> recurse + return code_after(range, node) + end + end + + return nil + end + + + # True if range A is less than range B + def less(a, b) + return a[1] <= b[0] + end + + # True if range A is greater than range B + def greater(a, b) + return a[0] >= b[1] + end + + # True if range A is within range B + def within(a, b) + return b[0] <= a[0] && a[1] <= b[1] + end + + + # Returns array of child nodes of given node + def child_nodes(node) + properties = NODE_TYPES[node["type"]] + + unless properties + Logger.fatal("Unknown node type: "+node["type"]) + exit(1) + end + + properties.map {|p| node[p] }.compact.flatten + end + + # All possible node types in Esprima-created abstract syntax tree + # + # Each node type maps to list of properties of that node into + # which we can recurse for further parsing. + NODE_TYPES = { + "Program" => ["body"], + + "BlockStatement" => ["body"], + "BreakStatement" => [], + "ContinueStatement" => [], + "DoWhileStatement" => ["body", "test"], + "DebuggerStatement" => [], + "EmptyStatement" => [], + "ExpressionStatement" => ["expression"], + "ForStatement" => ["init", "test", "update", "body"], + "ForInStatement" => ["left", "right", "body"], + "IfStatement" => ["test", "consequent", "alternate"], + "LabeledStatement" => ["body"], + "ReturnStatement" => ["argument"], + "SwitchStatement" => ["discriminant", "cases"], + "SwitchCase" => ["test", "consequent"], + "ThrowStatement" => ["argument"], + "TryStatement" => ["block", "handlers", "finalizer"], + "CatchClause" => ["param", "body"], + "WhileStatement" => ["test", "body"], + "WithStatement" => ["object", "body"], + + "FunctionDeclaration" => ["id", "params", "body"], + "VariableDeclaration" => ["declarations"], + "VariableDeclarator" => ["id", "init"], + + "AssignmentExpression" => ["left", "right"], + "ArrayExpression" => ["elements"], + "BinaryExpression" => ["left", "right"], + "CallExpression" => ["callee", "arguments"], + "ConditionalExpression" => ["test", "consequent", "alternate"], + "FunctionExpression" => ["body"], + + "LogicalExpression" => ["left", "right"], + "MemberExpression" => ["object", "property"], + "NewExpression" => ["callee", "arguments"], + "ObjectExpression" => ["properties"], + "Property" => ["key", "value"], + + "SequenceExpression" => ["expressions"], + "ThisExpression" => [], + "UnaryExpression" => ["argument"], + "UpdateExpression" => ["argument"], + + "Identifier" => [], + "Literal" => [], + } + end + + end +end diff --git a/lib/jsduck/js/parser.rb b/lib/jsduck/js/parser.rb index 78f5a5b2..17688a39 100644 --- a/lib/jsduck/js/parser.rb +++ b/lib/jsduck/js/parser.rb @@ -1,5 +1,5 @@ require 'jsduck/js/esprima' -require 'jsduck/logger' +require 'jsduck/js/associator' module JsDuck module Js @@ -10,213 +10,15 @@ module JsDuck # Initializes the parser with JavaScript source code to be parsed. def initialize(input, options = {}) @input = input - - # Initialize line number counting - @start_index = 0 - @start_linenr = 1 end - # Parses JavaScript source code and returns array of hashes like this: - # - # { - # :comment => "The contents of the comment", - # :code => {...AST data structure for code following the comment...}, - # :linenr => 12, // Beginning with 1 - # :type => :doc_comment, // or :plain_comment - # } - # + # Parses JavaScript source code and associates comments with AST + # nodes, returning array of docsets. def parse - @ast = Js::Esprima.parse(@input) - - @ast["comments"] = merge_comments(@ast["comments"]) - locate_comments - end - - private - - # Merges consecutive line-comments and Establishes links between - # comments, so we can easily use comment["next"] to get to the - # next comment. - def merge_comments(original_comments) - result = [] - - comment = original_comments[0] - i = 0 - - while comment - i += 1 - next_comment = original_comments[i] - - if next_comment && mergeable?(comment, next_comment) - # Merge next comment to current one - comment["value"] += "\n" + next_comment["value"] - comment["range"][1] = next_comment["range"][1] - else - # Create a link and continue with next comment - comment["next"] = next_comment - result << comment - comment = next_comment - end - end - - result - end - - # Two comments can be merged if they are both line-comments and - # they are separated only by whitespace (only one newline at the - # end of the first comment is allowed) - def mergeable?(c1, c2) - if c1["type"] == "Line" && c2["type"] == "Line" - /\A(\r\n|\n|\r)?[ \t]*\z/ =~ @input.slice((c1["range"][1])..(c2["range"][0]-1)) - else - false - end - end - - def locate_comments - @ast["comments"].map do |comment| - # Detect comment type and strip * at the beginning of doc-comment - value = comment["value"] - if comment["type"] == "Block" && value =~ /\A\*/ - type = :doc_comment - value = value.slice(1, value.length-1) - else - type = :plain_comment - end - - { - :comment => value, - :code => stuff_after(comment), - :linenr => line_number(comment["range"][0]), - :type => type, - } - end - end - - # Given index inside input string, returns the corresponding line number - def line_number(index) - # To speed things up, remember the index until which we counted, - # then next time just begin counting from there. This way we - # only count each line once. - @start_linenr = @input[@start_index...index].count("\n") + @start_linenr - @start_index = index - return @start_linenr - end - - # Sees if there is some code following the comment. - # Returns the code found. But if the comment is instead - # followed by another comment, returns nil. - def stuff_after(comment) - code = code_after(comment["range"], @ast) - if code && comment["next"] - return code["range"][0] < comment["next"]["range"][0] ? code : nil - else - code - end + ast = Js::Esprima.parse(@input) + Js::Associator.new(@input).associate(ast) end - # Looks for code following the given range. - # - # The second argument is the parent node within which we perform - # our search. - def code_after(range, parent) - # Look through all child nodes of parent... - child_nodes(parent).each do |node| - if less(range, node["range"]) - # If node is after our range, then that's it. There could - # be comments in our way, but that's taken care of in - # #stuff_after method. - return node - elsif within(range, node["range"]) - # Our range is within the node --> recurse - return code_after(range, node) - end - end - - return nil - end - - - # True if range A is less than range B - def less(a, b) - return a[1] <= b[0] - end - - # True if range A is greater than range B - def greater(a, b) - return a[0] >= b[1] - end - - # True if range A is within range B - def within(a, b) - return b[0] <= a[0] && a[1] <= b[1] - end - - - # Returns array of child nodes of given node - def child_nodes(node) - properties = NODE_TYPES[node["type"]] - - unless properties - Logger.fatal("Unknown node type: "+node["type"]) - exit(1) - end - - properties.map {|p| node[p] }.compact.flatten - end - - # All possible node types in Esprima-created abstract syntax tree - # - # Each node type maps to list of properties of that node into - # which we can recurse for further parsing. - NODE_TYPES = { - "Program" => ["body"], - - "BlockStatement" => ["body"], - "BreakStatement" => [], - "ContinueStatement" => [], - "DoWhileStatement" => ["body", "test"], - "DebuggerStatement" => [], - "EmptyStatement" => [], - "ExpressionStatement" => ["expression"], - "ForStatement" => ["init", "test", "update", "body"], - "ForInStatement" => ["left", "right", "body"], - "IfStatement" => ["test", "consequent", "alternate"], - "LabeledStatement" => ["body"], - "ReturnStatement" => ["argument"], - "SwitchStatement" => ["discriminant", "cases"], - "SwitchCase" => ["test", "consequent"], - "ThrowStatement" => ["argument"], - "TryStatement" => ["block", "handlers", "finalizer"], - "CatchClause" => ["param", "body"], - "WhileStatement" => ["test", "body"], - "WithStatement" => ["object", "body"], - - "FunctionDeclaration" => ["id", "params", "body"], - "VariableDeclaration" => ["declarations"], - "VariableDeclarator" => ["id", "init"], - - "AssignmentExpression" => ["left", "right"], - "ArrayExpression" => ["elements"], - "BinaryExpression" => ["left", "right"], - "CallExpression" => ["callee", "arguments"], - "ConditionalExpression" => ["test", "consequent", "alternate"], - "FunctionExpression" => ["body"], - - "LogicalExpression" => ["left", "right"], - "MemberExpression" => ["object", "property"], - "NewExpression" => ["callee", "arguments"], - "ObjectExpression" => ["properties"], - "Property" => ["key", "value"], - - "SequenceExpression" => ["expressions"], - "ThisExpression" => [], - "UnaryExpression" => ["argument"], - "UpdateExpression" => ["argument"], - - "Identifier" => [], - "Literal" => [], - } end end -- GitLab