Commit 742f5b0a authored by Rene Saarsoo's avatar Rene Saarsoo
Browse files

Move CSS lexer/parser to css/ subdir.

parent 75f8c2d3
Loading
Loading
Loading
Loading
+203 −0
Original line number Diff line number Diff line
require 'strscan'

module JsDuck
  module Css

    # Tokenizes CSS or SCSS code into lexical tokens.
    #
    # Each token has a type and value.
    # Types and possible values for them are as follows:
    #
    # - :number      -- "25.8"
    # - :percentage  -- "25%"
    # - :dimension   -- "2em"
    # - :string      -- '"Hello world"'
    # - :ident       -- "foo-bar"
    # - :at_keyword  -- "@mixin"
    # - :hash        -- "#00FF66"
    # - :delim       -- "{"
    # - :doc_comment -- "/** My comment */"
    #
    # Notice that doc-comments are recognized as tokens while normal
    # comments are ignored just as whitespace.
    #
    class Lexer
      # Initializes lexer with input string.
      def initialize(input)
        @input = StringScanner.new(input)
        @buffer = []
      end

      # Tests if given pattern matches the tokens that follow at current
      # position.
      #
      # Takes list of strings and symbols.  Symbols are compared to
      # token type, while strings to token value.  For example:
      #
      #     look(:ident, ":", :dimension)
      #
      def look(*tokens)
        buffer_tokens(tokens.length)
        i = 0
        tokens.all? do |t|
          tok = @buffer[i]
          i += 1
          if !tok
            false
          elsif t.instance_of?(Symbol)
            tok[:type] == t
          else
            tok[:value] == t
          end
        end
      end

      # Returns the value of next token, moving the current token cursor
      # also to next token.
      #
      # When full=true, returns full token as hash like so:
      #
      #     {:type => :ident, :value => "foo"}
      #
      # For doc-comments the full token also contains the field :linenr,
      # pointing to the line where the doc-comment began.
      #
      def next(full=false)
        buffer_tokens(1)
        tok = @buffer.shift
        # advance the scanpointer to the position after this token
        @input.pos = tok[:pos]
        full ? tok : tok[:value]
      end

      # True when no more tokens.
      def empty?
        buffer_tokens(1)
        return !@buffer.first
      end

      # Ensures next n tokens are read in buffer
      #
      # At the end of buffering the initial position scanpointer is
      # restored.  Only the #next method will advance the scanpointer in
      # a way that's visible outside this class.
      def buffer_tokens(n)
        prev_pos = @input.pos
        @input.pos = @buffer.last[:pos] if @buffer.last
        (n - @buffer.length).times do
          @previous_token = tok = next_token
          if tok
            # remember scanpointer position after each token
            tok[:pos] = @input.pos
            @buffer << tok
          end
        end
        @input.pos = prev_pos
      end

      # Parses out next token from input stream.
      def next_token
        while !@input.eos? do
          skip_white
          if @input.check(IDENT)
            return {
              :type => :ident,
              :value => @input.scan(IDENT)
            }
          elsif @input.check(/'/)
            return {
              :type => :string,
              :value => @input.scan(/'([^'\\]|\\.)*('|\Z)/m)
            }
          elsif @input.check(/"/)
            return {
              :type => :string,
              :value => @input.scan(/"([^"\\]|\\.)*("|\Z)/m)
            }
          elsif @input.check(/\//)
            # Several things begin with dash:
            # - comments, regexes, division-operators
            if @input.check(/\/\*\*[^\/]/)
              return {
                :type => :doc_comment,
                # Calculate current line number, starting with 1
                :linenr => @input.string[0...@input.pos].count("\n") + 1,
                :value => @input.scan_until(/\*\/|\Z/).sub(/\A\/\*\*/, "").sub(/\*\/\Z/, "")
              }
            elsif @input.check(/\/\*/)
              # skip multiline comment
              @input.scan_until(/\*\/|\Z/)
            elsif @input.check(/\/\//)
              # skip line comment
              @input.scan_until(/\n|\Z/)
            else
              return {
                :type => :operator,
                :value => @input.scan(/\//)
              }
            end
          elsif @input.check(NUM)
            nr = @input.scan(NUM)
            if @input.check(/%/)
              return {
                :type => :percentage,
                :value => nr + @input.scan(/%/)
              }
            elsif @input.check(IDENT)
              return {
                :type => :dimension,
                :value => nr + @input.scan(IDENT)
              }
            else
              return {
                :type => :number,
                :value => nr
              }
            end
          elsif @input.check(/@/)
            return maybe(:at_keyword, /@/, IDENT)
          elsif @input.check(/#/)
            return maybe(:hash, /#/, NAME)
          elsif @input.check(/\$/)
            return maybe(:var, /\$/, IDENT)
          elsif @input.check(/./)
            return {
              :type => :delim,
              :value => @input.scan(/./)
            }
          end
        end
      end

      # Returns token of given type when both regexes match.
      # Otherwise returns :delim token with value of first regex match.
      # First regex must always match.
      def maybe(token_type, before_re, after_re)
        before = @input.scan(before_re)
        if @input.check(after_re)
          return {
            :type => token_type,
            :value => before + @input.scan(after_re)
          }
        else
          return {
            :type => :delim,
            :value => before
          }
        end
      end

      def skip_white
        @input.scan(/\s+/)
      end

      # Simplified token syntax based on:
      # http://www.w3.org/TR/CSS21/syndata.html
      IDENT = /-?[_a-z][_a-z0-9-]*/i
      NAME = /[_a-z0-9-]+/i
      NUM = /[0-9]*\.[0-9]+|[0-9]+/

    end

  end
end
+121 −0
Original line number Diff line number Diff line
require 'jsduck/css/lexer'

module JsDuck
  module Css

    class Parser
      def initialize(input, options = {})
        @lex = Css::Lexer.new(input)
        @docs = []
      end

      # Parses the whole CSS block and returns same kind of structure
      # that JavaScript parser does.
      def parse
        while !@lex.empty? do
          if look(:doc_comment)
            comment = @lex.next(true)
            @docs << {
              :comment => comment[:value],
              :linenr => comment[:linenr],
              :code => code_block,
              :type => :doc_comment,
            }
          else
            @lex.next
          end
        end
        @docs
      end

      # <code-block> := <mixin-declaration> | <var-declaration> | <property>
      def code_block
        if look("@mixin")
          mixin_declaration
        elsif look(:var, ":")
          var_declaration
        else
          # Default to property like in JsParser.
          {:tagname => :property}
        end
      end

      # <mixin-declaration> := "@mixin" <ident>
      def mixin_declaration
        match("@mixin")
        return {
          :tagname => :css_mixin,
          :name => look(:ident) ? match(:ident) : nil,
        }
      end

      # <var-declaration> := <var> ":" <css-value>
      def var_declaration
        name = match(:var)
        match(":")
        value_list = css_value
        return {
          :tagname => :css_var,
          :name => name,
          :default => value_list.map {|v| v[:value] }.join(" "),
          :type => value_type(value_list),
        }
      end

      # <css-value> := ...anything up to... [ ";" | "}" | "!default" ]
      def css_value
        val = []
        while !look(";") && !look("}") && !look("!", "default")
          val << @lex.next(true)
        end
        val
      end

      # Determines type of CSS value
      def value_type(val)
        case val[0][:type]
        when :number
          "number"
        when :dimension
          "length"
        when :percentage
          "percentage"
        when :string
          "string"
        when :hash
          "color"
        when :ident
          case val[0][:value]
          when "true", "false"
            return "boolean"
          when "rgb", "rgba", "hsl", "hsla"
            return "color"
          when "black", "silver", "gray", "white", "maroon",
            "red", "purple", "fuchsia", "green", "lime", "olive",
            "yellow", "navy", "blue", "teal", "aqua", "orange"
            return "color"
          when "transparent"
            return "color"
          end
        end
      end

      # Matches all arguments, returns the value of last match
      # When the whole sequence doesn't match, throws exception
      def match(*args)
        if look(*args)
          last = nil
          args.length.times { last = @lex.next }
          last
        else
          throw "Expected: " + args.join(", ")
        end
      end

      def look(*args)
        @lex.look(*args)
      end
    end

  end
end

lib/jsduck/css_lexer.rb

deleted100644 → 0
+0 −201
Original line number Diff line number Diff line
require 'strscan'

module JsDuck

  # Tokenizes CSS or SCSS code into lexical tokens.
  #
  # Each token has a type and value.
  # Types and possible values for them are as follows:
  #
  # - :number      -- "25.8"
  # - :percentage  -- "25%"
  # - :dimension   -- "2em"
  # - :string      -- '"Hello world"'
  # - :ident       -- "foo-bar"
  # - :at_keyword  -- "@mixin"
  # - :hash        -- "#00FF66"
  # - :delim       -- "{"
  # - :doc_comment -- "/** My comment */"
  #
  # Notice that doc-comments are recognized as tokens while normal
  # comments are ignored just as whitespace.
  #
  class CssLexer
    # Initializes lexer with input string.
    def initialize(input)
      @input = StringScanner.new(input)
      @buffer = []
    end

    # Tests if given pattern matches the tokens that follow at current
    # position.
    #
    # Takes list of strings and symbols.  Symbols are compared to
    # token type, while strings to token value.  For example:
    #
    #     look(:ident, ":", :dimension)
    #
    def look(*tokens)
      buffer_tokens(tokens.length)
      i = 0
      tokens.all? do |t|
        tok = @buffer[i]
        i += 1
        if !tok
          false
        elsif t.instance_of?(Symbol)
          tok[:type] == t
        else
          tok[:value] == t
        end
      end
    end

    # Returns the value of next token, moving the current token cursor
    # also to next token.
    #
    # When full=true, returns full token as hash like so:
    #
    #     {:type => :ident, :value => "foo"}
    #
    # For doc-comments the full token also contains the field :linenr,
    # pointing to the line where the doc-comment began.
    #
    def next(full=false)
      buffer_tokens(1)
      tok = @buffer.shift
      # advance the scanpointer to the position after this token
      @input.pos = tok[:pos]
      full ? tok : tok[:value]
    end

    # True when no more tokens.
    def empty?
      buffer_tokens(1)
      return !@buffer.first
    end

    # Ensures next n tokens are read in buffer
    #
    # At the end of buffering the initial position scanpointer is
    # restored.  Only the #next method will advance the scanpointer in
    # a way that's visible outside this class.
    def buffer_tokens(n)
      prev_pos = @input.pos
      @input.pos = @buffer.last[:pos] if @buffer.last
      (n - @buffer.length).times do
        @previous_token = tok = next_token
        if tok
          # remember scanpointer position after each token
          tok[:pos] = @input.pos
          @buffer << tok
        end
      end
      @input.pos = prev_pos
    end

    # Parses out next token from input stream.
    def next_token
      while !@input.eos? do
        skip_white
        if @input.check(IDENT)
          return {
            :type => :ident,
            :value => @input.scan(IDENT)
          }
        elsif @input.check(/'/)
          return {
            :type => :string,
            :value => @input.scan(/'([^'\\]|\\.)*('|\Z)/m)
          }
        elsif @input.check(/"/)
          return {
            :type => :string,
            :value => @input.scan(/"([^"\\]|\\.)*("|\Z)/m)
          }
        elsif @input.check(/\//)
          # Several things begin with dash:
          # - comments, regexes, division-operators
          if @input.check(/\/\*\*[^\/]/)
            return {
              :type => :doc_comment,
              # Calculate current line number, starting with 1
              :linenr => @input.string[0...@input.pos].count("\n") + 1,
              :value => @input.scan_until(/\*\/|\Z/).sub(/\A\/\*\*/, "").sub(/\*\/\Z/, "")
            }
          elsif @input.check(/\/\*/)
            # skip multiline comment
            @input.scan_until(/\*\/|\Z/)
          elsif @input.check(/\/\//)
            # skip line comment
            @input.scan_until(/\n|\Z/)
          else
            return {
              :type => :operator,
              :value => @input.scan(/\//)
            }
          end
        elsif @input.check(NUM)
          nr = @input.scan(NUM)
          if @input.check(/%/)
            return {
              :type => :percentage,
              :value => nr + @input.scan(/%/)
            }
          elsif @input.check(IDENT)
            return {
              :type => :dimension,
              :value => nr + @input.scan(IDENT)
            }
          else
            return {
              :type => :number,
              :value => nr
            }
          end
        elsif @input.check(/@/)
          return maybe(:at_keyword, /@/, IDENT)
        elsif @input.check(/#/)
          return maybe(:hash, /#/, NAME)
        elsif @input.check(/\$/)
          return maybe(:var, /\$/, IDENT)
        elsif @input.check(/./)
          return {
            :type => :delim,
            :value => @input.scan(/./)
          }
        end
      end
    end

    # Returns token of given type when both regexes match.
    # Otherwise returns :delim token with value of first regex match.
    # First regex must always match.
    def maybe(token_type, before_re, after_re)
      before = @input.scan(before_re)
      if @input.check(after_re)
        return {
          :type => token_type,
          :value => before + @input.scan(after_re)
        }
      else
        return {
          :type => :delim,
          :value => before
        }
      end
    end

    def skip_white
      @input.scan(/\s+/)
    end

    # Simplified token syntax based on:
    # http://www.w3.org/TR/CSS21/syndata.html
    IDENT = /-?[_a-z][_a-z0-9-]*/i
    NAME = /[_a-z0-9-]+/i
    NUM = /[0-9]*\.[0-9]+|[0-9]+/

  end

end

lib/jsduck/css_parser.rb

deleted100644 → 0
+0 −119
Original line number Diff line number Diff line
require 'jsduck/css_lexer'

module JsDuck

  class CssParser
    def initialize(input, options = {})
      @lex = CssLexer.new(input)
      @docs = []
    end

    # Parses the whole CSS block and returns same kind of structure
    # that JavaScript parser does.
    def parse
      while !@lex.empty? do
        if look(:doc_comment)
          comment = @lex.next(true)
          @docs << {
            :comment => comment[:value],
            :linenr => comment[:linenr],
            :code => code_block,
            :type => :doc_comment,
          }
        else
          @lex.next
        end
      end
      @docs
    end

    # <code-block> := <mixin-declaration> | <var-declaration> | <property>
    def code_block
      if look("@mixin")
        mixin_declaration
      elsif look(:var, ":")
        var_declaration
      else
        # Default to property like in JsParser.
        {:tagname => :property}
      end
    end

    # <mixin-declaration> := "@mixin" <ident>
    def mixin_declaration
      match("@mixin")
      return {
        :tagname => :css_mixin,
        :name => look(:ident) ? match(:ident) : nil,
      }
    end

    # <var-declaration> := <var> ":" <css-value>
    def var_declaration
      name = match(:var)
      match(":")
      value_list = css_value
      return {
        :tagname => :css_var,
        :name => name,
        :default => value_list.map {|v| v[:value] }.join(" "),
        :type => value_type(value_list),
      }
    end

    # <css-value> := ...anything up to... [ ";" | "}" | "!default" ]
    def css_value
      val = []
      while !look(";") && !look("}") && !look("!", "default")
        val << @lex.next(true)
      end
      val
    end

    # Determines type of CSS value
    def value_type(val)
      case val[0][:type]
      when :number
        "number"
      when :dimension
        "length"
      when :percentage
        "percentage"
      when :string
        "string"
      when :hash
        "color"
      when :ident
        case val[0][:value]
        when "true", "false"
          return "boolean"
        when "rgb", "rgba", "hsl", "hsla"
          return "color"
        when "black", "silver", "gray", "white", "maroon",
          "red", "purple", "fuchsia", "green", "lime", "olive",
          "yellow", "navy", "blue", "teal", "aqua", "orange"
          return "color"
        when "transparent"
          return "color"
        end
      end
    end

    # Matches all arguments, returns the value of last match
    # When the whole sequence doesn't match, throws exception
    def match(*args)
      if look(*args)
        last = nil
        args.length.times { last = @lex.next }
        last
      else
        throw "Expected: " + args.join(", ")
      end
    end

    def look(*args)
      @lex.look(*args)
    end
  end

end
+2 −2
Original line number Diff line number Diff line
require 'jsduck/js_parser'
require 'jsduck/css_parser'
require 'jsduck/css/parser'
require 'jsduck/doc_parser'
require 'jsduck/merger'
require 'jsduck/ast'
@@ -40,7 +40,7 @@ module JsDuck
      # Parses the file depending on filename as JS or CSS
      def parse_js_or_css(contents, filename, options)
        if filename =~ /\.s?css$/
          docs = CssParser.new(contents, options).parse
          docs = Css::Parser.new(contents, options).parse
        else
          docs = JsParser.new(contents, options).parse
          docs = Ast.new(docs).detect_all!
Loading