diff --git a/.github/workflows/submit.yml b/.github/workflows/submit.yml index 7cfaf37d336c043a446ac4eaa6e1ad20e361c37d..99041df6e3b79cc23067a48ca0586ce98e73f2e0 100644 --- a/.github/workflows/submit.yml +++ b/.github/workflows/submit.yml @@ -10,7 +10,7 @@ on: platforms: description: "Platform(s) to execute on" required: true - default: "Linux additional (hotspot only), Linux x64, Linux x86, Windows x64, macOS x64" + default: "Linux additional (hotspot only), Linux x64, Linux x86, Windows aarch64, Windows x64, macOS x64" jobs: prerequisites: @@ -22,6 +22,7 @@ jobs: platform_linux_additional: ${{ steps.check_platforms.outputs.platform_linux_additional }} platform_linux_x64: ${{ steps.check_platforms.outputs.platform_linux_x64 }} platform_linux_x86: ${{ steps.check_platforms.outputs.platform_linux_x86 }} + platform_windows_aarch64: ${{ steps.check_platforms.outputs.platform_windows_aarch64 }} platform_windows_x64: ${{ steps.check_platforms.outputs.platform_windows_x64 }} platform_macos_x64: ${{ steps.check_platforms.outputs.platform_macos_x64 }} platform_macos_aarch64: ${{ steps.check_platforms.outputs.platform_macos_aarch64 }} @@ -38,6 +39,7 @@ jobs: echo "::set-output name=platform_linux_additional::${{ contains(github.event.inputs.platforms, 'linux additional (hotspot only)') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'linux additional (hotspot only)'))) }}" echo "::set-output name=platform_linux_x64::${{ contains(github.event.inputs.platforms, 'linux x64') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'linux x64'))) }}" echo "::set-output name=platform_linux_x86::${{ contains(github.event.inputs.platforms, 'linux x86') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'linux x86'))) }}" + echo "::set-output name=platform_windows_aarch64::${{ contains(github.event.inputs.platforms, 'windows aarch64') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'windows aarch64'))) }}" echo "::set-output name=platform_windows_x64::${{ contains(github.event.inputs.platforms, 'windows x64') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'windows x64'))) }}" echo "::set-output name=platform_macos_x64::${{ contains(github.event.inputs.platforms, 'macos x64') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'macos x64'))) }}" echo "::set-output name=platform_macos_aarch64::${{ contains(github.event.inputs.platforms, 'macos aarch64') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'macos aarch64'))) }}" @@ -172,7 +174,7 @@ jobs: - name: Install dependencies run: | sudo apt-get update - sudo apt-get install gcc-10=10.2.0-5ubuntu1~20.04 g++-10=10.2.0-5ubuntu1~20.04 libxrandr-dev libxtst-dev libcups2-dev libasound2-dev + sudo apt-get install gcc-10=10.3.0-1ubuntu1~20.04 g++-10=10.3.0-1ubuntu1~20.04 libxrandr-dev libxtst-dev libcups2-dev libasound2-dev sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 - name: Configure @@ -487,12 +489,12 @@ jobs: - name: Install native host dependencies run: | - sudo apt-get install gcc-10=10.2.0-5ubuntu1~20.04 g++-10=10.2.0-5ubuntu1~20.04 libxrandr-dev libxtst-dev libcups2-dev libasound2-dev + sudo apt-get install gcc-10=10.3.0-1ubuntu1~20.04 g++-10=10.3.0-1ubuntu1~20.04 libxrandr-dev libxtst-dev libcups2-dev libasound2-dev sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 if: matrix.debian-arch == '' - name: Install cross-compilation host dependencies - run: sudo apt-get install gcc-10-${{ matrix.gnu-arch }}-linux-gnu${{ matrix.gnu-flavor}}=10.2.0-5ubuntu1~20.04cross1 g++-10-${{ matrix.gnu-arch }}-linux-gnu${{ matrix.gnu-flavor}}=10.2.0-5ubuntu1~20.04cross1 + run: sudo apt-get install gcc-10-${{ matrix.gnu-arch }}-linux-gnu${{ matrix.gnu-flavor}}=10.3.0-1ubuntu1~20.04cross1 g++-10-${{ matrix.gnu-arch }}-linux-gnu${{ matrix.gnu-flavor}}=10.3.0-1ubuntu1~20.04cross1 if: matrix.debian-arch != '' - name: Cache sysroot @@ -847,6 +849,94 @@ jobs: path: ~/linux-x86${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip continue-on-error: true + windows_aarch64_build: + name: Windows aarch64 + runs-on: "windows-2019" + needs: prerequisites + if: needs.prerequisites.outputs.should_run != 'false' && needs.prerequisites.outputs.platform_windows_aarch64 != 'false' + + strategy: + fail-fast: false + matrix: + flavor: + - build debug + include: + - flavor: build debug + flags: --enable-debug + artifact: -debug + + env: + JDK_VERSION: "${{ fromJson(needs.prerequisites.outputs.dependencies).DEFAULT_VERSION_FEATURE }}" + BOOT_JDK_VERSION: "${{ fromJson(needs.prerequisites.outputs.dependencies).BOOT_JDK_VERSION }}" + BOOT_JDK_FILENAME: "${{ fromJson(needs.prerequisites.outputs.dependencies).WINDOWS_X64_BOOT_JDK_FILENAME }}" + BOOT_JDK_URL: "${{ fromJson(needs.prerequisites.outputs.dependencies).WINDOWS_X64_BOOT_JDK_URL }}" + BOOT_JDK_SHA256: "${{ fromJson(needs.prerequisites.outputs.dependencies).WINDOWS_X64_BOOT_JDK_SHA256 }}" + + steps: + - name: Restore cygwin packages from cache + id: cygwin + uses: actions/cache@v2 + with: + path: ~/cygwin/packages + key: cygwin-packages-${{ runner.os }}-v1 + + - name: Install cygwin + run: | + New-Item -Force -ItemType directory -Path "$HOME\cygwin" + & curl -L "https://www.cygwin.com/setup-x86_64.exe" -o "$HOME/cygwin/setup-x86_64.exe" + Start-Process -FilePath "$HOME\cygwin\setup-x86_64.exe" -ArgumentList "--quiet-mode --packages autoconf,make,zip,unzip --root $HOME\cygwin\cygwin64 --local-package-dir $HOME\cygwin\packages --site http://mirrors.kernel.org/sourceware/cygwin --no-desktop --no-shortcuts --no-startmenu --no-admin" -Wait -NoNewWindow + + - name: Checkout the source + uses: actions/checkout@v2 + with: + path: jdk + + - name: Restore boot JDK from cache + id: bootjdk + uses: actions/cache@v2 + with: + path: ~/bootjdk/${{ env.BOOT_JDK_VERSION }} + key: bootjdk-${{ runner.os }}-${{ env.BOOT_JDK_VERSION }}-${{ env.BOOT_JDK_SHA256 }}-v1 + + - name: Download boot JDK + run: | + mkdir -p "$HOME\bootjdk\$env:BOOT_JDK_VERSION" + & curl -L "$env:BOOT_JDK_URL" -o "$HOME/bootjdk/$env:BOOT_JDK_FILENAME" + $FileHash = Get-FileHash -Algorithm SHA256 "$HOME/bootjdk/$env:BOOT_JDK_FILENAME" + $FileHash.Hash -eq $env:BOOT_JDK_SHA256 + & tar -xf "$HOME/bootjdk/$env:BOOT_JDK_FILENAME" -C "$HOME/bootjdk/$env:BOOT_JDK_VERSION" + Get-ChildItem "$HOME\bootjdk\$env:BOOT_JDK_VERSION\*\*" | Move-Item -Destination "$HOME\bootjdk\$env:BOOT_JDK_VERSION" + if: steps.bootjdk.outputs.cache-hit != 'true' + + - name: Ensure a specific version of MSVC is installed + run: > + Start-Process -FilePath 'C:\Program Files (x86)\Microsoft Visual Studio\Installer\vs_installer.exe' -Wait -NoNewWindow -ArgumentList + 'modify --installPath "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise" --quiet + --add Microsoft.VisualStudio.Component.VC.14.29.arm64' + + - name: Configure + run: > + $env:Path = "$HOME\cygwin\cygwin64\bin;$HOME\cygwin\cygwin64\bin;$env:Path" ; + $env:Path = $env:Path -split ";" -match "C:\\Windows|PowerShell|cygwin" -join ";" ; + $env:BOOT_JDK = cygpath "$HOME/bootjdk/$env:BOOT_JDK_VERSION" ; + & bash configure + --with-conf-name=windows-aarch64 + --with-msvc-toolset-version=14.29 + --openjdk-target=aarch64-unknown-cygwin + ${{ matrix.flags }} + --with-version-opt="$env:GITHUB_ACTOR-$env:GITHUB_SHA" + --with-version-build=0 + --with-boot-jdk="$env:BOOT_JDK" + --with-default-make-target="hotspot" + working-directory: jdk + + - name: Build + run: | + $env:Path = "$HOME\cygwin\cygwin64\bin;$HOME\cygwin\cygwin64\bin;$env:Path" ; + $env:Path = $env:Path -split ";" -match "C:\\Windows|PowerShell|cygwin" -join ";" ; + & make CONF_NAME=windows-aarch64 + working-directory: jdk + windows_x64_build: name: Windows x64 runs-on: "windows-2019" @@ -1571,6 +1661,7 @@ jobs: needs: - prerequisites - linux_additional_build + - windows_aarch64_build - linux_x64_test - linux_x86_test - windows_x64_test diff --git a/.gitignore b/.gitignore index cf21c8919cd2620fdeef92562bf15bd3b38e227e..6787b23253522efe42871e4732d1bc1925d3aff0 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,5 @@ NashornProfile.txt **/JTreport/** **/JTwork/** /src/utils/LogCompilation/target/ +/.project/ +/.settings/ diff --git a/bin/idea.sh b/bin/idea.sh index 49c6ee45e3b469be62774607518288bacc5a32c3..6359a77b6db55f02664f581bce1ca968d931aa35 100644 --- a/bin/idea.sh +++ b/bin/idea.sh @@ -25,7 +25,7 @@ # Shell script for generating an IDEA project from a given list of modules usage() { - echo "usage: $0 [-h|--help] [-v|--verbose] [-o|--output ] [modules]+" + echo "usage: $0 [-h|--help] [-v|--verbose] [-o|--output ] [-c|--conf ] [modules]+" exit 1 } @@ -37,6 +37,7 @@ cd $TOP; IDEA_OUTPUT=$TOP/.idea VERBOSE="false" +CONF_ARG= while [ $# -gt 0 ] do case $1 in @@ -52,6 +53,10 @@ do IDEA_OUTPUT=$2/.idea shift ;; + -c | --conf ) + CONF_ARG="CONF_NAME=$2" + shift + ;; -*) # bad option usage @@ -64,6 +69,9 @@ do shift done +if [ -e $IDEA_OUTPUT ] ; then + rm -r $IDEA_OUTPUT +fi mkdir -p $IDEA_OUTPUT || exit 1 cd $IDEA_OUTPUT; IDEA_OUTPUT=`pwd` @@ -91,7 +99,7 @@ if [ "$VERBOSE" = "true" ] ; then echo "idea template dir: $IDEA_TEMPLATE" fi -cd $TOP ; make -f "$IDEA_MAKE/idea.gmk" -I $MAKE_DIR/.. idea MAKEOVERRIDES= OUT=$IDEA_OUTPUT/env.cfg MODULES="$*" || exit 1 +cd $TOP ; make -f "$IDEA_MAKE/idea.gmk" -I $MAKE_DIR/.. idea MAKEOVERRIDES= OUT=$IDEA_OUTPUT/env.cfg MODULES="$*" $CONF_ARG || exit 1 cd $SCRIPT_DIR . $IDEA_OUTPUT/env.cfg @@ -148,14 +156,14 @@ add_replacement "###MODULE_NAMES###" "$MODULE_NAMES" add_replacement "###VCS_TYPE###" "$VCS_TYPE" SPEC_DIR=`dirname $SPEC` if [ "x$CYGPATH" != "x" ]; then - add_replacement "###BUILD_DIR###" "`cygpath -am $SPEC_DIR`" - add_replacement "###IMAGES_DIR###" "`cygpath -am $SPEC_DIR`/images/jdk" - add_replacement "###ROOT_DIR###" "`cygpath -am $TOPLEVEL_DIR`" - add_replacement "###IDEA_DIR###" "`cygpath -am $IDEA_OUTPUT`" + add_replacement "###BUILD_DIR###" "`$CYGPATH -am $SPEC_DIR`" + add_replacement "###IMAGES_DIR###" "`$CYGPATH -am $SPEC_DIR`/images/jdk" + add_replacement "###ROOT_DIR###" "`$CYGPATH -am $TOPLEVEL_DIR`" + add_replacement "###IDEA_DIR###" "`$CYGPATH -am $IDEA_OUTPUT`" if [ "x$JT_HOME" = "x" ]; then add_replacement "###JTREG_HOME###" "" else - add_replacement "###JTREG_HOME###" "`cygpath -am $JT_HOME`" + add_replacement "###JTREG_HOME###" "`$CYGPATH -am $JT_HOME`" fi elif [ "x$WSL_DISTRO_NAME" != "x" ]; then add_replacement "###BUILD_DIR###" "`wslpath -am $SPEC_DIR`" @@ -180,7 +188,7 @@ SOURCE_POSTFIX="\" isTestSource=\"false\" />" for root in $MODULE_ROOTS; do if [ "x$CYGPATH" != "x" ]; then - root=`cygpath -am $root` + root=`$CYGPATH -am $root` elif [ "x$WSL_DISTRO_NAME" != "x" ]; then root=`wslpath -am $root` fi @@ -219,26 +227,33 @@ fi CP=$ANT_HOME/lib/ant.jar rm -rf $CLASSES; mkdir $CLASSES -if [ "x$CYGPATH" != "x" ] ; then ## CYGPATH may be set in env.cfg - JAVAC_SOURCE_FILE=`cygpath -am $IDEA_OUTPUT/src/idea/IdeaLoggerWrapper.java` - JAVAC_SOURCE_PATH=`cygpath -am $IDEA_OUTPUT/src` - JAVAC_CLASSES=`cygpath -am $CLASSES` - JAVAC_CP=`cygpath -am $CP` +# If we have a Windows boot JDK, we need a .exe suffix +if [ -e "$BOOT_JDK/bin/java.exe" ] ; then + JAVAC=javac.exe +else JAVAC=javac -elif [ "x$WSL_DISTRO_NAME" != "x" ]; then +fi + +# If we are on WSL, the boot JDK might be either Windows or Linux, +# and we need to use realpath instead of CYGPATH to make javac work on both. +# We need to handle this case first since CYGPATH might be set on WSL. +if [ "x$WSL_DISTRO_NAME" != "x" ]; then JAVAC_SOURCE_FILE=`realpath --relative-to=./ $IDEA_OUTPUT/src/idea/IdeaLoggerWrapper.java` JAVAC_SOURCE_PATH=`realpath --relative-to=./ $IDEA_OUTPUT/src` JAVAC_CLASSES=`realpath --relative-to=./ $CLASSES` ANT_TEMP=`mktemp -d -p ./` cp $ANT_HOME/lib/ant.jar $ANT_TEMP/ant.jar JAVAC_CP=$ANT_TEMP/ant.jar - JAVAC=javac.exe +elif [ "x$CYGPATH" != "x" ] ; then ## CYGPATH may be set in env.cfg + JAVAC_SOURCE_FILE=`$CYGPATH -am $IDEA_OUTPUT/src/idea/IdeaLoggerWrapper.java` + JAVAC_SOURCE_PATH=`$CYGPATH -am $IDEA_OUTPUT/src` + JAVAC_CLASSES=`$CYGPATH -am $CLASSES` + JAVAC_CP=`$CYGPATH -am $CP` else JAVAC_SOURCE_FILE=$IDEA_OUTPUT/src/idea/IdeaLoggerWrapper.java JAVAC_SOURCE_PATH=$IDEA_OUTPUT/src JAVAC_CLASSES=$CLASSES JAVAC_CP=$CP - JAVAC=javac fi $BOOT_JDK/bin/$JAVAC -d $JAVAC_CLASSES -sourcepath $JAVAC_SOURCE_PATH -cp $JAVAC_CP $JAVAC_SOURCE_FILE diff --git a/doc/building.html b/doc/building.html index 71c3710c9ae93513ea541000b3e6368c641bfd85..8d43c95bee59c91a71dece0d095299ed8cdb9540 100644 --- a/doc/building.html +++ b/doc/building.html @@ -488,7 +488,7 @@
  • CONF and CONF_NAME - Selecting the configuration(s) to use. See Using Multiple Configurations
  • Test Make Control Variables

    -

    These make control variables only make sense when running tests. Please see Testing the JDK for details.

    +

    These make control variables only make sense when running tests. Please see Testing the JDK (html, markdown) for details.

    • TEST
    • TEST_JOBS
    • @@ -506,7 +506,7 @@

    Running Tests

    Most of the JDK tests are using the JTReg test framework. Make sure that your configuration knows where to find your installation of JTReg. If this is not picked up automatically, use the --with-jtreg=<path to jtreg home> option to point to the JTReg framework. Note that this option should point to the JTReg home, i.e. the top directory, containing lib/jtreg.jar etc.

    -

    The Adoption Group provides recent builds of jtreg here. Download the latest .tar.gz file, unpack it, and point --with-jtreg to the jtreg directory that you just unpacked.

    +

    The Adoption Group provides recent builds of jtreg here. Download the latest .tar.gz file, unpack it, and point --with-jtreg to the jtreg directory that you just unpacked.

    Building of Hotspot Gtest suite requires the source code of Google Test framework. The top directory, which contains both googletest and googlemock directories, should be specified via --with-gtest. The supported version of Google Test is 1.8.1, whose source code can be obtained:

    • by downloading and unpacking the source bundle from here
    • @@ -514,7 +514,7 @@

    To execute the most basic tests (tier 1), use:

    make run-test-tier1
    -

    For more details on how to run tests, please see the Testing the JDK document.

    +

    For more details on how to run tests, please see Testing the JDK (html, markdown).

    Cross-compiling

    Cross-compiling means using one platform (the build platform) to generate output that can ran on another platform (the target platform).

    The typical reason for cross-compiling is that the build is performed on a more powerful desktop computer, but the resulting binaries will be able to run on a different, typically low-performing system. Most of the complications that arise when building for embedded is due to this separation of build and target systems.

    diff --git a/doc/building.md b/doc/building.md index 2f9a0026e28920a0a0007fb80f5aa0f50261c6e8..4c8cf213465422e04e9d72f397e659491d62de0c 100644 --- a/doc/building.md +++ b/doc/building.md @@ -818,7 +818,7 @@ configuration, as opposed to the "configure time" configuration. #### Test Make Control Variables These make control variables only make sense when running tests. Please see -[Testing the JDK](testing.html) for details. +**Testing the JDK** ([html](testing.html), [markdown](testing.md)) for details. * `TEST` * `TEST_JOBS` @@ -848,7 +848,7 @@ containing `lib/jtreg.jar` etc. The [Adoption Group](https://wiki.openjdk.java.net/display/Adoption) provides recent builds of jtreg [here]( -https://ci.adoptopenjdk.net/view/Dependencies/job/jtreg/lastSuccessfulBuild/artifact). +https://ci.adoptopenjdk.net/view/Dependencies/job/dependency_pipeline/lastSuccessfulBuild/artifact/jtreg/). Download the latest `.tar.gz` file, unpack it, and point `--with-jtreg` to the `jtreg` directory that you just unpacked. @@ -865,8 +865,8 @@ To execute the most basic tests (tier 1), use: make run-test-tier1 ``` -For more details on how to run tests, please see the [Testing -the JDK](testing.html) document. +For more details on how to run tests, please see **Testing the JDK** +([html](testing.html), [markdown](testing.md)). ## Cross-compiling diff --git a/doc/hotspot-style.html b/doc/hotspot-style.html index b0c22caf00fdee1248718d150781b9f9594c7041..fe72cbbdf80c1cca160571c8e4b9cd51e122c9f1 100644 --- a/doc/hotspot-style.html +++ b/doc/hotspot-style.html @@ -50,6 +50,7 @@
  • nullptr
  • <atomic>
  • Uniform Initialization
  • +
  • Local Function Objects
  • Additional Permitted Features
  • Excluded Features
  • Undecided Features
  • @@ -194,7 +195,7 @@ while ( test_foo(args...) ) { // No, excess spaces around controlSimilar discussions for some other projects:

    @@ -254,8 +255,7 @@ while ( test_foo(args...) ) { // No, excess spaces around control

    Function argument deduction. This is always permitted, and indeed encouraged. It is nearly always better to allow the type of a function template argument to be deduced rather than explicitly specified.

  • auto variable declarations (n1984)
    For local variables, this can be used to make the code clearer by eliminating type information that is obvious or irrelevant. Excessive use can make code much harder to understand.

  • Function return type deduction (n3638)
    Only use if the function body has a very small number of return statements, and generally relatively little other code.

  • -
  • Generic lambdas. Lambdas are not (yet) permitted.

  • -
  • Lambda init captures. Lambdas are not (yet) permitted.

  • +
  • Also see lambda expressions.

  • Expression SFINAE

    Substitution Failure Is Not An Error (SFINAE) is a template metaprogramming technique that makes use of template parameter substitution failures to make compile-time decisions.

    @@ -288,6 +288,121 @@ while ( test_foo(args...) ) { // No, excess spaces around controlaggregate initialization

    Although related, the use of std::initializer_list remains forbidden, as part of the avoidance of the C++ Standard Library in HotSpot code.

    +

    Local Function Objects

    +
      +
    • Local function objects, including lambda expressions, may be used.
    • +
    • Lambda expressions must only be used as a downward value.
    • +
    • Prefer [&] as the capture list of a lambda expression.
    • +
    • Return type deduction for lambda expressions is permitted, and indeed encouraged.
    • +
    • An empty parameter list for a lambda expression may be elided.
    • +
    • A lambda expression must not be mutable.
    • +
    • Generic lambda expressions are permitted.
    • +
    • Lambda expressions should be relatively simple.
    • +
    • Anonymous lambda expressions should not overly clutter the enclosing expression.
    • +
    • An anonymous lambda expression must not be directly invoked.
    • +
    • Bind expressions are forbidden.
    • +
    +

    Single-use function objects can be defined locally within a function, directly at the point of use. This is an alternative to having a function or function object class defined at class or namespace scope.

    +

    This usage was somewhat limited by C++03, which does not permit such a class to be used as a template parameter. That restriction was removed by C++11 (n2657). Use of this feature is permitted.

    +

    Many HotSpot protocols involve "function-like" objects that involve some named member function rather than a call operator. For example, a function that performs some action on all threads might be written as

    +
    void do_something() {
    +  struct DoSomething : public ThreadClosure {
    +    virtual void do_thread(Thread* t) {
    +      ... do something with t ...
    +    }
    +  } closure;
    +  Threads::threads_do(&closure);
    +}
    +

    HotSpot code has historically usually placed the DoSomething class at namespace (or sometimes class) scope. This separates the function's code from its use, often to the detriment of readability. It requires giving the class a globally unique name (if at namespace scope). It also loses the information that the class is intended for use in exactly one place, and does not have any subclasses. (However, the latter can now be indicated by declaring it final.) Often, for simplicity, a local class will skip things like access control and accessor functions, giving the enclosing function direct access to the implementation and eliminating some boilerplate that might be provided if the class is in some outer (more accessible) scope. On the other hand, if there is a lot of surrounding code in the function body or the local class is of significant size, defining it locally can increase clutter and reduce readability.

    +

    C++11 added lambda expressions as a new way to write a function object. Simple lambda expressions can be significantly more concise than a function object, eliminating a lot of boiler-plate. On the other hand, a complex lambda expression may not provide much, if any, readability benefit compared to an ordinary function object. Also, while a lambda can encapsulate a call to a "function-like" object, it cannot be used in place of such.

    +

    A common use for local functions is as one-use RAII objects. The amount of boilerplate for a function object class (local or not) makes such usage somewhat clumsy and verbose. But with the help of a small amount of supporting utility code, lambdas work particularly well for this use case.

    +

    Another use for local functions is partial application. Again here, lambdas are typically much simpler and less verbose than function object classes.

    +

    Because of these benefits, lambda expressions are permitted in HotSpot code, with some restrictions and usage guidance. An anonymous lambda is one which is passed directly as an argument. A named lambda is the value of a variable, which is its name.

    +

    Lambda expressions should only be passed downward. In particular, a lambda should not be returned from a function or stored in a global variable, whether directly or as the value of a member of some other object. Lambda capture is syntactically subtle (by design), and propagating a lambda in such ways can easily pass references to captured values to places where they are no longer valid. In particular, members of the enclosing this object are effectively captured by reference, even if the default capture is by-value. For such uses-cases a function object class should be used to make the desired value capturing and propagation explicit.

    +

    Limiting the capture list to [&] (implicitly capture by reference) is a simplifying restriction that still provides good support for HotSpot usage, while reducing the cases a reader must recognize and understand.

    +
      +
    • Many common lambda uses require reference capture. Not permitting it would substantially reduce the utility of lambdas.

    • +
    • Referential transparency. Implicit reference capture makes variable references in the lambda body have the same meaning they would have in the enclosing code. There isn't a semantic barrier across which the meaning of a variable changes.

    • +
    • Explicit reference capture introduces significant clutter, especially when lambda expressions are relatively small and simple, as they should be in HotSpot code.

    • +
    • There are a number of reasons why by-value capture might be used, but for the most part they don't apply to HotSpot code, given other usage restrictions.

      +
        +
      • A primary use-case for by-value capture is to support escaping uses, where values captured by-reference might become invalid. That use-case doesn't apply if only downward lambdas are used.

      • +
      • By-value capture can also make a lambda-local copy for mutation, which requires making the lambda mutable; see below.

      • +
      • By-value capture might be viewed as an optimization, avoiding any overhead for reference capture of cheap to copy values. But the compiler can often eliminate any such overhead.

      • +
      • By-value capture by a non-mutable lambda makes the captured values const, preventing any modification by the lambda and making the captured value unaffected by modifications to the outer variable. But this only applies to captured auto variables, not member variables, and is inconsistent with referential transparency.

      • +
    • +
    • Non-capturing lambdas (with an empty capture list - []) have limited utility. There are cases where no captures are required (pure functions, for example), but if the function is small and simple then that's obvious anyway.

    • +
    • Capture initializers (a C++14 feature - N3649) are not permitted. Capture initializers inherently increase the complexity of the capture list, and provide little benefit over an additional in-scope local variable.

    • +
    +

    The use of mutable lambda expressions is forbidden because there don't seem to be many, if any, good use-cases for them in HotSpot. A lambda expression needs to be mutable in order to modify a by-value captured value. But with only downward lambdas, such usage seems likely to be rare and complicated. It is better to use a function object class in any such cases that arise, rather than requiring all HotSpot developers to understand this relatively obscure feature.

    +

    While it is possible to directly invoke an anonymous lambda expression, that feature should not be used, as such a form can be confusing to readers. Instead, name the lambda and call it by name.

    +

    Some reasons to prefer a named lambda instead of an anonymous lambda are

    +
      +
    • The body contains non-trivial control flow or declarations or other nested constructs.

    • +
    • Its role in an argument list is hard to guess without examining the function declaration. Give it a name that indicates its purpose.

    • +
    • It has an unusual capture list.

    • +
    • It has a complex explicit return type or parameter types.

    • +
    +

    Lambda expressions, and particularly anonymous lambda expressions, should be simple and compact. One-liners are good. Anonymous lambdas should usually be limited to a couple lines of body code. More complex lambdas should be named. A named lambda should not clutter the enclosing function and make it long and complex; do continue to break up large functions via the use of separate helper functions.

    +

    An anonymous lambda expression should either be a one-liner in a one-line expression, or isolated in its own set of lines. Don't place part of a lambda expression on the same line as other arguments to a function. The body of a multi-line lambda argument should be indented from the start of the capture list, as if that were the start of an ordinary function definition. The body of a multi-line named lambda should be indented one step from the variable's indentation.

    +

    Some examples:

    +
      +
    1. foo([&] { ++counter; });
    2. +
    3. foo(x, [&] { ++counter; });
    4. +
    5. foo([&] { if (predicate) ++counter; });
    6. +
    7. foo([&] { auto tmp = process(x); tmp.f(); return tmp.g(); })
    8. +
    9. Separate one-line lambda from other arguments:

      +
      foo(c.begin(), c.end(),
      +    [&] (const X& x) { do_something(x); return x.value(); });
    10. +
    11. Indentation for multi-line lambda:

      +
      c.do_entries([&] (const X& x) {
      +               do_something(x, a);
      +               do_something1(x, b);
      +               do_something2(x, c);
      +             });
    12. +
    13. Separate multi-line lambda from other arguments:

      +
      foo(c.begin(), c.end(),
      +    [&] (const X& x) {
      +      do_something(x, a);
      +      do_something1(x, b);
      +      do_something2(x, c);
      +    });
    14. +
    15. Multi-line named lambda:

      +
      auto do_entry = [&] (const X& x) {
      +  do_something(x, a);
      +  do_something1(x, b);
      +  do_something2(x, c);
      +};
    16. +
    +

    Item 4, and especially items 6 and 7, are pushing the simplicity limits for anonymous lambdas. Item 6 might be better written using a named lambda:

    +
    c.do_entries(do_entry);
    +

    Note that C++11 also added bind expressions as a way to write a function object for partial application, using std::bind and related facilities from the Standard Library. std::bind generalizes and replaces some of the binders from C++03. Bind expressions are not permitted in HotSpot code. They don't provide enough benefit over lambdas or local function classes in the cases where bind expressions are applicable to warrant the introduction of yet another mechanism in this space into HotSpot code.

    +

    References:

    +
      +
    • Local and unnamed types as template parameters (n2657)
    • +
    • New wording for C++0x lambdas (n2927)
    • +
    • Generalized lambda capture (init-capture) (N3648)
    • +
    • Generic (polymorphic) lambda expressions (N3649)
    • +
    +

    References from C++17

    +
      +
    • Wording for constexpr lambda (p0170r1)
    • +
    • Lambda capture of *this by Value (p0018r3)
    • +
    +

    References from C++20

    +
      +
    • Allow lambda capture [=, this] (p0409r2)
    • +
    • Familiar template syntax for generic lambdas (p0428r2)
    • +
    • Simplifying implicit lambda capture (p0588r1)
    • +
    • Default constructible and assignable stateless lambdas (p0624r2)
    • +
    • Lambdas in unevaluated contexts (p0315r4)
    • +
    • Allow pack expansion in lambda init-capture (p0780r2) (p2095r0)
    • +
    • Deprecate implicit capture of this via [=] (p0806r2)
    • +
    +

    References from C++23

    +
      +
    • Make () more optional for lambdas (p1102r2)
    • +

    Additional Permitted Features

    • constexpr (n2235) (n3652)

    • @@ -305,7 +420,6 @@ while ( test_foo(args...) ) { // No, excess spaces around control

      Dynamic initialization and destruction with concurrency (n2660)

    • final virtual specifiers for classes and virtual functions (n2928), (n3206), (n3272)

    • override virtual specifiers for virtual functions (n2928), (n3206), (n3272)

    • -
    • Local and unnamed types as template parameters (n2657)

    • Range-based for loops (n2930) (range-for)

    Excluded Features

    @@ -337,7 +451,6 @@ while ( test_foo(args...) ) { // No, excess spaces around control

    Member initializers and aggregates (n3653)

  • [[noreturn]] attribute (n2761)

  • Rvalue references and move semantics

  • -
  • Lambdas

  • diff --git a/doc/hotspot-style.md b/doc/hotspot-style.md index e631033c78ffe18ed73c67a27eaf234dc40339f7..6d167cad9d6c20a2c7e6713aca40994de6da9a3f 100644 --- a/doc/hotspot-style.md +++ b/doc/hotspot-style.md @@ -409,7 +409,7 @@ Similar discussions for some other projects: * [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html) — Currently (2020) targeting C++17. -* [C++11 and C++14 use in Chromium](https://chromium-cpp.appspot.com) — +* [C++11 and C++14 use in Chromium](https://chromium.googlesource.com/chromium/src/+/main/styleguide/c++/c++11.md) — Categorizes features as allowed, banned, or to be discussed. * [llvm Coding Standards](https://llvm.org/docs/CodingStandards.html) — @@ -596,9 +596,7 @@ use can make code much harder to understand. Only use if the function body has a very small number of `return` statements, and generally relatively little other code. -* Generic lambdas. Lambdas are not (yet) permitted. - -* Lambda init captures. Lambdas are not (yet) permitted. +* Also see [lambda expressions](#lambdaexpressions). ### Expression SFINAE @@ -703,6 +701,273 @@ Some relevant sections from cppreference.com: Although related, the use of `std::initializer_list` remains forbidden, as part of the avoidance of the C++ Standard Library in HotSpot code. +### Local Function Objects + +* Local function objects, including lambda expressions, may be used. +* Lambda expressions must only be used as a downward value. +* Prefer `[&]` as the capture list of a lambda expression. +* Return type deduction for lambda expressions is permitted, and indeed encouraged. +* An empty parameter list for a lambda expression may be elided. +* A lambda expression must not be `mutable`. +* Generic lambda expressions are permitted. +* Lambda expressions should be relatively simple. +* Anonymous lambda expressions should not overly clutter the enclosing expression. +* An anonymous lambda expression must not be directly invoked. +* Bind expressions are forbidden. + +Single-use function objects can be defined locally within a function, +directly at the point of use. This is an alternative to having a function +or function object class defined at class or namespace scope. + +This usage was somewhat limited by C++03, which does not permit such a class +to be used as a template parameter. That restriction was removed by C++11 +([n2657]). Use of this feature is permitted. + +Many HotSpot protocols involve "function-like" objects that involve some +named member function rather than a call operator. For example, a function +that performs some action on all threads might be written as + +``` +void do_something() { + struct DoSomething : public ThreadClosure { + virtual void do_thread(Thread* t) { + ... do something with t ... + } + } closure; + Threads::threads_do(&closure); +} +``` + +HotSpot code has historically usually placed the DoSomething class at +namespace (or sometimes class) scope. This separates the function's code +from its use, often to the detriment of readability. It requires giving the +class a globally unique name (if at namespace scope). It also loses the +information that the class is intended for use in exactly one place, and +does not have any subclasses. (However, the latter can now be indicated by +declaring it `final`.) Often, for simplicity, a local class will skip +things like access control and accessor functions, giving the enclosing +function direct access to the implementation and eliminating some +boilerplate that might be provided if the class is in some outer (more +accessible) scope. On the other hand, if there is a lot of surrounding code +in the function body or the local class is of significant size, defining it +locally can increase clutter and reduce readability. + + +C++11 added _lambda expressions_ as a new way to write a function object. +Simple lambda expressions can be significantly more concise than a function +object, eliminating a lot of boiler-plate. On the other hand, a complex +lambda expression may not provide much, if any, readability benefit compared +to an ordinary function object. Also, while a lambda can encapsulate a call +to a "function-like" object, it cannot be used in place of such. + +A common use for local functions is as one-use [RAII] objects. The amount +of boilerplate for a function object class (local or not) makes such usage +somewhat clumsy and verbose. But with the help of a small amount of +supporting utility code, lambdas work particularly well for this use case. + +Another use for local functions is [partial application][PARTIALAPP]. Again +here, lambdas are typically much simpler and less verbose than function +object classes. + +Because of these benefits, lambda expressions are permitted in HotSpot code, +with some restrictions and usage guidance. An anonymous lambda is one which +is passed directly as an argument. A named lambda is the value of a +variable, which is its name. + +Lambda expressions should only be passed downward. In particular, a lambda +should not be returned from a function or stored in a global variable, +whether directly or as the value of a member of some other object. Lambda +capture is syntactically subtle (by design), and propagating a lambda in +such ways can easily pass references to captured values to places where they +are no longer valid. In particular, members of the enclosing `this` object +are effectively captured by reference, even if the default capture is +by-value. For such uses-cases a function object class should be used to +make the desired value capturing and propagation explicit. + +Limiting the capture list to `[&]` (implicitly capture by reference) is a +simplifying restriction that still provides good support for HotSpot usage, +while reducing the cases a reader must recognize and understand. + +* Many common lambda uses require reference capture. Not permitting it +would substantially reduce the utility of lambdas. + +* Referential transparency. Implicit reference capture makes variable +references in the lambda body have the same meaning they would have in the +enclosing code. There isn't a semantic barrier across which the meaning of +a variable changes. + +* Explicit reference capture introduces significant clutter, especially when +lambda expressions are relatively small and simple, as they should be in +HotSpot code. + +* There are a number of reasons why by-value capture might be used, but for +the most part they don't apply to HotSpot code, given other usage restrictions. + + * A primary use-case for by-value capture is to support escaping uses, + where values captured by-reference might become invalid. That use-case + doesn't apply if only downward lambdas are used. + + * By-value capture can also make a lambda-local copy for mutation, which + requires making the lambda `mutable`; see below. + + * By-value capture might be viewed as an optimization, avoiding any + overhead for reference capture of cheap to copy values. But the + compiler can often eliminate any such overhead. + + * By-value capture by a non-`mutable` lambda makes the captured values + const, preventing any modification by the lambda and making the captured + value unaffected by modifications to the outer variable. But this only + applies to captured auto variables, not member variables, and is + inconsistent with referential transparency. + +* Non-capturing lambdas (with an empty capture list - `[]`) have limited +utility. There are cases where no captures are required (pure functions, +for example), but if the function is small and simple then that's obvious +anyway. + +* Capture initializers (a C++14 feature - [N3649]) are not permitted. +Capture initializers inherently increase the complexity of the capture list, +and provide little benefit over an additional in-scope local variable. + +The use of `mutable` lambda expressions is forbidden because there don't +seem to be many, if any, good use-cases for them in HotSpot. A lambda +expression needs to be mutable in order to modify a by-value captured value. +But with only downward lambdas, such usage seems likely to be rare and +complicated. It is better to use a function object class in any such cases +that arise, rather than requiring all HotSpot developers to understand this +relatively obscure feature. + +While it is possible to directly invoke an anonymous lambda expression, that +feature should not be used, as such a form can be confusing to readers. +Instead, name the lambda and call it by name. + +Some reasons to prefer a named lambda instead of an anonymous lambda are + +* The body contains non-trivial control flow or declarations or other nested +constructs. + +* Its role in an argument list is hard to guess without examining the +function declaration. Give it a name that indicates its purpose. + +* It has an unusual capture list. + +* It has a complex explicit return type or parameter types. + +Lambda expressions, and particularly anonymous lambda expressions, should be +simple and compact. One-liners are good. Anonymous lambdas should usually +be limited to a couple lines of body code. More complex lambdas should be +named. A named lambda should not clutter the enclosing function and make it +long and complex; do continue to break up large functions via the use of +separate helper functions. + +An anonymous lambda expression should either be a one-liner in a one-line +expression, or isolated in its own set of lines. Don't place part of a +lambda expression on the same line as other arguments to a function. The +body of a multi-line lambda argument should be indented from the start of +the capture list, as if that were the start of an ordinary function +definition. The body of a multi-line named lambda should be indented one +step from the variable's indentation. + +Some examples: + +1. `foo([&] { ++counter; });` +2. `foo(x, [&] { ++counter; });` +3. `foo([&] { if (predicate) ++counter; });` +4. `foo([&] { auto tmp = process(x); tmp.f(); return tmp.g(); })` +5. Separate one-line lambda from other arguments: + + ``` + foo(c.begin(), c.end(), + [&] (const X& x) { do_something(x); return x.value(); }); + ``` +6. Indentation for multi-line lambda: + + ``` + c.do_entries([&] (const X& x) { + do_something(x, a); + do_something1(x, b); + do_something2(x, c); + }); + ``` +7. Separate multi-line lambda from other arguments: + + ``` + foo(c.begin(), c.end(), + [&] (const X& x) { + do_something(x, a); + do_something1(x, b); + do_something2(x, c); + }); + ``` +8. Multi-line named lambda: + + ``` + auto do_entry = [&] (const X& x) { + do_something(x, a); + do_something1(x, b); + do_something2(x, c); + }; + ``` + +Item 4, and especially items 6 and 7, are pushing the simplicity limits for +anonymous lambdas. Item 6 might be better written using a named lambda: +``` +c.do_entries(do_entry); +``` + +Note that C++11 also added _bind expressions_ as a way to write a function +object for partial application, using `std::bind` and related facilities +from the Standard Library. `std::bind` generalizes and replaces some of the +binders from C++03. Bind expressions are not permitted in HotSpot code. +They don't provide enough benefit over lambdas or local function classes in +the cases where bind expressions are applicable to warrant the introduction +of yet another mechanism in this space into HotSpot code. + +References: + +* Local and unnamed types as template parameters ([n2657]) +* New wording for C++0x lambdas ([n2927]) +* Generalized lambda capture (init-capture) ([N3648]) +* Generic (polymorphic) lambda expressions ([N3649]) + +[n2657]: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2657.htm +[n2927]: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2009/n2927.pdf +[N3648]: https://isocpp.org/files/papers/N3648.html +[N3649]: https://isocpp.org/files/papers/N3649.html + +References from C++17 + +* Wording for constexpr lambda ([p0170r1]) +* Lambda capture of *this by Value ([p0018r3]) + +[p0170r1]: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0170r1.pdf +[p0018r3]: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0018r3.html + +References from C++20 + +* Allow lambda capture [=, this] ([p0409r2]) +* Familiar template syntax for generic lambdas ([p0428r2]) +* Simplifying implicit lambda capture ([p0588r1]) +* Default constructible and assignable stateless lambdas ([p0624r2]) +* Lambdas in unevaluated contexts ([p0315r4]) +* Allow pack expansion in lambda init-capture ([p0780r2]) ([p2095r0]) +* Deprecate implicit capture of this via [=] ([p0806r2]) + +[p0409r2]: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0409r2.html +[p0428r2]: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0428r2.pdf +[p0588r1]: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0588r1.html +[p0624r2]: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0624r2.pdf +[p0315r4]: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0315r4.pdf +[p0780r2]: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0780r2.html +[p2095r0]: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2095r0.html +[p0806r2]: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0806r2.html + +References from C++23 + +* Make () more optional for lambdas ([p1102r2]) + +[p1102r2]: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p1102r2.html + ### Additional Permitted Features * `constexpr` @@ -757,9 +1022,6 @@ part of the avoidance of the C++ Standard Library in HotSpot code. ([n3206](http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2010/n3206.htm)), ([n3272](http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3272.htm)) -* Local and unnamed types as template parameters -([n2657](http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2657.htm)) - * Range-based `for` loops ([n2930](http://www.open-std.org/JTC1/SC22/WG21/docs/papers/2009/n2930.html)) ([range-for](https://en.cppreference.com/w/cpp/language/range-for)) @@ -837,9 +1099,6 @@ features that have not yet been discussed. * Rvalue references and move semantics -* Lambdas - - [ADL]: https://en.cppreference.com/w/cpp/language/adl "Argument Dependent Lookup" @@ -854,3 +1113,6 @@ features that have not yet been discussed. [SFINAE]: https://en.cppreference.com/w/cpp/language/sfinae "Substitution Failure Is Not An Error" + +[PARTIALAPP]: https://en.wikipedia.org/wiki/Partial_application + "Partial Application" diff --git a/doc/testing.html b/doc/testing.html index 49227421dcf68e2a7a35799fad5877a22c10de4a..1146400df805f5d5cc84536ea96b8b484c7dcf09 100644 --- a/doc/testing.html +++ b/doc/testing.html @@ -27,6 +27,7 @@
  • Configuration
  • Test selection
      +
    • Common Test Groups
    • JTReg
    • Gtest
    • Microbenchmarks
    • @@ -67,6 +68,19 @@ $ make exploded-test TEST=tier2

      All functionality is available using the test make target. In this use case, the test or tests to be executed is controlled using the TEST variable. To speed up subsequent test runs with no source code changes, test-only can be used instead, which do not depend on the source and test image build.

      For some common top-level tests, direct make targets have been generated. This includes all JTReg test groups, the hotspot gtest, and custom tests (if present). This means that make test-tier1 is equivalent to make test TEST="tier1", but the latter is more tab-completion friendly. For more complex test runs, the test TEST="x" solution needs to be used.

      The test specifications given in TEST is parsed into fully qualified test descriptors, which clearly and unambigously show which tests will be run. As an example, :tier1 will expand to jtreg:$(TOPDIR)/test/hotspot/jtreg:tier1 jtreg:$(TOPDIR)/test/jdk:tier1 jtreg:$(TOPDIR)/test/langtools:tier1 jtreg:$(TOPDIR)/test/nashorn:tier1 jtreg:$(TOPDIR)/test/jaxp:tier1. You can always submit a list of fully qualified test descriptors in the TEST variable if you want to shortcut the parser.

      +

      Common Test Groups

      +

      Ideally, all tests are run for every change but this may not be practical due to the limited testing resources, the scope of the change, etc.

      +

      The source tree currently defines a few common test groups in the relevant TEST.groups files. There are test groups that cover a specific component, for example hotspot_gc. It is a good idea to look into TEST.groups files to get a sense what tests are relevant to a particular JDK component.

      +

      Component-specific tests may miss some unintended consequences of a change, so other tests should also be run. Again, it might be impractical to run all tests, and therefore tiered test groups exist. Tiered test groups are not component-specific, but rather cover the significant parts of the entire JDK.

      +

      Multiple tiers allow balancing test coverage and testing costs. Lower test tiers are supposed to contain the simpler, quicker and more stable tests. Higher tiers are supposed to contain progressively more thorough, slower, and sometimes less stable tests, or the tests that require special configuration.

      +

      Contributors are expected to run the tests for the areas that are changed, and the first N tiers they can afford to run, but at least tier1.

      +

      A brief description of the tiered test groups:

      +
        +
      • tier1: This is the lowest test tier. Multiple developers run these tests every day. Because of the widespread use, the tests in tier1 are carefully selected and optimized to run fast, and to run in the most stable manner. The test failures in tier1 are usually followed up on quickly, either with fixes, or adding relevant tests to problem list. GitHub Actions workflows, if enabled, run tier1 tests.

      • +
      • tier2: This test group covers even more ground. These contain, among other things, tests that either run for too long to be at tier1, or may require special configuration, or tests that are less stable, or cover the broader range of non-core JVM and JDK features/components (for example, XML).

      • +
      • tier3: This test group includes more stressful tests, the tests for corner cases not covered by previous tiers, plus the tests that require GUIs. As such, this suite should either be run with low concurrency (TEST_JOBS=1), or without headful tests (JTREG_KEYWORDS=\!headful), or both.

      • +
      • tier4: This test group includes every other test not covered by previous tiers. It includes, for example, vmTestbase suites for Hotspot, which run for many hours even on large machines. It also runs GUI tests, so the same TEST_JOBS and JTREG_KEYWORDS caveats apply.

      • +

      JTReg

      JTReg tests can be selected either by picking a JTReg test group, or a selection of files or directories containing JTReg tests.

      JTReg test groups can be specified either without a test root, e.g. :tier1 (or tier1, the initial colon is optional), or with, e.g. hotspot:tier1, test/jdk:jdk_util or $(TOPDIR)/test/hotspot/jtreg:hotspot_all. The test root can be specified either as an absolute path, or a path relative to the JDK top directory, or the test directory. For simplicity, the hotspot JTReg test root, which really is hotspot/jtreg can be abbreviated as just hotspot.

      @@ -215,7 +229,7 @@ TEST FAILURE JTREG="JAVA_OPTIONS=-Djdk.test.docker.image.name=ubuntu -Djdk.test.docker.image.version=latest"

      Non-US locale

      -

      If your locale is non-US, some tests are likely to fail. To work around this you can set the locale to US. On Unix platforms simply setting LANG="en_US" in the environment before running tests should work. On Windows, setting JTREG="VM_OPTIONS=-Duser.language=en -Duser.country=US" helps for most, but not all test cases.

      +

      If your locale is non-US, some tests are likely to fail. To work around this you can set the locale to US. On Unix platforms simply setting LANG="en_US" in the environment before running tests should work. On Windows or MacOS, setting JTREG="VM_OPTIONS=-Duser.language=en -Duser.country=US" helps for most, but not all test cases.

      For example:

      $ export LANG="en_US" && make test TEST=...
       $ make test JTREG="VM_OPTIONS=-Duser.language=en -Duser.country=US" TEST=...
      diff --git a/doc/testing.md b/doc/testing.md index 0d09491be6e62a7076ded423872f75b24c006858..5dde4d11804d67a4450c7c2a1beb9227ccb8a221 100644 --- a/doc/testing.md +++ b/doc/testing.md @@ -64,6 +64,52 @@ jtreg:$(TOPDIR)/test/nashorn:tier1 jtreg:$(TOPDIR)/test/jaxp:tier1`. You can always submit a list of fully qualified test descriptors in the `TEST` variable if you want to shortcut the parser. +### Common Test Groups + +Ideally, all tests are run for every change but this may not be practical due to the limited +testing resources, the scope of the change, etc. + +The source tree currently defines a few common test groups in the relevant `TEST.groups` +files. There are test groups that cover a specific component, for example `hotspot_gc`. +It is a good idea to look into `TEST.groups` files to get a sense what tests are relevant +to a particular JDK component. + +Component-specific tests may miss some unintended consequences of a change, so other +tests should also be run. Again, it might be impractical to run all tests, and therefore +_tiered_ test groups exist. Tiered test groups are not component-specific, but rather cover +the significant parts of the entire JDK. + +Multiple tiers allow balancing test coverage and testing costs. Lower test tiers are supposed to +contain the simpler, quicker and more stable tests. Higher tiers are supposed to contain +progressively more thorough, slower, and sometimes less stable tests, or the tests that require +special configuration. + +Contributors are expected to run the tests for the areas that are changed, and the first N tiers +they can afford to run, but at least tier1. + +A brief description of the tiered test groups: + +- `tier1`: This is the lowest test tier. Multiple developers run these tests every day. +Because of the widespread use, the tests in `tier1` are carefully selected and optimized to run +fast, and to run in the most stable manner. The test failures in `tier1` are usually followed up +on quickly, either with fixes, or adding relevant tests to problem list. GitHub Actions workflows, +if enabled, run `tier1` tests. + +- `tier2`: This test group covers even more ground. These contain, among other things, +tests that either run for too long to be at `tier1`, or may require special configuration, +or tests that are less stable, or cover the broader range of non-core JVM and JDK features/components +(for example, XML). + +- `tier3`: This test group includes more stressful tests, the tests for corner cases +not covered by previous tiers, plus the tests that require GUIs. As such, this suite +should either be run with low concurrency (`TEST_JOBS=1`), or without headful tests +(`JTREG_KEYWORDS=\!headful`), or both. + +- `tier4`: This test group includes every other test not covered by previous tiers. It includes, +for example, `vmTestbase` suites for Hotspot, which run for many hours even on large +machines. It also runs GUI tests, so the same `TEST_JOBS` and `JTREG_KEYWORDS` caveats +apply. + ### JTReg JTReg tests can be selected either by picking a JTReg test group, or a selection @@ -460,7 +506,7 @@ $ make test TEST="jtreg:test/hotspot/jtreg/containers/docker" \ If your locale is non-US, some tests are likely to fail. To work around this you can set the locale to US. On Unix platforms simply setting `LANG="en_US"` -in the environment before running tests should work. On Windows, setting +in the environment before running tests should work. On Windows or MacOS, setting `JTREG="VM_OPTIONS=-Duser.language=en -Duser.country=US"` helps for most, but not all test cases. diff --git a/make/CreateJmods.gmk b/make/CreateJmods.gmk index 01ce2cf48ea84b84e47370b16f380f53840761eb..d9e5f415fe787c5d0b7721464f15a339b6d15ead 100644 --- a/make/CreateJmods.gmk +++ b/make/CreateJmods.gmk @@ -1,5 +1,5 @@ # -# Copyright (c) 2014, 2020, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2014, 2021, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -196,6 +196,11 @@ else # not java.base endif endif +# Set main class of jdk.httpserver module +ifeq ($(MODULE), jdk.httpserver) + JMOD_FLAGS += --main-class sun.net.httpserver.simpleserver.Main +endif + # Changes to the jmod tool itself should also trigger a rebuild of all jmods. # The variable JMOD_CMD could contain an environment variable assignment before # the actual command. Filter that out using wildcard before adding to DEPS. @@ -213,12 +218,12 @@ endif ifeq ($(call isTargetOs, windows), true) ifeq ($(SHIP_DEBUG_SYMBOLS), ) - JMOD_FLAGS += --exclude '**{_the.*,_*.marker,*.diz,*.pdb,*.map}' + JMOD_FLAGS += --exclude '**{_the.*,_*.marker*,*.diz,*.pdb,*.map}' else - JMOD_FLAGS += --exclude '**{_the.*,_*.marker,*.diz,*.map}' + JMOD_FLAGS += --exclude '**{_the.*,_*.marker*,*.diz,*.map}' endif else - JMOD_FLAGS += --exclude '**{_the.*,_*.marker,*.diz,*.debuginfo,*.dSYM/**,*.dSYM}' + JMOD_FLAGS += --exclude '**{_the.*,_*.marker*,*.diz,*.debuginfo,*.dSYM/**,*.dSYM}' endif # Create jmods in the support dir and then move them into place to keep the diff --git a/make/GenerateLinkOptData.gmk b/make/GenerateLinkOptData.gmk index 0de28d643fc5fcbbab1d228448c754e2100fe891..5dd766c8c07dfd4650c912d69077d8e78614721e 100644 --- a/make/GenerateLinkOptData.gmk +++ b/make/GenerateLinkOptData.gmk @@ -1,5 +1,5 @@ # -# Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -88,7 +88,10 @@ $(CLASSLIST_FILE): $(INTERIM_IMAGE_DIR)/bin/java$(EXECUTABLE_SUFFIX) $(CLASSLIST $(CAT) $(LINK_OPT_DIR)/stderr $(JLI_TRACE_FILE) ; \ exit $$exitcode \ ) - $(GREP) -v HelloClasslist $@.raw.2 > $@ + $(GREP) -v HelloClasslist $@.raw.2 > $@.raw.3 + $(FIXPATH) $(INTERIM_IMAGE_DIR)/bin/java \ + -cp $(SUPPORT_OUTPUTDIR)/classlist.jar \ + build.tools.classlist.SortClasslist $@.raw.3 > $@ # The jli trace is created by the same recipe as classlist. By declaring these # dependencies, make will correctly rebuild both jli trace and classlist diff --git a/make/Hsdis.gmk b/make/Hsdis.gmk new file mode 100644 index 0000000000000000000000000000000000000000..2253da906797293b3089c450688a76760c15df0d --- /dev/null +++ b/make/Hsdis.gmk @@ -0,0 +1,118 @@ +# +# Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. Oracle designates this +# particular file as subject to the "Classpath" exception as provided +# by Oracle in the LICENSE file that accompanied this code. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# + +default: all + +include $(SPEC) +include MakeBase.gmk +include JdkNativeCompilation.gmk + +################################################################################ +# This makefile compiles and installs the hsdis library +# +################################################################################ + +HSDIS_OUTPUT_DIR := $(SUPPORT_OUTPUTDIR)/hsdis + +ifeq ($(call isTargetOs, windows), true) + INSTALLED_HSDIS_DIR := $(JDK_OUTPUTDIR)/bin + + # On windows, we need to "fake" a completely different toolchain using gcc + # instead of the normal microsoft toolchain. This is quite hacky... + + MINGW_BASE := x86_64-w64-mingw32 + + $(eval $(call DefineNativeToolchain, TOOLCHAIN_MINGW, \ + CC := $(MINGW_BASE)-gcc, \ + LD := $(MINGW_BASE)-ld, \ + OBJCOPY := $(MINGW_BASE)-objcopy, \ + RC := $(RC), \ + SYSROOT_CFLAGS := --sysroot=/usr/$(MINGW_BASE)/sys-root, \ + SYSROOT_LDFLAGS := --sysroot=/usr/$(MINGW_BASE)/sys-root, \ + )) + + TOOLCHAIN_TYPE := gcc + OPENJDK_TARGET_OS := linux + CC_OUT_OPTION := -o$(SPACE) + LD_OUT_OPTION := -o$(SPACE) + GENDEPS_FLAGS := -MMD -MF + CFLAGS_DEBUG_SYMBOLS := -g + DISABLED_WARNINGS := + DISABLE_WARNING_PREFIX := -Wno- + CFLAGS_WARNINGS_ARE_ERRORS := -Werror + SHARED_LIBRARY_FLAGS := -shared + + HSDIS_TOOLCHAIN := TOOLCHAIN_MINGW + HSDIS_TOOLCHAIN_CFLAGS := + HSDIS_TOOLCHAIN_LDFLAGS := -L/usr/lib/gcc/$(MINGW_BASE)/9.2.0 \ + -L/usr/$(MINGW_BASE)/sys-root/mingw/lib + MINGW_DLLCRT := /usr/$(MINGW_BASE)/sys-root/mingw/lib/dllcrt2.o + HSDIS_TOOLCHAIN_LIBS := $(MINGW_DLLCRT) -lmingw32 -lgcc -lgcc_eh -lmoldname \ + -lmingwex -lmsvcrt -lpthread -ladvapi32 -lshell32 -luser32 -lkernel32 +else + INSTALLED_HSDIS_DIR := $(JDK_OUTPUTDIR)/lib + + HSDIS_TOOLCHAIN := TOOLCHAIN_DEFAULT + HSDIS_TOOLCHAIN_CFLAGS := $(CFLAGS_JDKLIB) + HSDIS_TOOLCHAIN_LDFLAGS := $(LDFLAGS_JDKLIB) + HSDIS_TOOLCHAIN_LIBS := -ldl +endif + + +$(eval $(call SetupJdkLibrary, BUILD_HSDIS, \ + NAME := hsdis, \ + SRC := $(TOPDIR)/src/utils/hsdis, \ + TOOLCHAIN := $(HSDIS_TOOLCHAIN), \ + OUTPUT_DIR := $(HSDIS_OUTPUT_DIR), \ + OBJECT_DIR := $(HSDIS_OUTPUT_DIR), \ + DISABLED_WARNINGS_gcc := undef format-nonliteral sign-compare, \ + DISABLED_WARNINGS_clang := undef format-nonliteral, \ + CFLAGS := $(HSDIS_TOOLCHAIN_CFLAGS) $(HSDIS_CFLAGS), \ + LDFLAGS := $(HSDIS_TOOLCHAIN_LDFLAGS) $(SHARED_LIBRARY_FLAGS), \ + LIBS := $(HSDIS_LIBS) $(HSDIS_TOOLCHAIN_LIBS), \ +)) + +build: $(BUILD_HSDIS) + +TARGETS += build + +INSTALLED_HSDIS_NAME := hsdis-$(OPENJDK_TARGET_CPU_LEGACY_LIB)$(SHARED_LIBRARY_SUFFIX) + +INSTALLED_HSDIS := $(INSTALLED_HSDIS_DIR)/$(INSTALLED_HSDIS_NAME) + +$(INSTALLED_HSDIS): $(BUILD_HSDIS_TARGET) + $(call LogWarn, NOTE: The resulting build might not be redistributable. Seek legal advice before distibuting.) + $(install-file) + + +install: $(INSTALLED_HSDIS) + +TARGETS += install + +################################################################################ + +all: $(TARGETS) + +.PHONY: all default build install diff --git a/make/Main.gmk b/make/Main.gmk index e34766f0611e5d71abe9203ee7a948d0b433b266..75eee65ba84afddd1f787ddbac7d13c2fd205fd8 100644 --- a/make/Main.gmk +++ b/make/Main.gmk @@ -526,6 +526,18 @@ $(eval $(call SetupTarget, update-x11wrappers, \ DEPS := java.base-copy buildtools-jdk, \ )) +ifneq ($(HSDIS_BACKEND), none) + $(eval $(call SetupTarget, build-hsdis, \ + MAKEFILE := Hsdis, \ + TARGET := build, \ + )) + + $(eval $(call SetupTarget, install-hsdis, \ + MAKEFILE := Hsdis, \ + TARGET := install, \ + )) +endif + ################################################################################ # Cross compilation support @@ -857,6 +869,9 @@ else # virtual target. jdk.jdwp.agent-libs: jdk.jdwp.agent-gensrc + # jdk.jfr-gendata uses TOOL_JFR_GEN from buildtools-hotspot + jdk.jfr-gendata: buildtools-hotspot + # The swing beans need to have java base properly generated to avoid errors # in javadoc. The X11 wrappers need the java.base include files to have been # copied and processed. diff --git a/make/RunTests.gmk b/make/RunTests.gmk index f1da577de6a4b3ea71a77527ea68d37b129b6819..a2c8ea8101eac3fbdca236ec0a3f087acae94cc0 100644 --- a/make/RunTests.gmk +++ b/make/RunTests.gmk @@ -200,9 +200,10 @@ $(eval $(call SetTestOpt,FAILURE_HANDLER_TIMEOUT,JTREG)) $(eval $(call ParseKeywordVariable, JTREG, \ SINGLE_KEYWORDS := JOBS TIMEOUT_FACTOR FAILURE_HANDLER_TIMEOUT \ TEST_MODE ASSERT VERBOSE RETAIN MAX_MEM RUN_PROBLEM_LISTS \ - RETRY_COUNT MAX_OUTPUT, \ + RETRY_COUNT MAX_OUTPUT $(CUSTOM_JTREG_SINGLE_KEYWORDS), \ STRING_KEYWORDS := OPTIONS JAVA_OPTIONS VM_OPTIONS KEYWORDS \ - EXTRA_PROBLEM_LISTS LAUNCHER_OPTIONS, \ + EXTRA_PROBLEM_LISTS LAUNCHER_OPTIONS \ + $(CUSTOM_JTREG_STRING_KEYWORDS), \ )) ifneq ($(JTREG), ) @@ -832,6 +833,8 @@ define SetupRunJtregTestBody endif endif + $$(eval $$(call SetupRunJtregTestCustom, $1)) + clean-workdir-$1: $$(RM) -r $$($1_TEST_SUPPORT_DIR) diff --git a/make/ToolsJdk.gmk b/make/ToolsJdk.gmk index 99e8bd9727c61321f2971c425dc1eeb551675dc1..395a78602f626a83e7ecdd2fb45e980e6caf7119 100644 --- a/make/ToolsJdk.gmk +++ b/make/ToolsJdk.gmk @@ -75,6 +75,8 @@ TOOL_MAKEJAVASECURITY = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_clas build.tools.makejavasecurity.MakeJavaSecurity TOOL_GENERATECACERTS = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \ + -Dkeystore.pkcs12.certProtectionAlgorithm=NONE \ + -Dkeystore.pkcs12.macAlgorithm=NONE \ build.tools.generatecacerts.GenerateCacerts TOOL_GENERATEEMOJIDATA = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \ diff --git a/make/autoconf/boot-jdk.m4 b/make/autoconf/boot-jdk.m4 index 1d82f7c79b94323736d75df3623b325ea46c9128..9c0f9018bfcc10e4b47e96f17edf39edcc284dd6 100644 --- a/make/autoconf/boot-jdk.m4 +++ b/make/autoconf/boot-jdk.m4 @@ -298,6 +298,28 @@ AC_DEFUN([BOOTJDK_CHECK_TOOL_IN_BOOTJDK], ]) ]) +# Setup CLASSPATH environment variable +AC_DEFUN([BOOTJDK_SETUP_CLASSPATH], +[ + AC_ARG_WITH([classpath], [AS_HELP_STRING([--with-classpath], + [Optional classpath to set as CLASSPATH to all Java invocations @<:@none@:>@])]) + + if test "x$CLASSPATH" != x; then + AC_MSG_WARN([CLASSPATH is set in the environment. This will be ignored. Use --with-classpath instead.]) + fi + + CLASSPATH= + + if test "x$with_classpath" != x && test "x$with_classpath" != xyes && + test "x$with_classpath" != xno ; then + CLASSPATH="$with_classpath" + AC_MSG_CHECKING([for classpath to use for all Java invocations]) + AC_MSG_RESULT([$CLASSPATH]) + fi + + AC_SUBST(CLASSPATH) +]) + ############################################################################### # # We need a Boot JDK to bootstrap the build. @@ -394,6 +416,8 @@ AC_DEFUN_ONCE([BOOTJDK_SETUP_BOOT_JDK], BOOTJDK_USE_LOCAL_CDS=false AC_MSG_RESULT([no, -XX:SharedArchiveFile not supported]) fi + + BOOTJDK_SETUP_CLASSPATH ]) AC_DEFUN_ONCE([BOOTJDK_SETUP_BOOT_JDK_ARGUMENTS], diff --git a/make/autoconf/build-aux/config.guess b/make/autoconf/build-aux/config.guess index 3e10c6e91103ed0ea5c5bf2e66ea854475b075a1..d589529f35aad914aac30789012a5beb3b47a8bb 100644 --- a/make/autoconf/build-aux/config.guess +++ b/make/autoconf/build-aux/config.guess @@ -102,6 +102,15 @@ if [ "x$OUT" = x ]; then fi fi +# Test and fix LoongArch64. +if [ "x$OUT" = x ]; then + if [ `uname -s` = Linux ]; then + if [ `uname -m` = loongarch64 ]; then + OUT=loongarch64-unknown-linux-gnu + fi + fi +fi + # Test and fix cpu on macos-aarch64, uname -p reports arm, buildsys expects aarch64 echo $OUT | grep arm-apple-darwin > /dev/null 2> /dev/null if test $? != 0; then diff --git a/make/autoconf/configure.ac b/make/autoconf/configure.ac index 0faec69738e185af9d454b48d37ae798fef548d7..29ed3f206aa46722b6b50bbb8d3ed047c764a214 100644 --- a/make/autoconf/configure.ac +++ b/make/autoconf/configure.ac @@ -249,6 +249,7 @@ JDKOPT_EXCLUDE_TRANSLATIONS JDKOPT_ENABLE_DISABLE_MANPAGES JDKOPT_ENABLE_DISABLE_CDS_ARCHIVE JDKOPT_ENABLE_DISABLE_COMPATIBLE_CDS_ALIGNMENT +JDKOPT_SETUP_HSDIS ############################################################################### # diff --git a/make/autoconf/flags-cflags.m4 b/make/autoconf/flags-cflags.m4 index 5eed1138f1f205874e21b92050634fbfcfefd0c7..ba4cb3e9ac7314ef19cc399e13f7b42fd5f836bb 100644 --- a/make/autoconf/flags-cflags.m4 +++ b/make/autoconf/flags-cflags.m4 @@ -137,7 +137,7 @@ AC_DEFUN([FLAGS_SETUP_WARNINGS], DISABLED_WARNINGS="4800" if test "x$TOOLCHAIN_VERSION" = x2017; then # VS2017 incorrectly triggers this warning for constexpr - DISABLED_WARNINGS+=" 4307" + DISABLED_WARNINGS="$DISABLED_WARNINGS 4307" fi ;; diff --git a/make/autoconf/flags-ldflags.m4 b/make/autoconf/flags-ldflags.m4 index 23bb33e878d17d2b8072189c1c6d4b17097598e7..e9d4557f8665ed879976b5ba04791ec106a54b13 100644 --- a/make/autoconf/flags-ldflags.m4 +++ b/make/autoconf/flags-ldflags.m4 @@ -32,16 +32,10 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS], # Setup the target toolchain + # The target dir matches the name of VM variant + TARGET_JVM_VARIANT_PATH=$JVM_VARIANT_MAIN + # On some platforms (mac) the linker warns about non existing -L dirs. - # For any of the variants server, client or minimal, the dir matches the - # variant name. The "main" variant should be used for linking. For the - # rest, the dir is just server. - if HOTSPOT_CHECK_JVM_VARIANT(server) || HOTSPOT_CHECK_JVM_VARIANT(client) \ - || HOTSPOT_CHECK_JVM_VARIANT(minimal); then - TARGET_JVM_VARIANT_PATH=$JVM_VARIANT_MAIN - else - TARGET_JVM_VARIANT_PATH=server - fi FLAGS_SETUP_LDFLAGS_CPU_DEP([TARGET]) # Setup the build toolchain diff --git a/make/autoconf/flags-other.m4 b/make/autoconf/flags-other.m4 index 14bb3f5b52fc64302224849fb9cdde79e611dbb2..eaed7515bf224aab0f8d6d8292a1265de4611b81 100644 --- a/make/autoconf/flags-other.m4 +++ b/make/autoconf/flags-other.m4 @@ -89,11 +89,12 @@ AC_DEFUN([FLAGS_SETUP_ASFLAGS], # Fix linker warning. # Code taken from make/autoconf/flags-cflags.m4 and adapted. - JVM_BASIC_ASFLAGS+="-DMAC_OS_X_VERSION_MIN_REQUIRED=$MACOSX_VERSION_MIN_NODOTS \ + JVM_BASIC_ASFLAGS="$JVM_BASIC_ASFLAGS \ + -DMAC_OS_X_VERSION_MIN_REQUIRED=$MACOSX_VERSION_MIN_NODOTS \ -mmacosx-version-min=$MACOSX_VERSION_MIN" if test -n "$MACOSX_VERSION_MAX"; then - JVM_BASIC_ASFLAGS+="$OS_CFLAGS \ + JVM_BASIC_ASFLAGS="$JVM_BASIC_ASFLAGS $OS_CFLAGS \ -DMAC_OS_X_VERSION_MAX_ALLOWED=$MACOSX_VERSION_MAX_NODOTS" fi fi diff --git a/make/autoconf/help.m4 b/make/autoconf/help.m4 index 7de6398bbd6e0d82bbb7fe56ed8237a1b2c45549..f36aa2819dfcc943e80509ec32113bf14eb7908a 100644 --- a/make/autoconf/help.m4 +++ b/make/autoconf/help.m4 @@ -1,5 +1,5 @@ # -# Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -42,21 +42,21 @@ AC_DEFUN([HELP_MSG_MISSING_DEPENDENCY], PKGHANDLER_COMMAND= case $PKGHANDLER in - apt-get) + *apt-get) apt_help $MISSING_DEPENDENCY ;; - yum) + *yum) yum_help $MISSING_DEPENDENCY ;; - brew) + *brew) brew_help $MISSING_DEPENDENCY ;; - port) + *port) port_help $MISSING_DEPENDENCY ;; - pkgutil) + *pkgutil) pkgutil_help $MISSING_DEPENDENCY ;; - pkgadd) + *pkgadd) pkgadd_help $MISSING_DEPENDENCY ;; - zypper) + *zypper) zypper_help $MISSING_DEPENDENCY ;; - pacman) + *pacman) pacman_help $MISSING_DEPENDENCY ;; esac @@ -80,6 +80,14 @@ cygwin_help() { PKGHANDLER_COMMAND="( cd && cmd /c setup -q -P make )" HELP_MSG="You might be able to fix this by running '$PKGHANDLER_COMMAND'." ;; + i686-w64-mingw32-gcc) + PKGHANDLER_COMMAND="( cd && cmd /c setup -q -P gcc-core i686-w64-mingw32-gcc-core mingw64-i686-glib2.0 )" + HELP_MSG="You might be able to fix this by running '$PKGHANDLER_COMMAND'." + ;; + x86_64-w64-mingw32-gcc) + PKGHANDLER_COMMAND="( cd && cmd /c setup -q -P gcc-core x86_64-w64-mingw32-gcc-core mingw64-x86_64-glib2.0 )" + HELP_MSG="You might be able to fix this by running '$PKGHANDLER_COMMAND'." + ;; esac } @@ -274,7 +282,7 @@ AC_DEFUN_ONCE([HELP_PRINT_SUMMARY_AND_WARNINGS], printf "\n" printf "Build performance summary:\n" - printf "* Cores to use: $JOBS\n" + printf "* Build jobs: $JOBS\n" printf "* Memory limit: $MEMORY_SIZE MB\n" if test "x$CCACHE_STATUS" != "x"; then printf "* ccache status: $CCACHE_STATUS\n" diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4 index 43653c115be41a894b57f3c4a6130d89fe2c0462..1cac6bb00c666e814d2e8cfd557661d0327d1d06 100644 --- a/make/autoconf/hotspot.m4 +++ b/make/autoconf/hotspot.m4 @@ -83,19 +83,10 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_VARIANTS], AC_MSG_ERROR([Cannot continue]) fi - # All "special" variants share the same output directory ("server") - VALID_MULTIPLE_JVM_VARIANTS="server client minimal" - UTIL_GET_NON_MATCHING_VALUES(INVALID_MULTIPLE_VARIANTS, $JVM_VARIANTS, \ - $VALID_MULTIPLE_JVM_VARIANTS) - if test "x$INVALID_MULTIPLE_VARIANTS" != x && \ - test "x$BUILDING_MULTIPLE_JVM_VARIANTS" = xtrue; then - AC_MSG_ERROR([You can only build multiple variants using these variants: '$VALID_MULTIPLE_JVM_VARIANTS']) - fi - # The "main" variant is the one used by other libs to link against during the # build. if test "x$BUILDING_MULTIPLE_JVM_VARIANTS" = "xtrue"; then - MAIN_VARIANT_PRIO_ORDER="server client minimal" + MAIN_VARIANT_PRIO_ORDER="server client minimal zero" for variant in $MAIN_VARIANT_PRIO_ORDER; do if HOTSPOT_CHECK_JVM_VARIANT($variant); then JVM_VARIANT_MAIN="$variant" diff --git a/make/autoconf/jdk-options.m4 b/make/autoconf/jdk-options.m4 index 299f76bd1e63c480bf6396b24967f78d9be982d1..c937101c2c71af49c93377beb8cf4f7e35e50d6a 100644 --- a/make/autoconf/jdk-options.m4 +++ b/make/autoconf/jdk-options.m4 @@ -704,3 +704,145 @@ AC_DEFUN_ONCE([JDKOPT_SETUP_REPRODUCIBLE_BUILD], AC_SUBST(SOURCE_DATE) AC_SUBST(ENABLE_REPRODUCIBLE_BUILD) ]) + +################################################################################ +# +# Helper function to build binutils from source. +# +AC_DEFUN([JDKOPT_BUILD_BINUTILS], +[ + BINUTILS_SRC="$with_binutils_src" + UTIL_FIXUP_PATH(BINUTILS_SRC) + + if ! test -d $BINUTILS_SRC; then + AC_MSG_ERROR([--with-binutils-src is not pointing to a directory]) + fi + if ! test -x $BINUTILS_SRC/configure; then + AC_MSG_ERROR([--with-binutils-src does not look like a binutils source directory]) + fi + + if test -e $BINUTILS_SRC/bfd/libbfd.a && \ + test -e $BINUTILS_SRC/opcodes/libopcodes.a && \ + test -e $BINUTILS_SRC/libiberty/libiberty.a && \ + test -e $BINUTILS_SRC/zlib/libz.a; then + AC_MSG_NOTICE([Found binutils binaries in binutils source directory -- not building]) + else + # On Windows, we cannot build with the normal Microsoft CL, but must instead use + # a separate mingw toolchain. + if test "x$OPENJDK_BUILD_OS" = xwindows; then + if test "x$OPENJDK_TARGET_CPU" = "xx86"; then + target_base="i686-w64-mingw32" + else + target_base="$OPENJDK_TARGET_CPU-w64-mingw32" + fi + binutils_cc="$target_base-gcc" + binutils_target="--host=$target_base --target=$target_base" + # Somehow the uint typedef is not included when building with mingw + binutils_cflags="-Duint=unsigned" + compiler_version=`$binutils_cc --version 2>&1` + if ! [ [[ "$compiler_version" =~ GCC ]] ]; then + AC_MSG_NOTICE([Could not find correct mingw compiler $binutils_cc.]) + HELP_MSG_MISSING_DEPENDENCY([$binutils_cc]) + AC_MSG_ERROR([Cannot continue. $HELP_MSG]) + else + AC_MSG_NOTICE([Using compiler $binutils_cc with version $compiler_version]) + fi + elif test "x$OPENJDK_BUILD_OS" = xmacosx; then + if test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then + binutils_target="--enable-targets=aarch64-darwin" + else + binutils_target="" + fi + else + binutils_cc="$CC $SYSROOT_CFLAGS" + binutils_target="" + fi + binutils_cflags="$binutils_cflags $MACHINE_FLAG $JVM_PICFLAG $C_O_FLAG_NORM" + + AC_MSG_NOTICE([Running binutils configure]) + AC_MSG_NOTICE([configure command line: ./configure --disable-nls CFLAGS="$binutils_cflags" CC="$binutils_cc" $binutils_target]) + saved_dir=`pwd` + cd "$BINUTILS_SRC" + ./configure --disable-nls CFLAGS="$binutils_cflags" CC="$binutils_cc" $binutils_target + if test $? -ne 0 || ! test -e $BINUTILS_SRC/Makefile; then + AC_MSG_NOTICE([Automatic building of binutils failed on configure. Try building it manually]) + AC_MSG_ERROR([Cannot continue]) + fi + AC_MSG_NOTICE([Running binutils make]) + $MAKE all-opcodes + if test $? -ne 0; then + AC_MSG_NOTICE([Automatic building of binutils failed on make. Try building it manually]) + AC_MSG_ERROR([Cannot continue]) + fi + cd $saved_dir + AC_MSG_NOTICE([Building of binutils done]) + fi + + BINUTILS_DIR="$BINUTILS_SRC" +]) + +################################################################################ +# +# Determine if hsdis should be built, and if so, with which backend. +# +AC_DEFUN_ONCE([JDKOPT_SETUP_HSDIS], +[ + AC_ARG_WITH([hsdis], [AS_HELP_STRING([--with-hsdis], + [what hsdis backend to use ('none', 'binutils') @<:@none@:>@])]) + + AC_ARG_WITH([binutils], [AS_HELP_STRING([--with-binutils], + [where to find the binutils files needed for hsdis/binutils])]) + + AC_ARG_WITH([binutils-src], [AS_HELP_STRING([--with-binutils-src], + [where to find the binutils source for building])]) + + AC_MSG_CHECKING([what hsdis backend to use]) + + if test "x$with_hsdis" = xyes; then + AC_MSG_ERROR([--with-hsdis must have a value]) + elif test "x$with_hsdis" = xnone || test "x$with_hsdis" = xno || test "x$with_hsdis" = x; then + HSDIS_BACKEND=none + AC_MSG_RESULT(['none', hsdis will not be built]) + elif test "x$with_hsdis" = xbinutils; then + HSDIS_BACKEND=binutils + AC_MSG_RESULT(['binutils']) + + # We need the binutils static libs and includes. + if test "x$with_binutils_src" != x; then + # Try building the source first. If it succeeds, it sets $BINUTILS_DIR. + JDKOPT_BUILD_BINUTILS + fi + + if test "x$with_binutils" != x; then + BINUTILS_DIR="$with_binutils" + fi + + AC_MSG_CHECKING([for binutils to use with hsdis]) + if test "x$BINUTILS_DIR" != x; then + if test -e $BINUTILS_DIR/bfd/libbfd.a && \ + test -e $BINUTILS_DIR/opcodes/libopcodes.a && \ + test -e $BINUTILS_DIR/libiberty/libiberty.a; then + AC_MSG_RESULT([$BINUTILS_DIR]) + HSDIS_CFLAGS="-I$BINUTILS_DIR/include -I$BINUTILS_DIR/bfd -DLIBARCH_$OPENJDK_TARGET_CPU_LEGACY_LIB" + HSDIS_LIBS="$BINUTILS_DIR/bfd/libbfd.a $BINUTILS_DIR/opcodes/libopcodes.a $BINUTILS_DIR/libiberty/libiberty.a $BINUTILS_DIR/zlib/libz.a" + else + AC_MSG_RESULT([invalid]) + AC_MSG_ERROR([$BINUTILS_DIR does not contain a proper binutils installation]) + fi + else + AC_MSG_RESULT([missing]) + AC_MSG_NOTICE([--with-hsdis=binutils requires specifying a binutils installation.]) + AC_MSG_NOTICE([Download binutils from https://www.gnu.org/software/binutils and unpack it,]) + AC_MSG_NOTICE([and point --with-binutils-src to the resulting directory, or use]) + AC_MSG_NOTICE([--with-binutils to point to a pre-built binutils installation.]) + AC_MSG_ERROR([Cannot continue]) + fi + else + AC_MSG_RESULT([invalid]) + AC_MSG_ERROR([Incorrect hsdis backend "$with_hsdis"]) + fi + + AC_SUBST(HSDIS_BACKEND) + AC_SUBST(HSDIS_CFLAGS) + AC_SUBST(HSDIS_LIBS) +]) diff --git a/make/autoconf/jdk-version.m4 b/make/autoconf/jdk-version.m4 index 092e7a6f490a0e350128d517f88c7db88bea9d18..60405d67bcb078779b7abacd6d69393fbd1dcb75 100644 --- a/make/autoconf/jdk-version.m4 +++ b/make/autoconf/jdk-version.m4 @@ -1,5 +1,5 @@ # -# Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -525,7 +525,7 @@ AC_DEFUN_ONCE([JDKVER_SETUP_JDK_VERSION_NUMBERS], MACOSX_BUNDLE_BUILD_VERSION="$VERSION_BUILD" # If VERSION_OPT consists of only numbers and periods, add it. if [ [[ $VERSION_OPT =~ ^[0-9\.]+$ ]] ]; then - MACOSX_BUNDLE_BUILD_VERSION+=".$VERSION_OPT" + MACOSX_BUNDLE_BUILD_VERSION="$MACOSX_BUNDLE_BUILD_VERSION.$VERSION_OPT" fi fi AC_SUBST(MACOSX_BUNDLE_BUILD_VERSION) diff --git a/make/autoconf/jvm-features.m4 b/make/autoconf/jvm-features.m4 index a4d0bf62ec2c235b5fac891d8c65ce871309dcec..1f76c323129fd366a358b72f1e67c6a0a63b7568 100644 --- a/make/autoconf/jvm-features.m4 +++ b/make/autoconf/jvm-features.m4 @@ -357,6 +357,13 @@ AC_DEFUN_ONCE([JVM_FEATURES_CHECK_ZGC], AC_MSG_RESULT([no, $OPENJDK_TARGET_OS-$OPENJDK_TARGET_CPU]) AVAILABLE=false fi + elif test "x$OPENJDK_TARGET_CPU" = "xppc64le"; then + if test "x$OPENJDK_TARGET_OS" = "xlinux"; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no, $OPENJDK_TARGET_OS-$OPENJDK_TARGET_CPU]) + AVAILABLE=false + fi else AC_MSG_RESULT([no, $OPENJDK_TARGET_OS-$OPENJDK_TARGET_CPU]) AVAILABLE=false diff --git a/make/autoconf/libraries.m4 b/make/autoconf/libraries.m4 index a65d91ee974bff4248cd39cce662159f23f84720..8e4012910d890774acb9a2769c147e1b14f9951e 100644 --- a/make/autoconf/libraries.m4 +++ b/make/autoconf/libraries.m4 @@ -43,11 +43,9 @@ AC_DEFUN_ONCE([LIB_DETERMINE_DEPENDENCIES], if test "x$OPENJDK_TARGET_OS" = xwindows || test "x$OPENJDK_TARGET_OS" = xmacosx; then # No X11 support on windows or macosx NEEDS_LIB_X11=false - elif test "x$ENABLE_HEADLESS_ONLY" = xtrue; then - # No X11 support needed when building headless only - NEEDS_LIB_X11=false else - # All other instances need X11 + # All other instances need X11, even if building headless only, libawt still + # needs X11 headers. NEEDS_LIB_X11=true fi diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4 index 2dd13d0d5e2078dfa309476fd0a12cb2fe41d41f..205d64f566d93a9a9c9d0b9191372e3aabb05143 100644 --- a/make/autoconf/platform.m4 +++ b/make/autoconf/platform.m4 @@ -72,6 +72,12 @@ AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_CPU], VAR_CPU_BITS=64 VAR_CPU_ENDIAN=little ;; + loongarch64) + VAR_CPU=loongarch64 + VAR_CPU_ARCH=loongarch + VAR_CPU_BITS=64 + VAR_CPU_ENDIAN=little + ;; m68k) VAR_CPU=m68k VAR_CPU_ARCH=m68k diff --git a/make/autoconf/spec.gmk.in b/make/autoconf/spec.gmk.in index 29445c8c24f4e938aa09e4ceacbe81c3eb33660b..72be922f1036f0b31ad6fc4ccd347f1e5eb69871 100644 --- a/make/autoconf/spec.gmk.in +++ b/make/autoconf/spec.gmk.in @@ -54,6 +54,9 @@ MAKE := @MAKE@ # Make sure all shell commands are executed with the C locale export LC_ALL := C +# Make sure we override any local CLASSPATH variable +export CLASSPATH := @CLASSPATH@ + # The default make arguments MAKE_ARGS = $(MAKE_LOG_FLAGS) -r -R -I $(TOPDIR)/make/common SPEC=$(SPEC) \ MAKE_LOG_FLAGS="$(MAKE_LOG_FLAGS)" $(MAKE_LOG_VARS) @@ -356,6 +359,10 @@ ENABLE_COMPATIBLE_CDS_ALIGNMENT := @ENABLE_COMPATIBLE_CDS_ALIGNMENT@ ALLOW_ABSOLUTE_PATHS_IN_OUTPUT := @ALLOW_ABSOLUTE_PATHS_IN_OUTPUT@ +HSDIS_BACKEND := @HSDIS_BACKEND@ +HSDIS_CFLAGS := @HSDIS_CFLAGS@ +HSDIS_LIBS := @HSDIS_LIBS@ + # The boot jdk to use. This is overridden in bootcycle-spec.gmk. Make sure to keep # it in sync. BOOT_JDK:=@BOOT_JDK@ diff --git a/make/autoconf/toolchain.m4 b/make/autoconf/toolchain.m4 index 7889588809587c77cdad6bcc42e628beb9687e3e..9666d9a6d9dd6c0d360fc89f5a89a25d7e011e93 100644 --- a/make/autoconf/toolchain.m4 +++ b/make/autoconf/toolchain.m4 @@ -51,7 +51,7 @@ TOOLCHAIN_DESCRIPTION_xlc="IBM XL C/C++" # Minimum supported versions, empty means unspecified TOOLCHAIN_MINIMUM_VERSION_clang="3.5" -TOOLCHAIN_MINIMUM_VERSION_gcc="5.0" +TOOLCHAIN_MINIMUM_VERSION_gcc="6.0" TOOLCHAIN_MINIMUM_VERSION_microsoft="19.10.0.0" # VS2017 TOOLCHAIN_MINIMUM_VERSION_xlc="" @@ -221,6 +221,12 @@ AC_DEFUN_ONCE([TOOLCHAIN_DETERMINE_TOOLCHAIN_TYPE], AC_ARG_WITH(toolchain-type, [AS_HELP_STRING([--with-toolchain-type], [the toolchain type (or family) to use, use '--help' to show possible values @<:@platform dependent@:>@])]) + # Linux x86_64 needs higher binutils after 8265783 + # (this really is a dependency on as version, but we take ld as a check for a general binutils version) + if test "x$OPENJDK_TARGET_CPU" = "xx86_64"; then + TOOLCHAIN_MINIMUM_LD_VERSION_gcc="2.25" + fi + # Use indirect variable referencing toolchain_var_name=VALID_TOOLCHAINS_$OPENJDK_BUILD_OS VALID_TOOLCHAINS=${!toolchain_var_name} @@ -677,9 +683,10 @@ AC_DEFUN_ONCE([TOOLCHAIN_DETECT_TOOLCHAIN_CORE], TOOLCHAIN_PREPARE_FOR_LD_VERSION_COMPARISONS if test "x$TOOLCHAIN_MINIMUM_LD_VERSION" != x; then + AC_MSG_NOTICE([comparing linker version to minimum version $TOOLCHAIN_MINIMUM_LD_VERSION]) TOOLCHAIN_CHECK_LINKER_VERSION(VERSION: $TOOLCHAIN_MINIMUM_LD_VERSION, IF_OLDER_THAN: [ - AC_MSG_WARN([You are using a linker older than $TOOLCHAIN_MINIMUM_LD_VERSION. This is not a supported configuration.]) + AC_MSG_ERROR([You are using a linker older than $TOOLCHAIN_MINIMUM_LD_VERSION. This is not a supported configuration.]) ] ) fi diff --git a/make/common/ProcessMarkdown.gmk b/make/common/ProcessMarkdown.gmk index 46cdbba0f85204956c0dff16a9d2117d16ae0338..fff54d1623899636e7a9c9974b75acdd5d4faa69 100644 --- a/make/common/ProcessMarkdown.gmk +++ b/make/common/ProcessMarkdown.gmk @@ -103,7 +103,7 @@ define ProcessMarkdown $$(call LogInfo, Post-processing markdown file $2) $$(call MakeDir, $$(SUPPORT_OUTPUTDIR)/markdown $$($1_$2_TARGET_DIR)) $$(call ExecuteWithLog, $$(SUPPORT_OUTPUTDIR)/markdown/$$($1_$2_MARKER)_post, \ - ( $$($1_POST_PROCESS) < $$($1_$2_PANDOC_OUTPUT) > $$($1_$2_OUTPUT_FILE) ) ) + ( $$($1_POST_PROCESS) $$($1_$2_PANDOC_OUTPUT) > $$($1_$2_OUTPUT_FILE) ) ) endif $1 += $$($1_$2_OUTPUT_FILE) diff --git a/make/conf/javadoc.conf b/make/conf/javadoc.conf index 6c92e40329afaec04f9a7f303f3b877ef04ba407..f13d882b9618568cbfe03d60f092d16209e17be1 100644 --- a/make/conf/javadoc.conf +++ b/make/conf/javadoc.conf @@ -1,4 +1,4 @@ -# Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -23,9 +23,9 @@ # # URLs -JAVADOC_BASE_URL=https://docs.oracle.com/pls/topic/lookup?ctx=javase$(VERSION_NUMBER)&id=homepage +JAVADOC_BASE_URL=https://docs.oracle.com/pls/topic/lookup?ctx=javase$(VERSION_FEATURE)&id=homepage BUG_SUBMIT_URL=https://bugreport.java.com/bugreport/ COPYRIGHT_URL=legal/copyright.html -LICENSE_URL=https://www.oracle.com/java/javase/terms/license/java$(VERSION_NUMBER)speclicense.html +LICENSE_URL=https://www.oracle.com/java/javase/terms/license/java$(VERSION_FEATURE)speclicense.html REDISTRIBUTION_URL=https://www.oracle.com/technetwork/java/redist-137594.html OTHER_JDK_VERSIONS_URL=https://docs.oracle.com/en/java/javase/index.html diff --git a/make/conf/jib-profiles.js b/make/conf/jib-profiles.js index 0140dee7d5cb6ba84277cb7956797ea15dc48c69..20fd9a92effd08f72ef632a305055c3b3a1aeae8 100644 --- a/make/conf/jib-profiles.js +++ b/make/conf/jib-profiles.js @@ -249,7 +249,7 @@ var getJibProfilesCommon = function (input, data) { dependencies: ["boot_jdk", "gnumake", "jtreg", "jib", "autoconf", "jmh", "jcov"], default_make_targets: ["product-bundles", "test-bundles", "static-libs-bundles"], configure_args: concat("--enable-jtreg-failure-handler", - "--with-exclude-translations=de,es,fr,it,ko,pt_BR,sv,ca,tr,cs,sk,ja_JP_A,ja_JP_HA,ja_JP_HI,ja_JP_I,zh_TW,zh_HK", + "--with-exclude-translations=es,fr,it,ko,pt_BR,sv,ca,tr,cs,sk,ja_JP_A,ja_JP_HA,ja_JP_HI,ja_JP_I,zh_TW,zh_HK", "--disable-manpages", "--disable-jvm-feature-shenandoahgc", versionArgs(input, common)) @@ -394,13 +394,8 @@ var getJibProfilesCommon = function (input, data) { }; }; - if (input.build_os == 'macosx' && input.build_cpu == 'aarch64') { - common.boot_jdk_version = "17"; - common.boot_jdk_build_number = "24"; - } else { - common.boot_jdk_version = "16"; - common.boot_jdk_build_number = "36"; - } + common.boot_jdk_version = "17"; + common.boot_jdk_build_number = "35"; common.boot_jdk_home = input.get("boot_jdk", "install_path") + "/jdk-" + common.boot_jdk_version + (input.build_os == "macosx" ? ".jdk/Contents/Home" : ""); @@ -638,7 +633,7 @@ var getJibProfilesProfiles = function (input, common, data) { // Bootcycle profiles runs the build with itself as the boot jdk. This can // be done in two ways. Either using the builtin bootcycle target in the // build system. Or by supplying the main jdk build as bootjdk to configure. - [ "linux-x64", "macosx-x64", "windows-x64" ] + [ "linux-x64", "macosx-x64", "windows-x64", "linux-aarch64" ] .forEach(function (name) { var bootcycleName = name + "-bootcycle"; var bootcyclePrebuiltName = name + "-bootcycle-prebuilt"; @@ -1073,11 +1068,7 @@ var getJibProfilesDependencies = function (input, common) { } var boot_jdk_os = input.build_os; if (input.build_os == "macosx") { - if (input.build_cpu == "aarch64") { - boot_jdk_os = "macos"; - } else { - boot_jdk_os = "osx"; - } + boot_jdk_os = "macos"; } var boot_jdk_platform = boot_jdk_os + "-" + input.build_cpu; var boot_jdk_ext = (input.build_os == "windows" ? ".zip" : ".tar.gz") @@ -1141,9 +1132,9 @@ var getJibProfilesDependencies = function (input, common) { jtreg: { server: "jpg", product: "jtreg", - version: "6", + version: "6.1", build_number: "1", - file: "bundles/jtreg-6+1.zip", + file: "bundles/jtreg-6.1+1.zip", environment_name: "JT_HOME", environment_path: input.get("jtreg", "home_path") + "/bin", configure_args: "--with-jtreg=" + input.get("jtreg", "home_path"), @@ -1164,7 +1155,7 @@ var getJibProfilesDependencies = function (input, common) { // build_number: "b07", // file: "bundles/jcov-3_0.zip", organization: common.organization, - revision: "3.0-7-jdk-asm+1.0", + revision: "3.0-9-jdk-asm+1.0", ext: "zip", environment_name: "JCOV_HOME", }, diff --git a/make/conf/test-dependencies b/make/conf/test-dependencies index 4619ab744471c1805bfc79c1c964f7c5389f165f..1e53b8f10f1435e961aebad20dbfe08cd9a30682 100644 --- a/make/conf/test-dependencies +++ b/make/conf/test-dependencies @@ -25,19 +25,19 @@ # Versions and download locations for dependencies used by pre-submit testing. -BOOT_JDK_VERSION=16 -JTREG_VERSION=6 +BOOT_JDK_VERSION=17 +JTREG_VERSION=6.1 JTREG_BUILD=1 GTEST_VERSION=1.8.1 -LINUX_X64_BOOT_JDK_FILENAME=openjdk-16_linux-x64_bin.tar.gz -LINUX_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk16/7863447f0ab643c585b9bdebf67c69db/36/GPL/openjdk-16_linux-x64_bin.tar.gz -LINUX_X64_BOOT_JDK_SHA256=e952958f16797ad7dc7cd8b724edd69ec7e0e0434537d80d6b5165193e33b931 +LINUX_X64_BOOT_JDK_FILENAME=openjdk-17_linux-x64_bin.tar.gz +LINUX_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk17/0d483333a00540d886896bac774ff48b/35/GPL/openjdk-17_linux-x64_bin.tar.gz +LINUX_X64_BOOT_JDK_SHA256=aef49cc7aa606de2044302e757fa94c8e144818e93487081c4fd319ca858134b -WINDOWS_X64_BOOT_JDK_FILENAME=openjdk-16_windows-x64_bin.zip -WINDOWS_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk16/7863447f0ab643c585b9bdebf67c69db/36/GPL/openjdk-16_windows-x64_bin.zip -WINDOWS_X64_BOOT_JDK_SHA256=a78bdeaad186297601edac6772d931224d7af6f682a43372e693c37020bd37d6 +WINDOWS_X64_BOOT_JDK_FILENAME=openjdk-17_windows-x64_bin.zip +WINDOWS_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk17/0d483333a00540d886896bac774ff48b/35/GPL/openjdk-17_windows-x64_bin.zip +WINDOWS_X64_BOOT_JDK_SHA256=e88b0df00021c9d266bb435c9a95fdc67d1948cce4518daf85c234907bd393c5 -MACOS_X64_BOOT_JDK_FILENAME=openjdk-16_osx-x64_bin.tar.gz -MACOS_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk16/7863447f0ab643c585b9bdebf67c69db/36/GPL/openjdk-16_osx-x64_bin.tar.gz -MACOS_X64_BOOT_JDK_SHA256=16f3e39a31e86f3f51b0b4035a37494a47ed3c4ead760eafc6afd7afdf2ad9f2 +MACOS_X64_BOOT_JDK_FILENAME=openjdk-17_macos-x64_bin.tar.gz +MACOS_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk17/0d483333a00540d886896bac774ff48b/35/GPL/openjdk-17_macos-x64_bin.tar.gz +MACOS_X64_BOOT_JDK_SHA256=18e11cf9bbc6f584031e801b11ae05a233c32086f8e1b84eb8a1e9bb8e1f5d90 diff --git a/make/conf/version-numbers.conf b/make/conf/version-numbers.conf index 1a5484311827dca122ddc78b93e63ee3caba2542..2d12d668a71051fad460bebc8a7a6027a2706e9f 100644 --- a/make/conf/version-numbers.conf +++ b/make/conf/version-numbers.conf @@ -37,6 +37,6 @@ DEFAULT_VERSION_DATE=2022-03-15 DEFAULT_VERSION_CLASSFILE_MAJOR=62 # "`$EXPR $DEFAULT_VERSION_FEATURE + 44`" DEFAULT_VERSION_CLASSFILE_MINOR=0 DEFAULT_VERSION_DOCS_API_SINCE=11 -DEFAULT_ACCEPTABLE_BOOT_VERSIONS="16 17 18" +DEFAULT_ACCEPTABLE_BOOT_VERSIONS="17 18" DEFAULT_JDK_SOURCE_TARGET_VERSION=18 DEFAULT_PROMOTED_VERSION_PRE=ea diff --git a/make/data/cacerts/globalsignr2ca b/make/data/cacerts/globalsignr2ca deleted file mode 100644 index 746d1fab985190af92c00223bbae1cae4ee4d4b8..0000000000000000000000000000000000000000 --- a/make/data/cacerts/globalsignr2ca +++ /dev/null @@ -1,29 +0,0 @@ -Owner: CN=GlobalSign, O=GlobalSign, OU=GlobalSign Root CA - R2 -Issuer: CN=GlobalSign, O=GlobalSign, OU=GlobalSign Root CA - R2 -Serial number: 400000000010f8626e60d -Valid from: Fri Dec 15 08:00:00 GMT 2006 until: Wed Dec 15 08:00:00 GMT 2021 -Signature algorithm name: SHA1withRSA -Subject Public Key Algorithm: 2048-bit RSA key -Version: 3 ------BEGIN CERTIFICATE----- -MIIDujCCAqKgAwIBAgILBAAAAAABD4Ym5g0wDQYJKoZIhvcNAQEFBQAwTDEgMB4G -A1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjIxEzARBgNVBAoTCkdsb2JhbFNp -Z24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMDYxMjE1MDgwMDAwWhcNMjExMjE1 -MDgwMDAwWjBMMSAwHgYDVQQLExdHbG9iYWxTaWduIFJvb3QgQ0EgLSBSMjETMBEG -A1UEChMKR2xvYmFsU2lnbjETMBEGA1UEAxMKR2xvYmFsU2lnbjCCASIwDQYJKoZI -hvcNAQEBBQADggEPADCCAQoCggEBAKbPJA6+Lm8omUVCxKs+IVSbC9N/hHD6ErPL -v4dfxn+G07IwXNb9rfF73OX4YJYJkhD10FPe+3t+c4isUoh7SqbKSaZeqKeMWhG8 -eoLrvozps6yWJQeXSpkqBy+0Hne/ig+1AnwblrjFuTosvNYSuetZfeLQBoZfXklq -tTleiDTsvHgMCJiEbKjNS7SgfQx5TfC4LcshytVsW33hoCmEofnTlEnLJGKRILzd -C9XZzPnqJworc5HGnRusyMvo4KD0L5CLTfuwNhv2GXqF4G3yYROIXJ/gkwpRl4pa -zq+r1feqCapgvdzZX99yqWATXgAByUr6P6TqBwMhAo6CygPCm48CAwEAAaOBnDCB -mTAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUm+IH -V2ccHsBqBt5ZtJot39wZhi4wNgYDVR0fBC8wLTAroCmgJ4YlaHR0cDovL2NybC5n -bG9iYWxzaWduLm5ldC9yb290LXIyLmNybDAfBgNVHSMEGDAWgBSb4gdXZxwewGoG -3lm0mi3f3BmGLjANBgkqhkiG9w0BAQUFAAOCAQEAmYFThxxol4aR7OBKuEQLq4Gs -J0/WwbgcQ3izDJr86iw8bmEbTUsp9Z8FHSbBuOmDAGJFtqkIk7mpM0sYmsL4h4hO -291xNBrBVNpGP+DTKqttVCL1OmLNIG+6KYnX3ZHu01yiPqFbQfXf5WRDLenVOavS -ot+3i9DAgBkcRcAtjOj4LaR0VknFBbVPFd5uRHg5h6h+u/N5GJG79G+dwfCMNYxd -AfvDbbnvRG15RjF+Cv6pgsH/76tuIMRQyV+dTZsXjAzlAcmgQWpzU/qlULRuJQ/7 -TBj0/VLZjmmx6BEP3ojY+x1J96relc8geMJgEtslQIxq/H5COEBkEveegeGTLg== ------END CERTIFICATE----- diff --git a/make/data/cacerts/identrustdstx3 b/make/data/cacerts/identrustdstx3 deleted file mode 100644 index 87a0d0c4f60f0f423a1d44070c9aea19c6796198..0000000000000000000000000000000000000000 --- a/make/data/cacerts/identrustdstx3 +++ /dev/null @@ -1,27 +0,0 @@ -Owner: CN=DST Root CA X3, O=Digital Signature Trust Co. -Issuer: CN=DST Root CA X3, O=Digital Signature Trust Co. -Serial number: 44afb080d6a327ba893039862ef8406b -Valid from: Sat Sep 30 21:12:19 GMT 2000 until: Thu Sep 30 14:01:15 GMT 2021 -Signature algorithm name: SHA1withRSA -Subject Public Key Algorithm: 2048-bit RSA key -Version: 3 ------BEGIN CERTIFICATE----- -MIIDSjCCAjKgAwIBAgIQRK+wgNajJ7qJMDmGLvhAazANBgkqhkiG9w0BAQUFADA/ -MSQwIgYDVQQKExtEaWdpdGFsIFNpZ25hdHVyZSBUcnVzdCBDby4xFzAVBgNVBAMT -DkRTVCBSb290IENBIFgzMB4XDTAwMDkzMDIxMTIxOVoXDTIxMDkzMDE0MDExNVow -PzEkMCIGA1UEChMbRGlnaXRhbCBTaWduYXR1cmUgVHJ1c3QgQ28uMRcwFQYDVQQD -Ew5EU1QgUm9vdCBDQSBYMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEB -AN+v6ZdQCINXtMxiZfaQguzH0yxrMMpb7NnDfcdAwRgUi+DoM3ZJKuM/IUmTrE4O -rz5Iy2Xu/NMhD2XSKtkyj4zl93ewEnu1lcCJo6m67XMuegwGMoOifooUMM0RoOEq -OLl5CjH9UL2AZd+3UWODyOKIYepLYYHsUmu5ouJLGiifSKOeDNoJjj4XLh7dIN9b -xiqKqy69cK3FCxolkHRyxXtqqzTWMIn/5WgTe1QLyNau7Fqckh49ZLOMxt+/yUFw -7BZy1SbsOFU5Q9D8/RhcQPGX69Wam40dutolucbY38EVAjqr2m7xPi71XAicPNaD -aeQQmxkqtilX4+U9m5/wAl0CAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAOBgNV -HQ8BAf8EBAMCAQYwHQYDVR0OBBYEFMSnsaR7LHH62+FLkHX/xBVghYkQMA0GCSqG -SIb3DQEBBQUAA4IBAQCjGiybFwBcqR7uKGY3Or+Dxz9LwwmglSBd49lZRNI+DT69 -ikugdB/OEIKcdBodfpga3csTS7MgROSR6cz8faXbauX+5v3gTt23ADq1cEmv8uXr -AvHRAosZy5Q6XkjEGB5YGV8eAlrwDPGxrancWYaLbumR9YbK+rlmM6pZW87ipxZz -R8srzJmwN0jP41ZL9c8PDHIyh8bwRLtTcm1D9SZImlJnt1ir/md2cXjbDaJWFBM5 -JDGFoqgCWjBH4d1QB7wCCZAA62RjYJsWvIjJEubSfZGL+T0yjWW06XyxV3bqxbYo -Ob8VZRzI9neWagqNdwvYkQsEjgfbKbYK7p2CNTUQ ------END CERTIFICATE----- diff --git a/make/data/charsetmapping/charsets b/make/data/charsetmapping/charsets index a06e4038d697f2420532e51f3965221453d27049..5932645bfbdc0bde6d155574a2797db7a9131beb 100644 --- a/make/data/charsetmapping/charsets +++ b/make/data/charsetmapping/charsets @@ -1,5 +1,5 @@ # -# Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -49,7 +49,6 @@ charset US-ASCII US_ASCII alias IBM367 alias cp367 alias csASCII - alias default # Other aliases alias 646 # Solaris POSIX locale alias iso_646.irv:1983 diff --git a/make/data/currency/CurrencyData.properties b/make/data/currency/CurrencyData.properties index a4ad6d613b85b3dcadcf6aeb4a206db0be4094d7..236e544feaf65543bac0ca05d69e55946dd31996 100644 --- a/make/data/currency/CurrencyData.properties +++ b/make/data/currency/CurrencyData.properties @@ -1,5 +1,5 @@ # -# Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -32,7 +32,7 @@ formatVersion=3 # Version of the currency code information in this class. # It is a serial number that accompanies with each amendment. -dataVersion=169 +dataVersion=170 # List of all valid ISO 4217 currency codes. # To ensure compatibility, do not remove codes. @@ -54,7 +54,7 @@ all=ADP020-AED784-AFA004-AFN971-ALL008-AMD051-ANG532-AOA973-ARS032-ATS040-AUD036 SBD090-SCR690-SDD736-SDG938-SEK752-SGD702-SHP654-SIT705-SKK703-SLL694-SOS706-\ SRD968-SRG740-SSP728-STD678-STN930-SVC222-SYP760-SZL748-THB764-TJS972-TMM795-TMT934-TND788-TOP776-\ TPE626-TRL792-TRY949-TTD780-TWD901-TZS834-UAH980-UGX800-USD840-USN997-USS998-UYI940-\ - UYU858-UZS860-VEB862-VEF937-VES928-VND704-VUV548-WST882-XAF950-XAG961-XAU959-XBA955-\ + UYU858-UZS860-VEB862-VED926-VEF937-VES928-VND704-VUV548-WST882-XAF950-XAG961-XAU959-XBA955-\ XBB956-XBC957-XBD958-XCD951-XDR960-XFO000-XFU000-XOF952-XPD964-XPF953-\ XPT962-XSU994-XTS963-XUA965-XXX999-YER886-YUM891-ZAR710-ZMK894-ZMW967-ZWD716-ZWL932-\ ZWN942-ZWR935 @@ -317,6 +317,8 @@ KI=AUD KP=KPW # KOREA (THE REPUBLIC OF) KR=KRW +# KOSOVO - Not in ISO 3166/4217 +XK=EUR # KUWAIT KW=KWD # KYRGYZSTAN diff --git a/make/data/hotspot-symbols/symbols-unix b/make/data/hotspot-symbols/symbols-unix index a34732739d35b9b64433cee4feff724c77af5017..d735f61b3a456bc9878949861043ebb995eceed7 100644 --- a/make/data/hotspot-symbols/symbols-unix +++ b/make/data/hotspot-symbols/symbols-unix @@ -183,6 +183,7 @@ JVM_ReferenceRefersTo JVM_RegisterLambdaProxyClassForArchiving JVM_RegisterSignal JVM_ReleaseUTF +JVM_ReportFinalizationComplete JVM_ResumeThread JVM_SetArrayElement JVM_SetClassSigners diff --git a/make/data/jdwp/jdwp.spec b/make/data/jdwp/jdwp.spec index b55a286aaa1ed47ab34ceddd838d91ebb3006853..51fe13cd9ef7f075763e5a2b82fe373284997e25 100644 --- a/make/data/jdwp/jdwp.spec +++ b/make/data/jdwp/jdwp.spec @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -134,9 +134,9 @@ JDWP "Java(tm) Debug Wire Protocol" "
        " "
      • All event requests are cancelled. " "
      • All threads suspended by the thread-level " - "resume command " + "suspend command " "or the VM-level " - "resume command " + "suspend command " "are resumed as many times as necessary for them to run. " "
      • Garbage collection is re-enabled in all cases where it was " "disabled " diff --git a/make/data/symbols/java.base-H.sym.txt b/make/data/symbols/java.base-H.sym.txt index 753fd5505bc8a19df2b8b41b65f7ab0fad5931b2..585d05063b31cc9d6102b42e7b45ca6fda279123 100644 --- a/make/data/symbols/java.base-H.sym.txt +++ b/make/data/symbols/java.base-H.sym.txt @@ -114,8 +114,8 @@ header extends java/lang/Object implements java/lang/annotation/Annotation flags class name java/lang/System -method name setSecurityManager descriptor (Ljava/lang/SecurityManager;)V -method name getSecurityManager descriptor ()Ljava/lang/SecurityManager; -method name setSecurityManager descriptor (Ljava/lang/SecurityManager;)V flags 9 deprecated true runtimeAnnotations @Ljava/lang/Deprecated;(forRemoval=Ztrue,since="17") method name getSecurityManager descriptor ()Ljava/lang/SecurityManager; flags 9 deprecated true runtimeAnnotations @Ljava/lang/Deprecated;(forRemoval=Ztrue,since="17") +method name setSecurityManager descriptor (Ljava/lang/SecurityManager;)V flags 9 deprecated true runtimeAnnotations @Ljava/lang/Deprecated;(forRemoval=Ztrue,since="17")@Ljdk/internal/reflect/CallerSensitive; class name java/lang/Thread -method name checkAccess descriptor ()V @@ -209,7 +209,8 @@ header extends java/lang/RuntimeException flags 21 class name java/lang/runtime/SwitchBootstraps header extends java/lang/Object flags 21 classAnnotations @Ljdk/internal/javac/PreviewFeature;(feature=eLjdk/internal/javac/PreviewFeature$Feature;SWITCH_PATTERN_MATCHING;) innerclass innerClass java/lang/invoke/MethodHandles$Lookup outerClass java/lang/invoke/MethodHandles innerClassName Lookup flags 19 -method name typeSwitch descriptor (Ljava/lang/invoke/MethodHandles$Lookup;Ljava/lang/String;Ljava/lang/invoke/MethodType;[Ljava/lang/Object;)Ljava/lang/invoke/CallSite; thrownTypes java/lang/Throwable flags 89 +method name typeSwitch descriptor (Ljava/lang/invoke/MethodHandles$Lookup;Ljava/lang/String;Ljava/lang/invoke/MethodType;[Ljava/lang/Object;)Ljava/lang/invoke/CallSite; flags 89 +method name enumSwitch descriptor (Ljava/lang/invoke/MethodHandles$Lookup;Ljava/lang/String;Ljava/lang/invoke/MethodType;[Ljava/lang/Object;)Ljava/lang/invoke/CallSite; flags 89 class name java/net/DatagramSocket -method name setDatagramSocketImplFactory descriptor (Ljava/net/DatagramSocketImplFactory;)V @@ -454,8 +455,7 @@ innerclass innerClass java/io/ObjectInputStream$GetField outerClass java/io/Obje innerclass innerClass java/io/ObjectOutputStream$PutField outerClass java/io/ObjectOutputStream innerClassName PutField flags 409 class name java/util/SplittableRandom -header extends jdk/internal/util/random/RandomSupport$AbstractSplittableGenerator flags 31 runtimeAnnotations @Ljdk/internal/util/random/RandomSupport$RandomGeneratorProperties;(name="SplittableRandom",i=I64,j=I0,k=I0,equidistribution=I1) -innerclass innerClass jdk/internal/util/random/RandomSupport$AbstractSplittableGenerator outerClass jdk/internal/util/random/RandomSupport innerClassName AbstractSplittableGenerator flags 409 +header extends java/lang/Object implements java/util/random/RandomGenerator,java/util/random/RandomGenerator$SplittableGenerator flags 31 runtimeAnnotations @Ljdk/internal/util/random/RandomSupport$RandomGeneratorProperties;(name="SplittableRandom",i=I64,j=I0,k=I0,equidistribution=I1) innerclass innerClass java/util/random/RandomGenerator$SplittableGenerator outerClass java/util/random/RandomGenerator innerClassName SplittableGenerator flags 609 -method name nextInt descriptor (I)I -method name nextInt descriptor (II)I diff --git a/make/data/symbols/jdk.incubator.foreign-H.sym.txt b/make/data/symbols/jdk.incubator.foreign-H.sym.txt index c33a11c68120b8ec7bede21ece7751e4775e1e09..376ba519f57b5fd270bb534406f45aa1feb316e7 100644 --- a/make/data/symbols/jdk.incubator.foreign-H.sym.txt +++ b/make/data/symbols/jdk.incubator.foreign-H.sym.txt @@ -39,6 +39,7 @@ innerclass innerClass jdk/incubator/foreign/CLinker$VaList$Builder outerClass jd -method name toCString descriptor (Ljava/lang/String;Ljava/nio/charset/Charset;Ljdk/incubator/foreign/NativeScope;)Ljdk/incubator/foreign/MemorySegment; -method name toJavaStringRestricted descriptor (Ljdk/incubator/foreign/MemoryAddress;)Ljava/lang/String; -method name toJavaStringRestricted descriptor (Ljdk/incubator/foreign/MemoryAddress;Ljava/nio/charset/Charset;)Ljava/lang/String; +-method name toJavaString descriptor (Ljdk/incubator/foreign/MemorySegment;Ljava/nio/charset/Charset;)Ljava/lang/String; -method name allocateMemoryRestricted descriptor (J)Ljdk/incubator/foreign/MemoryAddress; -method name freeMemoryRestricted descriptor (Ljdk/incubator/foreign/MemoryAddress;)V method name getInstance descriptor ()Ljdk/incubator/foreign/CLinker; flags 9 runtimeAnnotations @Ljdk/internal/reflect/CallerSensitive; @@ -47,10 +48,7 @@ method name downcallHandle descriptor (Ljava/lang/invoke/MethodType;Ljdk/incubat method name upcallStub descriptor (Ljava/lang/invoke/MethodHandle;Ljdk/incubator/foreign/FunctionDescriptor;Ljdk/incubator/foreign/ResourceScope;)Ljdk/incubator/foreign/MemoryAddress; flags 401 method name toCString descriptor (Ljava/lang/String;Ljdk/incubator/foreign/SegmentAllocator;)Ljdk/incubator/foreign/MemorySegment; flags 9 method name toCString descriptor (Ljava/lang/String;Ljdk/incubator/foreign/ResourceScope;)Ljdk/incubator/foreign/MemorySegment; flags 9 -method name toCString descriptor (Ljava/lang/String;Ljava/nio/charset/Charset;Ljdk/incubator/foreign/SegmentAllocator;)Ljdk/incubator/foreign/MemorySegment; flags 9 -method name toCString descriptor (Ljava/lang/String;Ljava/nio/charset/Charset;Ljdk/incubator/foreign/ResourceScope;)Ljdk/incubator/foreign/MemorySegment; flags 9 method name toJavaString descriptor (Ljdk/incubator/foreign/MemoryAddress;)Ljava/lang/String; flags 9 runtimeAnnotations @Ljdk/internal/reflect/CallerSensitive; -method name toJavaString descriptor (Ljdk/incubator/foreign/MemoryAddress;Ljava/nio/charset/Charset;)Ljava/lang/String; flags 9 runtimeAnnotations @Ljdk/internal/reflect/CallerSensitive; method name allocateMemory descriptor (J)Ljdk/incubator/foreign/MemoryAddress; flags 9 runtimeAnnotations @Ljdk/internal/reflect/CallerSensitive; method name freeMemory descriptor (Ljdk/incubator/foreign/MemoryAddress;)V flags 9 runtimeAnnotations @Ljdk/internal/reflect/CallerSensitive; method name systemLookup descriptor ()Ljdk/incubator/foreign/SymbolLookup; flags 9 runtimeAnnotations @Ljdk/internal/reflect/CallerSensitive; diff --git a/make/data/symbols/jdk.javadoc-H.sym.txt b/make/data/symbols/jdk.javadoc-H.sym.txt index 4681a5674de45de9bf5b6b6c8d9522c77eff0612..b2ab2561f5c5a004ce9efdf761741c23bd46b948 100644 --- a/make/data/symbols/jdk.javadoc-H.sym.txt +++ b/make/data/symbols/jdk.javadoc-H.sym.txt @@ -29,9 +29,9 @@ class name jdk/javadoc/doclet/Reporter header extends java/lang/Object flags 601 innerclass innerClass javax/tools/Diagnostic$Kind outerClass javax/tools/Diagnostic innerClassName Kind flags 4019 -method name print descriptor (Ljavax/tools/Diagnostic$Kind;Ljavax/tools/FileObject;IIILjava/lang/String;)V flags 401 method name getStandardWriter descriptor ()Ljava/io/PrintWriter; flags 1 method name getDiagnosticWriter descriptor ()Ljava/io/PrintWriter; flags 1 +method name print descriptor (Ljavax/tools/Diagnostic$Kind;Ljavax/tools/FileObject;IIILjava/lang/String;)V flags 1 class name jdk/javadoc/doclet/StandardDoclet header extends java/lang/Object implements jdk/javadoc/doclet/Doclet flags 21 diff --git a/make/data/tzdata/VERSION b/make/data/tzdata/VERSION index 71632a7bb61312e54d153f4879953b81c3083305..b5c971d897937678e10650d567dc9926e84e2e5a 100644 --- a/make/data/tzdata/VERSION +++ b/make/data/tzdata/VERSION @@ -21,4 +21,4 @@ # or visit www.oracle.com if you need additional information or have any # questions. # -tzdata2021a +tzdata2021c diff --git a/make/data/tzdata/africa b/make/data/tzdata/africa index 5de2e5f4ab1b0b97a6b029bd0e38733e8d1a41e0..0f367713ea900d457b14a23963854bc94c1c1414 100644 --- a/make/data/tzdata/africa +++ b/make/data/tzdata/africa @@ -53,9 +53,6 @@ # Milne J. Civil time. Geogr J. 1899 Feb;13(2):173-94. # https://www.jstor.org/stable/1774359 # -# A reliable and entertaining source about time zones is -# Derek Howse, Greenwich time and longitude, Philip Wilson Publishers (1997). -# # European-style abbreviations are commonly used along the Mediterranean. # For sub-Saharan Africa abbreviations were less standardized. # Previous editions of this database used WAT, CAT, SAT, and EAT @@ -176,8 +173,9 @@ Zone Africa/Ndjamena 1:00:12 - LMT 1912 # N'Djamena # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Africa/Abidjan -0:16:08 - LMT 1912 0:00 - GMT +Link Africa/Abidjan Africa/Accra # Ghana Link Africa/Abidjan Africa/Bamako # Mali -Link Africa/Abidjan Africa/Banjul # Gambia +Link Africa/Abidjan Africa/Banjul # The Gambia Link Africa/Abidjan Africa/Conakry # Guinea Link Africa/Abidjan Africa/Dakar # Senegal Link Africa/Abidjan Africa/Freetown # Sierra Leone @@ -404,93 +402,8 @@ Zone Africa/Cairo 2:05:09 - LMT 1900 Oct # Gabon # See Africa/Lagos. -# Gambia -# See Africa/Abidjan. - +# The Gambia # Ghana - -# From P Chan (2020-11-20): -# Interpretation Amendment Ordinance, 1915 (No.24 of 1915) [1915-11-02] -# Ordinances of the Gold Coast, Ashanti, Northern Territories 1915, p 69-71 -# https://books.google.com/books?id=ErA-AQAAIAAJ&pg=PA70 -# This Ordinance added "'Time' shall mean Greenwich Mean Time" to the -# Interpretation Ordinance, 1876. -# -# Determination of the Time Ordinance, 1919 (No. 18 of 1919) [1919-11-24] -# Ordinances of the Gold Coast, Ashanti, Northern Territories 1919, p 75-76 -# https://books.google.com/books?id=MbA-AQAAIAAJ&pg=PA75 -# This Ordinance removed the previous definition of time and introduced DST. -# -# Time Determination Ordinance (Cap. 214) -# The Laws of the Gold Coast (including Togoland Under British Mandate) -# Vol. II (1937), p 2328 -# https://books.google.com/books?id=Z7M-AQAAIAAJ&pg=PA2328 -# Revised edition of the 1919 Ordinance. -# -# Time Determination (Amendment) Ordinance, 1940 (No. 9 of 1940) [1940-04-06] -# Annual Volume of the Laws of the Gold Coast: -# Containing All Legislation Enacted During Year 1940, p 22 -# https://books.google.com/books?id=1ao-AQAAIAAJ&pg=PA22 -# This Ordinance changed the forward transition from September to May. -# -# Defence (Time Determination Ordinance Amendment) Regulations, 1942 -# (Regulations No. 6 of 1942) [1942-01-31, commenced on 1942-02-08] -# Annual Volume of the Laws of the Gold Coast: -# Containing All Legislation Enacted During Year 1942, p 48 -# https://books.google.com/books?id=Das-AQAAIAAJ&pg=PA48 -# These regulations advanced the [standard] time by thirty minutes. -# -# Defence (Time Determination Ordinance Amendment (No.2)) Regulations, -# 1942 (Regulations No. 28 of 1942) [1942-04-25] -# Annual Volume of the Laws of the Gold Coast: -# Containing All Legislation Enacted During Year 1942, p 87 -# https://books.google.com/books?id=Das-AQAAIAAJ&pg=PA87 -# These regulations abolished DST and changed the time to GMT+0:30. -# -# Defence (Revocation) (No.4) Regulations, 1945 (Regulations No. 45 of -# 1945) [1945-10-24, commenced on 1946-01-06] -# Annual Volume of the Laws of the Gold Coast: -# Containing All Legislation Enacted During Year 1945, p 256 -# https://books.google.com/books?id=9as-AQAAIAAJ&pg=PA256 -# These regulations revoked the previous two sets of Regulations. -# -# Time Determination (Amendment) Ordinance, 1945 (No. 18 of 1945) [1946-01-06] -# Annual Volume of the Laws of the Gold Coast: -# Containing All Legislation Enacted During Year 1945, p 69 -# https://books.google.com/books?id=9as-AQAAIAAJ&pg=PA69 -# This Ordinance abolished DST. -# -# Time Determination (Amendment) Ordinance, 1950 (No. 26 of 1950) [1950-07-22] -# Annual Volume of the Laws of the Gold Coast: -# Containing All Legislation Enacted During Year 1950, p 35 -# https://books.google.com/books?id=e60-AQAAIAAJ&pg=PA35 -# This Ordinance restored DST but with thirty minutes offset. -# -# Time Determination Ordinance (Cap. 264) -# The Laws of the Gold Coast, Vol. V (1954), p 380 -# https://books.google.com/books?id=Mqc-AQAAIAAJ&pg=PA380 -# Revised edition of the Time Determination Ordinance. -# -# Time Determination (Amendment) Ordinance, 1956 (No. 21 of 1956) [1956-08-29] -# Annual Volume of the Ordinances of the Gold Coast Enacted During the -# Year 1956, p 83 -# https://books.google.com/books?id=VLE-AQAAIAAJ&pg=PA83 -# This Ordinance abolished DST. - -# Rule NAME FROM TO - IN ON AT SAVE LETTER/S -Rule Ghana 1919 only - Nov 24 0:00 0:20 +0020 -Rule Ghana 1920 1942 - Jan 1 2:00 0 GMT -Rule Ghana 1920 1939 - Sep 1 2:00 0:20 +0020 -Rule Ghana 1940 1941 - May 1 2:00 0:20 +0020 -Rule Ghana 1950 1955 - Sep 1 2:00 0:30 +0030 -Rule Ghana 1951 1956 - Jan 1 2:00 0 GMT - -# Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Africa/Accra -0:00:52 - LMT 1915 Nov 2 - 0:00 Ghana %s 1942 Feb 8 - 0:30 - +0030 1946 Jan 6 - 0:00 Ghana %s - # Guinea # See Africa/Abidjan. @@ -755,7 +668,7 @@ Zone Indian/Mauritius 3:50:00 - LMT 1907 # Port Louis # See Africa/Nairobi. # Morocco -# See the 'europe' file for Spanish Morocco (Africa/Ceuta). +# See Africa/Ceuta for Spanish Morocco. # From Alex Krivenyshev (2008-05-09): # Here is an article that Morocco plan to introduce Daylight Saving Time between @@ -1405,23 +1318,21 @@ Zone Africa/Lagos 0:13:35 - LMT 1905 Jul 1 0:13:35 - LMT 1914 Jan 1 0:30 - +0030 1919 Sep 1 1:00 - WAT -Link Africa/Lagos Africa/Bangui # Central African Republic -Link Africa/Lagos Africa/Brazzaville # Rep. of the Congo -Link Africa/Lagos Africa/Douala # Cameroon -Link Africa/Lagos Africa/Kinshasa # Dem. Rep. of the Congo (west) -Link Africa/Lagos Africa/Libreville # Gabon -Link Africa/Lagos Africa/Luanda # Angola -Link Africa/Lagos Africa/Malabo # Equatorial Guinea -Link Africa/Lagos Africa/Niamey # Niger -Link Africa/Lagos Africa/Porto-Novo # Benin +Link Africa/Lagos Africa/Bangui # Central African Republic +Link Africa/Lagos Africa/Brazzaville # Rep. of the Congo +Link Africa/Lagos Africa/Douala # Cameroon +Link Africa/Lagos Africa/Kinshasa # Dem. Rep. of the Congo (west) +Link Africa/Lagos Africa/Libreville # Gabon +Link Africa/Lagos Africa/Luanda # Angola +Link Africa/Lagos Africa/Malabo # Equatorial Guinea +Link Africa/Lagos Africa/Niamey # Niger +Link Africa/Lagos Africa/Porto-Novo # Benin # Réunion # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Indian/Reunion 3:41:52 - LMT 1911 Jun # Saint-Denis 4:00 - +04 # -# Crozet Islands also observes Réunion time; see the 'antarctica' file. -# # Scattered Islands (Îles Éparses) administered from Réunion are as follows. # The following information about them is taken from # Îles Éparses (, 1997-07-22, @@ -1513,8 +1424,8 @@ Rule SA 1943 1944 - Mar Sun>=15 2:00 0 - Zone Africa/Johannesburg 1:52:00 - LMT 1892 Feb 8 1:30 - SAST 1903 Mar 2:00 SA SAST -Link Africa/Johannesburg Africa/Maseru # Lesotho -Link Africa/Johannesburg Africa/Mbabane # Eswatini +Link Africa/Johannesburg Africa/Maseru # Lesotho +Link Africa/Johannesburg Africa/Mbabane # Eswatini # # Marion and Prince Edward Is # scientific station since 1947 @@ -1550,12 +1461,13 @@ Zone Africa/Khartoum 2:10:08 - LMT 1931 3:00 - EAT 2017 Nov 1 2:00 - CAT +# South Sudan + # From Steffen Thorsen (2021-01-18): # "South Sudan will change its time zone by setting the clock back 1 # hour on February 1, 2021...." # from https://eyeradio.org/south-sudan-adopts-new-time-zone-makuei/ -# South Sudan # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Africa/Juba 2:06:28 - LMT 1931 2:00 Sudan CA%sT 2000 Jan 15 12:00 @@ -1660,7 +1572,7 @@ Rule Tunisia 2005 only - Sep 30 1:00s 0 - Rule Tunisia 2006 2008 - Mar lastSun 2:00s 1:00 S Rule Tunisia 2006 2008 - Oct lastSun 2:00s 0 - -# See Europe/Paris for PMT-related transitions. +# See Europe/Paris commentary for PMT-related transitions. # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Africa/Tunis 0:40:44 - LMT 1881 May 12 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time diff --git a/make/data/tzdata/antarctica b/make/data/tzdata/antarctica index 509fadc29a9644cb9d2d714e54588382a1ee3b16..13f024ef9bc4c6544b8fa11e5a1fb8ab80cf14fa 100644 --- a/make/data/tzdata/antarctica +++ b/make/data/tzdata/antarctica @@ -171,7 +171,7 @@ Zone Antarctica/Mawson 0 - -00 1954 Feb 13 # # Alfred Faure, Possession Island, Crozet Islands, -462551+0515152, since 1964; # sealing & whaling stations operated variously 1802/1911+; -# see Indian/Reunion. +# see Asia/Dubai. # # Martin-de-Viviès, Amsterdam Island, -374105+0773155, since 1950 # Port-aux-Français, Kerguelen Islands, -492110+0701303, since 1951; @@ -185,17 +185,7 @@ Zone Indian/Kerguelen 0 - -00 1950 # Port-aux-Français 5:00 - +05 # # year-round base in the main continent -# Dumont d'Urville, Île des Pétrels, -6640+14001, since 1956-11 -# (2005-12-05) -# -# Another base at Port-Martin, 50km east, began operation in 1947. -# It was destroyed by fire on 1952-01-14. -# -# Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Antarctica/DumontDUrville 0 - -00 1947 - 10:00 - +10 1952 Jan 14 - 0 - -00 1956 Nov - 10:00 - +10 +# Dumont d'Urville - see Pacific/Port_Moresby. # France & Italy - year-round base # Concordia, -750600+1232000, since 2005 @@ -211,20 +201,7 @@ Zone Antarctica/DumontDUrville 0 - -00 1947 # Zuchelli, Terra Nova Bay, -744140+1640647, since 1986 # Japan - year-round bases -# Syowa (also known as Showa), -690022+0393524, since 1957 -# -# From Hideyuki Suzuki (1999-02-06): -# In all Japanese stations, +0300 is used as the standard time. -# -# Syowa station, which is the first antarctic station of Japan, -# was established on 1957-01-29. Since Syowa station is still the main -# station of Japan, it's appropriate for the principal location. -# Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Antarctica/Syowa 0 - -00 1957 Jan 29 - 3:00 - +03 -# See: -# NIPR Antarctic Research Activities (1999-08-17) -# http://www.nipr.ac.jp/english/ara01.html +# See Asia/Riyadh. # S Korea - year-round base # Jang Bogo, Terra Nova Bay, -743700+1641205 since 2014 diff --git a/make/data/tzdata/asia b/make/data/tzdata/asia index 143d8e8fdc3876ef3346a6a18ea5026c1e171a88..a5cee81a42ee963fe7f1e8d0fef8a67afb828adc 100644 --- a/make/data/tzdata/asia +++ b/make/data/tzdata/asia @@ -57,9 +57,6 @@ # Byalokoz EL. New Counting of Time in Russia since July 1, 1919. # (See the 'europe' file for a fuller citation.) # -# A reliable and entertaining source about time zones is -# Derek Howse, Greenwich time and longitude, Philip Wilson Publishers (1997). -# # The following alphabetic abbreviations appear in these tables # (corrections are welcome): # std dst @@ -2257,6 +2254,14 @@ Zone Asia/Tokyo 9:18:59 - LMT 1887 Dec 31 15:00u # From Paul Eggert (2013-12-11): # As Steffen suggested, consider the past 21-month experiment to be DST. +# From Steffen Thorsen (2021-09-24): +# The Jordanian Government announced yesterday that they will start DST +# in February instead of March: +# https://petra.gov.jo/Include/InnerPage.jsp?ID=37683&lang=en&name=en_news (English) +# https://petra.gov.jo/Include/InnerPage.jsp?ID=189969&lang=ar&name=news (Arabic) +# From the Arabic version, it seems to say it would be at midnight +# (assume 24:00) on the last Thursday in February, starting from 2022. + # Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Jordan 1973 only - Jun 6 0:00 1:00 S Rule Jordan 1973 1975 - Oct 1 0:00 0 - @@ -2287,8 +2292,9 @@ Rule Jordan 2004 only - Oct 15 0:00s 0 - Rule Jordan 2005 only - Sep lastFri 0:00s 0 - Rule Jordan 2006 2011 - Oct lastFri 0:00s 0 - Rule Jordan 2013 only - Dec 20 0:00 0 - -Rule Jordan 2014 max - Mar lastThu 24:00 1:00 S +Rule Jordan 2014 2021 - Mar lastThu 24:00 1:00 S Rule Jordan 2014 max - Oct lastFri 0:00s 0 - +Rule Jordan 2022 max - Feb lastThu 24:00 1:00 S # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Asia/Amman 2:23:44 - LMT 1931 2:00 Jordan EE%sT @@ -2763,7 +2769,8 @@ Rule NBorneo 1935 1941 - Dec 14 0:00 0 - # # peninsular Malaysia # taken from Mok Ly Yng (2003-10-30) -# http://www.math.nus.edu.sg/aslaksen/teaching/timezone.html +# https://web.archive.org/web/20190822231045/http://www.math.nus.edu.sg/~mathelmr/teaching/timezone.html +# This agrees with Singapore since 1905-06-01. # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Asia/Kuala_Lumpur 6:46:46 - LMT 1901 Jan 1 6:55:25 - SMT 1905 Jun 1 # Singapore M.T. @@ -3523,6 +3530,12 @@ Zone Asia/Hebron 2:20:23 - LMT 1900 Oct # influence of the sources. There is no current abbreviation for DST, # so use "PDT", the usual American style. +# From P Chan (2021-05-10): +# Here's a fairly comprehensive article in Japanese: +# https://wiki.suikawiki.org/n/Philippine%20Time +# From Paul Eggert (2021-05-10): +# The info in the Japanese table has not been absorbed (yet) below. + # Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Phil 1936 only - Nov 1 0:00 1:00 D Rule Phil 1937 only - Feb 1 0:00 0 S @@ -3589,12 +3602,13 @@ Link Asia/Qatar Asia/Bahrain # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Asia/Riyadh 3:06:52 - LMT 1947 Mar 14 3:00 - +03 +Link Asia/Riyadh Antarctica/Syowa Link Asia/Riyadh Asia/Aden # Yemen Link Asia/Riyadh Asia/Kuwait # Singapore # taken from Mok Ly Yng (2003-10-30) -# http://www.math.nus.edu.sg/aslaksen/teaching/timezone.html +# https://web.archive.org/web/20190822231045/http://www.math.nus.edu.sg/~mathelmr/teaching/timezone.html # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Asia/Singapore 6:55:25 - LMT 1901 Jan 1 6:55:25 - SMT 1905 Jun 1 # Singapore M.T. diff --git a/make/data/tzdata/australasia b/make/data/tzdata/australasia index e28538e0c84e032d8911c0c526deaf081d5b3847..af72f11e5aede93d24f6b62b72de744abab66fd0 100644 --- a/make/data/tzdata/australasia +++ b/make/data/tzdata/australasia @@ -487,7 +487,7 @@ Link Pacific/Guam Pacific/Saipan # N Mariana Is # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Pacific/Tarawa 11:32:04 - LMT 1901 # Bairiki 12:00 - +12 -Zone Pacific/Enderbury -11:24:20 - LMT 1901 +Zone Pacific/Kanton 0 - -00 1937 Aug 31 -12:00 - -12 1979 Oct -11:00 - -11 1994 Dec 31 13:00 - +13 @@ -620,13 +620,46 @@ Link Pacific/Auckland Antarctica/McMurdo # was probably like Pacific/Auckland # Cook Is -# From Shanks & Pottenger: +# +# From Alexander Krivenyshev (2021-03-24): +# In 1899 the Cook Islands celebrated Christmas twice to correct the calendar. +# According to the old books, missionaries were unaware of +# the International Date line, when they came from Sydney. +# Thus the Cook Islands were one day ahead.... +# http://nzetc.victoria.ac.nz/tm/scholarly/tei-KloDisc-t1-body-d18.html +# ... Appendix to the Journals of the House of Representatives, 1900 +# https://atojs.natlib.govt.nz/cgi-bin/atojs?a=d&d=AJHR1900-I.2.1.2.3 +# (page 20) +# +# From Michael Deckers (2021-03-24): +# ... in the Cook Island Act of 1915-10-11, online at +# http://www.paclii.org/ck/legis/ck-nz_act/cia1915132/ +# "651. The hour of the day shall in each of the islands included in the +# Cook Islands be determined in accordance with the meridian of that island." +# so that local (mean?) time was still used in Rarotonga (and Niue) in 1915. +# This was changed in the Cook Island Amendment Act of 1952-10-16 ... +# http://www.paclii.org/ck/legis/ck-nz_act/ciaa1952212/ +# "651 (1) The hour of the day in each of the islands included in the Cook +# Islands, other than Niue, shall be determined as if each island were +# situated on the meridian one hundred and fifty-seven degrees thirty minutes +# West of Greenwich. (2) The hour of the day in the Island of Niue shall be +# determined as if that island were situated on the meridian one hundred and +# seventy degrees West of Greenwich." +# This act does not state when it takes effect, so one has to assume it +# applies since 1952-10-16. But there is the possibility that the act just +# legalized prior existing practice, as we had seen with the Guernsey law of +# 1913-06-18 for the switch in 1909-04-19. +# +# From Paul Eggert (2021-03-24): +# Transitions after 1952 are from Shanks & Pottenger. +# # Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Cook 1978 only - Nov 12 0:00 0:30 - Rule Cook 1979 1991 - Mar Sun>=1 0:00 0 - Rule Cook 1979 1990 - Oct lastSun 0:00 0:30 - # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Pacific/Rarotonga -10:39:04 - LMT 1901 # Avarua +Zone Pacific/Rarotonga 13:20:56 - LMT 1899 Dec 26 # Avarua + -10:39:04 - LMT 1952 Oct 16 -10:30 - -1030 1978 Nov 12 -10:00 Cook -10/-0930 @@ -634,10 +667,18 @@ Zone Pacific/Rarotonga -10:39:04 - LMT 1901 # Avarua # Niue +# See Pacific/Raratonga comments for 1952 transition. +# +# From Tim Parenti (2021-09-13): +# Consecutive contemporaneous editions of The Air Almanac listed -11:20 for +# Niue as of Apr 1964 but -11 as of Aug 1964: +# Apr 1964: https://books.google.com/books?id=_1So677Y5vUC&pg=SL1-PA23 +# Aug 1964: https://books.google.com/books?id=MbJloqd-zyUC&pg=SL1-PA23 +# Without greater specificity, guess 1964-07-01 for this transition. + # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Pacific/Niue -11:19:40 - LMT 1901 # Alofi - -11:20 - -1120 1951 - -11:30 - -1130 1978 Oct 1 +Zone Pacific/Niue -11:19:40 - LMT 1952 Oct 16 # Alofi + -11:20 - -1120 1964 Jul -11:00 - -11 # Norfolk @@ -661,6 +702,7 @@ Zone Pacific/Palau -15:02:04 - LMT 1844 Dec 31 # Koror Zone Pacific/Port_Moresby 9:48:40 - LMT 1880 9:48:32 - PMMT 1895 # Port Moresby Mean Time 10:00 - +10 +Link Pacific/Port_Moresby Antarctica/DumontDUrville # # From Paul Eggert (2014-10-13): # Base the Bougainville entry on the Arawa-Kieta region, which appears to have @@ -765,13 +807,17 @@ Link Pacific/Pago_Pago Pacific/Midway # in US minor outlying islands # From Paul Eggert (2014-07-08): # That web page currently lists transitions for 2012/3 and 2013/4. # Assume the pattern instituted in 2012 will continue indefinitely. +# +# From Geoffrey D. Bennett (2021-09-20): +# https://www.mcil.gov.ws/storage/2021/09/MCIL-Scan_20210920_120553.pdf +# DST has been cancelled for this year. # Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule WS 2010 only - Sep lastSun 0:00 1 - Rule WS 2011 only - Apr Sat>=1 4:00 0 - Rule WS 2011 only - Sep lastSat 3:00 1 - -Rule WS 2012 max - Apr Sun>=1 4:00 0 - -Rule WS 2012 max - Sep lastSun 3:00 1 - +Rule WS 2012 2021 - Apr Sun>=1 4:00 0 - +Rule WS 2012 2020 - Sep lastSun 3:00 1 - # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Pacific/Apia 12:33:04 - LMT 1892 Jul 5 -11:26:56 - LMT 1911 @@ -818,8 +864,8 @@ Rule Tonga 2001 2002 - Jan lastSun 2:00 0 - Rule Tonga 2016 only - Nov Sun>=1 2:00 1:00 - Rule Tonga 2017 only - Jan Sun>=15 3:00 0 - # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Pacific/Tongatapu 12:19:20 - LMT 1901 - 12:20 - +1220 1941 +Zone Pacific/Tongatapu 12:19:12 - LMT 1945 Sep 10 + 12:20 - +1220 1961 13:00 - +13 1999 13:00 Tonga +13/+14 @@ -1761,6 +1807,23 @@ Zone Pacific/Wallis 12:15:20 - LMT 1901 # One source for this is page 202 of: Bartky IR. One Time Fits All: # The Campaigns for Global Uniformity (2007). +# Kanton + +# From Paul Eggert (2021-05-27): +# Kiribati's +13 timezone is represented by Kanton, its only populated +# island. (It was formerly spelled "Canton", but Gilbertese lacks "C".) +# Kanton was settled on 1937-08-31 by two British radio operators +# ; +# Americans came the next year and built an airfield, partly to +# establish airline service and perhaps partly anticipating the +# next war. Aside from the war, the airfield was used by commercial +# airlines until long-range jets became standard; although currently +# for emergency use only, China says it is considering rebuilding the +# airfield for high-end niche tourism. Kanton has about two dozen +# people, caretakers who rotate in from the rest of Kiribati in 2-5 +# year shifts, and who use some of the leftover structures +# . + # Kwajalein # From an AP article (1993-08-22): @@ -2044,6 +2107,17 @@ Zone Pacific/Wallis 12:15:20 - LMT 1901 # Tonga +# From Paul Eggert (2021-03-04): +# In 1943 "The standard time kept is 12 hrs. 19 min. 12 sec. fast +# on Greenwich mean time." according to the Admiralty's Hydrographic +# Dept., Pacific Islands Pilot, Vol. II, 7th ed., 1943, p 360. + +# From Michael Deckers (2021-03-03): +# [Ian R Bartky: "One Time Fits All: The Campaigns for Global Uniformity". +# Stanford University Press. 2007. p. 255]: +# On 10 September 1945 Tonga adopted a standard time 12 hours, +# 20 minutes in advance of Greenwich. + # From Paul Eggert (1996-01-22): # Today's _Wall Street Journal_ (p 1) reports that "Tonga has been plotting # to sneak ahead of [New Zealanders] by introducing daylight-saving time." @@ -2072,9 +2146,26 @@ Zone Pacific/Wallis 12:15:20 - LMT 1901 # The Crown Prince, presented an unanswerable argument: "Remember that # on the World Day of Prayer, you would be the first people on Earth # to say your prayers in the morning." - -# From Paul Eggert (2006-03-22): -# Shanks & Pottenger say the transition was on 1968-10-01; go with Mundell. +# +# From Tim Parenti (2021-09-13), per Paul Eggert (2006-03-22) and Michael +# Deckers (2021-03-03): +# Mundell places the transition from +12:20 to +13 in 1941, while Shanks & +# Pottenger say the transition was on 1968-10-01. +# +# The Air Almanac published contemporaneous tables of standard times, +# which listed +12:20 as of Nov 1960 and +13 as of Mar 1961: +# Nov 1960: https://books.google.com/books?id=bVgtWM6kPZUC&pg=SL1-PA19 +# Mar 1961: https://books.google.com/books?id=W2nItAul4g0C&pg=SL1-PA19 +# (Thanks to P Chan for pointing us toward these sources.) +# This agrees with Bartky, who writes that "since 1961 [Tonga's] official time +# has been thirteen hours in advance of Greenwich time" (p. 202) and further +# writes in an endnote that this was because "the legislation was amended" on +# 1960-10-19. (p. 255) +# +# Without greater specificity, presume that Bartky and the Air Almanac point to +# a 1961-01-01 transition, as Tāufaʻāhau Tupou IV was still Crown Prince in +# 1961 and this still jives with the gist of Mundell's telling, and go with +# this over Shanks & Pottenger. # From Eric Ulevik (1999-05-03): # Tonga's director of tourism, who is also secretary of the National Millennium diff --git a/make/data/tzdata/backward b/make/data/tzdata/backward index 48482b74d301a0401a7545177afec1869e33801a..59c125623e2a15af64134b2f356a5751dca2764e 100644 --- a/make/data/tzdata/backward +++ b/make/data/tzdata/backward @@ -26,8 +26,10 @@ # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. -# This file provides links between current names for timezones -# and their old names. Many names changed in late 1993. +# This file provides links from old or merged timezone names to current ones. +# Many names changed in late 1993. Several of these names are +# also present in the file 'backzone', which has data important only +# for pre-1970 timestamps and so is out of scope for tzdb proper. # Link TARGET LINK-NAME Link Africa/Nairobi Africa/Asmera @@ -36,7 +38,7 @@ Link America/Argentina/Catamarca America/Argentina/ComodRivadavia Link America/Adak America/Atka Link America/Argentina/Buenos_Aires America/Buenos_Aires Link America/Argentina/Catamarca America/Catamarca -Link America/Atikokan America/Coral_Harbour +Link America/Panama America/Coral_Harbour Link America/Argentina/Cordoba America/Cordoba Link America/Tijuana America/Ensenada Link America/Indiana/Indianapolis America/Fort_Wayne @@ -51,7 +53,7 @@ Link America/Rio_Branco America/Porto_Acre Link America/Argentina/Cordoba America/Rosario Link America/Tijuana America/Santa_Isabel Link America/Denver America/Shiprock -Link America/Port_of_Spain America/Virgin +Link America/Puerto_Rico America/Virgin Link Pacific/Auckland Antarctica/South_Pole Link Asia/Ashgabat Asia/Ashkhabad Link Asia/Kolkata Asia/Calcutta @@ -126,6 +128,7 @@ Link Pacific/Auckland NZ Link Pacific/Chatham NZ-CHAT Link America/Denver Navajo Link Asia/Shanghai PRC +Link Pacific/Kanton Pacific/Enderbury Link Pacific/Honolulu Pacific/Johnston Link Pacific/Pohnpei Pacific/Ponape Link Pacific/Pago_Pago Pacific/Samoa diff --git a/make/data/tzdata/europe b/make/data/tzdata/europe index eb9056e92d58437f39dd03bde54608dfff292a7c..87f9a19f7acbad9586b3053d25803db33ef5a31e 100644 --- a/make/data/tzdata/europe +++ b/make/data/tzdata/europe @@ -91,7 +91,6 @@ # 0:00 GMT BST BDST Greenwich, British Summer # 0:00 GMT IST Greenwich, Irish Summer # 0:00 WET WEST WEMT Western Europe -# 0:19:32.13 AMT* NST* Amsterdam, Netherlands Summer (1835-1937) # 1:00 BST British Standard (1968-1971) # 1:00 IST GMT Irish Standard (1968-) with winter DST # 1:00 CET CEST CEMT Central Europe @@ -1823,6 +1822,10 @@ Zone Europe/Rome 0:49:56 - LMT 1866 Dec 12 1:00 Italy CE%sT 1980 1:00 EU CE%sT +# Kosovo +# See Europe/Belgrade. + + Link Europe/Rome Europe/Vatican Link Europe/Rome Europe/San_Marino @@ -2173,6 +2176,10 @@ Zone Europe/Monaco 0:29:32 - LMT 1892 Jun 1 # The data entries before 1945 are taken from # https://www.staff.science.uu.nl/~gent0113/wettijd/wettijd.htm +# From Paul Eggert (2021-05-09): +# I invented the abbreviations AMT for Amsterdam Mean Time and NST for +# Netherlands Summer Time, used in the Netherlands from 1835 to 1937. + # Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Neth 1916 only - May 1 0:00 1:00 NST # Netherlands Summer Time Rule Neth 1916 only - Oct 1 0:00 0 AMT # Amsterdam Mean Time @@ -2399,12 +2406,10 @@ Rule Port 1943 1945 - Aug Sat>=25 22:00s 1:00 S Rule Port 1944 1945 - Apr Sat>=21 22:00s 2:00 M Rule Port 1946 only - Apr Sat>=1 23:00s 1:00 S Rule Port 1946 only - Oct Sat>=1 23:00s 0 - -Rule Port 1947 1949 - Apr Sun>=1 2:00s 1:00 S -Rule Port 1947 1949 - Oct Sun>=1 2:00s 0 - -# Shanks & Pottenger say DST was observed in 1950; go with Whitman. +# Whitman says DST was not observed in 1950; go with Shanks & Pottenger. # Whitman gives Oct lastSun for 1952 on; go with Shanks & Pottenger. -Rule Port 1951 1965 - Apr Sun>=1 2:00s 1:00 S -Rule Port 1951 1965 - Oct Sun>=1 2:00s 0 - +Rule Port 1947 1965 - Apr Sun>=1 2:00s 1:00 S +Rule Port 1947 1965 - Oct Sun>=1 2:00s 0 - Rule Port 1977 only - Mar 27 0:00s 1:00 S Rule Port 1977 only - Sep 25 0:00s 0 - Rule Port 1978 1979 - Apr Sun>=1 0:00s 1:00 S @@ -3706,6 +3711,9 @@ Zone Atlantic/Canary -1:01:36 - LMT 1922 Mar # Las Palmas de Gran C. # # Source: The newspaper "Dagens Nyheter", 1916-10-01, page 7 upper left. +# An extra-special abbreviation style is SET for Swedish Time (svensk +# normaltid) 1879-1899, 3° west of the Stockholm Observatory. + # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Europe/Stockholm 1:12:12 - LMT 1879 Jan 1 1:00:14 - SET 1900 Jan 1 # Swedish Time diff --git a/make/data/tzdata/leapseconds b/make/data/tzdata/leapseconds index 6f1941601d3b97dd4061ec9f0168088c38758bd0..cc514561ff177924dd0741b5b5f9f37c3b8526d2 100644 --- a/make/data/tzdata/leapseconds +++ b/make/data/tzdata/leapseconds @@ -95,11 +95,11 @@ Leap 2016 Dec 31 23:59:60 + S # Any additional leap seconds will come after this. # This Expires line is commented out for now, # so that pre-2020a zic implementations do not reject this file. -#Expires 2021 Dec 28 00:00:00 +#Expires 2022 Jun 28 00:00:00 # POSIX timestamps for the data in this file: #updated 1467936000 (2016-07-08 00:00:00 UTC) -#expires 1640649600 (2021-12-28 00:00:00 UTC) +#expires 1656374400 (2022-06-28 00:00:00 UTC) -# Updated through IERS Bulletin C61 -# File expires on: 28 December 2021 +# Updated through IERS Bulletin C62 +# File expires on: 28 June 2022 diff --git a/make/data/tzdata/northamerica b/make/data/tzdata/northamerica index 610c606c01a10b1cb4ec9afebb5406dfd9e8c1f0..ddd4929b1d4c6912d42ced6ed9ccfdbde9f9a83b 100644 --- a/make/data/tzdata/northamerica +++ b/make/data/tzdata/northamerica @@ -752,7 +752,11 @@ Zone America/Adak 12:13:22 - LMT 1867 Oct 19 12:44:35 -11:00 US B%sT 1983 Oct 30 2:00 -10:00 US AH%sT 1983 Nov 30 -10:00 US H%sT -# The following switches don't quite make our 1970 cutoff. +# The following switches don't make our 1970 cutoff. +# +# Kiska observed Tokyo date and time during Japanese occupation from +# 1942-06-06 to 1943-07-29, and similarly for Attu from 1942-06-07 to +# 1943-05-29 (all dates American). Both islands are now uninhabited. # # Shanks writes that part of southwest Alaska (e.g. Aniak) # switched from -11:00 to -10:00 on 1968-09-22 at 02:00, @@ -848,6 +852,8 @@ Zone America/Phoenix -7:28:18 - LMT 1883 Nov 18 11:31:42 -7:00 - MST 1967 -7:00 US M%sT 1968 Mar 21 -7:00 - MST +Link America/Phoenix America/Creston + # From Arthur David Olson (1988-02-13): # A writer from the Inter Tribal Council of Arizona, Inc., # notes in private correspondence dated 1987-12-28 that "Presently, only the @@ -1616,24 +1622,7 @@ Zone America/Moncton -4:19:08 - LMT 1883 Dec 9 # From Paul Eggert (2020-01-10): # See America/Toronto for most of Quebec, including Montreal. # See America/Halifax for the Îles de la Madeleine and the Listuguj reserve. -# -# Matthews and Vincent (1998) also write that Quebec east of the -63 -# meridian is supposed to observe AST, but residents as far east as -# Natashquan use EST/EDT, and residents east of Natashquan use AST. -# The Quebec department of justice writes in -# "The situation in Minganie and Basse-Côte-Nord" -# https://www.justice.gouv.qc.ca/en/department/ministre/functions-and-responsabilities/legal-time-in-quebec/the-situation-in-minganie-and-basse-cote-nord/ -# that the coastal strip from just east of Natashquan to Blanc-Sablon -# observes Atlantic standard time all year round. -# This common practice was codified into law as of 2007; see Legal Time Act, -# CQLR c T-5.1 . -# For lack of better info, guess this practice began around 1970, contra to -# Shanks & Pottenger who have this region observing AST/ADT. - -# Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone America/Blanc-Sablon -3:48:28 - LMT 1884 - -4:00 Canada A%sT 1970 - -4:00 - AST +# See America/Puerto_Rico for east of Natashquan. # Ontario @@ -1672,54 +1661,6 @@ Zone America/Blanc-Sablon -3:48:28 - LMT 1884 # time became a comic failure in Orillia. Toronto Star 2017-07-08. # https://www.thestar.com/news/insight/2017/07/08/bold-attempt-at-daylight-saving-time-became-a-comic-failure-in-orillia.html -# From Paul Eggert (1997-10-17): -# Mark Brader writes that an article in the 1997-10-14 Toronto Star -# says that Atikokan, Ontario currently does not observe DST, -# but will vote on 11-10 whether to use EST/EDT. -# He also writes that the Ontario Time Act (1990, Chapter T.9) -# http://www.gov.on.ca/MBS/english/publications/statregs/conttext.html -# says that Ontario east of 90W uses EST/EDT, and west of 90W uses CST/CDT. -# Officially Atikokan is therefore on CST/CDT, and most likely this report -# concerns a non-official time observed as a matter of local practice. -# -# From Paul Eggert (2000-10-02): -# Matthews and Vincent (1998) write that Atikokan, Pickle Lake, and -# New Osnaburgh observe CST all year, that Big Trout Lake observes -# CST/CDT, and that Upsala and Shebandowan observe EST/EDT, all in -# violation of the official Ontario rules. -# -# From Paul Eggert (2006-07-09): -# Chris Walton (2006-07-06) mentioned an article by Stephanie MacLellan in the -# 2005-07-21 Chronicle-Journal, which said: -# -# The clocks in Atikokan stay set on standard time year-round. -# This means they spend about half the time on central time and -# the other half on eastern time. -# -# For the most part, the system works, Mayor Dennis Brown said. -# -# "The majority of businesses in Atikokan deal more with Eastern -# Canada, but there are some that deal with Western Canada," he -# said. "I don't see any changes happening here." -# -# Walton also writes "Supposedly Pickle Lake and Mishkeegogamang -# [New Osnaburgh] follow the same practice." - -# From Garry McKinnon (2006-07-14) via Chris Walton: -# I chatted with a member of my board who has an outstanding memory -# and a long history in Atikokan (and in the telecom industry) and he -# can say for certain that Atikokan has been practicing the current -# time keeping since 1952, at least. - -# From Paul Eggert (2006-07-17): -# Shanks & Pottenger say that Atikokan has agreed with Rainy River -# ever since standard time was introduced, but the information from -# McKinnon sounds more authoritative. For now, assume that Atikokan -# switched to EST immediately after WWII era daylight saving time -# ended. This matches the old (less-populous) America/Coral_Harbour -# entry since our cutoff date of 1970, so we can move -# America/Coral_Harbour to the 'backward' file. - # From Mark Brader (2010-03-06): # # Currently the database has: @@ -1850,6 +1791,7 @@ Zone America/Toronto -5:17:32 - LMT 1895 -5:00 Canada E%sT 1946 -5:00 Toronto E%sT 1974 -5:00 Canada E%sT +Link America/Toronto America/Nassau Zone America/Thunder_Bay -5:57:00 - LMT 1895 -6:00 - CST 1910 -5:00 - EST 1942 @@ -1865,11 +1807,7 @@ Zone America/Rainy_River -6:18:16 - LMT 1895 -6:00 Canada C%sT 1940 Sep 29 -6:00 1:00 CDT 1942 Feb 9 2:00s -6:00 Canada C%sT -Zone America/Atikokan -6:06:28 - LMT 1895 - -6:00 Canada C%sT 1940 Sep 29 - -6:00 1:00 CDT 1942 Feb 9 2:00s - -6:00 Canada C%sT 1945 Sep 30 2:00 - -5:00 - EST +# For Atikokan see America/Panama. # Manitoba @@ -2060,60 +1998,6 @@ Zone America/Edmonton -7:33:52 - LMT 1906 Sep # Shanks & Pottenger write that since 1970 most of this region has # been like Vancouver. # Dawson Creek uses MST. Much of east BC is like Edmonton. -# Matthews and Vincent (1998) write that Creston is like Dawson Creek. - -# It seems though that (re: Creston) is not entirely correct: - -# From Chris Walton (2011-12-01): -# There are two areas within the Canadian province of British Columbia -# that do not currently observe daylight saving: -# a) The Creston Valley (includes the town of Creston and surrounding area) -# b) The eastern half of the Peace River Regional District -# (includes the cities of Dawson Creek and Fort St. John) - -# Earlier this year I stumbled across a detailed article about the time -# keeping history of Creston; it was written by Tammy Hardwick who is the -# manager of the Creston & District Museum. The article was written in May 2009. -# http://www.ilovecreston.com/?p=articles&t=spec&ar=260 -# According to the article, Creston has not changed its clocks since June 1918. -# i.e. Creston has been stuck on UT-7 for 93 years. -# Dawson Creek, on the other hand, changed its clocks as recently as April 1972. - -# Unfortunately the exact date for the time change in June 1918 remains -# unknown and will be difficult to ascertain. I e-mailed Tammy a few months -# ago to ask if Sunday June 2 was a reasonable guess. She said it was just -# as plausible as any other date (in June). She also said that after writing -# the article she had discovered another time change in 1916; this is the -# subject of another article which she wrote in October 2010. -# http://www.creston.museum.bc.ca/index.php?module=comments&uop=view_comment&cm+id=56 - -# Here is a summary of the three clock change events in Creston's history: -# 1. 1884 or 1885: adoption of Mountain Standard Time (GMT-7) -# Exact date unknown -# 2. Oct 1916: switch to Pacific Standard Time (GMT-8) -# Exact date in October unknown; Sunday October 1 is a reasonable guess. -# 3. June 1918: switch to Pacific Daylight Time (GMT-7) -# Exact date in June unknown; Sunday June 2 is a reasonable guess. -# note 1: -# On Oct 27/1918 when daylight saving ended in the rest of Canada, -# Creston did not change its clocks. -# note 2: -# During WWII when the Federal Government legislated a mandatory clock change, -# Creston did not oblige. -# note 3: -# There is no guarantee that Creston will remain on Mountain Standard Time -# (UTC-7) forever. -# The subject was debated at least once this year by the town Council. -# http://www.bclocalnews.com/kootenay_rockies/crestonvalleyadvance/news/116760809.html - -# During a period WWII, summer time (Daylight saying) was mandatory in Canada. -# In Creston, that was handled by shifting the area to PST (-8:00) then applying -# summer time to cause the offset to be -7:00, the same as it had been before -# the change. It can be argued that the timezone abbreviation during this -# period should be PDT rather than MST, but that doesn't seem important enough -# (to anyone) to further complicate the rules. - -# The transition dates (and times) are guesses. # From Matt Johnson (2015-09-21): # Fort Nelson, BC, Canada will cancel DST this year. So while previously they @@ -2167,10 +2051,7 @@ Zone America/Fort_Nelson -8:10:47 - LMT 1884 -8:00 Vanc P%sT 1987 -8:00 Canada P%sT 2015 Mar 8 2:00 -7:00 - MST -Zone America/Creston -7:46:04 - LMT 1884 - -7:00 - MST 1916 Oct 1 - -8:00 - PST 1918 Jun 2 - -7:00 - MST +# For Creston see America/Phoenix. # Northwest Territories, Nunavut, Yukon @@ -2952,64 +2833,61 @@ Zone America/Tijuana -7:48:04 - LMT 1922 Jan 1 0:11:56 # Anguilla # Antigua and Barbuda -# See America/Port_of_Spain. +# See America/Puerto_Rico. -# Bahamas -# -# For 1899 Milne gives -5:09:29.5; round that. -# -# From P Chan (2020-11-27, corrected on 2020-12-02): -# There were two periods of DST observed in 1942-1945: 1942-05-01 -# midnight to 1944-12-31 midnight and 1945-02-01 to 1945-10-17 midnight. -# "midnight" should mean 24:00 from the context. -# -# War Time Order 1942 [1942-05-01] and War Time (No. 2) Order 1942 [1942-09-29] -# Appendix to the Statutes of 7 George VI. and the Year 1942. p 34, 43 -# https://books.google.com/books?id=5rlNAQAAIAAJ&pg=RA3-PA34 -# https://books.google.com/books?id=5rlNAQAAIAAJ&pg=RA3-PA43 -# -# War Time Order 1943 [1943-03-31] and War Time Order 1944 [1943-12-29] -# Appendix to the Statutes of 8 George VI. and the Year 1943. p 9-10, 28-29 -# https://books.google.com/books?id=5rlNAQAAIAAJ&pg=RA4-PA9 -# https://books.google.com/books?id=5rlNAQAAIAAJ&pg=RA4-PA28 -# -# War Time Order 1945 [1945-01-31] and the Order which revoke War Time Order -# 1945 [1945-10-16] Appendix to the Statutes of 9 George VI. and the Year -# 1945. p 160, 247-248 -# https://books.google.com/books?id=5rlNAQAAIAAJ&pg=RA6-PA160 -# https://books.google.com/books?id=5rlNAQAAIAAJ&pg=RA6-PA247 -# -# From Sue Williams (2006-12-07): -# The Bahamas announced about a month ago that they plan to change their DST -# rules to sync with the U.S. starting in 2007.... -# http://www.jonesbahamas.com/?c=45&a=10412 +# The Bahamas +# See America/Toronto. -# Rule NAME FROM TO - IN ON AT SAVE LETTER/S -Rule Bahamas 1942 only - May 1 24:00 1:00 W -Rule Bahamas 1944 only - Dec 31 24:00 0 S -Rule Bahamas 1945 only - Feb 1 0:00 1:00 W -Rule Bahamas 1945 only - Aug 14 23:00u 1:00 P # Peace -Rule Bahamas 1945 only - Oct 17 24:00 0 S -Rule Bahamas 1964 1975 - Oct lastSun 2:00 0 S -Rule Bahamas 1964 1975 - Apr lastSun 2:00 1:00 D -# Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone America/Nassau -5:09:30 - LMT 1912 Mar 2 - -5:00 Bahamas E%sT 1976 - -5:00 US E%sT # Barbados # For 1899 Milne gives -3:58:29.2; round that. +# From P Chan (2020-12-09 and 2020-12-11): +# Standard time of GMT-4 was adopted in 1911. +# Definition of Time Act, 1911 (1911-7) [1911-08-28] +# 1912, Laws of Barbados (5 v.), OCLC Number: 919801291, Vol. 4, Image No. 522 +# 1944, Laws of Barbados (5 v.), OCLC Number: 84548697, Vol. 4, Image No. 122 +# http://llmc.com/browse.aspx?type=2&coll=85&div=297 +# +# DST was observed in 1942-44. +# Defence (Daylight Saving) Regulations, 1942, 1942-04-13 +# Defence (Daylight Saving) (Repeal) Regulations, 1942, 1942-08-22 +# Defence (Daylight Saving) Regulations, 1943, 1943-04-16 +# Defence (Daylight Saving) (Repeal) Regulations, 1943, 1943-09-01 +# Defence (Daylight Saving) Regulations, 1944, 1944-03-21 +# [Defence (Daylight Saving) (Amendment) Regulations 1944, 1944-03-28] +# Defence (Daylight Saving) (Repeal) Regulations, 1944, 1944-08-30 +# +# 1914-, Subsidiary Legis., Annual Vols. OCLC Number: 226290591 +# 1942: Image Nos. 527-528, 555-556 +# 1943: Image Nos. 178-179, 198 +# 1944: Image Nos. 113-115, 129 +# http://llmc.com/titledescfull.aspx?type=2&coll=85&div=297&set=98437 +# +# From Tim Parenti (2021-02-20): +# The transitions below are derived from P Chan's sources, except that the 1977 +# through 1980 transitions are from Shanks & Pottenger since we have no better +# data there. Of particular note, the 1944 DST regulation only advanced the +# time to "exactly three and a half hours later than Greenwich mean time", as +# opposed to "three hours" in the 1942 and 1943 regulations. + # Rule NAME FROM TO - IN ON AT SAVE LETTER/S +Rule Barb 1942 only - Apr 19 5:00u 1:00 D +Rule Barb 1942 only - Aug 31 6:00u 0 S +Rule Barb 1943 only - May 2 5:00u 1:00 D +Rule Barb 1943 only - Sep 5 6:00u 0 S +Rule Barb 1944 only - Apr 10 5:00u 0:30 - +Rule Barb 1944 only - Sep 10 6:00u 0 S Rule Barb 1977 only - Jun 12 2:00 1:00 D Rule Barb 1977 1978 - Oct Sun>=1 2:00 0 S Rule Barb 1978 1980 - Apr Sun>=15 2:00 1:00 D Rule Barb 1979 only - Sep 30 2:00 0 S Rule Barb 1980 only - Sep 25 2:00 0 S # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone America/Barbados -3:58:29 - LMT 1924 # Bridgetown - -3:58:29 - BMT 1932 # Bridgetown Mean Time +Zone America/Barbados -3:58:29 - LMT 1911 Aug 28 # Bridgetown + -4:00 Barb A%sT 1944 + -4:00 Barb AST/-0330 1945 -4:00 Barb A%sT # Belize @@ -3171,6 +3049,9 @@ Zone Atlantic/Bermuda -4:19:18 - LMT 1890 # Hamilton -4:00 Canada A%sT 1976 -4:00 US A%sT +# Caribbean Netherlands +# See America/Puerto_Rico. + # Cayman Is # See America/Panama. @@ -3399,7 +3280,7 @@ Zone America/Havana -5:29:28 - LMT 1890 -5:00 Cuba C%sT # Dominica -# See America/Port_of_Spain. +# See America/Puerto_Rico. # Dominican Republic @@ -3451,7 +3332,7 @@ Zone America/El_Salvador -5:56:48 - LMT 1921 # San Salvador # Guadeloupe # St Barthélemy # St Martin (French part) -# See America/Port_of_Spain. +# See America/Puerto_Rico. # Guatemala # @@ -3638,7 +3519,7 @@ Zone America/Martinique -4:04:20 - LMT 1890 # Fort-de-France -4:00 - AST # Montserrat -# See America/Port_of_Spain. +# See America/Puerto_Rico. # Nicaragua # @@ -3710,6 +3591,7 @@ Zone America/Managua -5:45:08 - LMT 1890 Zone America/Panama -5:18:08 - LMT 1890 -5:19:36 - CMT 1908 Apr 22 # Colón Mean Time -5:00 - EST +Link America/Panama America/Atikokan Link America/Panama America/Cayman # Puerto Rico @@ -3719,10 +3601,29 @@ Zone America/Puerto_Rico -4:24:25 - LMT 1899 Mar 28 12:00 # San Juan -4:00 - AST 1942 May 3 -4:00 US A%sT 1946 -4:00 - AST +Link America/Puerto_Rico America/Anguilla +Link America/Puerto_Rico America/Antigua +Link America/Puerto_Rico America/Aruba +Link America/Puerto_Rico America/Curacao +Link America/Puerto_Rico America/Blanc-Sablon # Quebec (Lower North Shore) +Link America/Puerto_Rico America/Dominica +Link America/Puerto_Rico America/Grenada +Link America/Puerto_Rico America/Guadeloupe +Link America/Puerto_Rico America/Kralendijk # Caribbean Netherlands +Link America/Puerto_Rico America/Lower_Princes # Sint Maarten +Link America/Puerto_Rico America/Marigot # St Martin (French part) +Link America/Puerto_Rico America/Montserrat +Link America/Puerto_Rico America/Port_of_Spain # Trinidad & Tobago +Link America/Puerto_Rico America/St_Barthelemy # St Barthélemy +Link America/Puerto_Rico America/St_Kitts # St Kitts & Nevis +Link America/Puerto_Rico America/St_Lucia +Link America/Puerto_Rico America/St_Thomas # Virgin Islands (US) +Link America/Puerto_Rico America/St_Vincent +Link America/Puerto_Rico America/Tortola # Virgin Islands (UK) # St Kitts-Nevis # St Lucia -# See America/Port_of_Spain. +# See America/Puerto_Rico. # St Pierre and Miquelon # There are too many St Pierres elsewhere, so we'll use 'Miquelon'. @@ -3733,7 +3634,10 @@ Zone America/Miquelon -3:44:40 - LMT 1911 May 15 # St Pierre -3:00 Canada -03/-02 # St Vincent and the Grenadines -# See America/Port_of_Spain. +# See America/Puerto_Rico. + +# Sint Maarten +# See America/Puerto_Rico. # Turks and Caicos # @@ -3804,8 +3708,8 @@ Zone America/Grand_Turk -4:44:32 - LMT 1890 -5:00 US E%sT # British Virgin Is -# Virgin Is -# See America/Port_of_Spain. +# US Virgin Is +# See America/Puerto_Rico. # Local Variables: diff --git a/make/data/tzdata/southamerica b/make/data/tzdata/southamerica index 566dabfadb46e94ddf3237b88199968ea2288d70..503ed65f58036589e0a49de694077ca769a25a9e 100644 --- a/make/data/tzdata/southamerica +++ b/make/data/tzdata/southamerica @@ -597,7 +597,7 @@ Zone America/Argentina/Ushuaia -4:33:12 - LMT 1894 Oct 31 -3:00 - -03 # Aruba -Link America/Curacao America/Aruba +# See America/Puerto_Rico. # Bolivia # Zone NAME STDOFF RULES FORMAT [UNTIL] @@ -1392,35 +1392,14 @@ Zone America/Bogota -4:56:16 - LMT 1884 Mar 13 # no information; probably like America/Bogota # Curaçao - -# Milne gives 4:35:46.9 for Curaçao mean time; round to nearest. -# -# From Paul Eggert (2006-03-22): -# Shanks & Pottenger say that The Bottom and Philipsburg have been at -# -4:00 since standard time was introduced on 1912-03-02; and that -# Kralendijk and Rincon used Kralendijk Mean Time (-4:33:08) from -# 1912-02-02 to 1965-01-01. The former is dubious, since S&P also say -# Saba Island has been like Curaçao. -# This all predates our 1970 cutoff, though. -# -# By July 2007 Curaçao and St Maarten are planned to become -# associated states within the Netherlands, much like Aruba; -# Bonaire, Saba and St Eustatius would become directly part of the -# Netherlands as Kingdom Islands. This won't affect their time zones -# though, as far as we know. +# See America/Puerto_Rico. # -# Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone America/Curacao -4:35:47 - LMT 1912 Feb 12 # Willemstad - -4:30 - -0430 1965 - -4:00 - AST - # From Arthur David Olson (2011-06-15): # use links for places with new iso3166 codes. # The name "Lower Prince's Quarter" is both longer than fourteen characters -# and contains an apostrophe; use "Lower_Princes" below. - -Link America/Curacao America/Lower_Princes # Sint Maarten -Link America/Curacao America/Kralendijk # Caribbean Netherlands +# and contains an apostrophe; use "Lower_Princes".... +# From Paul Eggert (2021-09-29): +# These backward-compatibility links now are in the 'northamerica' file. # Ecuador # @@ -1563,11 +1542,40 @@ Zone America/Cayenne -3:29:20 - LMT 1911 Jul -3:00 - -03 # Guyana + +# From P Chan (2020-11-27): +# https://books.google.com/books?id=5-5CAQAAMAAJ&pg=SA1-PA547 +# The Official Gazette of British Guiana. (New Series.) Vol. XL. July to +# December, 1915, p 1547, lists as several notes: +# "Local Mean Time 3 hours 52 mins. 39 secs. slow of Greenwich Mean Time +# (Georgetown.) From 1st August, 1911, British Guiana Standard Mean Time 4 +# hours slow of Greenwich Mean Time, by notice in Official Gazette on 1st July, +# 1911. From 1st March, 1915, British Guiana Standard Mean Time 3 hours 45 +# mins. 0 secs. slow of Greenwich Mean Time, by notice in Official Gazette on +# 23rd January, 1915." +# +# https://parliament.gov.gy/documents/acts/10923-act_no._27_of_1975_-_interpretation_and_general_clauses_(amendment)_act_1975.pdf +# Interpretation and general clauses (Amendment) Act 1975 (Act No. 27 of 1975) +# [dated 1975-07-31] +# "This Act...shall come into operation on 1st August, 1975." +# "...where any expression of time occurs...the time referred to shall signify +# the standard time of Guyana which shall be three hours behind Greenwich Mean +# Time." +# +# Circular No. 10/1992 dated 1992-03-20 +# https://dps.gov.gy/wp-content/uploads/2018/12/1992-03-20-Circular-010.pdf +# "...cabinet has decided that with effect from Sunday 29th March, 1992, Guyana +# Standard Time would be re-established at 01:00 hours by adjusting the hands +# of the clock back to 24:00 hours." +# Legislated in the Interpretation and general clauses (Amendment) Act 1992 +# (Act No. 6 of 1992) [passed 1992-03-27, published 1992-04-18] +# https://parliament.gov.gy/documents/acts/5885-6_of_1992_interpretation_and_general_clauses_(amendment)_act_1992.pdf + # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone America/Guyana -3:52:40 - LMT 1915 Mar # Georgetown - -3:45 - -0345 1975 Jul 31 - -3:00 - -03 1991 -# IATA SSIM (1996-06) says -4:00. Assume a 1991 switch. +Zone America/Guyana -3:52:39 - LMT 1911 Aug 1 # Georgetown + -4:00 - -04 1915 Mar 1 + -3:45 - -0345 1975 Aug 1 + -3:00 - -03 1992 Mar 29 1:00 -4:00 - -04 # Paraguay @@ -1708,24 +1716,7 @@ Zone America/Paramaribo -3:40:40 - LMT 1911 -3:00 - -03 # Trinidad and Tobago -# Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone America/Port_of_Spain -4:06:04 - LMT 1912 Mar 2 - -4:00 - AST - -# These all agree with Trinidad and Tobago since 1970. -Link America/Port_of_Spain America/Anguilla -Link America/Port_of_Spain America/Antigua -Link America/Port_of_Spain America/Dominica -Link America/Port_of_Spain America/Grenada -Link America/Port_of_Spain America/Guadeloupe -Link America/Port_of_Spain America/Marigot # St Martin (French part) -Link America/Port_of_Spain America/Montserrat -Link America/Port_of_Spain America/St_Barthelemy # St Barthélemy -Link America/Port_of_Spain America/St_Kitts # St Kitts & Nevis -Link America/Port_of_Spain America/St_Lucia -Link America/Port_of_Spain America/St_Thomas # Virgin Islands (US) -Link America/Port_of_Spain America/St_Vincent -Link America/Port_of_Spain America/Tortola # Virgin Islands (UK) +# See America/Puerto_Rico. # Uruguay # From Paul Eggert (1993-11-18): diff --git a/make/data/tzdata/zone.tab b/make/data/tzdata/zone.tab index 28db0745e08be412d2b4fca9e2d3517907e29f2c..0420a6934c92b2bda79ae9487a9060bce72ac628 100644 --- a/make/data/tzdata/zone.tab +++ b/make/data/tzdata/zone.tab @@ -26,7 +26,7 @@ # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. # -# From Paul Eggert (2018-06-27): +# From Paul Eggert (2021-09-20): # This file is intended as a backward-compatibility aid for older programs. # New programs should use zone1970.tab. This file is like zone1970.tab (see # zone1970.tab's comments), but with the following additional restrictions: @@ -39,6 +39,9 @@ # clocks have agreed since 1970; this is a narrower definition than # that of zone1970.tab. # +# Unlike zone1970.tab, a row's third column can be a Link from +# 'backward' instead of a Zone. +# # This table is intended as an aid for users, to help them select timezones # appropriate for their practical needs. It is not intended to take or # endorse any position on legal or territorial claims. @@ -251,7 +254,7 @@ KE -0117+03649 Africa/Nairobi KG +4254+07436 Asia/Bishkek KH +1133+10455 Asia/Phnom_Penh KI +0125+17300 Pacific/Tarawa Gilbert Islands -KI -0308-17105 Pacific/Enderbury Phoenix Islands +KI -0247-17143 Pacific/Kanton Phoenix Islands KI +0152-15720 Pacific/Kiritimati Line Islands KM -1141+04316 Indian/Comoro KN +1718-06243 America/St_Kitts @@ -414,7 +417,7 @@ TK -0922-17114 Pacific/Fakaofo TL -0833+12535 Asia/Dili TM +3757+05823 Asia/Ashgabat TN +3648+01011 Africa/Tunis -TO -2110-17510 Pacific/Tongatapu +TO -210800-1751200 Pacific/Tongatapu TR +4101+02858 Europe/Istanbul TT +1039-06131 America/Port_of_Spain TV -0831+17913 Pacific/Funafuti diff --git a/make/devkit/createJMHBundle.sh b/make/devkit/createJMHBundle.sh index 9e0b9c06e4f1fc421f124845751215e44b39a5cd..848e8e1e432142f9ad121e28597553cc37382036 100644 --- a/make/devkit/createJMHBundle.sh +++ b/make/devkit/createJMHBundle.sh @@ -1,6 +1,6 @@ #!/bin/bash -e # -# Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -26,7 +26,7 @@ # Create a bundle in the build directory, containing what's needed to # build and run JMH microbenchmarks from the OpenJDK build. -JMH_VERSION=1.28 +JMH_VERSION=1.32 COMMONS_MATH3_VERSION=3.2 JOPT_SIMPLE_VERSION=4.6 diff --git a/make/devkit/createMacosxDevkit.sh b/make/devkit/createMacosxDevkit.sh index cd1058233666b0fdb4055e40c0eaeae2bc47b700..84fbb3a8098cc4870cefb84597a31304351fce80 100644 --- a/make/devkit/createMacosxDevkit.sh +++ b/make/devkit/createMacosxDevkit.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -102,7 +102,7 @@ EXCLUDE_DIRS=" \ " for ex in $EXCLUDE_DIRS; do - EXCLUDE_ARGS+="--exclude=$ex " + EXCLUDE_ARGS="$EXCLUDE_ARGS --exclude=$ex" done echo "Copying Xcode.app..." diff --git a/make/hotspot/HotspotCommon.gmk b/make/hotspot/HotspotCommon.gmk index 239478cae768a98527c4abdcf6de61d2866f4ef3..3aacdf30c4cc69a3142606e506056d26d3a28d37 100644 --- a/make/hotspot/HotspotCommon.gmk +++ b/make/hotspot/HotspotCommon.gmk @@ -34,13 +34,7 @@ JVM_SUPPORT_DIR := $(JVM_VARIANT_OUTPUTDIR)/support DTRACE_SUPPORT_DIR := $(JVM_SUPPORT_DIR)/dtrace LIB_OUTPUTDIR := $(call FindLibDirForModule, java.base) -ifneq ($(filter client minimal, $(JVM_VARIANT)), ) - JVM_VARIANT_SUBDIR := $(JVM_VARIANT) -else - # Use 'server' as default target directory name for all other variants. - JVM_VARIANT_SUBDIR := server -endif -JVM_LIB_OUTPUTDIR := $(LIB_OUTPUTDIR)/$(JVM_VARIANT_SUBDIR) +JVM_LIB_OUTPUTDIR := $(LIB_OUTPUTDIR)/$(JVM_VARIANT) ################################################################################ diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk index ba8165c2ff036d979f3dbc8c6a742c2cdcefe0e5..f9f1bb3868879e9fe5a95fa551e142ca7a6f4416 100644 --- a/make/hotspot/gensrc/GensrcAdlc.gmk +++ b/make/hotspot/gensrc/GensrcAdlc.gmk @@ -155,6 +155,7 @@ ifeq ($(call check-jvm-feature, compiler2), true) ifeq ($(call check-jvm-feature, zgc), true) AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/z/z_$(HOTSPOT_TARGET_CPU).ad \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/z/z_$(HOTSPOT_TARGET_CPU_ARCH).ad \ ))) endif diff --git a/make/hotspot/lib/CompileGtest.gmk b/make/hotspot/lib/CompileGtest.gmk index 03c4de783cd9445ac07de15193124167d2604198..cb2bbccc1686aa4a28a8f8557523eee75d0b80ca 100644 --- a/make/hotspot/lib/CompileGtest.gmk +++ b/make/hotspot/lib/CompileGtest.gmk @@ -1,5 +1,5 @@ # -# Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -101,7 +101,7 @@ $(eval $(call SetupJdkLibrary, BUILD_GTEST_LIBJVM, \ CFLAGS_windows := -EHsc, \ CFLAGS_macosx := -DGTEST_OS_MAC=1, \ DISABLED_WARNINGS_gcc := $(DISABLED_WARNINGS_gcc) \ - undef, \ + undef stringop-overflow, \ DISABLED_WARNINGS_clang := $(DISABLED_WARNINGS_clang) \ undef switch format-nonliteral tautological-undefined-compare \ self-assign-overloaded, \ diff --git a/make/hotspot/lib/JvmFeatures.gmk b/make/hotspot/lib/JvmFeatures.gmk index 1f16d0a9118be175f652b0429195d0806932ceb7..ab555f9d82d4e7cb01e271fa0807c7673a544293 100644 --- a/make/hotspot/lib/JvmFeatures.gmk +++ b/make/hotspot/lib/JvmFeatures.gmk @@ -58,6 +58,10 @@ else JVM_EXCLUDE_PATTERNS += /zero/ endif +ifeq ($(JVM_VARIANT), core) + JVM_CFLAGS_FEATURES += -DVMTYPE=\"Core\" +endif + ifeq ($(JVM_VARIANT), custom) JVM_CFLAGS_FEATURES += -DVMTYPE=\"Custom\" endif @@ -116,8 +120,12 @@ endif ifneq ($(call check-jvm-feature, cds), true) JVM_CFLAGS_FEATURES += -DINCLUDE_CDS=0 JVM_EXCLUDE_FILES += \ + cdsProtectionDomain.cpp \ classLoaderDataShared.cpp \ classLoaderExt.cpp \ + dumpTimeSharedClassInfo.cpp \ + lambdaProxyClassDictionary.cpp \ + runTimeSharedClassInfo.cpp \ systemDictionaryShared.cpp JVM_EXCLUDE_PATTERNS += cds/ endif @@ -183,7 +191,6 @@ ifeq ($(call check-jvm-feature, opt-size), true) assembler.cpp \ barrierSet.cpp \ basicLock.cpp \ - biasedLocking.cpp \ bytecode.cpp \ bytecodeInterpreter.cpp \ c1_Compilation.cpp \ diff --git a/make/hotspot/lib/JvmOverrideFiles.gmk b/make/hotspot/lib/JvmOverrideFiles.gmk index a9f8a0e54edee0f85f9e110ccf78419d6e33207d..d53694fb2dfd4c6bdb040f6b6a9f43555b5e3cc8 100644 --- a/make/hotspot/lib/JvmOverrideFiles.gmk +++ b/make/hotspot/lib/JvmOverrideFiles.gmk @@ -1,5 +1,5 @@ # -# Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2013, 2021, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -36,6 +36,7 @@ ifeq ($(TOOLCHAIN_TYPE), gcc) BUILD_LIBJVM_assembler_x86.cpp_CXXFLAGS := -Wno-maybe-uninitialized BUILD_LIBJVM_cardTableBarrierSetAssembler_x86.cpp_CXXFLAGS := -Wno-maybe-uninitialized BUILD_LIBJVM_interp_masm_x86.cpp_CXXFLAGS := -Wno-uninitialized + BUILD_LIBJVM_ad_$(HOTSPOT_TARGET_CPU_ARCH).cpp_CXXFLAGS := -Wno-nonnull ifeq ($(DEBUG_LEVEL), release) # Need extra inlining to collapse shared marking code into the hot marking loop BUILD_LIBJVM_shenandoahMark.cpp_CXXFLAGS := --param inline-unit-growth=1000 diff --git a/make/ide/idea/jdk/template/misc.xml b/make/ide/idea/jdk/template/misc.xml index 669c382327ab340a2e6d890e12353b9ec474ffd0..486001253483a30eb4ccebc24b97d208793cd5d7 100644 --- a/make/ide/idea/jdk/template/misc.xml +++ b/make/ide/idea/jdk/template/misc.xml @@ -12,7 +12,7 @@ - + diff --git a/make/jdk/src/classes/build/tools/classlist/SortClasslist.java b/make/jdk/src/classes/build/tools/classlist/SortClasslist.java new file mode 100644 index 0000000000000000000000000000000000000000..a85025ee85b858e6fe151c21379497174d89f780 --- /dev/null +++ b/make/jdk/src/classes/build/tools/classlist/SortClasslist.java @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * This application is meant to be run to create a classlist file representing + * common use. + * + * The classlist is produced by adding -XX:DumpLoadedClassList=classlist + */ +package build.tools.classlist; + +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.regex.Pattern; +import java.util.regex.Matcher; +import java.util.Scanner; + +/** + * The classlist generated by build.tools.classlist.HelloClasslist + * may have non-deterministic contents, affected by Java thread execution order. + * SortClasslist sorts the file to make the JDK image's contents more deterministic. + */ +public class SortClasslist { + public static void main(String args[]) throws FileNotFoundException { + ArrayList classes = new ArrayList<>(); + ArrayList lambdas = new ArrayList<>(); + + FileInputStream fis = new FileInputStream(args[0]); + Scanner scanner = new Scanner(fis); + Pattern p = Pattern.compile("^(.*)[ ]+id:[ ]+([0-9]+)$"); + while (scanner.hasNextLine()) { + String line = scanner.nextLine(); + Matcher m = p.matcher(line); + if (line.startsWith("#")) { + // Comments -- print them first without sorting. These appear only at the top + // of the file. + System.out.println(line); + } else if (line.startsWith("@")) { + // @lambda-form-invoker, @lambda-proxy, etc. + lambdas.add(line); + } else if (m.find()) { + // We found a pattern like this: + // + // java/lang/Object id: 0 + // + // This is a class used by one of the three builtin class loaders + // (boot/platform/app). Since the default classlist does not support unregistered + // classes, the ID is unused. Let's omit the ID, as it may be non-deterministic. + String className = m.group(1); // matches the (.*) part of the pattern. + classes.add(className); + } else { + // HelloClasslist should not load classes in custom class loaders, or else + // we might end up with output like this: + // + // SomeClass id: 123 super: 0 source: foo.jar + // + // Such classes won't be usable for common applications, so they should + // not be included in the JDK's default classlist. + System.err.println("Unexpected line: " + line); + System.err.println("The default classlist should not contain unregistered classes"); + System.exit(1); + } + } + + Collections.sort(classes); + Collections.sort(lambdas); + + for (String s : classes) { + System.out.println(s); + } + for (String s : lambdas) { + System.out.println(s); + } + } +} diff --git a/make/jdk/src/classes/build/tools/generatecacerts/GenerateCacerts.java b/make/jdk/src/classes/build/tools/generatecacerts/GenerateCacerts.java index aa769b47b78dcc1d483961d219caea809efe6b4a..d7c809090a1b9916bc1c9cae40bca1791fa02927 100644 --- a/make/jdk/src/classes/build/tools/generatecacerts/GenerateCacerts.java +++ b/make/jdk/src/classes/build/tools/generatecacerts/GenerateCacerts.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,21 +25,15 @@ package build.tools.generatecacerts; -import java.io.DataOutputStream; import java.io.FileOutputStream; -import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.io.UnsupportedEncodingException; import java.nio.file.Files; import java.nio.file.Path; -import java.security.DigestOutputStream; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; -import java.security.cert.CertificateException; +import java.security.KeyStore; import java.security.cert.CertificateFactory; import java.security.cert.X509Certificate; -import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.stream.Collectors; @@ -51,33 +45,16 @@ import java.util.stream.Collectors; public class GenerateCacerts { public static void main(String[] args) throws Exception { try (FileOutputStream fos = new FileOutputStream(args[1])) { - store(args[0], fos, "changeit".toCharArray()); + store(args[0], fos); } } - // The following code are copied from JavaKeyStore.java. + public static void store(String dir, OutputStream stream) throws Exception { - private static final int MAGIC = 0xfeedfeed; - private static final int VERSION_2 = 0x02; + CertificateFactory cf = CertificateFactory.getInstance("X.509"); - // This method is a simplified version of JavaKeyStore::engineStore. - // A new "dir" argument is added. All cert names in "dir" is collected into - // a sorted array. Each cert is stored with a creation date set to its - // notBefore value. Thus the output is determined as long as the certs - // are the same. - public static void store(String dir, OutputStream stream, char[] password) - throws IOException, NoSuchAlgorithmException, CertificateException - { - byte[] encoded; // the certificate encoding - CertificateFactory cf = CertificateFactory.getInstance("X509"); - - MessageDigest md = getPreKeyedHash(password); - DataOutputStream dos - = new DataOutputStream(new DigestOutputStream(stream, md)); - - dos.writeInt(MAGIC); - // always write the latest version - dos.writeInt(VERSION_2); + KeyStore ks = KeyStore.getInstance("pkcs12"); + ks.load(null, null); // All file names in dir sorted. // README is excluded. Name starting with "." excluded. @@ -88,61 +65,15 @@ public class GenerateCacerts { entries.sort(String::compareTo); - dos.writeInt(entries.size()); - for (String entry : entries) { - String alias = entry + " [jdk]"; X509Certificate cert; try (InputStream fis = Files.newInputStream(Path.of(dir, entry))) { cert = (X509Certificate) cf.generateCertificate(fis); } - - dos.writeInt(2); - - // Write the alias - dos.writeUTF(alias); - - // Write the (entry creation) date, which is notBefore of the cert - dos.writeLong(cert.getNotBefore().getTime()); - - // Write the trusted certificate - encoded = cert.getEncoded(); - dos.writeUTF(cert.getType()); - dos.writeInt(encoded.length); - dos.write(encoded); + ks.setCertificateEntry(alias, cert); } - /* - * Write the keyed hash which is used to detect tampering with - * the keystore (such as deleting or modifying key or - * certificate entries). - */ - byte[] digest = md.digest(); - - dos.write(digest); - dos.flush(); - } - - private static MessageDigest getPreKeyedHash(char[] password) - throws NoSuchAlgorithmException, UnsupportedEncodingException - { - - MessageDigest md = MessageDigest.getInstance("SHA"); - byte[] passwdBytes = convertToBytes(password); - md.update(passwdBytes); - Arrays.fill(passwdBytes, (byte) 0x00); - md.update("Mighty Aphrodite".getBytes("UTF8")); - return md; - } - - private static byte[] convertToBytes(char[] password) { - int i, j; - byte[] passwdBytes = new byte[password.length * 2]; - for (i=0, j=0; i> 8); - passwdBytes[j++] = (byte)password[i]; - } - return passwdBytes; + ks.store(stream, null); } } diff --git a/make/jdk/src/classes/build/tools/generatelsrequivmaps/EquivMapsGenerator.java b/make/jdk/src/classes/build/tools/generatelsrequivmaps/EquivMapsGenerator.java index 7d1b7b261d587f67e6d88b3aff5d4b463ac94e5e..af2844b29e7efd5950923da616d686cda039347a 100644 --- a/make/jdk/src/classes/build/tools/generatelsrequivmaps/EquivMapsGenerator.java +++ b/make/jdk/src/classes/build/tools/generatelsrequivmaps/EquivMapsGenerator.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,7 +27,6 @@ package build.tools.generatelsrequivmaps; import java.io.BufferedWriter; import java.io.IOException; -import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Paths; import java.time.ZoneId; @@ -79,8 +78,7 @@ public class EquivMapsGenerator { String preferred = null; String prefix = null; - for (String line : Files.readAllLines(Paths.get(filename), - Charset.forName("UTF-8"))) { + for (String line : Files.readAllLines(Paths.get(filename))) { line = line.toLowerCase(Locale.ROOT); int index = line.indexOf(' ') + 1; if (line.startsWith("file-date:")) { diff --git a/make/jdk/src/classes/build/tools/jigsaw/GenGraphs.java b/make/jdk/src/classes/build/tools/jigsaw/GenGraphs.java index 16cea17bbc858142cbe331b31d6bae006e30e8d3..3bfd1f7764d88da1f83dbdc74f37b7192830a805 100644 --- a/make/jdk/src/classes/build/tools/jigsaw/GenGraphs.java +++ b/make/jdk/src/classes/build/tools/jigsaw/GenGraphs.java @@ -36,6 +36,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -115,102 +116,112 @@ public class GenGraphs { /** * Custom dot file attributes. */ - static class ModuleGraphAttributes implements ModuleDotGraph.Attributes { - static Map DEFAULT_ATTRIBUTES = Map.of( - "ranksep", "0.6", - "fontsize", "12", - "fontcolor", BLACK, - "fontname", "DejaVuSans", - "arrowsize", "1", - "arrowwidth", "2", - "arrowcolor", DARK_GRAY, - // custom - "requiresMandatedColor", LIGHT_GRAY, - "javaSubgraphColor", ORANGE, - "jdkSubgraphColor", BLUE - ); - - final Map weights = new HashMap<>(); - final List> ranks = new ArrayList<>(); - final Map attrs; - ModuleGraphAttributes(Map attrs) { - int h = 1000; - weight("java.se", "java.sql.rowset", h * 10); - weight("java.sql.rowset", "java.sql", h * 10); - weight("java.sql", "java.xml", h * 10); - weight("java.xml", "java.base", h * 10); - - ranks.add(Set.of("java.logging", "java.scripting", "java.xml")); - ranks.add(Set.of("java.sql")); - ranks.add(Set.of("java.transaction.xa")); - ranks.add(Set.of("java.compiler", "java.instrument")); - ranks.add(Set.of("java.desktop", "java.management")); - - this.attrs = attrs; - } + static class ModuleGraphAttributes extends ModuleDotGraph.DotGraphAttributes { + final Properties attrs; + final Map weights; ModuleGraphAttributes() { - this(DEFAULT_ATTRIBUTES); - } + this(new Properties()); + }; ModuleGraphAttributes(Properties props) { - this(toAttributes(props)); + this.attrs = props; + this.weights = initWeights(props); + } + + @Override + public double nodeSep() { + String v = attrs.getProperty("nodesep"); + return v != null ? Double.valueOf(v) : super.nodeSep(); } @Override public double rankSep() { - return Double.valueOf(attrs.get("ranksep")); + String v = attrs.getProperty("ranksep"); + return v != null ? Double.valueOf(v) : super.rankSep(); } @Override public int fontSize() { - return Integer.valueOf(attrs.get("fontsize")); + String v = attrs.getProperty("fontsize"); + return v != null ? Integer.valueOf(v) : super.fontSize(); } @Override public String fontName() { - return attrs.get("fontname"); + String v = attrs.getProperty("fontname"); + return v != null ? v : super.fontName(); } @Override public String fontColor() { - return attrs.get("fontcolor"); + String v = attrs.getProperty("fontcolor"); + return v != null ? v : super.fontColor(); } @Override public int arrowSize() { - return Integer.valueOf(attrs.get("arrowsize")); + String v = attrs.getProperty("arrowsize"); + return v != null ? Integer.valueOf(v) : super.arrowSize(); } @Override public int arrowWidth() { - return Integer.valueOf(attrs.get("arrowwidth")); + String v = attrs.getProperty("arrowwidth"); + return v != null ? Integer.valueOf(v) : super.arrowWidth(); } @Override public String arrowColor() { - return attrs.get("arrowcolor"); + String v = attrs.getProperty("arrowcolor"); + return v != null ? v : super.arrowColor(); } @Override public List> ranks() { - return ranks; + return attrs.stringPropertyNames().stream() + .filter(k -> k.startsWith("ranks.")) + .sorted() + .map(k -> Arrays.stream(attrs.getProperty(k).split(",")) + .collect(Collectors.toSet())) + .toList(); } @Override public String requiresMandatedColor() { - return attrs.get("requiresMandatedColor"); + String v = attrs.getProperty("requiresMandatedColor"); + return v != null ? v : super.requiresMandatedColor(); } @Override public String javaSubgraphColor() { - return attrs.get("javaSubgraphColor"); + String v = attrs.getProperty("javaSubgraphColor"); + return v != null ? v : super.javaSubgraphColor(); } @Override public String jdkSubgraphColor() { - return attrs.get("jdkSubgraphColor"); + String v = attrs.getProperty("jdkSubgraphColor"); + return v != null ? v : super.jdkSubgraphColor(); + } + + @Override + public String nodeMargin() { + String v = attrs.getProperty("node-margin"); + return v != null ? v : super.nodeMargin(); } + @Override + public String requiresStyle() { + String v = attrs.getProperty("requiresStyle"); + return v != null ? v : super.requiresStyle(); + }; + + @Override + public String requiresTransitiveStyle() { + String v = attrs.getProperty("requiresTransitiveStyle"); + return v != null ? v : super.requiresTransitiveStyle(); + }; + @Override public int weightOf(String s, String t) { int w = weights.getOrDefault(s + ":" + t, 1); @@ -221,14 +232,25 @@ public class GenGraphs { return 1; } - public void weight(String s, String t, int w) { - weights.put(s + ":" + t, w); - } - - static Map toAttributes(Properties props) { - return DEFAULT_ATTRIBUTES.keySet().stream() - .collect(Collectors.toMap(Function.identity(), - k -> props.getProperty(k, DEFAULT_ATTRIBUTES.get(k)))); + /* + * Create a map of : with a weight trying to line up + * the modules in the weights property in the specified order. + */ + public static Map initWeights(Properties props) { + String[] modules = props.getProperty("weights", "").split(","); + int len = modules.length; + if (len == 0) return Map.of(); + + Map weights = new HashMap<>(); + String mn = modules[0]; + int w = 10000; + for (int i = 1; i < len; i++) { + String dep = modules[i]; + weights.put(mn + ":" + dep, w); + mn = dep; + } + weights.put(mn + ":java.base", w); + return weights; } } diff --git a/make/jdk/src/classes/build/tools/jigsaw/javadoc-graphs.properties b/make/jdk/src/classes/build/tools/jigsaw/javadoc-graphs.properties index 75348c3f7be07dab1d70a4e8d5240572de8958bd..c6d2a67a635aafb6319b72be4142b332465369a1 100644 --- a/make/jdk/src/classes/build/tools/jigsaw/javadoc-graphs.properties +++ b/make/jdk/src/classes/build/tools/jigsaw/javadoc-graphs.properties @@ -1,2 +1,35 @@ +# Configuration file for build.tools.jigsaw.GenGraphs + +nodesep=.5 +node-margin=.2,.2 +ranksep=0.6 +fontsize=12 +fontcolor=#000000 +fontname=DejaVuSans +arrowsize=1 +arrowwidth=2 + +# requires edge: gray arrowcolor=#999999 -requiresMandatedColor=#999999 + +# requires mandated java.base edge: light gray +requiresMandatedColor=#dddddd + +requiresTransitiveStyle= +requiresStyle=dashed + +# java.* modules: orange +javaSubgraphColor=#e76f00 + +# jdk.* modules: blue +jdkSubgraphColor=#437291 + +# configure the group of modules in the same rank +ranks.1=java.logging,java.scripting,java.xml +ranks.2=java.sql +ranks.4=java.compiler,java.instrument +ranks.5=java.desktop,java.management + +# configure the edges A -> B -> C .... with the same weight +# that should get these modules lined in a straight line +weights=java.se,java.sql.rowset,java.sql,java.xml diff --git a/make/langtools/src/classes/build/tools/symbolgenerator/CreateSymbols.java b/make/langtools/src/classes/build/tools/symbolgenerator/CreateSymbols.java index 74a817e990b1816793bf971d5cc602bb3a87765f..1f439e1c29e081bd23174c1a44f6cebb10c56841 100644 --- a/make/langtools/src/classes/build/tools/symbolgenerator/CreateSymbols.java +++ b/make/langtools/src/classes/build/tools/symbolgenerator/CreateSymbols.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -123,6 +123,7 @@ import com.sun.tools.classfile.InnerClasses_attribute; import com.sun.tools.classfile.InnerClasses_attribute.Info; import com.sun.tools.classfile.Method; import com.sun.tools.classfile.MethodParameters_attribute; +import com.sun.tools.classfile.ModuleMainClass_attribute; import com.sun.tools.classfile.ModuleResolution_attribute; import com.sun.tools.classfile.ModuleTarget_attribute; import com.sun.tools.classfile.Module_attribute; @@ -229,6 +230,7 @@ public class CreateSymbols { : null, Paths.get(ctDescriptionFile)); + stripNonExistentAnnotations(data); splitHeaders(data.classes); Map> package2Version2Module = new HashMap<>(); @@ -301,6 +303,50 @@ public class CreateSymbols { } } + private static final String PREVIEW_FEATURE_ANNOTATION_OLD = + "Ljdk/internal/PreviewFeature;"; + private static final String PREVIEW_FEATURE_ANNOTATION_NEW = + "Ljdk/internal/javac/PreviewFeature;"; + private static final String PREVIEW_FEATURE_ANNOTATION_INTERNAL = + "Ljdk/internal/PreviewFeature+Annotation;"; + private static final String VALUE_BASED_ANNOTATION = + "Ljdk/internal/ValueBased;"; + private static final String VALUE_BASED_ANNOTATION_INTERNAL = + "Ljdk/internal/ValueBased+Annotation;"; + public static final Set HARDCODED_ANNOTATIONS = new HashSet<>( + List.of("Ljdk/Profile+Annotation;", + "Lsun/Proprietary+Annotation;", + PREVIEW_FEATURE_ANNOTATION_OLD, + PREVIEW_FEATURE_ANNOTATION_NEW, + VALUE_BASED_ANNOTATION)); + + private void stripNonExistentAnnotations(LoadDescriptions data) { + Set allClasses = data.classes.name2Class.keySet(); + data.modules.values().forEach(mod -> { + stripNonExistentAnnotations(allClasses, mod.header); + }); + data.classes.classes.forEach(clazz -> { + stripNonExistentAnnotations(allClasses, clazz.header); + stripNonExistentAnnotations(allClasses, clazz.fields); + stripNonExistentAnnotations(allClasses, clazz.methods); + }); + } + + private void stripNonExistentAnnotations(Set allClasses, Iterable descs) { + descs.forEach(d -> stripNonExistentAnnotations(allClasses, d)); + } + + private void stripNonExistentAnnotations(Set allClasses, FeatureDescription d) { + stripNonExistentAnnotations(allClasses, d.classAnnotations); + stripNonExistentAnnotations(allClasses, d.runtimeAnnotations); + } + + private void stripNonExistentAnnotations(Set allClasses, List annotations) { + if (annotations != null) + annotations.removeIf(ann -> !HARDCODED_ANNOTATIONS.contains(ann.annotationType) && + !allClasses.contains(ann.annotationType.substring(1, ann.annotationType.length() - 1))); + } + private ZipEntry createZipEntry(String name, long timestamp) { ZipEntry ze = new ZipEntry(name); @@ -883,6 +929,12 @@ public class CreateSymbols { attributes.put(Attribute.ModuleTarget, new ModuleTarget_attribute(attrIdx, targetIdx)); } + if (header.moduleMainClass != null) { + int attrIdx = addString(cp, Attribute.ModuleMainClass); + int targetIdx = addString(cp, header.moduleMainClass); + attributes.put(Attribute.ModuleMainClass, + new ModuleMainClass_attribute(attrIdx, targetIdx)); + } int attrIdx = addString(cp, Attribute.Module); attributes.put(Attribute.Module, new Module_attribute(attrIdx, @@ -1140,17 +1192,16 @@ public class CreateSymbols { values.put("reflective", essentialAPI != null && !essentialAPI); } + if (VALUE_BASED_ANNOTATION.equals(annotationType)) { + //the non-public ValueBased annotation will not be available in ct.sym, + //replace with purely synthetic javac-internal annotation: + annotationType = VALUE_BASED_ANNOTATION_INTERNAL; + } + return new Annotation(null, addString(constantPool, annotationType), createElementPairs(constantPool, values)); } - //where: - private static final String PREVIEW_FEATURE_ANNOTATION_OLD = - "Ljdk/internal/PreviewFeature;"; - private static final String PREVIEW_FEATURE_ANNOTATION_NEW = - "Ljdk/internal/javac/PreviewFeature;"; - private static final String PREVIEW_FEATURE_ANNOTATION_INTERNAL = - "Ljdk/internal/PreviewFeature+Annotation;"; private element_value_pair[] createElementPairs(List constantPool, Map annotationAttributes) { element_value_pair[] pairs = new element_value_pair[annotationAttributes.size()]; @@ -2250,6 +2301,13 @@ public class CreateSymbols { chd.isSealed = true; break; } + case Attribute.ModuleMainClass: { + ModuleMainClass_attribute moduleMainClass = (ModuleMainClass_attribute) attr; + assert feature instanceof ModuleHeaderDescription; + ModuleHeaderDescription mhd = (ModuleHeaderDescription) feature; + mhd.moduleMainClass = moduleMainClass.getMainClassName(cf.constant_pool); + break; + } default: throw new IllegalStateException("Unhandled attribute: " + attrName); @@ -2687,6 +2745,7 @@ public class CreateSymbols { List provides = new ArrayList<>(); Integer moduleResolution; String moduleTarget; + String moduleMainClass; @Override public int hashCode() { @@ -2699,6 +2758,7 @@ public class CreateSymbols { hash = 83 * hash + Objects.hashCode(this.provides); hash = 83 * hash + Objects.hashCode(this.moduleResolution); hash = 83 * hash + Objects.hashCode(this.moduleTarget); + hash = 83 * hash + Objects.hashCode(this.moduleMainClass); return hash; } @@ -2737,6 +2797,10 @@ public class CreateSymbols { other.moduleResolution)) { return false; } + if (!Objects.equals(this.moduleMainClass, + other.moduleMainClass)) { + return false; + } return true; } @@ -2774,6 +2838,8 @@ public class CreateSymbols { output.append(" resolution " + quote(Integer.toHexString(moduleResolution), true)); + if (moduleMainClass != null) + output.append(" moduleMainClass " + quote(moduleMainClass, true)); writeAttributes(output); output.append("\n"); writeInnerClasses(output, baselineVersion, version); @@ -2818,6 +2884,8 @@ public class CreateSymbols { moduleResolution = Integer.parseInt(resolutionFlags, 16); } + moduleMainClass = reader.attributes.get("moduleMainClass"); + readAttributes(reader); reader.moveNext(); readInnerClasses(reader); diff --git a/make/modules/java.base/Copy.gmk b/make/modules/java.base/Copy.gmk index dfa2a54f63af341a1e145b4569dedf2d9c0f747c..d61a274317296b60c32e75458a9b921a8a78ef34 100644 --- a/make/modules/java.base/Copy.gmk +++ b/make/modules/java.base/Copy.gmk @@ -95,16 +95,10 @@ ifeq ($(call And, $(call isTargetOs, windows) $(call isTargetCpu, x86)), true) endif DEFAULT_CFG_VARIANT ?= server -# Any variant other than server, client or minimal is represented as server in -# the cfg file. -VALID_CFG_VARIANTS := server client minimal -CFG_VARIANTS := $(filter $(VALID_CFG_VARIANTS), $(JVM_VARIANTS)) \ - $(if $(filter-out $(VALID_CFG_VARIANTS), $(JVM_VARIANTS)), server) - # Change the order to put the default variant first if present. ORDERED_CFG_VARIANTS := \ - $(if $(filter $(DEFAULT_CFG_VARIANT), $(CFG_VARIANTS)), $(DEFAULT_CFG_VARIANT)) \ - $(filter-out $(DEFAULT_CFG_VARIANT), $(CFG_VARIANTS)) + $(if $(filter $(DEFAULT_CFG_VARIANT), $(JVM_VARIANTS)), $(DEFAULT_CFG_VARIANT)) \ + $(filter-out $(DEFAULT_CFG_VARIANT), $(JVM_VARIANTS)) JVMCFG := $(LIB_DST_DIR)/jvm.cfg diff --git a/make/modules/java.base/Lib.gmk b/make/modules/java.base/Lib.gmk index 5658ff342e53c1eb71f95a93db6cf4745fd58554..eee488607f2f691ce00256b49d9d27014222583e 100644 --- a/make/modules/java.base/Lib.gmk +++ b/make/modules/java.base/Lib.gmk @@ -156,11 +156,8 @@ ifeq ($(call isTargetOsType, unix), true) TARGETS += $(LIB_OUTPUTDIR)/$1/$(call SHARED_LIBRARY,jsig) endef - # The subdir is the same as the variant for client and minimal, for all - # others it's server. - VARIANT_SUBDIRS := $(filter client minimal, $(JVM_VARIANTS)) \ - $(if $(filter-out client minimal, $(JVM_VARIANTS)), server) - $(foreach v, $(VARIANT_SUBDIRS), $(eval $(call CreateSymlinks,$v))) + # The subdir is the same as the variant + $(foreach v, $(JVM_VARIANTS), $(eval $(call CreateSymlinks,$v))) endif ############################################################################ diff --git a/make/modules/java.desktop/Java.gmk b/make/modules/java.desktop/Java.gmk index 4b1c14a1133ff903174d280ae862f9d1a38336c7..e9f0d1fa3184ef08fa19f64851534655cbdddb73 100644 --- a/make/modules/java.desktop/Java.gmk +++ b/make/modules/java.desktop/Java.gmk @@ -23,7 +23,7 @@ # questions. # -DOCLINT += -Xdoclint:all/protected,-reference \ +DOCLINT += -Xdoclint:all/protected,-reference,-missing \ '-Xdoclint/package:java.*,javax.*' COPY += .gif .png .wav .txt .xml .css .pf CLEAN += iio-plugin.properties cursors.properties diff --git a/make/modules/java.desktop/lib/Awt2dLibraries.gmk b/make/modules/java.desktop/lib/Awt2dLibraries.gmk index 4d0c0c00dbf0ba0890a9f48ecc7c260e40b5c77d..ef7eadae20672fe31d0e94be54b46807322fe862 100644 --- a/make/modules/java.desktop/lib/Awt2dLibraries.gmk +++ b/make/modules/java.desktop/lib/Awt2dLibraries.gmk @@ -435,7 +435,7 @@ endif ifeq ($(USE_EXTERNAL_HARFBUZZ), true) LIBFONTMANAGER_EXTRA_SRC = - BUILD_LIBFONTMANAGER_FONTLIB += $(LIBHARFBUZZ_LIBS) + BUILD_LIBFONTMANAGER_FONTLIB += $(HARFBUZZ_LIBS) else LIBFONTMANAGER_EXTRA_SRC = libharfbuzz diff --git a/make/modules/jdk.httpserver/Gensrc.gmk b/make/modules/jdk.httpserver/Gensrc.gmk new file mode 100644 index 0000000000000000000000000000000000000000..6e90917db337f1b297ad9389b2dbd43ff482cd86 --- /dev/null +++ b/make/modules/jdk.httpserver/Gensrc.gmk @@ -0,0 +1,41 @@ +# +# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. Oracle designates this +# particular file as subject to the "Classpath" exception as provided +# by Oracle in the LICENSE file that accompanied this code. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# + +include GensrcCommonJdk.gmk +include GensrcProperties.gmk +include Modules.gmk + +################################################################################ + +# Use wildcard so as to avoid getting non-existing directories back +SIMPLESERVER_RESOURCES_DIRS := $(wildcard $(addsuffix /sun/net/httpserver/simpleserver/resources, \ + $(call FindModuleSrcDirs, jdk.httpserver))) + +$(eval $(call SetupCompileProperties, SIMPLESERVER_PROPERTIES, \ + SRC_DIRS := $(SIMPLESERVER_RESOURCES_DIRS), \ + CLASS := ListResourceBundle, \ +)) + +TARGETS += $(SIMPLESERVER_PROPERTIES) diff --git a/make/modules/jdk.javadoc/Gendata.gmk b/make/modules/jdk.javadoc/Gendata.gmk index 78432cd6a68a9f62215d8d3bd83792da00db7abe..c648df6e032f679ca85e4ace30d9163c9cf8feab 100644 --- a/make/modules/jdk.javadoc/Gendata.gmk +++ b/make/modules/jdk.javadoc/Gendata.gmk @@ -61,36 +61,54 @@ $(eval $(call SetupJavaCompilation, COMPILE_CREATE_SYMBOLS, \ $(COMPILECREATESYMBOLS_ADD_EXPORTS), \ )) -$(SUPPORT_OUTPUTDIR)/javadoc-symbols/symbols: \ +GENERATE_SYMBOLS_FROM_JDK_VERSION := 11 +JDK_JAVADOC_DIR := $(JDK_OUTPUTDIR)/modules/jdk.javadoc +ELEMENT_LISTS_PKG := jdk/javadoc/internal/doclets/toolkit/resources/releases +ELEMENT_LISTS_DIR := $(JDK_JAVADOC_DIR)/$(ELEMENT_LISTS_PKG) + +$(JDK_JAVADOC_DIR)/_element_lists.marker: \ $(COMPILE_CREATE_SYMBOLS) \ $(wildcard $(TOPDIR)/make/data/symbols/*) \ $(MODULE_INFOS) - $(RM) -r $(@D) - $(MKDIR) -p $(@D) - $(ECHO) Creating javadoc element list - $(JAVA_SMALL) $(INTERIM_LANGTOOLS_ARGS) \ - $(COMPILECREATESYMBOLS_ADD_EXPORTS) \ - -classpath $(BUILDTOOLS_OUTPUTDIR)/create_symbols_javadoc \ - build.tools.symbolgenerator.CreateSymbols \ - build-javadoc-data \ - $(CT_DATA_DESCRIPTION) \ - $(JDK_OUTPUTDIR)/modules/jdk.javadoc/jdk/javadoc/internal/doclets/toolkit/resources/releases \ - 11 - $(JAVA_SMALL) $(INTERIM_LANGTOOLS_ARGS) \ - $(COMPILECREATESYMBOLS_ADD_EXPORTS) \ - -classpath $(BUILDTOOLS_OUTPUTDIR)/create_symbols_javadoc \ - build.tools.symbolgenerator.JavadocElementList \ - $(JDK_OUTPUTDIR)/modules/jdk.javadoc/jdk/javadoc/internal/doclets/toolkit/resources/releases/element-list-$(JDK_SOURCE_TARGET_VERSION).txt \ - $(JAVADOC_MODULESOURCEPATH) \ - $(JAVADOC_MODULES) + $(call MakeTargetDir) + $(call LogInfo, Creating javadoc element lists) + $(RM) -r $(ELEMENT_LISTS_DIR) + # Generate element-list files for JDK 11 to current-1 + $(call ExecuteWithLog, $@_historic, \ + $(JAVA_SMALL) $(INTERIM_LANGTOOLS_ARGS) \ + $(COMPILECREATESYMBOLS_ADD_EXPORTS) \ + -classpath $(BUILDTOOLS_OUTPUTDIR)/create_symbols_javadoc \ + build.tools.symbolgenerator.CreateSymbols \ + build-javadoc-data \ + $(CT_DATA_DESCRIPTION) \ + $(ELEMENT_LISTS_DIR) \ + $(GENERATE_SYMBOLS_FROM_JDK_VERSION) \ + ) + # Generate element-list file for the current JDK version + $(call ExecuteWithLog, $@_current, \ + $(JAVA_SMALL) $(INTERIM_LANGTOOLS_ARGS) \ + $(COMPILECREATESYMBOLS_ADD_EXPORTS) \ + -classpath $(BUILDTOOLS_OUTPUTDIR)/create_symbols_javadoc \ + build.tools.symbolgenerator.JavadocElementList \ + $(ELEMENT_LISTS_DIR)/element-list-$(JDK_SOURCE_TARGET_VERSION).txt \ + $(JAVADOC_MODULESOURCEPATH) \ + $(JAVADOC_MODULES) \ + ) $(TOUCH) $@ -# Copy ct.sym to the modules libs dir -$(eval $(call SetupCopyFiles, COPY_TO_LIBS, \ - FILES := $(SUPPORT_OUTPUTDIR)/javadoc-symbols/*.txt, \ - DEST := $(JDK_OUTPUTDIR)/modules/jdk.javadoc/jdk/javadoc/internal/doclets/toolkit/resources/releases, \ -)) +################################################################################ +# Copy element-lists to interim langtools -TARGETS += $(SUPPORT_OUTPUTDIR)/javadoc-symbols/symbols +INTERIM_JDK_JAVADOC_DIR := $(BUILDTOOLS_OUTPUTDIR)/interim_langtools_modules/jdk.javadoc.interim +INTERIM_ELEMENT_LISTS_DIR := $(INTERIM_JDK_JAVADOC_DIR)/$(ELEMENT_LISTS_PKG) + +$(INTERIM_JDK_JAVADOC_DIR)/_element_lists.marker: $(JDK_JAVADOC_DIR)/_element_lists.marker + $(call MakeDir, $(INTERIM_ELEMENT_LISTS_DIR)) + $(RM) -r $(INTERIM_ELEMENT_LISTS_DIR)/* + $(CP) -R $(ELEMENT_LISTS_DIR)/* $(INTERIM_ELEMENT_LISTS_DIR)/ + $(TOUCH) $@ ################################################################################ + +TARGETS += $(JDK_JAVADOC_DIR)/_element_lists.marker \ + $(INTERIM_JDK_JAVADOC_DIR)/_element_lists.marker diff --git a/make/scripts/compare.sh b/make/scripts/compare.sh index 42886573f2c0fcc16dfe715255e22f23e296a7d2..cc05476c997e5090c860882fe72e241b1bf89531 100644 --- a/make/scripts/compare.sh +++ b/make/scripts/compare.sh @@ -356,8 +356,8 @@ compare_general_files() { " $CAT $OTHER_DIR/$f | eval "$SVG_FILTER" > $OTHER_FILE $CAT $THIS_DIR/$f | eval "$SVG_FILTER" > $THIS_FILE - elif [[ "$f" = *"/lib/classlist" ]] || [ "$SUFFIX" = "jar_contents" ]; then - # The classlist files may have some lines in random order + elif [ "$SUFFIX" = "jar_contents" ]; then + # The jar_contents files may have some lines in random order OTHER_FILE=$WORK_DIR/$f.other THIS_FILE=$WORK_DIR/$f.this $MKDIR -p $(dirname $OTHER_FILE) $(dirname $THIS_FILE) diff --git a/make/test/BuildTestLib.gmk b/make/test/BuildTestLib.gmk index dff446eed3b3c3cca8b99b60628c551898da2459..f677d255dda579611bef9f91eece0eff9e133a26 100644 --- a/make/test/BuildTestLib.gmk +++ b/make/test/BuildTestLib.gmk @@ -36,9 +36,10 @@ TEST_LIB_SUPPORT := $(SUPPORT_OUTPUTDIR)/test/lib $(eval $(call SetupJavaCompilation, BUILD_WB_JAR, \ TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK_UPGRADED), \ - SRC := $(TEST_LIB_SOURCE_DIR)/sun, \ + SRC := $(TEST_LIB_SOURCE_DIR)/sun $(TEST_LIB_SOURCE_DIR)/jdk/test/whitebox/parser, \ BIN := $(TEST_LIB_SUPPORT)/wb_classes, \ JAR := $(TEST_LIB_SUPPORT)/wb.jar, \ + DISABLED_WARNINGS := deprecation removal, \ )) TARGETS += $(BUILD_WB_JAR) @@ -50,7 +51,7 @@ $(eval $(call SetupJavaCompilation, BUILD_TEST_LIB_JAR, \ BIN := $(TEST_LIB_SUPPORT)/test-lib_classes, \ HEADERS := $(TEST_LIB_SUPPORT)/test-lib_headers, \ JAR := $(TEST_LIB_SUPPORT)/test-lib.jar, \ - DISABLED_WARNINGS := try deprecation rawtypes unchecked serial cast, \ + DISABLED_WARNINGS := try deprecation rawtypes unchecked serial cast removal, \ )) TARGETS += $(BUILD_TEST_LIB_JAR) diff --git a/make/test/JtregNativeHotspot.gmk b/make/test/JtregNativeHotspot.gmk index cfddb6fe0d96227ea81570dfea6cf86a383b2924..58390110251d4e4884e3b3c164901a7b0b0fb23d 100644 --- a/make/test/JtregNativeHotspot.gmk +++ b/make/test/JtregNativeHotspot.gmk @@ -863,7 +863,7 @@ ifeq ($(call isTargetOs, linux), true) BUILD_HOTSPOT_JTREG_EXECUTABLES_LIBS_exeFPRegs := -ldl BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libAsyncGetCallTraceTest := -ldl else - BUILD_HOTSPOT_JTREG_EXCLUDE += libtest-rw.c libtest-rwx.c libTestJNI.c \ + BUILD_HOTSPOT_JTREG_EXCLUDE += libtest-rw.c libtest-rwx.c \ exeinvoke.c exestack-gap.c exestack-tls.c libAsyncGetCallTraceTest.cpp endif @@ -871,7 +871,7 @@ BUILD_HOTSPOT_JTREG_EXECUTABLES_LIBS_exesigtest := -ljvm ifeq ($(call isTargetOs, windows), true) BUILD_HOTSPOT_JTREG_EXECUTABLES_CFLAGS_exeFPRegs := -MT - BUILD_HOTSPOT_JTREG_EXCLUDE += exesigtest.c libterminatedThread.c + BUILD_HOTSPOT_JTREG_EXCLUDE += exesigtest.c libterminatedThread.c libTestJNI.c BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libatExit := jvm.lib else BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libbootclssearch_agent += -lpthread diff --git a/make/test/JtregNativeJdk.gmk b/make/test/JtregNativeJdk.gmk index 3342710bcd28be7489aa2bef3279c8d1256445dd..8ed5cbd2a58b884712f046e921da34d46becf6a0 100644 --- a/make/test/JtregNativeJdk.gmk +++ b/make/test/JtregNativeJdk.gmk @@ -53,6 +53,8 @@ BUILD_JDK_JTREG_EXECUTABLES_CFLAGS_exeJliLaunchTest := \ -I$(TOPDIR)/src/java.base/$(OPENJDK_TARGET_OS_TYPE)/native/libjli \ -I$(TOPDIR)/src/java.base/$(OPENJDK_TARGET_OS)/native/libjli +BUILD_JDK_JTREG_LIBRARIES_LDFLAGS_libAsyncStackWalk := $(LIBCXX) + # Platform specific setup ifeq ($(call isTargetOs, windows), true) BUILD_JDK_JTREG_EXCLUDE += libDirectIO.c libInheritedChannel.c exelauncher.c @@ -63,6 +65,7 @@ ifeq ($(call isTargetOs, windows), true) BUILD_JDK_JTREG_EXECUTABLES_LIBS_exeJliLaunchTest := $(WIN_LIB_JLI) BUILD_JDK_JTREG_EXECUTABLES_LIBS_exeCallerAccessTest := jvm.lib BUILD_JDK_JTREG_EXECUTABLES_LIBS_exerevokeall := advapi32.lib + BUILD_JDK_JTREG_LIBRARIES_CFLAGS_libAsyncStackWalk := /EHsc else BUILD_JDK_JTREG_LIBRARIES_LIBS_libstringPlatformChars := -ljava BUILD_JDK_JTREG_LIBRARIES_LIBS_libDirectIO := -ljava diff --git a/src/demo/share/README b/src/demo/share/README index 7936fb3893ed6ebe767ff8c9f0e196a409bb0727..e3e30cbec3ca489175a98aec059ace4e38c176f2 100644 --- a/src/demo/share/README +++ b/src/demo/share/README @@ -4,12 +4,3 @@ deliberately simplified. Additional steps required for a production-quality application, such as security checks, input validation, and proper error handling, might not be present in the sample code. - -In some cases, the default security settings may block an execution -of demo applets in a browser. To adjust the security settings, please -refer to the following resource: - -http://java.com/en/download/help/java_blocked.xml - -Some demo applets need to be accessed through the HTTP or HTTPS -protocols to enable access to the required resources. diff --git a/src/demo/share/jfc/Font2DTest/Font2DTest.html b/src/demo/share/jfc/Font2DTest/Font2DTest.html deleted file mode 100644 index 4c94fc02b70c7933807c1a2d103b5dbdb0828847..0000000000000000000000000000000000000000 --- a/src/demo/share/jfc/Font2DTest/Font2DTest.html +++ /dev/null @@ -1,49 +0,0 @@ - - - - -Font2DTest Demo - - - - - -
        -
        - -

        -Font2DTest -

        - -An encompassing font/glyph demo application. -Source code is in
        -Font2DTest.java,
        -Font2DTestApplet.java,
        -RangeMenu.java and
        -FontPanel.java.
        -You can run this program either as an applet or as an application.
        -Detailed information about the program can be found in -README.txt
        - -

        To run it as an application, -execute the Font2DTest class. -For example: - -

        -
        -% java -jar Font2DTest.jar
        -
        -
        - -

        -Note: If AWTPermission's showWindowWithoutWarningBanner permission is not given,
        -the zoom feature will not look as good, as characters may be hidden by the warning banner.
        - - - - -


        - - - - diff --git a/src/demo/share/jfc/Font2DTest/Font2DTest.java b/src/demo/share/jfc/Font2DTest/Font2DTest.java index 0fb8251b7189f76199663db3af70e17e6efdb7eb..e237ee680ce29045b86b39f96297d371f8557a8b 100644 --- a/src/demo/share/jfc/Font2DTest/Font2DTest.java +++ b/src/demo/share/jfc/Font2DTest/Font2DTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -72,6 +72,8 @@ import java.util.BitSet; import javax.swing.*; import javax.swing.event.*; +import static java.nio.charset.StandardCharsets.UTF_16; + /** * Font2DTest.java * @@ -133,7 +135,7 @@ public final class Font2DTest extends JPanel private static boolean canDisplayCheck = true; /// Initialize GUI variables and its layouts - public Font2DTest( JFrame f, boolean isApplet ) { + public Font2DTest( JFrame f) { parent = f; rm = new RangeMenu( this, parent ); @@ -165,8 +167,8 @@ public final class Font2DTest extends JPanel contrastSlider.setPaintLabels(true); contrastSlider.addChangeListener(this); setupPanel(); - setupMenu( isApplet ); - setupDialog( isApplet ); + setupMenu(); + setupDialog(); if(canDisplayCheck) { fireRangeChanged(); @@ -256,7 +258,7 @@ public final class Font2DTest extends JPanel } /// Sets up menu entries - private void setupMenu( boolean isApplet ) { + private void setupMenu() { JMenu fileMenu = new JMenu( "File" ); JMenu optionMenu = new JMenu( "Option" ); @@ -268,11 +270,7 @@ public final class Font2DTest extends JPanel fileMenu.add( new MenuItemV2( "Page Setup...", this )); fileMenu.add( new MenuItemV2( "Print...", this )); fileMenu.addSeparator(); - if ( !isApplet ) - fileMenu.add( new MenuItemV2( "Exit", this )); - else - fileMenu.add( new MenuItemV2( "Close", this )); - + fileMenu.add( new MenuItemV2( "Exit", this )); displayGridCBMI = new CheckboxMenuItemV2( "Display Grid", true, this ); force16ColsCBMI = new CheckboxMenuItemV2( "Force 16 Columns", false, this ); showFontInfoCBMI = new CheckboxMenuItemV2( "Display Font Info", false, this ); @@ -326,11 +324,8 @@ public final class Font2DTest extends JPanel } /// Sets up the all dialogs used in Font2DTest... - private void setupDialog( boolean isApplet ) { - if (!isApplet) - filePromptDialog = new JFileChooser( ); - else - filePromptDialog = null; + private void setupDialog() { + filePromptDialog = new JFileChooser(); /// Prepare user text dialog... userTextDialog = new JDialog( parent, "User Text", false ); @@ -432,8 +427,6 @@ public final class Font2DTest extends JPanel /// Changes the message on the status bar public void fireChangeStatus( String message, boolean error ) { - /// If this is not ran as an applet, use own status bar, - /// Otherwise, use the appletviewer/browser's status bar statusBar.setText( message ); if ( error ) fp.showingError = true; @@ -598,7 +591,7 @@ public final class Font2DTest extends JPanel if (numBytes >= 2 && (( byteData[0] == (byte) 0xFF && byteData[1] == (byte) 0xFE ) || ( byteData[0] == (byte) 0xFE && byteData[1] == (byte) 0xFF ))) - fileText = new String( byteData, "UTF-16" ); + fileText = new String(byteData, UTF_16); /// Otherwise, use system default encoding else fileText = new String( byteData ); @@ -656,7 +649,7 @@ public final class Font2DTest extends JPanel showFontInfoCBMI.getState() + "\n" + rm.getSelectedItem() + "\n" + range[0] + "\n" + range[1] + "\n" + curOptions + tFileName); - byte[] toBeWritten = completeOptions.getBytes( "UTF-16" ); + byte[] toBeWritten = completeOptions.getBytes(UTF_16); bos.write( toBeWritten, 0, toBeWritten.length ); bos.close(); } @@ -721,7 +714,7 @@ public final class Font2DTest extends JPanel (byteData[0] != (byte) 0xFE || byteData[1] != (byte) 0xFF) ) throw new Exception( "Not a Font2DTest options file" ); - String options = new String( byteData, "UTF-16" ); + String options = new String(byteData, UTF_16); StringTokenizer perLine = new StringTokenizer( options, "\n" ); String title = perLine.nextToken(); if ( !title.equals( "Font2DTest Option File" )) @@ -1030,7 +1023,7 @@ public final class Font2DTest extends JPanel UIManager.put("swing.boldMetal", Boolean.FALSE); final JFrame f = new JFrame( "Font2DTest" ); - final Font2DTest f2dt = new Font2DTest( f, false ); + final Font2DTest f2dt = new Font2DTest( f); f.addWindowListener( new WindowAdapter() { public void windowOpening( WindowEvent e ) { f2dt.repaint(); } public void windowClosing( WindowEvent e ) { System.exit(0); } diff --git a/src/demo/share/jfc/Font2DTest/FontPanel.java b/src/demo/share/jfc/Font2DTest/FontPanel.java index e0a4aca2950cf665702cbfcca825e5a5a40c6ff6..d52f572e090e603813b426a3474733160145b3b8 100644 --- a/src/demo/share/jfc/Font2DTest/FontPanel.java +++ b/src/demo/share/jfc/Font2DTest/FontPanel.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -80,6 +80,7 @@ import javax.imageio.*; import javax.swing.*; import static java.awt.RenderingHints.*; +import static java.nio.charset.StandardCharsets.ISO_8859_1; /** * FontPanel.java @@ -643,7 +644,7 @@ public final class FontPanel extends JPanel implements AdjustmentListener { break; case DRAW_BYTES: try { - byte[] lineBytes = line.getBytes( "ISO-8859-1" ); + byte[] lineBytes = line.getBytes(ISO_8859_1); g2.drawBytes( lineBytes, 0, lineBytes.length, 0, 0 ); } catch ( Exception e ) { diff --git a/src/demo/share/jfc/Font2DTest/README.txt b/src/demo/share/jfc/Font2DTest/README.txt index 1eb959d79ed1aee77c8128f6ee277bda79ed4dd2..60f8c013aabdfd35b06d3ee0f225115c5b77a9f9 100644 --- a/src/demo/share/jfc/Font2DTest/README.txt +++ b/src/demo/share/jfc/Font2DTest/README.txt @@ -4,35 +4,17 @@ Font2DTest To run Font2DTest: % java -jar Font2DTest.jar - or -% appletviewer Font2DTest.html -These instructions assume that the 1.7 versions of the java -and appletviewer commands are in your path. If they aren't, -then you should either specify the complete path to the commands +These instructions assume that the java command is in your path. +If they aren't, then you should either specify the complete path to the commands or update your PATH environment variable as described in the installation instructions for the Java(TM) SE Development Kit. -To view Font2DTest within a web browser with Java Plugin, -load Font2DTest.html. - If you wish to modify any of the source code, you may want to extract the contents of the Font2DTest.jar file by executing this command: % jar -xvf Font2DTest.jar -NOTE: - -When Font2DTest is ran as an applet, the browser plugin/viewer needs -following permissions given in order to run properly: - -AWTPermission "showWindowWithoutWarningBanner" -RuntimePermission "queuePrintJob" - -The program will run without these properties set, -but some of its features will be limited. -To enable all features, please add these permissions. - ----------------------------------------------------------------------- Introduction ----------------------------------------------------------------------- @@ -129,23 +111,3 @@ that are within the selected range. Third option, "Print all text..." is similar, and it will print all lines of text that user has put in. ==================================================================== - -Known Problems: - -- When a PostScript font is used, the characters may extend beyond the -enclosing grid or zoom rectangle. This is due to the problem with -FontMetrics.getMaxAscent() and getMaxDescent() functions; the functions -do not always return the right values for PostScript fonts. - -- There are still some bugs around the error handling. -Most of these problems will usually get fixed when some parameters -are changed, or the screen is refreshed. - -- Many fonts on Solaris fails to retrieve outlines properly, -and as the result, they do not align within the grid properly. -These are mainly F3 and fonts that was returned by X server. - -- When showWindowWithoutWarningBanner AWTPermission is not given, -the "zoom" window will look really bad because of the -Applet warning label tacked at the bottom of the zoom window. -To remove this, follow the "NOTE:" instruction at the top. diff --git a/src/demo/share/jfc/J2Ddemo/java2d/J2Ddemo.java b/src/demo/share/jfc/J2Ddemo/java2d/J2Ddemo.java index d8396ee5ec85e3688ff2d550cff3376a90731492..5d90756f9d6b13b5ad5940f1a8a3cd505ec94c2f 100644 --- a/src/demo/share/jfc/J2Ddemo/java2d/J2Ddemo.java +++ b/src/demo/share/jfc/J2Ddemo/java2d/J2Ddemo.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2007, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -109,7 +109,6 @@ public class J2Ddemo extends JPanel implements ItemListener, ActionListener, Dem { "Paths", "Append", "CurveQuadTo", "FillStroke", "WindingRule" }, { "Transforms", "Rotate", "SelectTx", "TransformAnim" } }; - private final boolean demoIsInApplet; private JCheckBoxMenuItem controlsCB; private JMenuItem runMI, cloneMI, fileMI, backgMI; // private JMenuItem ccthreadMI, verboseMI; @@ -122,8 +121,7 @@ public class J2Ddemo extends JPanel implements ItemListener, ActionListener, Dem /** * Construct the J2D Demo. */ - public J2Ddemo(boolean demoIsInApplet, DemoProgress progress, RunWindowSettings runWndSetts) { - this.demoIsInApplet = demoIsInApplet; + public J2Ddemo(DemoProgress progress, RunWindowSettings runWndSetts) { this.runWndSetts = runWndSetts; setLayout(new BorderLayout()); @@ -171,11 +169,9 @@ public class J2Ddemo extends JPanel implements ItemListener, ActionListener, Dem JPopupMenu.setDefaultLightWeightPopupEnabled(false); JMenuBar menuBar = new JMenuBar(); - if (!demoIsInApplet) { - JMenu file = menuBar.add(new JMenu("File")); - fileMI = file.add(new JMenuItem("Exit")); - fileMI.addActionListener(this); - } + JMenu file = menuBar.add(new JMenu("File")); + fileMI = file.add(new JMenuItem("Exit")); + fileMI.addActionListener(this); JMenu options = menuBar.add(new JMenu("Options")); @@ -239,11 +235,7 @@ public class J2Ddemo extends JPanel implements ItemListener, ActionListener, Dem rf.addWindowListener(l); rf.getContentPane().add("Center", runwindow); rf.pack(); - if (!demoIsInApplet) { - rf.setSize(new Dimension(200, 125)); - } else { - rf.setSize(new Dimension(200, 150)); - } + rf.setSize(new Dimension(200, 125)); rf.setVisible(true); } @@ -611,7 +603,7 @@ public class J2Ddemo extends JPanel implements ItemListener, ActionListener, Dem frame.setVisible(true); - J2Ddemo demo = new J2Ddemo(false, demoProgress, runWndSetts); + J2Ddemo demo = new J2Ddemo(demoProgress, runWndSetts); demoOneInstArr[0] = demo; frame.getContentPane().removeAll(); diff --git a/src/demo/share/jfc/SwingSet2/ButtonDemo.java b/src/demo/share/jfc/SwingSet2/ButtonDemo.java index c5e8c7aaea0770e1fd70e4ad034ac7ee14b5938c..ad45b6d20820bfa152f36c9e00a5b989c96b2496 100644 --- a/src/demo/share/jfc/SwingSet2/ButtonDemo.java +++ b/src/demo/share/jfc/SwingSet2/ButtonDemo.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,22 +30,30 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -import javax.swing.*; -import javax.swing.event.*; -import javax.swing.text.*; -import javax.swing.border.*; -import javax.swing.colorchooser.*; -import javax.swing.filechooser.*; -import javax.accessibility.*; - -import java.awt.*; -import java.awt.event.*; -import java.beans.*; -import java.util.*; -import java.io.*; -import java.applet.*; -import java.net.*; +import javax.swing.AbstractButton; +import javax.swing.Box; +import javax.swing.BoxLayout; +import javax.swing.ButtonGroup; +import javax.swing.JButton; +import javax.swing.JCheckBox; +import javax.swing.JLabel; +import javax.swing.JPanel; +import javax.swing.JRadioButton; +import javax.swing.JTabbedPane; +import javax.swing.JToggleButton; +import javax.swing.SingleSelectionModel; +import javax.swing.border.CompoundBorder; +import javax.swing.border.EmptyBorder; +import javax.swing.border.TitledBorder; +import javax.swing.event.ChangeEvent; +import javax.swing.event.ChangeListener; + +import java.awt.Component; +import java.awt.Dimension; +import java.awt.Insets; +import java.awt.event.ItemEvent; +import java.awt.event.ItemListener; +import java.util.Vector; /** * JButton, JRadioButton, JToggleButton, JCheckBox Demos diff --git a/src/demo/share/jfc/SwingSet2/ColorChooserDemo.java b/src/demo/share/jfc/SwingSet2/ColorChooserDemo.java index d87c108054ed954df5a768cf5909882ea0ffb27e..aa2418b128b2df2228e1710c7e79e5e049828850 100644 --- a/src/demo/share/jfc/SwingSet2/ColorChooserDemo.java +++ b/src/demo/share/jfc/SwingSet2/ColorChooserDemo.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,22 +30,20 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -import javax.swing.*; -import javax.swing.event.*; -import javax.swing.text.*; -import javax.swing.border.*; -import javax.swing.colorchooser.*; -import javax.swing.filechooser.*; -import javax.accessibility.*; - -import java.awt.*; -import java.awt.event.*; -import java.beans.*; -import java.util.*; -import java.io.*; -import java.applet.*; -import java.net.*; +import javax.swing.Box; +import javax.swing.BoxLayout; +import javax.swing.Icon; +import javax.swing.JButton; +import javax.swing.JColorChooser; +import javax.swing.JDialog; +import javax.swing.JPanel; + +import java.awt.Color; +import java.awt.Component; +import java.awt.Dimension; +import java.awt.Graphics; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; /** * JColorChooserDemo diff --git a/src/demo/share/jfc/SwingSet2/ComboBoxDemo.java b/src/demo/share/jfc/SwingSet2/ComboBoxDemo.java index d32147e571351c22a3963ed6ee1a961c7dd6ad07..75e8311cd75de3dafe001d1eaabee1713719489b 100644 --- a/src/demo/share/jfc/SwingSet2/ComboBoxDemo.java +++ b/src/demo/share/jfc/SwingSet2/ComboBoxDemo.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,22 +30,23 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -import javax.swing.*; -import javax.swing.event.*; -import javax.swing.text.*; -import javax.swing.border.*; -import javax.swing.colorchooser.*; -import javax.swing.filechooser.*; -import javax.accessibility.*; - -import java.awt.*; -import java.awt.event.*; -import java.beans.*; -import java.util.*; -import java.io.*; -import java.applet.*; -import java.net.*; +import javax.accessibility.AccessibleRelation; +import javax.swing.Box; +import javax.swing.BoxLayout; +import javax.swing.Icon; +import javax.swing.ImageIcon; +import javax.swing.JComboBox; +import javax.swing.JLabel; +import javax.swing.JPanel; +import javax.swing.border.BevelBorder; + +import java.awt.BorderLayout; +import java.awt.Component; +import java.awt.Dimension; +import java.awt.Graphics; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.util.Hashtable; /** * JComboBox Demo diff --git a/src/demo/share/jfc/SwingSet2/DemoModule.java b/src/demo/share/jfc/SwingSet2/DemoModule.java index a726cc257cba2ecbac1ed9252cd7ce7da328d9c6..9aa17e76e80b46946a48ada87257ecb2d2b6bd85 100644 --- a/src/demo/share/jfc/SwingSet2/DemoModule.java +++ b/src/demo/share/jfc/SwingSet2/DemoModule.java @@ -1,6 +1,5 @@ /* - * - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,21 +29,25 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -import javax.swing.*; -import javax.swing.event.*; -import javax.swing.text.*; -import javax.swing.border.*; -import javax.swing.colorchooser.*; -import javax.swing.filechooser.*; -import javax.accessibility.*; - -import java.awt.*; -import java.awt.event.*; -import java.beans.*; -import java.util.*; -import java.io.*; -import java.applet.*; -import java.net.*; +import javax.swing.BoxLayout; +import javax.swing.Icon; +import javax.swing.ImageIcon; +import javax.swing.JFrame; +import javax.swing.JPanel; +import javax.swing.UIManager; +import javax.swing.border.Border; +import javax.swing.border.CompoundBorder; +import javax.swing.border.EmptyBorder; +import javax.swing.border.SoftBevelBorder; + +import java.awt.BorderLayout; +import java.awt.Dimension; +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.URL; + +import static java.nio.charset.StandardCharsets.UTF_8; /** * A generic SwingSet2 demo module @@ -155,7 +158,7 @@ public class DemoModule extends JFrame { try { url = getClass().getResource(filename); is = url.openStream(); - isr = new InputStreamReader(is, "UTF-8"); + isr = new InputStreamReader(is, UTF_8); BufferedReader reader = new BufferedReader(isr); // Read one line at a time, htmlize using super-spiffy diff --git a/src/demo/share/jfc/SwingSet2/FileChooserDemo.java b/src/demo/share/jfc/SwingSet2/FileChooserDemo.java index f85100065c5bbdb706b35819546592b6a1d1041b..85192d74bc587ffaca8a13b8cebd87082daf4f14 100644 --- a/src/demo/share/jfc/SwingSet2/FileChooserDemo.java +++ b/src/demo/share/jfc/SwingSet2/FileChooserDemo.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,22 +30,37 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -import javax.swing.*; -import javax.swing.event.*; -import javax.swing.text.*; -import javax.swing.border.*; -import javax.swing.colorchooser.*; -import javax.swing.filechooser.*; -import javax.accessibility.*; - -import java.awt.*; -import java.awt.event.*; -import java.beans.*; -import java.util.*; -import java.io.*; -import java.applet.*; -import java.net.*; +import javax.swing.AbstractAction; +import javax.swing.Action; +import javax.swing.Box; +import javax.swing.BoxLayout; +import javax.swing.Icon; +import javax.swing.ImageIcon; +import javax.swing.JButton; +import javax.swing.JComponent; +import javax.swing.JDialog; +import javax.swing.JFileChooser; +import javax.swing.JLabel; +import javax.swing.JOptionPane; +import javax.swing.JPanel; +import javax.swing.JScrollPane; +import javax.swing.SwingUtilities; +import javax.swing.WindowConstants; +import javax.swing.border.BevelBorder; +import javax.swing.filechooser.FileNameExtensionFilter; + +import java.awt.BorderLayout; +import java.awt.Color; +import java.awt.Component; +import java.awt.Dimension; +import java.awt.Frame; +import java.awt.Graphics; +import java.awt.Image; +import java.awt.Insets; +import java.awt.event.ActionEvent; +import java.beans.PropertyChangeEvent; +import java.beans.PropertyChangeListener; +import java.io.File; /** * JFileChooserDemo diff --git a/src/demo/share/jfc/SwingSet2/HtmlDemo.java b/src/demo/share/jfc/SwingSet2/HtmlDemo.java index e19a965e208826b063b242ee7848dfdef3f1a625..0b95ce61d6b75e185495c19645215c4cfd140030 100644 --- a/src/demo/share/jfc/SwingSet2/HtmlDemo.java +++ b/src/demo/share/jfc/SwingSet2/HtmlDemo.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,23 +30,19 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +import javax.swing.JEditorPane; +import javax.swing.JScrollPane; +import javax.swing.JViewport; +import javax.swing.event.HyperlinkEvent.EventType; +import javax.swing.event.HyperlinkEvent; +import javax.swing.event.HyperlinkListener; +import javax.swing.text.html.HTMLDocument; +import javax.swing.text.html.HTMLFrameHyperlinkEvent; -import javax.swing.*; -import javax.swing.event.*; -import javax.swing.text.*; -import javax.swing.text.html.*; -import javax.swing.border.*; -import javax.swing.colorchooser.*; -import javax.swing.filechooser.*; -import javax.accessibility.*; - -import java.awt.*; -import java.awt.event.*; -import java.beans.*; -import java.util.*; -import java.io.*; -import java.applet.*; -import java.net.*; +import java.awt.BorderLayout; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; /** * Html Demo diff --git a/src/demo/share/jfc/SwingSet2/InternalFrameDemo.java b/src/demo/share/jfc/SwingSet2/InternalFrameDemo.java index a02ec0b919733f7d2408b8256fa4cd8950e48e89..0bf009ae634c42dc37502fb8d5d0c204577ee51b 100644 --- a/src/demo/share/jfc/SwingSet2/InternalFrameDemo.java +++ b/src/demo/share/jfc/SwingSet2/InternalFrameDemo.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,22 +30,26 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -import javax.swing.*; -import javax.swing.event.*; -import javax.swing.text.*; -import javax.swing.border.*; -import javax.swing.colorchooser.*; -import javax.swing.filechooser.*; -import javax.accessibility.*; - -import java.awt.*; -import java.awt.event.*; -import java.beans.*; -import java.util.*; -import java.io.*; -import java.applet.*; -import java.net.*; +import javax.swing.AbstractAction; +import javax.swing.Box; +import javax.swing.BoxLayout; +import javax.swing.Icon; +import javax.swing.ImageIcon; +import javax.swing.JButton; +import javax.swing.JCheckBox; +import javax.swing.JDesktopPane; +import javax.swing.JInternalFrame; +import javax.swing.JLabel; +import javax.swing.JPanel; +import javax.swing.JScrollPane; +import javax.swing.JTextField; + +import java.awt.BorderLayout; +import java.awt.Color; +import java.awt.Dimension; +import java.awt.GridLayout; +import java.awt.Insets; +import java.awt.event.ActionEvent; /** * Internal Frames Demo diff --git a/src/demo/share/jfc/SwingSet2/ListDemo.java b/src/demo/share/jfc/SwingSet2/ListDemo.java index 0352bc3ab71630a1978114c613ef096893c958d6..994059c0c74fc0dfb66ccd5aa55651fdfb3c88b1 100644 --- a/src/demo/share/jfc/SwingSet2/ListDemo.java +++ b/src/demo/share/jfc/SwingSet2/ListDemo.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2007, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,22 +30,29 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -import javax.swing.*; -import javax.swing.event.*; -import javax.swing.text.*; -import javax.swing.border.*; -import javax.swing.colorchooser.*; -import javax.swing.filechooser.*; -import javax.accessibility.*; - -import java.awt.*; -import java.awt.event.*; -import java.beans.*; -import java.util.*; -import java.io.*; -import java.applet.*; -import java.net.*; +import javax.swing.AbstractAction; +import javax.swing.AbstractListModel; +import javax.swing.Action; +import javax.swing.Box; +import javax.swing.BoxLayout; +import javax.swing.DefaultListCellRenderer; +import javax.swing.ImageIcon; +import javax.swing.JCheckBox; +import javax.swing.JComponent; +import javax.swing.JLabel; +import javax.swing.JList; +import javax.swing.JPanel; +import javax.swing.JScrollPane; + +import java.awt.BorderLayout; +import java.awt.Component; +import java.awt.Insets; +import java.awt.Rectangle; +import java.awt.event.ActionEvent; +import java.awt.event.FocusAdapter; +import java.awt.event.FocusEvent; +import java.awt.event.FocusListener; +import java.util.Vector; /** * List Demo. This demo shows that it is not diff --git a/src/demo/share/jfc/SwingSet2/OptionPaneDemo.java b/src/demo/share/jfc/SwingSet2/OptionPaneDemo.java index c60d4b8f159947b32f06fe02b085c73535449f02..8814a499722a049fb111937a9c3acfcb9a6cedbc 100644 --- a/src/demo/share/jfc/SwingSet2/OptionPaneDemo.java +++ b/src/demo/share/jfc/SwingSet2/OptionPaneDemo.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,22 +30,19 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -import javax.swing.*; -import javax.swing.event.*; -import javax.swing.text.*; -import javax.swing.border.*; -import javax.swing.colorchooser.*; -import javax.swing.filechooser.*; -import javax.accessibility.*; - -import java.awt.*; -import java.awt.event.*; -import java.beans.*; -import java.util.*; -import java.io.*; -import java.applet.*; -import java.net.*; +import javax.swing.AbstractAction; +import javax.swing.Action; +import javax.swing.Box; +import javax.swing.BoxLayout; +import javax.swing.JButton; +import javax.swing.JComboBox; +import javax.swing.JOptionPane; +import javax.swing.JPanel; +import javax.swing.JTextField; + +import java.awt.Dimension; +import java.awt.event.ActionEvent; +import java.net.URL; /** * JOptionPaneDemo diff --git a/src/demo/share/jfc/SwingSet2/ProgressBarDemo.java b/src/demo/share/jfc/SwingSet2/ProgressBarDemo.java index c5568a33ed3118212541989a2cf094d6394d8f2d..47f03708f3a69c54695ed70f9229c57a0ed21b10 100644 --- a/src/demo/share/jfc/SwingSet2/ProgressBarDemo.java +++ b/src/demo/share/jfc/SwingSet2/ProgressBarDemo.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,22 +30,19 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -import javax.swing.*; -import javax.swing.event.*; -import javax.swing.text.*; -import javax.swing.border.*; -import javax.swing.colorchooser.*; -import javax.swing.filechooser.*; -import javax.accessibility.*; - -import java.awt.*; -import java.awt.event.*; -import java.beans.*; -import java.util.*; -import java.io.*; -import java.applet.*; -import java.net.*; +import javax.swing.AbstractAction; +import javax.swing.Action; +import javax.swing.JButton; +import javax.swing.JPanel; +import javax.swing.JProgressBar; +import javax.swing.JScrollPane; +import javax.swing.JTextArea; +import javax.swing.border.BevelBorder; +import javax.swing.border.SoftBevelBorder; + +import java.awt.BorderLayout; +import java.awt.Dimension; +import java.awt.event.ActionEvent; /** * JProgressBar Demo diff --git a/src/demo/share/jfc/SwingSet2/README.txt b/src/demo/share/jfc/SwingSet2/README.txt index a715d29f705cbc74ae2da1dddd69f6cc8dacfcfc..e762eccb5fac6eda93a3831d68bb436c83f3a0ab 100644 --- a/src/demo/share/jfc/SwingSet2/README.txt +++ b/src/demo/share/jfc/SwingSet2/README.txt @@ -10,12 +10,6 @@ TO RUN SWINGSET2 AS AN APPLICATION java -jar SwingSet2.jar -============================= -TO RUN SWINGSET2 AS AN APPLET -============================= - - appletviewer SwingSet2.html - ========================= TO MODIFY/BUILD SWINGSET2 ========================= @@ -40,8 +34,8 @@ documentation) or you can specify the splash screen image on the command line: java -splash:resources/images/splash.png SwingSet2 -Note: These instructions assume that this installation's versions of the java, -appletviewer, and javac commands are in your path. If they aren't, then you should +Note: These instructions assume that this installation's versions of the java +and javac commands are in your path. If they aren't, then you should either specify the complete path to the commands or update your PATH environment variable as described in the installation instructions for the Java(TM) SE Development Kit. diff --git a/src/demo/share/jfc/SwingSet2/ScrollPaneDemo.java b/src/demo/share/jfc/SwingSet2/ScrollPaneDemo.java index e4a45e2cca92ee6a49ab2f9c823976197c8b2c66..d0a5e74464597e7d5d1061c4d0a817e9d2e55199 100644 --- a/src/demo/share/jfc/SwingSet2/ScrollPaneDemo.java +++ b/src/demo/share/jfc/SwingSet2/ScrollPaneDemo.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,22 +30,14 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +import javax.swing.Icon; +import javax.swing.ImageIcon; +import javax.swing.JLabel; +import javax.swing.JPanel; +import javax.swing.JScrollBar; +import javax.swing.JScrollPane; -import javax.swing.*; -import javax.swing.event.*; -import javax.swing.text.*; -import javax.swing.border.*; -import javax.swing.colorchooser.*; -import javax.swing.filechooser.*; -import javax.accessibility.*; - -import java.awt.*; -import java.awt.event.*; -import java.beans.*; -import java.util.*; -import java.io.*; -import java.applet.*; -import java.net.*; +import java.awt.BorderLayout; /** * Scroll Pane Demo diff --git a/src/demo/share/jfc/SwingSet2/SliderDemo.java b/src/demo/share/jfc/SwingSet2/SliderDemo.java index a71a4aaf404eb6c381234e739eaf2be11a7d0e4b..c583b7c1b9d00ebe7e883c5bc4c312c6793563cd 100644 --- a/src/demo/share/jfc/SwingSet2/SliderDemo.java +++ b/src/demo/share/jfc/SwingSet2/SliderDemo.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,22 +30,21 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -import javax.swing.*; -import javax.swing.event.*; -import javax.swing.text.*; -import javax.swing.border.*; -import javax.swing.colorchooser.*; -import javax.swing.filechooser.*; -import javax.accessibility.*; - -import java.awt.*; -import java.awt.event.*; -import java.beans.*; -import java.util.*; -import java.io.*; -import java.applet.*; -import java.net.*; +import javax.swing.BoundedRangeModel; +import javax.swing.Box; +import javax.swing.BoxLayout; +import javax.swing.DefaultBoundedRangeModel; +import javax.swing.JLabel; +import javax.swing.JPanel; +import javax.swing.JSlider; +import javax.swing.border.BevelBorder; +import javax.swing.border.TitledBorder; +import javax.swing.event.ChangeEvent; +import javax.swing.event.ChangeListener; + +import java.awt.BorderLayout; +import java.awt.GridLayout; +import java.util.Dictionary; /** * JSlider Demo diff --git a/src/demo/share/jfc/SwingSet2/SplitPaneDemo.java b/src/demo/share/jfc/SwingSet2/SplitPaneDemo.java index d39ee89aec1536b399e8dbfcccf35bfa7f8c05c9..870bf639d6df04adeb18f3601ea4a2b39b612204 100644 --- a/src/demo/share/jfc/SwingSet2/SplitPaneDemo.java +++ b/src/demo/share/jfc/SwingSet2/SplitPaneDemo.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,23 +30,26 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -import javax.swing.*; -import javax.swing.event.*; -import javax.swing.text.*; -import javax.swing.table.*; -import javax.swing.border.*; -import javax.swing.colorchooser.*; -import javax.swing.filechooser.*; -import javax.accessibility.*; - -import java.awt.*; -import java.awt.event.*; -import java.beans.*; -import java.util.*; -import java.io.*; -import java.applet.*; -import java.net.*; +import javax.swing.Box; +import javax.swing.BoxLayout; +import javax.swing.ButtonGroup; +import javax.swing.JCheckBox; +import javax.swing.JLabel; +import javax.swing.JOptionPane; +import javax.swing.JPanel; +import javax.swing.JRadioButton; +import javax.swing.JSplitPane; +import javax.swing.JTextField; +import javax.swing.event.ChangeEvent; +import javax.swing.event.ChangeListener; + +import java.awt.BorderLayout; +import java.awt.Color; +import java.awt.Dimension; +import java.awt.FlowLayout; +import java.awt.GridLayout; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; /** * Split Pane demo diff --git a/src/demo/share/jfc/SwingSet2/SwingSet2.html b/src/demo/share/jfc/SwingSet2/SwingSet2.html deleted file mode 100644 index 3bf8221224bb22287677f1a9df5be88dd336be59..0000000000000000000000000000000000000000 --- a/src/demo/share/jfc/SwingSet2/SwingSet2.html +++ /dev/null @@ -1,14 +0,0 @@ - - - - SwingSet demo - - - -

        SwingSet demo

        - - - - diff --git a/src/demo/share/jfc/SwingSet2/TabbedPaneDemo.java b/src/demo/share/jfc/SwingSet2/TabbedPaneDemo.java index 894e4451ec3e8b87ea16f6f104ab52099a513370..0be83b3b77a55d3f8f2775b334cc2b8e4c6a4b7d 100644 --- a/src/demo/share/jfc/SwingSet2/TabbedPaneDemo.java +++ b/src/demo/share/jfc/SwingSet2/TabbedPaneDemo.java @@ -1,6 +1,5 @@ /* - * - * Copyright (c) 2007, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,22 +29,23 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -import javax.swing.*; -import javax.swing.event.*; -import javax.swing.text.*; -import javax.swing.border.*; -import javax.swing.colorchooser.*; -import javax.swing.filechooser.*; -import javax.accessibility.*; - -import java.awt.*; -import java.awt.event.*; -import java.beans.*; -import java.util.*; -import java.io.*; -import java.applet.*; -import java.net.*; +import javax.swing.ButtonGroup; +import javax.swing.ImageIcon; +import javax.swing.JComponent; +import javax.swing.JLabel; +import javax.swing.JPanel; +import javax.swing.JRadioButton; +import javax.swing.JTabbedPane; +import javax.swing.SingleSelectionModel; +import javax.swing.event.ChangeEvent; +import javax.swing.event.ChangeListener; + +import java.awt.BorderLayout; +import java.awt.Color; +import java.awt.Graphics; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.util.Random; /** * JTabbedPane Demo diff --git a/src/demo/share/jfc/SwingSet2/TableDemo.java b/src/demo/share/jfc/SwingSet2/TableDemo.java index 48199c564a8507e7bc25e5b0a2668c8d5ff7f06c..350fb6efd675e2221eec0fa97a6cbab5cc89b9e4 100644 --- a/src/demo/share/jfc/SwingSet2/TableDemo.java +++ b/src/demo/share/jfc/SwingSet2/TableDemo.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,24 +30,51 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -import javax.swing.*; -import javax.swing.event.*; -import javax.swing.text.*; -import javax.swing.table.*; -import javax.swing.border.*; -import javax.swing.colorchooser.*; -import javax.swing.filechooser.*; -import javax.accessibility.*; - -import java.awt.*; -import java.awt.event.*; +import javax.accessibility.Accessible; +import javax.accessibility.AccessibleContext; +import javax.accessibility.AccessibleRelation; +import javax.accessibility.AccessibleRelationSet; +import javax.swing.AbstractAction; +import javax.swing.BorderFactory; +import javax.swing.Box; +import javax.swing.BoxLayout; +import javax.swing.DefaultCellEditor; +import javax.swing.ImageIcon; +import javax.swing.JButton; +import javax.swing.JCheckBox; +import javax.swing.JComboBox; +import javax.swing.JComponent; +import javax.swing.JLabel; +import javax.swing.JOptionPane; +import javax.swing.JPanel; +import javax.swing.JScrollPane; +import javax.swing.JSlider; +import javax.swing.JTable; +import javax.swing.JTextField; +import javax.swing.KeyStroke; +import javax.swing.border.TitledBorder; +import javax.swing.event.ChangeEvent; +import javax.swing.event.ChangeListener; +import javax.swing.table.AbstractTableModel; +import javax.swing.table.DefaultTableCellRenderer; +import javax.swing.table.TableColumn; +import javax.swing.table.TableModel; +import javax.swing.table.TableRowSorter; + +import java.awt.BorderLayout; +import java.awt.Color; +import java.awt.Component; +import java.awt.Container; +import java.awt.Dimension; +import java.awt.GridLayout; +import java.awt.Insets; +import java.awt.LayoutManager; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.awt.event.ItemEvent; +import java.awt.event.ItemListener; import java.awt.print.PrinterException; -import java.beans.*; -import java.util.*; -import java.io.*; -import java.applet.*; -import java.net.*; +import java.util.Vector; import java.text.MessageFormat; @@ -549,7 +576,10 @@ public class TableDemo extends DemoModule { public int getRowCount() { return data.length;} public Object getValueAt(int row, int col) {return data[row][col];} public String getColumnName(int column) {return names[column];} - public Class getColumnClass(int c) {return getValueAt(0, c).getClass();} + public Class getColumnClass(int c) { + Object obj = getValueAt(0, c); + return obj != null ? obj.getClass() : Object.class; + } public boolean isCellEditable(int row, int col) {return col != 5;} public void setValueAt(Object aValue, int row, int column) { data[row][column] = aValue; } }; diff --git a/src/demo/share/jfc/SwingSet2/ToolTipDemo.java b/src/demo/share/jfc/SwingSet2/ToolTipDemo.java index c91af2d3bbe3a68d9d6e69c5e84623df24292a85..760a81418664a8998c189c5027749eff82f866a6 100644 --- a/src/demo/share/jfc/SwingSet2/ToolTipDemo.java +++ b/src/demo/share/jfc/SwingSet2/ToolTipDemo.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,22 +30,15 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +import javax.swing.Box; +import javax.swing.BoxLayout; +import javax.swing.JLabel; +import javax.swing.JPanel; -import javax.swing.*; -import javax.swing.event.*; -import javax.swing.text.*; -import javax.swing.border.*; -import javax.swing.colorchooser.*; -import javax.swing.filechooser.*; -import javax.accessibility.*; - -import java.awt.*; -import java.awt.event.*; -import java.beans.*; -import java.util.*; -import java.io.*; -import java.applet.*; -import java.net.*; +import java.awt.Color; +import java.awt.Dimension; +import java.awt.Point; +import java.awt.Polygon; /** * ToolTip Demo diff --git a/src/demo/share/jfc/SwingSet2/TreeDemo.java b/src/demo/share/jfc/SwingSet2/TreeDemo.java index 57f2f25e14b02be092f34077acf9f823bc2f987f..b41ee3b5c5ad75cd4fbce8d8495c9c482b4dcd78 100644 --- a/src/demo/share/jfc/SwingSet2/TreeDemo.java +++ b/src/demo/share/jfc/SwingSet2/TreeDemo.java @@ -1,6 +1,5 @@ /* - * - * Copyright (c) 2007, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,19 +29,19 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +import javax.swing.JScrollPane; +import javax.swing.JTree; +import javax.swing.tree.DefaultMutableTreeNode; -import javax.swing.*; -import javax.swing.event.*; -import javax.swing.tree.*; -import javax.accessibility.*; +import java.awt.BorderLayout; +import java.awt.Insets; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.URL; -import java.awt.*; -import java.awt.event.*; -import java.beans.*; -import java.util.*; -import java.io.*; -import java.applet.*; -import java.net.*; +import static java.nio.charset.StandardCharsets.UTF_8; /** * JTree Demo @@ -84,7 +83,7 @@ public class TreeDemo extends DemoModule { try { // convert url to buffered string InputStream is = url.openStream(); - InputStreamReader isr = new InputStreamReader(is, "UTF-8"); + InputStreamReader isr = new InputStreamReader(is, UTF_8); BufferedReader reader = new BufferedReader(isr); // read one line at a time, put into tree diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index e0f067a23c8626150d621e991a6e357b112ca9b7..52d624a2de6394a146c07a14949d55638ba4b496 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -1295,7 +1295,7 @@ public: }; }; - bool is_CAS(int opcode, bool maybe_volatile); + bool is_CAS(int opcode, bool maybe_volatile); // predicates controlling emit of ldr/ldar and associated dmb @@ -1902,7 +1902,7 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { __ bind(L_skip_barrier); } - if (C->max_vector_size() >= 16) { + if (C->max_vector_size() > 0) { __ reinitialize_ptrue(); } @@ -2161,7 +2161,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo as_FloatRegister(Matcher::_regEncode[src_lo])); } } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy - if (cbuf) { + if (is64) { __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo])); } else { @@ -2388,7 +2388,7 @@ const bool Matcher::match_rule_supported(int opcode) { // Identify extra cases that we might want to provide match rules for vector nodes and // other intrinsics guarded with vector length (vlen) and element type (bt). const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { + if (!match_rule_supported(opcode)) { return false; } int bit_size = vlen * type2aelembytes(bt) * 8; @@ -2396,10 +2396,18 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType return false; } if (UseSVE > 0) { - return op_sve_supported(opcode); + return op_sve_supported(opcode, vlen, bt); } else { // NEON // Special cases switch (opcode) { + case Op_VectorMaskCmp: + // We don't have VectorReinterpret with bit_size less than 64 support for + // now, even for byte type. To be refined with fully VectorCast support. + case Op_VectorReinterpret: + if (vlen < 2 || bit_size < 64) { + return false; + } + break; case Op_MulAddVS2VI: if (bit_size < 128) { return false; @@ -2413,11 +2421,31 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType return false; } break; + // Some types of VectorCast are not implemented for now. + case Op_VectorCastI2X: + if (bt == T_BYTE) { + return false; + } + break; + case Op_VectorCastS2X: + if (vlen < 4 || bit_size < 64) { + return false; + } + break; + case Op_VectorCastF2X: + case Op_VectorCastD2X: + if (bt == T_INT || bt == T_SHORT || bt == T_BYTE || bt == T_LONG) { + return false; + } + break; + case Op_LoadVectorGather: + case Op_StoreVectorScatter: + return false; default: break; } } - return true; // Per default match rules are supported. + return vector_size_supported(bt, vlen); } const RegMask* Matcher::predicate_reg_mask(void) { @@ -2438,10 +2466,6 @@ OptoRegPair Matcher::vector_return_value(uint ideal_reg) { return OptoRegPair(0, 0); } -const int Matcher::float_pressure(int default_pressure_threshold) { - return default_pressure_threshold; -} - // Is this branch offset short enough that a short branch can be used? // // NOTE: If the platform does not provide any short branch variants, then @@ -2467,24 +2491,20 @@ const int Matcher::vector_width_in_bytes(BasicType bt) { const int Matcher::max_vector_size(const BasicType bt) { return vector_width_in_bytes(bt)/type2aelembytes(bt); } + const int Matcher::min_vector_size(const BasicType bt) { int max_size = max_vector_size(bt); - if ((UseSVE > 0) && (MaxVectorSize >= 16)) { - // Currently vector length less than SVE vector register size is not supported. - return max_size; - } else { // NEON - // Limit the vector size to 8 bytes - int size = 8 / type2aelembytes(bt); - if (bt == T_BYTE) { - // To support vector api shuffle/rearrange. - size = 4; - } else if (bt == T_BOOLEAN) { - // To support vector api load/store mask. - size = 2; - } - if (size < 2) size = 2; - return MIN2(size,max_size); + // Limit the min vector size to 8 bytes. + int size = 8 / type2aelembytes(bt); + if (bt == T_BYTE) { + // To support vector api shuffle/rearrange. + size = 4; + } else if (bt == T_BOOLEAN) { + // To support vector api load/store mask. + size = 2; } + if (size < 2) size = 2; + return MIN2(size, max_size); } // Actual max scalable vector register length. @@ -2494,7 +2514,7 @@ const int Matcher::scalable_vector_reg_size(const BasicType bt) { // Vector ideal reg. const uint Matcher::vector_ideal_reg(int len) { - if (UseSVE > 0 && 16 <= len && len <= 256) { + if (UseSVE > 0 && 2 <= len && len <= 256) { return Op_VecA; } switch(len) { @@ -2513,7 +2533,7 @@ MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, return NULL; } -bool Matcher::is_generic_reg2reg_move(MachNode* m) { +bool Matcher::is_reg2reg_move(MachNode* m) { ShouldNotReachHere(); // generic vector operands not supported return false; } @@ -2554,6 +2574,39 @@ bool Matcher::is_spillable_arg(int reg) return can_be_java_arg(reg); } +uint Matcher::int_pressure_limit() +{ + // JDK-8183543: When taking the number of available registers as int + // register pressure threshold, the jtreg test: + // test/hotspot/jtreg/compiler/regalloc/TestC2IntPressure.java + // failed due to C2 compilation failure with + // "COMPILE SKIPPED: failed spill-split-recycle sanity check". + // + // A derived pointer is live at CallNode and then is flagged by RA + // as a spilled LRG. Spilling heuristics(Spill-USE) explicitly skip + // derived pointers and lastly fail to spill after reaching maximum + // number of iterations. Lowering the default pressure threshold to + // (_NO_SPECIAL_REG32_mask.Size() minus 1) forces CallNode to become + // a high register pressure area of the code so that split_DEF can + // generate DefinitionSpillCopy for the derived pointer. + uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.Size() - 1; + if (!PreserveFramePointer) { + // When PreserveFramePointer is off, frame pointer is allocatable, + // but different from other SOC registers, it is excluded from + // fatproj's mask because its save type is No-Save. Decrease 1 to + // ensure high pressure at fatproj when PreserveFramePointer is off. + // See check_pressure_at_fatproj(). + default_int_pressure_threshold--; + } + return (INTPRESSURE == -1) ? default_int_pressure_threshold : INTPRESSURE; +} + +uint Matcher::float_pressure_limit() +{ + // _FLOAT_REG_mask is generated by adlc from the float_reg register class. + return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.Size() : FLOATPRESSURE; +} + bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { return false; } @@ -2588,6 +2641,13 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() { bool size_fits_all_mem_uses(AddPNode* addp, int shift) { for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) { Node* u = addp->fast_out(i); + if (u->is_LoadStore()) { + // On AArch64, LoadStoreNodes (i.e. compare and swap + // instructions) only take register indirect as an operand, so + // any attempt to use an AddPNode as an input to a LoadStoreNode + // must fail. + return false; + } if (u->is_Mem()) { int opsize = u->as_Mem()->memory_size(); assert(opsize > 0, "unexpected memory operand size"); @@ -3659,7 +3719,7 @@ encode %{ } // Only non uncommon_trap calls need to reinitialize ptrue. - if (Compile::current()->max_vector_size() >= 16 && uncommon_trap_request() == 0) { + if (Compile::current()->max_vector_size() > 0 && uncommon_trap_request() == 0) { __ reinitialize_ptrue(); } %} @@ -3671,7 +3731,7 @@ encode %{ if (call == NULL) { ciEnv::current()->record_failure("CodeCache is full"); return; - } else if (Compile::current()->max_vector_size() >= 16) { + } else if (Compile::current()->max_vector_size() > 0) { __ reinitialize_ptrue(); } %} @@ -3709,7 +3769,7 @@ encode %{ __ bind(retaddr); __ add(sp, sp, 2 * wordSize); } - if (Compile::current()->max_vector_size() >= 16) { + if (Compile::current()->max_vector_size() > 0) { __ reinitialize_ptrue(); } %} @@ -3722,7 +3782,7 @@ encode %{ enc_class aarch64_enc_ret() %{ C2_MacroAssembler _masm(&cbuf); #ifdef ASSERT - if (Compile::current()->max_vector_size() >= 16) { + if (Compile::current()->max_vector_size() > 0) { __ verify_ptrue(); } #endif @@ -3767,10 +3827,6 @@ encode %{ __ br(Assembler::NE, cont); } - if (UseBiasedLocking && !UseOptoBiasInlining) { - __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont); - } - // Check for existing monitor __ tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor); @@ -3841,10 +3897,6 @@ encode %{ assert_different_registers(oop, box, tmp, disp_hdr); - if (UseBiasedLocking && !UseOptoBiasInlining) { - __ biased_locking_exit(oop, tmp, cont); - } - // Find the lock address and load the displaced header from the stack. __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); @@ -4103,6 +4155,16 @@ operand immIExt() interface(CONST_INTER); %} +operand immI_gt_1() +%{ + predicate(n->get_int() > 1); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + operand immI_le_4() %{ predicate(n->get_int() <= 4); @@ -5457,6 +5519,15 @@ operand vRegD_V31() interface(REG_INTER); %} +operand pReg() +%{ + constraint(ALLOC_IN_RC(pr_reg)); + match(RegVectMask); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + operand pRegGov() %{ constraint(ALLOC_IN_RC(gov_pr)); @@ -8866,11 +8937,6 @@ instruct storePConditional(memory8 heap_top_ptr, iRegP oldval, iRegP newval, rFl ins_pipe(pipe_serial); %} - -// storeLConditional is used by PhaseMacroExpand::expand_lock_node -// when attempting to rebias a lock towards the current thread. We -// must use the acquire form of cmpxchg in order to guarantee acquire -// semantics in this case. instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{ match(Set cr (StoreLConditional mem (Binary oldval newval))); @@ -10827,7 +10893,6 @@ instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) instruct mnegI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI0 zero) %{ match(Set dst (MulI (SubI zero src1) src2)); - match(Set dst (MulI src1 (SubI zero src2))); ins_cost(INSN_COST * 3); format %{ "mneg $dst, $src1, $src2" %} @@ -10879,7 +10944,6 @@ instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{ instruct mnegL(iRegLNoSp dst, iRegL src1, iRegL src2, immL0 zero) %{ match(Set dst (MulL (SubL zero src1) src2)); - match(Set dst (MulL src1 (SubL zero src2))); ins_cost(INSN_COST * 5); format %{ "mneg $dst, $src1, $src2" %} @@ -10929,7 +10993,6 @@ instruct smsubL(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegLNoSp src3) instruct smnegL(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2, immL0 zero) %{ match(Set dst (MulL (SubL zero (ConvI2L src1)) (ConvI2L src2))); - match(Set dst (MulL (ConvI2L src1) (SubL zero (ConvI2L src2)))); ins_cost(INSN_COST * 3); format %{ "smnegl $dst, $src1, $src2" %} @@ -14933,12 +14996,12 @@ instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlag ins_pipe(pipe_class_memory); %} -instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr) +instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr) %{ predicate((uint64_t)n->in(2)->get_long() < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); match(Set dummy (ClearArray cnt base)); - effect(USE_KILL base); + effect(TEMP temp, USE_KILL base, KILL cr); ins_cost(4 * INSN_COST); format %{ "ClearArray $cnt, $base" %} @@ -16627,11 +16690,11 @@ instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, %} instruct string_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, - iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, - iRegINoSp tmp3, rFlagsReg cr) + iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, + iRegINoSp tmp3, rFlagsReg cr) %{ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); - predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U); + predicate((UseSVE == 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); @@ -16650,7 +16713,7 @@ instruct stringL_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, iRegINoSp tmp3, rFlagsReg cr) %{ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); - predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); + predicate((UseSVE == 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); @@ -16658,8 +16721,8 @@ instruct stringL_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, ins_encode %{ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, - $result$$Register, $tmp1$$Register, $tmp2$$Register, - $tmp3$$Register); + $result$$Register, $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register); %} ins_pipe(pipe_class_memory); %} @@ -16798,6 +16861,7 @@ instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len, vRegD_V2 Vtmp3, vRegD_V3 Vtmp4, iRegI_R0 result, rFlagsReg cr) %{ + predicate(!((EncodeISOArrayNode*)n)->is_ascii()); match(Set result (EncodeISOArray src (Binary dst len))); effect(USE_KILL src, USE_KILL dst, USE_KILL len, KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr); diff --git a/src/hotspot/cpu/aarch64/aarch64_neon.ad b/src/hotspot/cpu/aarch64/aarch64_neon.ad index 1beac317c57a5b3f5ff9ff6df9be06e8272986d0..bb617aeb379bead5d09ee8993782048e2a8d3896 100644 --- a/src/hotspot/cpu/aarch64/aarch64_neon.ad +++ b/src/hotspot/cpu/aarch64/aarch64_neon.ad @@ -33,7 +33,7 @@ // Load Vector (16 bits) instruct loadV2(vecD dst, vmem2 mem) %{ - predicate(n->as_LoadVector()->memory_size() == 2); + predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 2); match(Set dst (LoadVector mem)); ins_cost(4 * INSN_COST); format %{ "ldrh $dst,$mem\t# vector (16 bits)" %} @@ -44,7 +44,7 @@ instruct loadV2(vecD dst, vmem2 mem) // Load Vector (32 bits) instruct loadV4(vecD dst, vmem4 mem) %{ - predicate(n->as_LoadVector()->memory_size() == 4); + predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 4); match(Set dst (LoadVector mem)); ins_cost(4 * INSN_COST); format %{ "ldrs $dst,$mem\t# vector (32 bits)" %} @@ -55,7 +55,7 @@ instruct loadV4(vecD dst, vmem4 mem) // Load Vector (64 bits) instruct loadV8(vecD dst, vmem8 mem) %{ - predicate(n->as_LoadVector()->memory_size() == 8); + predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 8); match(Set dst (LoadVector mem)); ins_cost(4 * INSN_COST); format %{ "ldrd $dst,$mem\t# vector (64 bits)" %} @@ -1850,7 +1850,7 @@ instruct vcmpD(vecD dst, vecD src1, vecD src2, immI cond) format %{ "vcmpD $dst, $src1, $src2\t# vector compare " %} ins_cost(INSN_COST); ins_encode %{ - BasicType bt = vector_element_basic_type(this); + BasicType bt = Matcher::vector_element_basic_type(this); assert(type2aelembytes(bt) != 8, "not supported"); __ neon_compare(as_FloatRegister($dst$$reg), bt, as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), (int)$cond$$constant, /*isQ*/ false); @@ -1865,7 +1865,7 @@ instruct vcmpX(vecX dst, vecX src1, vecX src2, immI cond) format %{ "vcmpX $dst, $src1, $src2\t# vector compare " %} ins_cost(INSN_COST); ins_encode %{ - BasicType bt = vector_element_basic_type(this); + BasicType bt = Matcher::vector_element_basic_type(this); __ neon_compare(as_FloatRegister($dst$$reg), bt, as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), (int)$cond$$constant, /*isQ*/ true); %} @@ -2473,9 +2473,10 @@ instruct vmaskcastX(vecX dst) instruct loadcon8B(vecD dst, immI0 src) %{ - predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 || - n->as_Vector()->length() == 8) && - n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + predicate(UseSVE == 0 && + (n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8) && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorLoadConst src)); ins_cost(INSN_COST); format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %} @@ -2488,7 +2489,7 @@ instruct loadcon8B(vecD dst, immI0 src) instruct loadcon16B(vecX dst, immI0 src) %{ - predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + predicate(UseSVE == 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorLoadConst src)); ins_cost(INSN_COST); format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %} @@ -2945,8 +2946,8 @@ instruct vabd2D(vecX dst, vecX src1, vecX src2) instruct replicate8B(vecD dst, iRegIorL2I src) %{ - predicate(n->as_Vector()->length() == 4 || - n->as_Vector()->length() == 8); + predicate(UseSVE == 0 && (n->as_Vector()->length() == 8 || + n->as_Vector()->length() == 4)); match(Set dst (ReplicateB src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (8B)" %} @@ -2970,8 +2971,8 @@ instruct replicate16B(vecX dst, iRegIorL2I src) instruct replicate8B_imm(vecD dst, immI con) %{ - predicate(n->as_Vector()->length() == 4 || - n->as_Vector()->length() == 8); + predicate(UseSVE == 0 && (n->as_Vector()->length() == 8 || + n->as_Vector()->length() == 4)); match(Set dst (ReplicateB con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector (8B)" %} @@ -2995,8 +2996,8 @@ instruct replicate16B_imm(vecX dst, immI con) instruct replicate4S(vecD dst, iRegIorL2I src) %{ - predicate(n->as_Vector()->length() == 2 || - n->as_Vector()->length() == 4); + predicate(UseSVE == 0 && (n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 2)); match(Set dst (ReplicateS src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (4S)" %} @@ -3020,8 +3021,8 @@ instruct replicate8S(vecX dst, iRegIorL2I src) instruct replicate4S_imm(vecD dst, immI con) %{ - predicate(n->as_Vector()->length() == 2 || - n->as_Vector()->length() == 4); + predicate(UseSVE == 0 && (n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 2)); match(Set dst (ReplicateS con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector (4H)" %} @@ -3045,7 +3046,7 @@ instruct replicate8S_imm(vecX dst, immI con) instruct replicate2I(vecD dst, iRegIorL2I src) %{ - predicate(n->as_Vector()->length() == 2); + predicate(UseSVE == 0 && n->as_Vector()->length() == 2); match(Set dst (ReplicateI src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (2I)" %} @@ -3069,7 +3070,7 @@ instruct replicate4I(vecX dst, iRegIorL2I src) instruct replicate2I_imm(vecD dst, immI con) %{ - predicate(n->as_Vector()->length() == 2); + predicate(UseSVE == 0 && n->as_Vector()->length() == 2); match(Set dst (ReplicateI con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector (2I)" %} @@ -3119,7 +3120,7 @@ instruct replicate2L_zero(vecX dst, immI0 zero) instruct replicate2F(vecD dst, vRegF src) %{ - predicate(n->as_Vector()->length() == 2); + predicate(UseSVE == 0 && n->as_Vector()->length() == 2); match(Set dst (ReplicateF src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (2F)" %} @@ -4249,8 +4250,8 @@ instruct vxor16B(vecX dst, vecX src1, vecX src2) // ------------------------------ Shift --------------------------------------- instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{ - predicate(n->as_Vector()->length_in_bytes() == 4 || - n->as_Vector()->length_in_bytes() == 8); + predicate(UseSVE == 0 && (n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8)); match(Set dst (LShiftCntV cnt)); match(Set dst (RShiftCntV cnt)); format %{ "dup $dst, $cnt\t# shift count vector (8B)" %} @@ -4261,7 +4262,7 @@ instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{ %} instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{ - predicate(n->as_Vector()->length_in_bytes() == 16); + predicate(UseSVE == 0 && (n->as_Vector()->length_in_bytes() == 16)); match(Set dst (LShiftCntV cnt)); match(Set dst (RShiftCntV cnt)); format %{ "dup $dst, $cnt\t# shift count vector (16B)" %} @@ -5296,3 +5297,172 @@ instruct vpopcount2I(vecD dst, vecD src) %{ %} ins_pipe(pipe_class_default); %} + +// vector mask reductions + +instruct vmask_truecount8B(iRegINoSp dst, vecD src, vecD tmp) %{ + predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN); + match(Set dst (VectorMaskTrueCount src)); + effect(TEMP tmp); + ins_cost(2 * INSN_COST); + format %{ "addv $tmp, $src\n\t" + "umov $dst, $tmp, B, 0\t# vector (8B)" %} + ins_encode %{ + // Input "src" is a vector of boolean represented as bytes with + // 0x00/0x01 as element values. + __ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($src$$reg)); + __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_truecount16B(iRegINoSp dst, vecX src, vecX tmp) %{ + predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN); + match(Set dst (VectorMaskTrueCount src)); + effect(TEMP tmp); + ins_cost(2 * INSN_COST); + format %{ "addv $tmp, $src\n\t" + "umov $dst, $tmp, B, 0\t# vector (16B)" %} + ins_encode %{ + // Input "src" is a vector of boolean represented as bytes with + // 0x00/0x01 as element values. + __ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($src$$reg)); + __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_firsttrue_LT8B(iRegINoSp dst, vecD src, rFlagsReg cr) %{ + predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN && + n->in(1)->bottom_type()->is_vect()->length() < 8); + match(Set dst (VectorMaskFirstTrue src)); + effect(KILL cr); + ins_cost(7 * INSN_COST); + format %{ "vmask_firsttrue $dst, $src\t# vector (4I/4S/2I)" %} + ins_encode %{ + // Returns the index of the first active lane of the + // vector mask, or VLENGTH if no lane is active. + // + // Input "src" is a vector of boolean represented as + // bytes with 0x00/0x01 as element values. + // + // Computed by reversing the bits and counting the leading + // zero bytes. + __ fmovd($dst$$Register, as_FloatRegister($src$$reg)); + __ rbit($dst$$Register, $dst$$Register); + __ clz($dst$$Register, $dst$$Register); + __ lsrw($dst$$Register, $dst$$Register, 3); + __ movw(rscratch1, Matcher::vector_length(this, $src)); + __ cmpw($dst$$Register, rscratch1); + __ cselw($dst$$Register, rscratch1, $dst$$Register, Assembler::GE); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_firsttrue8B(iRegINoSp dst, vecD src) %{ + predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN && + n->in(1)->bottom_type()->is_vect()->length() == 8); + match(Set dst (VectorMaskFirstTrue src)); + ins_cost(4 * INSN_COST); + format %{ "vmask_firsttrue $dst, $src\t# vector (8B)" %} + ins_encode %{ + // Returns the index of the first active lane of the + // vector mask, or VLENGTH if no lane is active. + // + // Input "src" is a vector of boolean represented as + // bytes with 0x00/0x01 as element values. + // + // Computed by reversing the bits and counting the leading + // zero bytes. + __ fmovd($dst$$Register, as_FloatRegister($src$$reg)); + __ rbit($dst$$Register, $dst$$Register); + __ clz($dst$$Register, $dst$$Register); + __ lsrw($dst$$Register, $dst$$Register, 3); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_firsttrue16B(iRegINoSp dst, vecX src) %{ + predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN); + match(Set dst (VectorMaskFirstTrue src)); + ins_cost(6 * INSN_COST); + format %{ "vmask_firsttrue $dst, $src\t# vector (16B)" %} + ins_encode %{ + // Returns the index of the first active lane of the + // vector mask, or 16 (VLENGTH) if no lane is active. + // + // Input "src" is a vector of boolean represented as + // bytes with 0x00/0x01 as element values. + + Label FIRST_TRUE_INDEX; + + // Try to compute the result from lower 64 bits. + __ fmovd($dst$$Register, as_FloatRegister($src$$reg)); + __ movw(rscratch1, zr); + __ cbnz($dst$$Register, FIRST_TRUE_INDEX); + + // Compute the result from the higher 64 bits. + __ fmovhid($dst$$Register, as_FloatRegister($src$$reg)); + __ movw(rscratch1, 8); + + // Reverse the bits and count the leading zero bytes. + __ bind(FIRST_TRUE_INDEX); + __ rbit($dst$$Register, $dst$$Register); + __ clz($dst$$Register, $dst$$Register); + __ addw($dst$$Register, rscratch1, $dst$$Register, Assembler::LSR, 3); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_lasttrue8B(iRegINoSp dst, vecD src) %{ + predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN); + match(Set dst (VectorMaskLastTrue src)); + ins_cost(4 * INSN_COST); + format %{ "vmask_lasttrue $dst, $src\t# vector (8B)" %} + ins_encode %{ + // Returns the index of the last active lane of the + // vector mask, or -1 if no lane is active. + // + // Input "src" is a vector of boolean represented as + // bytes with 0x00/0x01 as element values. + // + // Computed by counting the leading zero bytes and + // substracting it by 7 (VLENGTH - 1). + __ fmovd($dst$$Register, as_FloatRegister($src$$reg)); + __ clz($dst$$Register, $dst$$Register); + __ movw(rscratch1, 7); + __ subw($dst$$Register, rscratch1, $dst$$Register, Assembler::LSR, 3); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_lasttrue16B(iRegINoSp dst, vecX src) %{ + predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN); + match(Set dst (VectorMaskLastTrue src)); + ins_cost(5 * INSN_COST); + format %{ "vmask_lasttrue $dst, $src\t# vector (16B)" %} + ins_encode %{ + // Returns the index of the last active lane of the + // vector mask, or -1 if no lane is active. + // + // Input "src" is a vector of boolean represented as + // bytes with 0x00/0x01 as element values. + + Label LAST_TRUE_INDEX; + + // Try to compute the result from higher 64 bits. + __ fmovhid($dst$$Register, as_FloatRegister($src$$reg)); + __ movw(rscratch1, 16 - 1); + __ cbnz($dst$$Register, LAST_TRUE_INDEX); + + // Compute the result from the lower 64 bits. + __ fmovd($dst$$Register, as_FloatRegister($src$$reg)); + __ movw(rscratch1, 8 - 1); + + // Count the leading zero bytes and substract it by 15 (VLENGTH - 1). + __ bind(LAST_TRUE_INDEX); + __ clz($dst$$Register, $dst$$Register); + __ subw($dst$$Register, rscratch1, $dst$$Register, Assembler::LSR, 3); + %} + ins_pipe(pipe_slow); +%} diff --git a/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 index 306cd1b56ced9b7459669d8491ac15f6894d6a42..a0f980e44cb11880c8369e75b05457546132962c 100644 --- a/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 @@ -69,9 +69,9 @@ instruct $3V$4`'(vec$5 $7, vmem$4 mem) ins_pipe(v$3`_reg_mem'ifelse(eval($4 * 8), 128, 128, 64)); %}')dnl dnl $1 $2 $3 $4 $5 $6 $7 $8 -VLoadStore(ldrh, H, load, 2, D, 16, dst, ) -VLoadStore(ldrs, S, load, 4, D, 32, dst, ) -VLoadStore(ldrd, D, load, 8, D, 64, dst, ) +VLoadStore(ldrh, H, load, 2, D, 16, dst, UseSVE == 0 && ) +VLoadStore(ldrs, S, load, 4, D, 32, dst, UseSVE == 0 && ) +VLoadStore(ldrd, D, load, 8, D, 64, dst, UseSVE == 0 && ) VLoadStore(ldrq, Q, load, 16, X, 128, dst, UseSVE == 0 && ) VLoadStore(strh, H, store, 2, D, 16, src, ) VLoadStore(strs, S, store, 4, D, 32, src, ) @@ -872,7 +872,7 @@ instruct vcmpD(vecD dst, vecD src1, vecD src2, immI cond) format %{ "vcmpD $dst, $src1, $src2\t# vector compare " %} ins_cost(INSN_COST); ins_encode %{ - BasicType bt = vector_element_basic_type(this); + BasicType bt = Matcher::vector_element_basic_type(this); assert(type2aelembytes(bt) != 8, "not supported"); __ neon_compare(as_FloatRegister($dst$$reg), bt, as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), (int)$cond$$constant, /*isQ*/ false); @@ -887,7 +887,7 @@ instruct vcmpX(vecX dst, vecX src1, vecX src2, immI cond) format %{ "vcmpX $dst, $src1, $src2\t# vector compare " %} ins_cost(INSN_COST); ins_encode %{ - BasicType bt = vector_element_basic_type(this); + BasicType bt = Matcher::vector_element_basic_type(this); __ neon_compare(as_FloatRegister($dst$$reg), bt, as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), (int)$cond$$constant, /*isQ*/ true); %} @@ -1196,10 +1196,11 @@ dnl //-------------------------------- LOAD_IOTA_INDICES---------------------------------- dnl define(`PREDICATE', `ifelse($1, 8, -`predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 || - n->as_Vector()->length() == 8) && - n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);', -`predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);')')dnl +`predicate(UseSVE == 0 && + (n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8) && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);', +`predicate(UseSVE == 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);')')dnl dnl define(`VECTOR_LOAD_CON', ` instruct loadcon$1B`'(vec$2 dst, immI0 src) @@ -1466,9 +1467,10 @@ dnl define(`VREPLICATE', ` instruct replicate$3$4$5`'(vec$6 dst, $7 ifelse($7, immI0, zero, $7, immI, con, src)) %{ - predicate(ifelse($8, UseSVE == 0 && , $8, - $8, , , $8` - ')n->as_Vector()->length() == $3); + predicate(UseSVE == 0 && ifelse($8, `', + n->as_Vector()->length() == $3, + (n->as_Vector()->length() == $3 ||` + 'n->as_Vector()->length() == $8))); match(Set dst (Replicate`'ifelse($7, immI0, I, $4) ifelse($7, immI0, zero, $7, immI, con, $7, zero, I, src))); ins_cost(INSN_COST); format %{ "$1 $dst, $ifelse($7, immI0, zero, $7, immI, con, src)`\t# vector ('ifelse($4$7, SimmI, $3H, $2, eor, 4I, $3$4)`)"' %} @@ -1494,24 +1496,24 @@ instruct replicate$3$4$5`'(vec$6 dst, $7 ifelse($7, immI0, zero, $7, immI, con, $7, iRegL, vdup_reg_reg, $4, F, vdup_reg_freg, vdup_reg_dreg)`'ifelse($6, X, 128, 64)); %}')dnl -dnl $1 $2 $3 $4 $5 $6 $7 $8 $9 -VREPLICATE(dup, dup, 8, B, , D, iRegIorL2I, n->as_Vector()->length() == 4 ||, B) -VREPLICATE(dup, dup, 16, B, , X, iRegIorL2I, UseSVE == 0 && , B) -VREPLICATE(movi, mov, 8, B, _imm, D, immI, n->as_Vector()->length() == 4 ||, B) -VREPLICATE(movi, mov, 16, B, _imm, X, immI, UseSVE == 0 && , B) -VREPLICATE(dup, dup, 4, S, , D, iRegIorL2I, n->as_Vector()->length() == 2 ||, H) -VREPLICATE(dup, dup, 8, S, , X, iRegIorL2I, UseSVE == 0 && , H) -VREPLICATE(movi, mov, 4, S, _imm, D, immI, n->as_Vector()->length() == 2 ||, H) -VREPLICATE(movi, mov, 8, S, _imm, X, immI, UseSVE == 0 && , H) -VREPLICATE(dup, dup, 2, I, , D, iRegIorL2I, , S) -VREPLICATE(dup, dup, 4, I, , X, iRegIorL2I, UseSVE == 0 && , S) -VREPLICATE(movi, mov, 2, I, _imm, D, immI, , S) -VREPLICATE(movi, mov, 4, I, _imm, X, immI, UseSVE == 0 && , S) -VREPLICATE(dup, dup, 2, L, , X, iRegL, UseSVE == 0 && , D) -VREPLICATE(movi, eor, 2, L, _zero, X, immI0, UseSVE == 0 && , D) -VREPLICATE(dup, dup, 2, F, , D, vRegF, , S) -VREPLICATE(dup, dup, 4, F, , X, vRegF, UseSVE == 0 && , S) -VREPLICATE(dup, dup, 2, D, , X, vRegD, UseSVE == 0 && , D) +dnl $1 $2 $3 $4 $5 $6 $7 $8 $9 +VREPLICATE(dup, dup, 8, B, , D, iRegIorL2I, 4, B) +VREPLICATE(dup, dup, 16, B, , X, iRegIorL2I, , B) +VREPLICATE(movi, mov, 8, B, _imm, D, immI, 4, B) +VREPLICATE(movi, mov, 16, B, _imm, X, immI, , B) +VREPLICATE(dup, dup, 4, S, , D, iRegIorL2I, 2, H) +VREPLICATE(dup, dup, 8, S, , X, iRegIorL2I, , H) +VREPLICATE(movi, mov, 4, S, _imm, D, immI, 2, H) +VREPLICATE(movi, mov, 8, S, _imm, X, immI, , H) +VREPLICATE(dup, dup, 2, I, , D, iRegIorL2I, , S) +VREPLICATE(dup, dup, 4, I, , X, iRegIorL2I, , S) +VREPLICATE(movi, mov, 2, I, _imm, D, immI, , S) +VREPLICATE(movi, mov, 4, I, _imm, X, immI, , S) +VREPLICATE(dup, dup, 2, L, , X, iRegL, , D) +VREPLICATE(movi, eor, 2, L, _zero, X, immI0, , D) +VREPLICATE(dup, dup, 2, F, , D, vRegF, , S) +VREPLICATE(dup, dup, 4, F, , X, vRegF, , S) +VREPLICATE(dup, dup, 2, D, , X, vRegD, , D) dnl // ====================REDUCTION ARITHMETIC==================================== @@ -1884,8 +1886,8 @@ VLOGICAL(xor, eor, xor, Xor, 16, B, X) dnl define(`VSHIFTCNT', ` instruct vshiftcnt$3$4`'(vec$5 dst, iRegIorL2I cnt) %{ - predicate(ifelse($3, 8, n->as_Vector()->length_in_bytes() == 4 ||` - ')n->as_Vector()->length_in_bytes() == $3); + predicate(UseSVE == 0 && (ifelse($3, 8, n->as_Vector()->length_in_bytes() == 4 ||` + ')n->as_Vector()->length_in_bytes() == $3)); match(Set dst (LShiftCntV cnt)); match(Set dst (RShiftCntV cnt)); format %{ "$1 $dst, $cnt\t# shift count vector ($3$4)" %} @@ -2243,3 +2245,151 @@ instruct vpopcount$1$2`'(vec$5 dst, vec$5 src) %{ dnl $1 $2 $3 $4 $5 VPOPCOUNT(4, I, 16, 8, X) VPOPCOUNT(2, I, 8, 4, D) +dnl +dnl VMASK_TRUECOUNT($1, $2 ) +dnl VMASK_TRUECOUNT(suffix, reg) +define(`VMASK_TRUECOUNT', ` +instruct vmask_truecount$1(iRegINoSp dst, $2 src, $2 tmp) %{ + predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN); + match(Set dst (VectorMaskTrueCount src)); + effect(TEMP tmp); + ins_cost(2 * INSN_COST); + format %{ "addv $tmp, $src\n\t" + "umov $dst, $tmp, B, 0\t# vector ($1)" %} + ins_encode %{ + // Input "src" is a vector of boolean represented as bytes with + // 0x00/0x01 as element values. + __ addv(as_FloatRegister($tmp$$reg), __ T$1, as_FloatRegister($src$$reg)); + __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl +define(`ARGLIST', +`ifelse($1, `_LT8B', `iRegINoSp dst, vecD src, rFlagsReg cr', `iRegINoSp dst, vecD src')') +dnl +dnl VMASK_FIRSTTRUE_D($1, $2, $3, $4 ) +dnl VMASK_FIRSTTRUE_D(suffix, cond, cost, size) +define(`VMASK_FIRSTTRUE_D', ` +instruct vmask_firsttrue$1(ARGLIST($1)) %{ + predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN && + n->in(1)->bottom_type()->is_vect()->length() $2 8); + match(Set dst (VectorMaskFirstTrue src));dnl +ifelse($1, `_LT8B', ` + effect(KILL cr);') + ins_cost($3 * INSN_COST); + format %{ "vmask_firsttrue $dst, $src\t# vector ($4)" %} + ins_encode %{ + // Returns the index of the first active lane of the + // vector mask, or VLENGTH if no lane is active. + // + // Input "src" is a vector of boolean represented as + // bytes with 0x00/0x01 as element values. + // + // Computed by reversing the bits and counting the leading + // zero bytes. + __ fmovd($dst$$Register, as_FloatRegister($src$$reg)); + __ rbit($dst$$Register, $dst$$Register); + __ clz($dst$$Register, $dst$$Register); + __ lsrw($dst$$Register, $dst$$Register, 3);dnl +ifelse(`$1', `_LT8B', ` + __ movw(rscratch1, Matcher::vector_length(this, $src)); + __ cmpw($dst$$Register, rscratch1); + __ cselw($dst$$Register, rscratch1, $dst$$Register, Assembler::GE);') + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +undefine(ARGLIST)dnl +dnl +// vector mask reductions +VMASK_TRUECOUNT(8B, vecD) +VMASK_TRUECOUNT(16B, vecX) +VMASK_FIRSTTRUE_D(_LT8B, <, 7, 4I/4S/2I) +VMASK_FIRSTTRUE_D(8B, ==, 4, 8B) + +instruct vmask_firsttrue16B(iRegINoSp dst, vecX src) %{ + predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN); + match(Set dst (VectorMaskFirstTrue src)); + ins_cost(6 * INSN_COST); + format %{ "vmask_firsttrue $dst, $src\t# vector (16B)" %} + ins_encode %{ + // Returns the index of the first active lane of the + // vector mask, or 16 (VLENGTH) if no lane is active. + // + // Input "src" is a vector of boolean represented as + // bytes with 0x00/0x01 as element values. + + Label FIRST_TRUE_INDEX; + + // Try to compute the result from lower 64 bits. + __ fmovd($dst$$Register, as_FloatRegister($src$$reg)); + __ movw(rscratch1, zr); + __ cbnz($dst$$Register, FIRST_TRUE_INDEX); + + // Compute the result from the higher 64 bits. + __ fmovhid($dst$$Register, as_FloatRegister($src$$reg)); + __ movw(rscratch1, 8); + + // Reverse the bits and count the leading zero bytes. + __ bind(FIRST_TRUE_INDEX); + __ rbit($dst$$Register, $dst$$Register); + __ clz($dst$$Register, $dst$$Register); + __ addw($dst$$Register, rscratch1, $dst$$Register, Assembler::LSR, 3); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_lasttrue8B(iRegINoSp dst, vecD src) %{ + predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN); + match(Set dst (VectorMaskLastTrue src)); + ins_cost(4 * INSN_COST); + format %{ "vmask_lasttrue $dst, $src\t# vector (8B)" %} + ins_encode %{ + // Returns the index of the last active lane of the + // vector mask, or -1 if no lane is active. + // + // Input "src" is a vector of boolean represented as + // bytes with 0x00/0x01 as element values. + // + // Computed by counting the leading zero bytes and + // substracting it by 7 (VLENGTH - 1). + __ fmovd($dst$$Register, as_FloatRegister($src$$reg)); + __ clz($dst$$Register, $dst$$Register); + __ movw(rscratch1, 7); + __ subw($dst$$Register, rscratch1, $dst$$Register, Assembler::LSR, 3); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_lasttrue16B(iRegINoSp dst, vecX src) %{ + predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN); + match(Set dst (VectorMaskLastTrue src)); + ins_cost(5 * INSN_COST); + format %{ "vmask_lasttrue $dst, $src\t# vector (16B)" %} + ins_encode %{ + // Returns the index of the last active lane of the + // vector mask, or -1 if no lane is active. + // + // Input "src" is a vector of boolean represented as + // bytes with 0x00/0x01 as element values. + + Label LAST_TRUE_INDEX; + + // Try to compute the result from higher 64 bits. + __ fmovhid($dst$$Register, as_FloatRegister($src$$reg)); + __ movw(rscratch1, 16 - 1); + __ cbnz($dst$$Register, LAST_TRUE_INDEX); + + // Compute the result from the lower 64 bits. + __ fmovd($dst$$Register, as_FloatRegister($src$$reg)); + __ movw(rscratch1, 8 - 1); + + // Count the leading zero bytes and substract it by 15 (VLENGTH - 1). + __ bind(LAST_TRUE_INDEX); + __ clz($dst$$Register, $dst$$Register); + __ subw($dst$$Register, rscratch1, $dst$$Register, Assembler::LSR, 3); + %} + ins_pipe(pipe_slow); +%} diff --git a/src/hotspot/cpu/aarch64/aarch64_sve.ad b/src/hotspot/cpu/aarch64/aarch64_sve.ad index bcd38acdcb7f2af608425a92a22a4143969b7bd4..1910ef42b255ec689691d8d1fe1006a52a31d369 100644 --- a/src/hotspot/cpu/aarch64/aarch64_sve.ad +++ b/src/hotspot/cpu/aarch64/aarch64_sve.ad @@ -32,6 +32,7 @@ operand vmemA_immIOffset4() %{ + // (esize / msize) = 1 predicate(Address::offset_ok_for_sve_immed(n->get_int(), 4, Matcher::scalable_vector_reg_size(T_BYTE))); match(ConI); @@ -43,6 +44,7 @@ operand vmemA_immIOffset4() operand vmemA_immLOffset4() %{ + // (esize / msize) = 1 predicate(Address::offset_ok_for_sve_immed(n->get_long(), 4, Matcher::scalable_vector_reg_size(T_BYTE))); match(ConL); @@ -57,7 +59,7 @@ operand vmemA_indOffI4(iRegP reg, vmemA_immIOffset4 off) constraint(ALLOC_IN_RC(ptr_reg)); match(AddP reg off); op_cost(0); - format %{ "[$reg, $off, MUL VL]" %} + format %{ "[$reg, $off]" %} interface(MEMORY_INTER) %{ base($reg); index(0xffffffff); @@ -71,7 +73,7 @@ operand vmemA_indOffL4(iRegP reg, vmemA_immLOffset4 off) constraint(ALLOC_IN_RC(ptr_reg)); match(AddP reg off); op_cost(0); - format %{ "[$reg, $off, MUL VL]" %} + format %{ "[$reg, $off]" %} interface(MEMORY_INTER) %{ base($reg); index(0xffffffff); @@ -80,133 +82,81 @@ operand vmemA_indOffL4(iRegP reg, vmemA_immLOffset4 off) %} %} +// The indOff of vmemA is valid only when the vector element (load to/store from) +// size equals to memory element (load from/store to) size. opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4); source_hpp %{ - bool op_sve_supported(int opcode); + bool op_sve_supported(int opcode, int vlen, BasicType bt); %} source %{ - static inline BasicType vector_element_basic_type(const MachNode* n) { - const TypeVect* vt = n->bottom_type()->is_vect(); - return vt->element_basic_type(); - } - - static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) { - int def_idx = use->operand_index(opnd); - Node* def = use->in(def_idx); - const TypeVect* vt = def->bottom_type()->is_vect(); - return vt->element_basic_type(); - } - - static Assembler::SIMD_RegVariant elemBytes_to_regVariant(int esize) { - switch(esize) { - case 1: - return Assembler::B; - case 2: - return Assembler::H; - case 4: - return Assembler::S; - case 8: - return Assembler::D; - default: - assert(false, "unsupported"); - ShouldNotReachHere(); - } - return Assembler::INVALID; - } - - static Assembler::SIMD_RegVariant elemType_to_regVariant(BasicType bt) { - return elemBytes_to_regVariant(type2aelembytes(bt)); - } typedef void (C2_MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T, PRegister Pg, const Address &adr); // Predicated load/store, with optional ptrue to all elements of given predicate register. - static void loadStoreA_predicate(C2_MacroAssembler masm, bool is_store, - FloatRegister reg, PRegister pg, BasicType bt, - int opcode, Register base, int index, int size, int disp) { + static void loadStoreA_predicated(C2_MacroAssembler masm, bool is_store, FloatRegister reg, + PRegister pg, BasicType mem_elem_bt, BasicType vector_elem_bt, + int opcode, Register base, int index, int size, int disp) { sve_mem_insn_predicate insn; - Assembler::SIMD_RegVariant type; - int esize = type2aelembytes(bt); + int mesize = type2aelembytes(mem_elem_bt); if (index == -1) { assert(size == 0, "unsupported address mode: scale size = %d", size); - switch(esize) { + switch(mesize) { case 1: insn = is_store ? &C2_MacroAssembler::sve_st1b : &C2_MacroAssembler::sve_ld1b; - type = Assembler::B; break; case 2: insn = is_store ? &C2_MacroAssembler::sve_st1h : &C2_MacroAssembler::sve_ld1h; - type = Assembler::H; break; case 4: insn = is_store ? &C2_MacroAssembler::sve_st1w : &C2_MacroAssembler::sve_ld1w; - type = Assembler::S; break; case 8: insn = is_store ? &C2_MacroAssembler::sve_st1d : &C2_MacroAssembler::sve_ld1d; - type = Assembler::D; break; default: assert(false, "unsupported"); ShouldNotReachHere(); } - (masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE))); + int imm4 = disp / mesize / Matcher::scalable_vector_reg_size(vector_elem_bt); + (masm.*insn)(reg, Assembler::elemType_to_regVariant(vector_elem_bt), pg, Address(base, imm4)); } else { assert(false, "unimplemented"); ShouldNotReachHere(); } } - bool op_sve_supported(int opcode) { + bool op_sve_supported(int opcode, int vlen, BasicType bt) { + int length_in_bytes = vlen * type2aelembytes(bt); switch (opcode) { case Op_MulAddVS2VI: - // No multiply reduction instructions + // No multiply reduction instructions case Op_MulReductionVD: case Op_MulReductionVF: case Op_MulReductionVI: case Op_MulReductionVL: - // Others - case Op_Extract: - case Op_ExtractB: + // Others case Op_ExtractC: - case Op_ExtractD: - case Op_ExtractF: - case Op_ExtractI: - case Op_ExtractL: - case Op_ExtractS: case Op_ExtractUB: + return false; // Vector API specific - case Op_AndReductionV: - case Op_OrReductionV: - case Op_XorReductionV: - case Op_MaxReductionV: - case Op_MinReductionV: - case Op_LoadVectorGather: - case Op_StoreVectorScatter: - case Op_VectorBlend: - case Op_VectorCast: - case Op_VectorCastB2X: - case Op_VectorCastD2X: - case Op_VectorCastF2X: - case Op_VectorCastI2X: - case Op_VectorCastL2X: - case Op_VectorCastS2X: - case Op_VectorInsert: - case Op_VectorLoadConst: - case Op_VectorLoadMask: case Op_VectorLoadShuffle: - case Op_VectorMaskCmp: case Op_VectorRearrange: - case Op_VectorReinterpret: - case Op_VectorStoreMask: - case Op_VectorTest: - return false; + if (vlen < 4 || length_in_bytes > MaxVectorSize) { + return false; + } else { + return true; + } + case Op_LoadVector: + case Op_StoreVector: + return Matcher::vector_size_supported(bt, vlen); default: - return true; + break; } + // By default, we only support vector operations with no less than 8 bytes and 2 elements. + return 8 <= length_in_bytes && length_in_bytes <= MaxVectorSize && vlen >= 2; } %} @@ -219,31 +169,203 @@ definitions %{ // vector load/store -// Use predicated vector load/store +// Unpredicated vector load/store instruct loadV(vReg dst, vmemA mem) %{ - predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16); + predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16 && + n->as_LoadVector()->memory_size() == MaxVectorSize); match(Set dst (LoadVector mem)); - ins_cost(SVE_COST); - format %{ "sve_ldr $dst, $mem\t # vector (sve)" %} + ins_cost(4 * SVE_COST); + format %{ "sve_ldr $dst, $mem\t# vector (sve)" %} ins_encode %{ FloatRegister dst_reg = as_FloatRegister($dst$$reg); - loadStoreA_predicate(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue, - vector_element_basic_type(this), $mem->opcode(), - as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + BasicType bt = Matcher::vector_element_basic_type(this); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue, + bt, bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} ins_pipe(pipe_slow); %} instruct storeV(vReg src, vmemA mem) %{ - predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16); + predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16 && + n->as_StoreVector()->memory_size() == MaxVectorSize); match(Set mem (StoreVector mem src)); - ins_cost(SVE_COST); - format %{ "sve_str $mem, $src\t # vector (sve)" %} + ins_cost(4 * SVE_COST); + format %{ "sve_str $mem, $src\t# vector (sve)" %} ins_encode %{ FloatRegister src_reg = as_FloatRegister($src$$reg); - loadStoreA_predicate(C2_MacroAssembler(&cbuf), true, src_reg, ptrue, - vector_element_basic_type(this, $src), $mem->opcode(), - as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + BasicType bt = Matcher::vector_element_basic_type(this, $src); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, src_reg, ptrue, + bt, bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%} + +// Load Vector (16 bits) +instruct loadV2_vreg(vReg dst, vmem2 mem) +%{ + predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() == 2); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrh $dst,$mem\t# vector (16 bits)" %} + ins_encode( aarch64_enc_ldrvH(dst, mem) ); + ins_pipe(vload_reg_mem64); +%} + +// Store Vector (16 bits) +instruct storeV2_vreg(vReg src, vmem2 mem) +%{ + predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() == 2); + match(Set mem (StoreVector mem src)); + ins_cost(4 * INSN_COST); + format %{ "strh $mem,$src\t# vector (16 bits)" %} + ins_encode( aarch64_enc_strvH(src, mem) ); + ins_pipe(vstore_reg_mem64); +%} + +// Load Vector (32 bits) +instruct loadV4_vreg(vReg dst, vmem4 mem) +%{ + predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() == 4); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrs $dst,$mem\t# vector (32 bits)" %} + ins_encode( aarch64_enc_ldrvS(dst, mem) ); + ins_pipe(vload_reg_mem64); +%} + +// Store Vector (32 bits) +instruct storeV4_vreg(vReg src, vmem4 mem) +%{ + predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() == 4); + match(Set mem (StoreVector mem src)); + ins_cost(4 * INSN_COST); + format %{ "strs $mem,$src\t# vector (32 bits)" %} + ins_encode( aarch64_enc_strvS(src, mem) ); + ins_pipe(vstore_reg_mem64); +%} + +// Load Vector (64 bits) +instruct loadV8_vreg(vReg dst, vmem8 mem) +%{ + predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() == 8); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrd $dst,$mem\t# vector (64 bits)" %} + ins_encode( aarch64_enc_ldrvD(dst, mem) ); + ins_pipe(vload_reg_mem64); +%} + +// Store Vector (64 bits) +instruct storeV8_vreg(vReg src, vmem8 mem) +%{ + predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem src)); + ins_cost(4 * INSN_COST); + format %{ "strd $mem,$src\t# vector (64 bits)" %} + ins_encode( aarch64_enc_strvD(src, mem) ); + ins_pipe(vstore_reg_mem64); +%} + +// Load Vector (128 bits) +instruct loadV16_vreg(vReg dst, vmem16 mem) +%{ + predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() == 16); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrq $dst,$mem\t# vector (128 bits)" %} + ins_encode( aarch64_enc_ldrvQ(dst, mem) ); + ins_pipe(vload_reg_mem128); +%} + +// Store Vector (128 bits) +instruct storeV16_vreg(vReg src, vmem16 mem) +%{ + predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() == 16); + match(Set mem (StoreVector mem src)); + ins_cost(4 * INSN_COST); + format %{ "strq $mem,$src\t# vector (128 bits)" %} + ins_encode( aarch64_enc_strvQ(src, mem) ); + ins_pipe(vstore_reg_mem128); +%} + +// Predicated vector load/store, based on the vector length of the node. +// Only load/store values in the range of the memory_size. This is needed +// when the memory_size is lower than the hardware supported max vector size. +// And this might happen for Vector API mask vector load/store. +instruct loadV_partial(vReg dst, vmemA mem, pRegGov pTmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() > 16 && + n->as_LoadVector()->memory_size() < MaxVectorSize); + match(Set dst (LoadVector mem)); + effect(TEMP pTmp, KILL cr); + ins_cost(6 * SVE_COST); + format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" + "sve_ldr $dst, $pTmp, $mem\t# load vector predicated" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ elemType_to_regVariant(bt), + Matcher::vector_length(this)); + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, + as_PRegister($pTmp$$reg), bt, bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%} + +instruct storeV_partial(vReg src, vmemA mem, pRegGov pTmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() > 16 && + n->as_StoreVector()->memory_size() < MaxVectorSize); + match(Set mem (StoreVector mem src)); + effect(TEMP pTmp, KILL cr); + ins_cost(5 * SVE_COST); + format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" + "sve_str $src, $pTmp, $mem\t# store vector predicated" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src); + __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ elemType_to_regVariant(bt), + Matcher::vector_length(this, $src)); + FloatRegister src_reg = as_FloatRegister($src$$reg); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, src_reg, + as_PRegister($pTmp$$reg), bt, bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%} + +// vector reinterpret + +instruct reinterpret(vReg dst) %{ + predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() == + n->in(1)->bottom_type()->is_vect()->length_in_bytes()); // src == dst + match(Set dst (VectorReinterpret dst)); + ins_cost(0); + format %{ "# reinterpret $dst\t# do nothing" %} + ins_encode %{ + // empty + %} + ins_pipe(pipe_class_empty); +%} + +instruct reinterpretResize(vReg dst, vReg src, pRegGov pTmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() != + n->in(1)->bottom_type()->is_vect()->length_in_bytes()); // src != dst + match(Set dst (VectorReinterpret src)); + effect(TEMP_DEF dst, TEMP pTmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "reinterpretResize $dst, $src\t# vector (sve)" %} + ins_encode %{ + uint length_in_bytes_src = Matcher::vector_length_in_bytes(this, $src); + uint length_in_bytes_dst = Matcher::vector_length_in_bytes(this); + uint length_in_bytes_resize = length_in_bytes_src < length_in_bytes_dst ? + length_in_bytes_src : length_in_bytes_dst; + assert(length_in_bytes_src <= MaxVectorSize && length_in_bytes_dst <= MaxVectorSize, + "invalid vector length"); + __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ B, length_in_bytes_resize); + __ sve_dup(as_FloatRegister($dst$$reg), __ B, 0); + __ sve_sel(as_FloatRegister($dst$$reg), __ B, as_PRegister($pTmp$$reg), + as_FloatRegister($src$$reg), as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_slow); %} @@ -251,7 +373,7 @@ instruct storeV(vReg src, vmemA mem) %{ // vector abs instruct vabsB(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 16 && + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (AbsVB src)); ins_cost(SVE_COST); @@ -264,7 +386,7 @@ instruct vabsB(vReg dst, vReg src) %{ %} instruct vabsS(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 8 && + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (AbsVS src)); ins_cost(SVE_COST); @@ -277,7 +399,7 @@ instruct vabsS(vReg dst, vReg src) %{ %} instruct vabsI(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 && + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AbsVI src)); ins_cost(SVE_COST); @@ -290,7 +412,7 @@ instruct vabsI(vReg dst, vReg src) %{ %} instruct vabsL(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 && + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (AbsVL src)); ins_cost(SVE_COST); @@ -303,7 +425,7 @@ instruct vabsL(vReg dst, vReg src) %{ %} instruct vabsF(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 && + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (AbsVF src)); ins_cost(SVE_COST); @@ -316,7 +438,7 @@ instruct vabsF(vReg dst, vReg src) %{ %} instruct vabsD(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 && + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (AbsVD src)); ins_cost(SVE_COST); @@ -331,7 +453,7 @@ instruct vabsD(vReg dst, vReg src) %{ // vector add instruct vaddB(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); + predicate(UseSVE > 0); match(Set dst (AddVB src1 src2)); ins_cost(SVE_COST); format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (B)" %} @@ -344,7 +466,7 @@ instruct vaddB(vReg dst, vReg src1, vReg src2) %{ %} instruct vaddS(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); + predicate(UseSVE > 0); match(Set dst (AddVS src1 src2)); ins_cost(SVE_COST); format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (H)" %} @@ -357,7 +479,7 @@ instruct vaddS(vReg dst, vReg src1, vReg src2) %{ %} instruct vaddI(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + predicate(UseSVE > 0); match(Set dst (AddVI src1 src2)); ins_cost(SVE_COST); format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (S)" %} @@ -370,7 +492,7 @@ instruct vaddI(vReg dst, vReg src1, vReg src2) %{ %} instruct vaddL(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + predicate(UseSVE > 0); match(Set dst (AddVL src1 src2)); ins_cost(SVE_COST); format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (D)" %} @@ -383,7 +505,7 @@ instruct vaddL(vReg dst, vReg src1, vReg src2) %{ %} instruct vaddF(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + predicate(UseSVE > 0); match(Set dst (AddVF src1 src2)); ins_cost(SVE_COST); format %{ "sve_fadd $dst, $src1, $src2\t # vector (sve) (S)" %} @@ -396,7 +518,7 @@ instruct vaddF(vReg dst, vReg src1, vReg src2) %{ %} instruct vaddD(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + predicate(UseSVE > 0); match(Set dst (AddVD src1 src2)); ins_cost(SVE_COST); format %{ "sve_fadd $dst, $src1, $src2\t # vector (sve) (D)" %} @@ -411,7 +533,7 @@ instruct vaddD(vReg dst, vReg src1, vReg src2) %{ // vector and instruct vand(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + predicate(UseSVE > 0); match(Set dst (AndV src1 src2)); ins_cost(SVE_COST); format %{ "sve_and $dst, $src1, $src2\t# vector (sve)" %} @@ -426,7 +548,7 @@ instruct vand(vReg dst, vReg src1, vReg src2) %{ // vector or instruct vor(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + predicate(UseSVE > 0); match(Set dst (OrV src1 src2)); ins_cost(SVE_COST); format %{ "sve_orr $dst, $src1, $src2\t# vector (sve)" %} @@ -441,7 +563,7 @@ instruct vor(vReg dst, vReg src1, vReg src2) %{ // vector xor instruct vxor(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + predicate(UseSVE > 0); match(Set dst (XorV src1 src2)); ins_cost(SVE_COST); format %{ "sve_eor $dst, $src1, $src2\t# vector (sve)" %} @@ -456,7 +578,7 @@ instruct vxor(vReg dst, vReg src1, vReg src2) %{ // vector not instruct vnotI(vReg dst, vReg src, immI_M1 m1) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + predicate(UseSVE > 0); match(Set dst (XorV src (ReplicateB m1))); match(Set dst (XorV src (ReplicateS m1))); match(Set dst (XorV src (ReplicateI m1))); @@ -470,7 +592,7 @@ instruct vnotI(vReg dst, vReg src, immI_M1 m1) %{ %} instruct vnotL(vReg dst, vReg src, immL_M1 m1) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + predicate(UseSVE > 0); match(Set dst (XorV src (ReplicateL m1))); ins_cost(SVE_COST); format %{ "sve_not $dst, $src\t# vector (sve) D" %} @@ -485,7 +607,7 @@ instruct vnotL(vReg dst, vReg src, immL_M1 m1) %{ // vector and_not instruct vand_notI(vReg dst, vReg src1, vReg src2, immI_M1 m1) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + predicate(UseSVE > 0); match(Set dst (AndV src1 (XorV src2 (ReplicateB m1)))); match(Set dst (AndV src1 (XorV src2 (ReplicateS m1)))); match(Set dst (AndV src1 (XorV src2 (ReplicateI m1)))); @@ -500,7 +622,7 @@ instruct vand_notI(vReg dst, vReg src1, vReg src2, immI_M1 m1) %{ %} instruct vand_notL(vReg dst, vReg src1, vReg src2, immL_M1 m1) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + predicate(UseSVE > 0); match(Set dst (AndV src1 (XorV src2 (ReplicateL m1)))); ins_cost(SVE_COST); format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) D" %} @@ -516,7 +638,7 @@ instruct vand_notL(vReg dst, vReg src1, vReg src2, immL_M1 m1) %{ // vector float div instruct vdivF(vReg dst_src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + predicate(UseSVE > 0); match(Set dst_src1 (DivVF dst_src1 src2)); ins_cost(SVE_COST); format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) (S)" %} @@ -528,7 +650,7 @@ instruct vdivF(vReg dst_src1, vReg src2) %{ %} instruct vdivD(vReg dst_src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + predicate(UseSVE > 0); match(Set dst_src1 (DivVD dst_src1 src2)); ins_cost(SVE_COST); format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) (D)" %} @@ -542,13 +664,13 @@ instruct vdivD(vReg dst_src1, vReg src2) %{ // vector min/max instruct vmin(vReg dst_src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + predicate(UseSVE > 0); match(Set dst_src1 (MinV dst_src1 src2)); ins_cost(SVE_COST); format %{ "sve_min $dst_src1, $dst_src1, $src2\t # vector (sve)" %} ins_encode %{ - BasicType bt = vector_element_basic_type(this); - Assembler::SIMD_RegVariant size = elemType_to_regVariant(bt); + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); if (is_floating_point_type(bt)) { __ sve_fmin(as_FloatRegister($dst_src1$$reg), size, ptrue, as_FloatRegister($src2$$reg)); @@ -562,13 +684,13 @@ instruct vmin(vReg dst_src1, vReg src2) %{ %} instruct vmax(vReg dst_src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + predicate(UseSVE > 0); match(Set dst_src1 (MaxV dst_src1 src2)); ins_cost(SVE_COST); format %{ "sve_max $dst_src1, $dst_src1, $src2\t # vector (sve)" %} ins_encode %{ - BasicType bt = vector_element_basic_type(this); - Assembler::SIMD_RegVariant size = elemType_to_regVariant(bt); + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); if (is_floating_point_type(bt)) { __ sve_fmax(as_FloatRegister($dst_src1$$reg), size, ptrue, as_FloatRegister($src2$$reg)); @@ -585,7 +707,7 @@ instruct vmax(vReg dst_src1, vReg src2) %{ // dst_src1 = dst_src1 + src2 * src3 instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4); + predicate(UseFMA && UseSVE > 0); match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); ins_cost(SVE_COST); format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %} @@ -598,7 +720,7 @@ instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{ // dst_src1 = dst_src1 + src2 * src3 instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2); + predicate(UseFMA && UseSVE > 0); match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); ins_cost(SVE_COST); format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %} @@ -614,7 +736,7 @@ instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{ // dst_src1 = dst_src1 + -src2 * src3 // dst_src1 = dst_src1 + src2 * -src3 instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4); + predicate(UseFMA && UseSVE > 0); match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3))); match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3)))); ins_cost(SVE_COST); @@ -629,7 +751,7 @@ instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ // dst_src1 = dst_src1 + -src2 * src3 // dst_src1 = dst_src1 + src2 * -src3 instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2); + predicate(UseFMA && UseSVE > 0); match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3))); match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3)))); ins_cost(SVE_COST); @@ -646,7 +768,7 @@ instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ // dst_src1 = -dst_src1 + -src2 * src3 // dst_src1 = -dst_src1 + src2 * -src3 instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4); + predicate(UseFMA && UseSVE > 0); match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3))); match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3)))); ins_cost(SVE_COST); @@ -661,7 +783,7 @@ instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ // dst_src1 = -dst_src1 + -src2 * src3 // dst_src1 = -dst_src1 + src2 * -src3 instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2); + predicate(UseFMA && UseSVE > 0); match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3))); match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3)))); ins_cost(SVE_COST); @@ -677,7 +799,7 @@ instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ // dst_src1 = -dst_src1 + src2 * src3 instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4); + predicate(UseFMA && UseSVE > 0); match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3))); ins_cost(SVE_COST); format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %} @@ -690,7 +812,7 @@ instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ // dst_src1 = -dst_src1 + src2 * src3 instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2); + predicate(UseFMA && UseSVE > 0); match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3))); ins_cost(SVE_COST); format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %} @@ -706,7 +828,7 @@ instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ // dst_src1 = dst_src1 + src2 * src3 instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); + predicate(UseSVE > 0); match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3))); ins_cost(SVE_COST); format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (B)" %} @@ -720,7 +842,7 @@ instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) // dst_src1 = dst_src1 + src2 * src3 instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); + predicate(UseSVE > 0); match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3))); ins_cost(SVE_COST); format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (H)" %} @@ -734,7 +856,7 @@ instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) // dst_src1 = dst_src1 + src2 * src3 instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + predicate(UseSVE > 0); match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3))); ins_cost(SVE_COST); format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (S)" %} @@ -748,7 +870,7 @@ instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) // dst_src1 = dst_src1 + src2 * src3 instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + predicate(UseSVE > 0); match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3))); ins_cost(SVE_COST); format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (D)" %} @@ -764,7 +886,7 @@ instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) // dst_src1 = dst_src1 - src2 * src3 instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); + predicate(UseSVE > 0); match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3))); ins_cost(SVE_COST); format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (B)" %} @@ -778,7 +900,7 @@ instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) // dst_src1 = dst_src1 - src2 * src3 instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); + predicate(UseSVE > 0); match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3))); ins_cost(SVE_COST); format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (H)" %} @@ -792,7 +914,7 @@ instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) // dst_src1 = dst_src1 - src2 * src3 instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + predicate(UseSVE > 0); match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3))); ins_cost(SVE_COST); format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (S)" %} @@ -806,7 +928,7 @@ instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) // dst_src1 = dst_src1 - src2 * src3 instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + predicate(UseSVE > 0); match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3))); ins_cost(SVE_COST); format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (D)" %} @@ -821,7 +943,7 @@ instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) // vector mul instruct vmulB(vReg dst_src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); + predicate(UseSVE > 0); match(Set dst_src1 (MulVB dst_src1 src2)); ins_cost(SVE_COST); format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (B)" %} @@ -833,7 +955,7 @@ instruct vmulB(vReg dst_src1, vReg src2) %{ %} instruct vmulS(vReg dst_src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); + predicate(UseSVE > 0); match(Set dst_src1 (MulVS dst_src1 src2)); ins_cost(SVE_COST); format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (H)" %} @@ -845,7 +967,7 @@ instruct vmulS(vReg dst_src1, vReg src2) %{ %} instruct vmulI(vReg dst_src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + predicate(UseSVE > 0); match(Set dst_src1 (MulVI dst_src1 src2)); ins_cost(SVE_COST); format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (S)" %} @@ -857,7 +979,7 @@ instruct vmulI(vReg dst_src1, vReg src2) %{ %} instruct vmulL(vReg dst_src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + predicate(UseSVE > 0); match(Set dst_src1 (MulVL dst_src1 src2)); ins_cost(SVE_COST); format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (D)" %} @@ -869,7 +991,7 @@ instruct vmulL(vReg dst_src1, vReg src2) %{ %} instruct vmulF(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + predicate(UseSVE > 0); match(Set dst (MulVF src1 src2)); ins_cost(SVE_COST); format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (S)" %} @@ -882,7 +1004,7 @@ instruct vmulF(vReg dst, vReg src1, vReg src2) %{ %} instruct vmulD(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); + predicate(UseSVE > 0); match(Set dst (MulVD src1 src2)); ins_cost(SVE_COST); format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (D)" %} @@ -897,7 +1019,7 @@ instruct vmulD(vReg dst, vReg src1, vReg src2) %{ // vector fneg instruct vnegF(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + predicate(UseSVE > 0); match(Set dst (NegVF src)); ins_cost(SVE_COST); format %{ "sve_fneg $dst, $src\t# vector (sve) (S)" %} @@ -909,7 +1031,7 @@ instruct vnegF(vReg dst, vReg src) %{ %} instruct vnegD(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + predicate(UseSVE > 0); match(Set dst (NegVD src)); ins_cost(SVE_COST); format %{ "sve_fneg $dst, $src\t# vector (sve) (D)" %} @@ -923,855 +1045,2969 @@ instruct vnegD(vReg dst, vReg src) %{ // popcount vector instruct vpopcountI(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + predicate(UseSVE > 0); match(Set dst (PopCountVI src)); - format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %} + format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %} ins_encode %{ __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg)); %} ins_pipe(pipe_slow); %} -// vector add reduction +// vector mask compare -instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 && - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); - match(Set dst (AddReductionVI src1 src2)); - effect(TEMP_DEF dst, TEMP tmp); - ins_cost(SVE_COST); - format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (B)\n\t" - "smov $dst, $tmp, B, 0\n\t" - "addw $dst, $dst, $src1\n\t" - "sxtb $dst, $dst\t # add reduction B" %} +instruct vmaskcmp(vReg dst, vReg src1, vReg src2, immI cond, pRegGov pTmp, rFlagsReg cr) %{ + predicate(UseSVE > 0); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + effect(TEMP pTmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_cmp $pTmp, $src1, $src2\n\t" + "sve_cpy $dst, $pTmp, -1\t# vector mask cmp (sve)" %} ins_encode %{ - __ sve_uaddv(as_FloatRegister($tmp$$reg), __ B, - ptrue, as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0); - __ addw($dst$$Register, $dst$$Register, $src1$$Register); - __ sxtb($dst$$Register, $dst$$Register); + BasicType bt = Matcher::vector_element_basic_type(this); + __ sve_compare(as_PRegister($pTmp$$reg), bt, ptrue, as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), (int)$cond$$constant); + __ sve_cpy(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt), + as_PRegister($pTmp$$reg), -1, false); %} ins_pipe(pipe_slow); %} -instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 && - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); - match(Set dst (AddReductionVI src1 src2)); - effect(TEMP_DEF dst, TEMP tmp); - ins_cost(SVE_COST); - format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (H)\n\t" - "smov $dst, $tmp, H, 0\n\t" - "addw $dst, $dst, $src1\n\t" - "sxth $dst, $dst\t # add reduction H" %} +// vector blend + +instruct vblend(vReg dst, vReg src1, vReg src2, vReg src3, pRegGov pTmp, rFlagsReg cr) %{ + predicate(UseSVE > 0); + match(Set dst (VectorBlend (Binary src1 src2) src3)); + effect(TEMP pTmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_cmpeq $pTmp, $src3, -1\n\t" + "sve_sel $dst, $pTmp, $src2, $src1\t# vector blend (sve)" %} ins_encode %{ - __ sve_uaddv(as_FloatRegister($tmp$$reg), __ H, - ptrue, as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ H, 0); - __ addw($dst$$Register, $dst$$Register, $src1$$Register); - __ sxth($dst$$Register, $dst$$Register); + Assembler::SIMD_RegVariant size = + __ elemType_to_regVariant(Matcher::vector_element_basic_type(this)); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, + ptrue, as_FloatRegister($src3$$reg), -1); + __ sve_sel(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 && - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); - match(Set dst (AddReductionVI src1 src2)); - effect(TEMP_DEF dst, TEMP tmp); - ins_cost(SVE_COST); - format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (S)\n\t" - "umov $dst, $tmp, S, 0\n\t" - "addw $dst, $dst, $src1\t # add reduction S" %} +// vector blend with compare + +instruct vblend_maskcmp(vReg dst, vReg src1, vReg src2, vReg src3, + vReg src4, pRegGov pTmp, immI cond, rFlagsReg cr) %{ + predicate(UseSVE > 0); + match(Set dst (VectorBlend (Binary src1 src2) (VectorMaskCmp (Binary src3 src4) cond))); + effect(TEMP pTmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_cmp $pTmp, $src3, $src4\t# vector cmp (sve)\n\t" + "sve_sel $dst, $pTmp, $src2, $src1\t# vector blend (sve)" %} ins_encode %{ - __ sve_uaddv(as_FloatRegister($tmp$$reg), __ S, - ptrue, as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ S, 0); - __ addw($dst$$Register, $dst$$Register, $src1$$Register); + BasicType bt = Matcher::vector_element_basic_type(this); + __ sve_compare(as_PRegister($pTmp$$reg), bt, ptrue, as_FloatRegister($src3$$reg), + as_FloatRegister($src4$$reg), (int)$cond$$constant); + __ sve_sel(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt), + as_PRegister($pTmp$$reg), as_FloatRegister($src2$$reg), + as_FloatRegister($src1$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 && - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); - match(Set dst (AddReductionVL src1 src2)); - effect(TEMP_DEF dst, TEMP tmp); +// vector load mask + +instruct vloadmaskB(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorLoadMask src)); ins_cost(SVE_COST); - format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (D)\n\t" - "umov $dst, $tmp, D, 0\n\t" - "add $dst, $dst, $src1\t # add reduction D" %} + format %{ "sve_neg $dst, $src\t# vector load mask (B)" %} ins_encode %{ - __ sve_uaddv(as_FloatRegister($tmp$$reg), __ D, - ptrue, as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0); - __ add($dst$$Register, $dst$$Register, $src1$$Register); + __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, as_FloatRegister($src$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_addF(vRegF src1_dst, vReg src2) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); - match(Set src1_dst (AddReductionVF src1_dst src2)); - ins_cost(SVE_COST); - format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (S)" %} +instruct vloadmaskS(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorLoadMask src)); + ins_cost(2 * SVE_COST); + format %{ "sve_uunpklo $dst, H, $src\n\t" + "sve_neg $dst, $dst\t# vector load mask (B to H)" %} ins_encode %{ - __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S, - ptrue, as_FloatRegister($src2$$reg)); + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); + __ sve_neg(as_FloatRegister($dst$$reg), __ H, ptrue, as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_addD(vRegD src1_dst, vReg src2) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); - match(Set src1_dst (AddReductionVD src1_dst src2)); - ins_cost(SVE_COST); - format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (D)" %} +instruct vloadmaskI(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (VectorLoadMask src)); + ins_cost(3 * SVE_COST); + format %{ "sve_uunpklo $dst, H, $src\n\t" + "sve_uunpklo $dst, S, $dst\n\t" + "sve_neg $dst, $dst\t# vector load mask (B to S)" %} ins_encode %{ - __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D, - ptrue, as_FloatRegister($src2$$reg)); + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); + __ sve_neg(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_slow); %} -// vector max reduction - -instruct reduce_maxF(vRegF dst, vRegF src1, vReg src2) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); - match(Set dst (MaxReductionV src1 src2)); - ins_cost(INSN_COST); - effect(TEMP_DEF dst); - format %{ "sve_fmaxv $dst, $src2 # vector (sve) (S)\n\t" - "fmaxs $dst, $dst, $src1\t # max reduction F" %} +instruct vloadmaskL(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (VectorLoadMask src)); + ins_cost(4 * SVE_COST); + format %{ "sve_uunpklo $dst, H, $src\n\t" + "sve_uunpklo $dst, S, $dst\n\t" + "sve_uunpklo $dst, D, $dst\n\t" + "sve_neg $dst, $dst\t# vector load mask (B to D)" %} ins_encode %{ - __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S, - ptrue, as_FloatRegister($src2$$reg)); - __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg)); + __ sve_neg(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_maxD(vRegD dst, vRegD src1, vReg src2) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); - match(Set dst (MaxReductionV src1 src2)); - ins_cost(INSN_COST); - effect(TEMP_DEF dst); - format %{ "sve_fmaxv $dst, $src2 # vector (sve) (S)\n\t" - "fmaxs $dst, $dst, $src1\t # max reduction D" %} +// vector store mask + +instruct vstoremaskB(vReg dst, vReg src, immI_1 size) %{ + predicate(UseSVE > 0); + match(Set dst (VectorStoreMask src size)); + ins_cost(SVE_COST); + format %{ "sve_neg $dst, $src\t# vector store mask (B)" %} ins_encode %{ - __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D, - ptrue, as_FloatRegister($src2$$reg)); - __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, + as_FloatRegister($src$$reg)); %} ins_pipe(pipe_slow); %} -// vector min reduction - -instruct reduce_minF(vRegF dst, vRegF src1, vReg src2) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); - match(Set dst (MinReductionV src1 src2)); - ins_cost(INSN_COST); - effect(TEMP_DEF dst); - format %{ "sve_fminv $dst, $src2 # vector (sve) (S)\n\t" - "fmins $dst, $dst, $src1\t # min reduction F" %} +instruct vstoremaskS(vReg dst, vReg src, vReg tmp, immI_2 size) %{ + predicate(UseSVE > 0); + match(Set dst (VectorStoreMask src size)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(3 * SVE_COST); + format %{ "sve_dup $tmp, H, 0\n\t" + "sve_uzp1 $dst, B, $src, $tmp\n\t" + "sve_neg $dst, B, $dst\t# vector store mask (sve) (H to B)" %} ins_encode %{ - __ sve_fminv(as_FloatRegister($dst$$reg), __ S, - ptrue, as_FloatRegister($src2$$reg)); - __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, + as_FloatRegister($dst$$reg)); + %} ins_pipe(pipe_slow); %} -instruct reduce_minD(vRegD dst, vRegD src1, vReg src2) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); - match(Set dst (MinReductionV src1 src2)); - ins_cost(INSN_COST); - effect(TEMP_DEF dst); - format %{ "sve_fminv $dst, $src2 # vector (sve) (S)\n\t" - "fmins $dst, $dst, $src1\t # min reduction D" %} +instruct vstoremaskI(vReg dst, vReg src, vReg tmp, immI_4 size) %{ + predicate(UseSVE > 0); + match(Set dst (VectorStoreMask src size)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(4 * SVE_COST); + format %{ "sve_dup $tmp, S, 0\n\t" + "sve_uzp1 $dst, H, $src, $tmp\n\t" + "sve_uzp1 $dst, B, $dst, $tmp\n\t" + "sve_neg $dst, B, $dst\t# vector store mask (sve) (S to B)" %} ins_encode %{ - __ sve_fminv(as_FloatRegister($dst$$reg), __ D, - ptrue, as_FloatRegister($src2$$reg)); - __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, + as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, + as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_slow); %} -// vector Math.rint, floor, ceil - -instruct vroundD(vReg dst, vReg src, immI rmode) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 && - n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); - match(Set dst (RoundDoubleModeV src rmode)); - format %{ "sve_frint $dst, $src, $rmode\t# vector (sve) (D)" %} +instruct vstoremaskL(vReg dst, vReg src, vReg tmp, immI_8 size) %{ + predicate(UseSVE > 0); + match(Set dst (VectorStoreMask src size)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(5 * SVE_COST); + format %{ "sve_dup $tmp, D, 0\n\t" + "sve_uzp1 $dst, S, $src, $tmp\n\t" + "sve_uzp1 $dst, H, $dst, $tmp\n\t" + "sve_uzp1 $dst, B, $dst, $tmp\n\t" + "sve_neg $dst, B, $dst\t# vector store mask (sve) (D to B)" %} ins_encode %{ - switch ($rmode$$constant) { - case RoundDoubleModeNode::rmode_rint: - __ sve_frintn(as_FloatRegister($dst$$reg), __ D, - ptrue, as_FloatRegister($src$$reg)); - break; - case RoundDoubleModeNode::rmode_floor: - __ sve_frintm(as_FloatRegister($dst$$reg), __ D, - ptrue, as_FloatRegister($src$$reg)); - break; - case RoundDoubleModeNode::rmode_ceil: - __ sve_frintp(as_FloatRegister($dst$$reg), __ D, - ptrue, as_FloatRegister($src$$reg)); - break; - } + __ sve_dup(as_FloatRegister($tmp$$reg), __ D, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, + as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_slow); %} -// vector replicate +// load/store mask vector -instruct replicateB(vReg dst, iRegIorL2I src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); - match(Set dst (ReplicateB src)); - ins_cost(SVE_COST); - format %{ "sve_dup $dst, $src\t# vector (sve) (B)" %} +instruct vloadmask_loadV_byte(vReg dst, vmemA mem) %{ + predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() == MaxVectorSize && + type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) == 1); + match(Set dst (VectorLoadMask (LoadVector mem))); + ins_cost(5 * SVE_COST); + format %{ "sve_ld1b $dst, $mem\n\t" + "sve_neg $dst, $dst\t# load vector mask (sve)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($src$$reg)); + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + BasicType to_vect_bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant to_vect_variant = __ elemType_to_regVariant(to_vect_bt); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue, + T_BOOLEAN, to_vect_bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ sve_neg(dst_reg, to_vect_variant, ptrue, dst_reg); %} ins_pipe(pipe_slow); %} -instruct replicateS(vReg dst, iRegIorL2I src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); - match(Set dst (ReplicateS src)); - ins_cost(SVE_COST); - format %{ "sve_dup $dst, $src\t# vector (sve) (H)" %} +instruct vloadmask_loadV_non_byte(vReg dst, indirect mem) %{ + predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() == MaxVectorSize && + type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) > 1); + match(Set dst (VectorLoadMask (LoadVector mem))); + ins_cost(5 * SVE_COST); + format %{ "sve_ld1b $dst, $mem\n\t" + "sve_neg $dst, $dst\t# load vector mask (sve)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($src$$reg)); + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + BasicType to_vect_bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant to_vect_variant = __ elemType_to_regVariant(to_vect_bt); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue, + T_BOOLEAN, to_vect_bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ sve_neg(dst_reg, to_vect_variant, ptrue, dst_reg); %} ins_pipe(pipe_slow); %} -instruct replicateI(vReg dst, iRegIorL2I src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); - match(Set dst (ReplicateI src)); - ins_cost(SVE_COST); - format %{ "sve_dup $dst, $src\t# vector (sve) (S)" %} +instruct storeV_vstoremask_byte(vmemA mem, vReg src, vReg tmp, immI_1 esize) %{ + predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() * + n->as_StoreVector()->in(MemNode::ValueIn)->in(2)->get_int() == MaxVectorSize); + match(Set mem (StoreVector mem (VectorStoreMask src esize))); + effect(TEMP tmp); + ins_cost(5 * SVE_COST); + format %{ "sve_neg $tmp, $src\n\t" + "sve_st1b $tmp, $mem\t# store vector mask (sve)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($src$$reg)); + BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src); + assert(type2aelembytes(from_vect_bt) == (int)$esize$$constant, "unsupported type."); + Assembler::SIMD_RegVariant from_vect_variant = __ elemBytes_to_regVariant($esize$$constant); + __ sve_neg(as_FloatRegister($tmp$$reg), from_vect_variant, ptrue, + as_FloatRegister($src$$reg)); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($tmp$$reg), + ptrue, T_BOOLEAN, from_vect_bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} ins_pipe(pipe_slow); %} -instruct replicateL(vReg dst, iRegL src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); - match(Set dst (ReplicateL src)); - ins_cost(SVE_COST); - format %{ "sve_dup $dst, $src\t# vector (sve) (D)" %} +instruct storeV_vstoremask_non_byte(indirect mem, vReg src, vReg tmp, immI_gt_1 esize) %{ + predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() * + n->as_StoreVector()->in(MemNode::ValueIn)->in(2)->get_int() == MaxVectorSize); + match(Set mem (StoreVector mem (VectorStoreMask src esize))); + effect(TEMP tmp); + ins_cost(5 * SVE_COST); + format %{ "sve_neg $tmp, $src\n\t" + "sve_st1b $tmp, $mem\t# store vector mask (sve)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($src$$reg)); + BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src); + assert(type2aelembytes(from_vect_bt) == (int)$esize$$constant, "unsupported type."); + Assembler::SIMD_RegVariant from_vect_variant = __ elemBytes_to_regVariant($esize$$constant); + __ sve_neg(as_FloatRegister($tmp$$reg), from_vect_variant, ptrue, + as_FloatRegister($src$$reg)); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($tmp$$reg), + ptrue, T_BOOLEAN, from_vect_bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} ins_pipe(pipe_slow); %} -instruct replicateB_imm8(vReg dst, immI8 con) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); - match(Set dst (ReplicateB con)); +// vector add reduction + +instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); ins_cost(SVE_COST); - format %{ "sve_dup $dst, $con\t# vector (sve) (B)" %} + format %{ "sve_reduce_addI $dst, $src1, $src2\t# addB/S/I reduction (sve) (may extend)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($dst$$reg), __ B, $con$$constant); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_uaddv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ addw($dst$$Register, $dst$$Register, $src1$$Register); + if (bt == T_BYTE) { + __ sxtb($dst$$Register, $dst$$Register); + } else if (bt == T_SHORT) { + __ sxth($dst$$Register, $dst$$Register); + } else { + assert(bt == T_INT, "unsupported type"); + } %} ins_pipe(pipe_slow); %} -instruct replicateS_imm8(vReg dst, immI8_shift8 con) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); - match(Set dst (ReplicateS con)); +instruct reduce_addI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_dup $dst, $con\t# vector (sve) (H)" %} + format %{ "sve_reduce_addI $dst, $src1, $src2\t# addI reduction partial (sve) (may extend)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($dst$$reg), __ H, $con$$constant); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_uaddv(as_FloatRegister($vtmp$$reg), variant, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ addw($dst$$Register, $dst$$Register, $src1$$Register); + if (bt == T_BYTE) { + __ sxtb($dst$$Register, $dst$$Register); + } else if (bt == T_SHORT) { + __ sxth($dst$$Register, $dst$$Register); + } else { + assert(bt == T_INT, "unsupported type"); + } %} ins_pipe(pipe_slow); %} -instruct replicateI_imm8(vReg dst, immI8_shift8 con) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); - match(Set dst (ReplicateI con)); +instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (AddReductionVL src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); ins_cost(SVE_COST); - format %{ "sve_dup $dst, $con\t# vector (sve) (S)" %} + format %{ "sve_reduce_addL $dst, $src1, $src2\t# addL reduction (sve)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($dst$$reg), __ S, $con$$constant); + __ sve_uaddv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ add($dst$$Register, $dst$$Register, $src1$$Register); %} ins_pipe(pipe_slow); %} -instruct replicateL_imm8(vReg dst, immL8_shift8 con) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); - match(Set dst (ReplicateL con)); +instruct reduce_addL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (AddReductionVL src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_dup $dst, $con\t# vector (sve) (D)" %} + format %{ "sve_reduce_addL $dst, $src1, $src2\t# addL reduction partial (sve)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($dst$$reg), __ D, $con$$constant); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_uaddv(as_FloatRegister($vtmp$$reg), __ D, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ add($dst$$Register, $dst$$Register, $src1$$Register); %} ins_pipe(pipe_slow); %} -instruct replicateF(vReg dst, vRegF src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); - match(Set dst (ReplicateF src)); + +instruct reduce_addF(vRegF src1_dst, vReg src2) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set src1_dst (AddReductionVF src1_dst src2)); ins_cost(SVE_COST); - format %{ "sve_cpy $dst, $src\t# vector (sve) (S)" %} + format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (S)" %} ins_encode %{ - __ sve_cpy(as_FloatRegister($dst$$reg), __ S, - ptrue, as_FloatRegister($src$$reg)); + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -instruct replicateD(vReg dst, vRegD src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); - match(Set dst (ReplicateD src)); +instruct reduce_addF_partial(vRegF src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set src1_dst (AddReductionVF src1_dst src2)); ins_cost(SVE_COST); - format %{ "sve_cpy $dst, $src\t# vector (sve) (D)" %} + effect(TEMP ptmp, KILL cr); + format %{ "sve_reduce_addF $src1_dst, $src1_dst, $src2\t# addF reduction partial (sve) (S)" %} ins_encode %{ - __ sve_cpy(as_FloatRegister($dst$$reg), __ D, - ptrue, as_FloatRegister($src$$reg)); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, + Matcher::vector_length(this, $src2)); + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -// vector shift - -instruct vasrB(vReg dst, vReg shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); - match(Set dst (RShiftVB dst shift)); +instruct reduce_addD(vRegD src1_dst, vReg src2) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set src1_dst (AddReductionVD src1_dst src2)); ins_cost(SVE_COST); - format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (B)" %} + format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (D)" %} ins_encode %{ - __ sve_asr(as_FloatRegister($dst$$reg), __ B, - ptrue, as_FloatRegister($shift$$reg)); + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -instruct vasrS(vReg dst, vReg shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); - match(Set dst (RShiftVS dst shift)); +instruct reduce_addD_partial(vRegD src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set src1_dst (AddReductionVD src1_dst src2)); ins_cost(SVE_COST); - format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (H)" %} + effect(TEMP ptmp, KILL cr); + format %{ "sve_reduce_addD $src1_dst, $src1_dst, $src2\t# addD reduction partial (sve) (D)" %} ins_encode %{ - __ sve_asr(as_FloatRegister($dst$$reg), __ H, - ptrue, as_FloatRegister($shift$$reg)); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -instruct vasrI(vReg dst, vReg shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); - match(Set dst (RShiftVI dst shift)); +// vector and reduction + +instruct reduce_andI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (AndReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); ins_cost(SVE_COST); - format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (S)" %} + format %{ "sve_reduce_andI $dst, $src1, $src2\t# andB/S/I reduction (sve) (may extend)" %} ins_encode %{ - __ sve_asr(as_FloatRegister($dst$$reg), __ S, - ptrue, as_FloatRegister($shift$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_andv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ andw($dst$$Register, $dst$$Register, $src1$$Register); + if (bt == T_BYTE) { + __ sxtb($dst$$Register, $dst$$Register); + } else if (bt == T_SHORT) { + __ sxth($dst$$Register, $dst$$Register); + } else { + assert(bt == T_INT, "unsupported type"); + } %} ins_pipe(pipe_slow); %} -instruct vasrL(vReg dst, vReg shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); - match(Set dst (RShiftVL dst shift)); +instruct reduce_andI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (AndReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (D)" %} + format %{ "sve_reduce_andI $dst, $src1, $src2\t# andI reduction partial (sve) (may extend)" %} ins_encode %{ - __ sve_asr(as_FloatRegister($dst$$reg), __ D, - ptrue, as_FloatRegister($shift$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_andv(as_FloatRegister($vtmp$$reg), variant, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ andw($dst$$Register, $dst$$Register, $src1$$Register); + if (bt == T_BYTE) { + __ sxtb($dst$$Register, $dst$$Register); + } else if (bt == T_SHORT) { + __ sxth($dst$$Register, $dst$$Register); + } else { + assert(bt == T_INT, "unsupported type"); + } %} ins_pipe(pipe_slow); %} -instruct vlslB(vReg dst, vReg shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); - match(Set dst (LShiftVB dst shift)); +instruct reduce_andL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (AndReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); ins_cost(SVE_COST); - format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (B)" %} + format %{ "sve_reduce_andL $dst, $src1, $src2\t# andL reduction (sve)" %} ins_encode %{ - __ sve_lsl(as_FloatRegister($dst$$reg), __ B, - ptrue, as_FloatRegister($shift$$reg)); + __ sve_andv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ andr($dst$$Register, $dst$$Register, $src1$$Register); %} ins_pipe(pipe_slow); %} -instruct vlslS(vReg dst, vReg shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); - match(Set dst (LShiftVS dst shift)); +instruct reduce_andL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (AndReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (H)" %} + format %{ "sve_reduce_andL $dst, $src1, $src2\t# andL reduction partial (sve)" %} ins_encode %{ - __ sve_lsl(as_FloatRegister($dst$$reg), __ H, - ptrue, as_FloatRegister($shift$$reg)); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_andv(as_FloatRegister($vtmp$$reg), __ D, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ andr($dst$$Register, $dst$$Register, $src1$$Register); %} ins_pipe(pipe_slow); %} -instruct vlslI(vReg dst, vReg shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); - match(Set dst (LShiftVI dst shift)); +// vector or reduction + +instruct reduce_orI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (OrReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); ins_cost(SVE_COST); - format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (S)" %} + format %{ "sve_reduce_orI $dst, $src1, $src2\t# orB/S/I reduction (sve) (may extend)" %} ins_encode %{ - __ sve_lsl(as_FloatRegister($dst$$reg), __ S, - ptrue, as_FloatRegister($shift$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_orv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ orrw($dst$$Register, $dst$$Register, $src1$$Register); + if (bt == T_BYTE) { + __ sxtb($dst$$Register, $dst$$Register); + } else if (bt == T_SHORT) { + __ sxth($dst$$Register, $dst$$Register); + } else { + assert(bt == T_INT, "unsupported type"); + } %} ins_pipe(pipe_slow); %} -instruct vlslL(vReg dst, vReg shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); - match(Set dst (LShiftVL dst shift)); +instruct reduce_orI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (OrReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (D)" %} + format %{ "sve_reduce_orI $dst, $src1, $src2\t# orI reduction partial (sve) (may extend)" %} ins_encode %{ - __ sve_lsl(as_FloatRegister($dst$$reg), __ D, - ptrue, as_FloatRegister($shift$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_orv(as_FloatRegister($vtmp$$reg), variant, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ orrw($dst$$Register, $dst$$Register, $src1$$Register); + if (bt == T_BYTE) { + __ sxtb($dst$$Register, $dst$$Register); + } else if (bt == T_SHORT) { + __ sxth($dst$$Register, $dst$$Register); + } else { + assert(bt == T_INT, "unsupported type"); + } %} ins_pipe(pipe_slow); %} -instruct vlsrB(vReg dst, vReg shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); - match(Set dst (URShiftVB dst shift)); +instruct reduce_orL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (OrReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); ins_cost(SVE_COST); - format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (B)" %} + format %{ "sve_reduce_orL $dst, $src1, $src2\t# orL reduction (sve)" %} ins_encode %{ - __ sve_lsr(as_FloatRegister($dst$$reg), __ B, - ptrue, as_FloatRegister($shift$$reg)); + __ sve_orv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ orr($dst$$Register, $dst$$Register, $src1$$Register); %} ins_pipe(pipe_slow); %} -instruct vlsrS(vReg dst, vReg shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); - match(Set dst (URShiftVS dst shift)); +instruct reduce_orL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (OrReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (H)" %} + format %{ "sve_reduce_orL $dst, $src1, $src2\t# orL reduction partial (sve)" %} ins_encode %{ - __ sve_lsr(as_FloatRegister($dst$$reg), __ H, - ptrue, as_FloatRegister($shift$$reg)); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_orv(as_FloatRegister($vtmp$$reg), __ D, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ orr($dst$$Register, $dst$$Register, $src1$$Register); %} ins_pipe(pipe_slow); %} -instruct vlsrI(vReg dst, vReg shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); - match(Set dst (URShiftVI dst shift)); +// vector xor reduction + +instruct reduce_eorI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (XorReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); ins_cost(SVE_COST); - format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (S)" %} + format %{ "sve_reduce_eorI $dst, $src1, $src2\t# xorB/H/I reduction (sve) (may extend)" %} ins_encode %{ - __ sve_lsr(as_FloatRegister($dst$$reg), __ S, - ptrue, as_FloatRegister($shift$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_eorv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ eorw($dst$$Register, $dst$$Register, $src1$$Register); + if (bt == T_BYTE) { + __ sxtb($dst$$Register, $dst$$Register); + } else if (bt == T_SHORT) { + __ sxth($dst$$Register, $dst$$Register); + } else { + assert(bt == T_INT, "unsupported type"); + } %} ins_pipe(pipe_slow); %} -instruct vlsrL(vReg dst, vReg shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); - match(Set dst (URShiftVL dst shift)); +instruct reduce_eorI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (XorReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (D)" %} + format %{ "sve_reduce_eorI $dst, $src1, $src2\t# xorI reduction partial (sve) (may extend)" %} ins_encode %{ - __ sve_lsr(as_FloatRegister($dst$$reg), __ D, - ptrue, as_FloatRegister($shift$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_eorv(as_FloatRegister($vtmp$$reg), variant, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ eorw($dst$$Register, $dst$$Register, $src1$$Register); + if (bt == T_BYTE) { + __ sxtb($dst$$Register, $dst$$Register); + } else if (bt == T_SHORT) { + __ sxth($dst$$Register, $dst$$Register); + } else { + assert(bt == T_INT, "unsupported type"); + } %} ins_pipe(pipe_slow); %} -instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); - match(Set dst (RShiftVB src (RShiftCntV shift))); +instruct reduce_eorL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (XorReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); ins_cost(SVE_COST); - format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (B)" %} + format %{ "sve_reduce_eorL $dst, $src1, $src2\t# xorL reduction (sve)" %} ins_encode %{ - int con = (int)$shift$$constant; - if (con == 0) { - __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - return; - } - if (con >= 8) con = 7; - __ sve_asr(as_FloatRegister($dst$$reg), __ B, - as_FloatRegister($src$$reg), con); + __ sve_eorv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ eor($dst$$Register, $dst$$Register, $src1$$Register); %} ins_pipe(pipe_slow); %} -instruct vasrS_imm(vReg dst, vReg src, immI shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); - match(Set dst (RShiftVS src (RShiftCntV shift))); +instruct reduce_eorL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (XorReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (H)" %} + format %{ "sve_reduce_eorL $dst, $src1, $src2\t# xorL reduction partial (sve)" %} ins_encode %{ - int con = (int)$shift$$constant; - if (con == 0) { - __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - return; - } - if (con >= 16) con = 15; - __ sve_asr(as_FloatRegister($dst$$reg), __ H, - as_FloatRegister($src$$reg), con); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_eorv(as_FloatRegister($vtmp$$reg), __ D, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ eor($dst$$Register, $dst$$Register, $src1$$Register); %} ins_pipe(pipe_slow); %} -instruct vasrI_imm(vReg dst, vReg src, immI shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); - match(Set dst (RShiftVI src (RShiftCntV shift))); + +// vector max reduction + +instruct reduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE || + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT || + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT)); + match(Set dst (MaxReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); ins_cost(SVE_COST); - format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (S)" %} + format %{ "sve_reduce_maxI $dst, $src1, $src2\t# reduce maxB/S/I (sve)" %} ins_encode %{ - int con = (int)$shift$$constant; - if (con == 0) { - __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - return; - } - __ sve_asr(as_FloatRegister($dst$$reg), __ S, - as_FloatRegister($src$$reg), con); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_smaxv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ cmpw($dst$$Register, $src1$$Register); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::GT); %} ins_pipe(pipe_slow); %} -instruct vasrL_imm(vReg dst, vReg src, immI shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); - match(Set dst (RShiftVL src (RShiftCntV shift))); +instruct reduce_maxI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE || + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT || + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT)); + match(Set dst (MaxReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (D)" %} + format %{ "sve_reduce_maxI $dst, $src1, $src2\t# reduce maxI partial (sve)" %} ins_encode %{ - int con = (int)$shift$$constant; - if (con == 0) { - __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - return; - } - __ sve_asr(as_FloatRegister($dst$$reg), __ D, - as_FloatRegister($src$$reg), con); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_smaxv(as_FloatRegister($vtmp$$reg), variant, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ cmpw($dst$$Register, $src1$$Register); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::GT); %} ins_pipe(pipe_slow); %} -instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); - match(Set dst (URShiftVB src (RShiftCntV shift))); +instruct reduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MaxReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); ins_cost(SVE_COST); - format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (B)" %} + format %{ "sve_reduce_maxL $dst, $src1, $src2\t# reduce maxL partial (sve)" %} ins_encode %{ - int con = (int)$shift$$constant; - if (con == 0) { - __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - return; - } - if (con >= 8) { - __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - return; - } - __ sve_lsr(as_FloatRegister($dst$$reg), __ B, - as_FloatRegister($src$$reg), con); + __ sve_smaxv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ cmp($dst$$Register, $src1$$Register); + __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::GT); %} ins_pipe(pipe_slow); %} -instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); - match(Set dst (URShiftVS src (RShiftCntV shift))); +instruct reduce_maxL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MaxReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (H)" %} + format %{ "sve_reduce_maxL $dst, $src1, $src2\t# reduce maxL partial (sve)" %} ins_encode %{ - int con = (int)$shift$$constant; - if (con == 0) { - __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - return; - } - if (con >= 16) { - __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - return; - } - __ sve_lsr(as_FloatRegister($dst$$reg), __ H, - as_FloatRegister($src$$reg), con); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_smaxv(as_FloatRegister($vtmp$$reg), __ D, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ cmp($dst$$Register, $src1$$Register); + __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::GT); %} ins_pipe(pipe_slow); %} -instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); - match(Set dst (URShiftVI src (RShiftCntV shift))); - ins_cost(SVE_COST); - format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (S)" %} +instruct reduce_maxF(vRegF dst, vRegF src1, vReg src2) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (MaxReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "sve_fmaxv $dst, $src2 # vector (sve) (S)\n\t" + "fmaxs $dst, $dst, $src1\t# max reduction F" %} ins_encode %{ - int con = (int)$shift$$constant; - if (con == 0) { - __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - return; - } - __ sve_lsr(as_FloatRegister($dst$$reg), __ S, - as_FloatRegister($src$$reg), con); + __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg)); + __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); %} ins_pipe(pipe_slow); %} -instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); - match(Set dst (URShiftVL src (RShiftCntV shift))); - ins_cost(SVE_COST); - format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (D)" %} +instruct reduce_maxF_partial(vRegF dst, vRegF src1, vReg src2, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (MaxReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP ptmp, KILL cr); + format %{ "sve_reduce_maxF $dst, $src1, $src2\t# reduce max S partial (sve)" %} ins_encode %{ - int con = (int)$shift$$constant; - if (con == 0) { - __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - return; - } - __ sve_lsr(as_FloatRegister($dst$$reg), __ D, - as_FloatRegister($src$$reg), con); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, + Matcher::vector_length(this, $src2)); + __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); %} ins_pipe(pipe_slow); %} -instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); - match(Set dst (LShiftVB src (LShiftCntV shift))); - ins_cost(SVE_COST); - format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (B)" %} +instruct reduce_maxD(vRegD dst, vRegD src1, vReg src2) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (MaxReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "sve_fmaxv $dst, $src2 # vector (sve) (D)\n\t" + "fmaxs $dst, $dst, $src1\t# max reduction D" %} ins_encode %{ - int con = (int)$shift$$constant; - if (con >= 8) { - __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - return; - } - __ sve_lsl(as_FloatRegister($dst$$reg), __ B, - as_FloatRegister($src$$reg), con); + __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg)); + __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); %} ins_pipe(pipe_slow); %} -instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); - match(Set dst (LShiftVS src (LShiftCntV shift))); - ins_cost(SVE_COST); - format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (H)" %} +instruct reduce_maxD_partial(vRegD dst, vRegD src1, vReg src2, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (MaxReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP ptmp, KILL cr); + format %{ "sve_reduce_maxD $dst, $src1, $src2\t# reduce max D partial (sve)" %} ins_encode %{ - int con = (int)$shift$$constant; - if (con >= 16) { - __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - return; - } - __ sve_lsl(as_FloatRegister($dst$$reg), __ H, - as_FloatRegister($src$$reg), con); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); %} ins_pipe(pipe_slow); %} -instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); - match(Set dst (LShiftVI src (LShiftCntV shift))); +// vector min reduction + +instruct reduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE || + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT || + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT)); + match(Set dst (MinReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); ins_cost(SVE_COST); - format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (S)" %} + format %{ "sve_reduce_minI $dst, $src1, $src2\t# reduce minB/S/I (sve)" %} ins_encode %{ - int con = (int)$shift$$constant; - __ sve_lsl(as_FloatRegister($dst$$reg), __ S, - as_FloatRegister($src$$reg), con); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_sminv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ cmpw($dst$$Register, $src1$$Register); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::LT); %} ins_pipe(pipe_slow); %} -instruct vlslL_imm(vReg dst, vReg src, immI shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); - match(Set dst (LShiftVL src (LShiftCntV shift))); +instruct reduce_minI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE || + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT || + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT)); + match(Set dst (MinReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (D)" %} + format %{ "sve_reduce_minI $dst, $src1, $src2\t# reduce minI partial (sve)" %} ins_encode %{ - int con = (int)$shift$$constant; - __ sve_lsl(as_FloatRegister($dst$$reg), __ D, - as_FloatRegister($src$$reg), con); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_sminv(as_FloatRegister($vtmp$$reg), variant, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ cmpw($dst$$Register, $src1$$Register); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::LT); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MinReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); + ins_cost(SVE_COST); + format %{ "sve_reduce_minL $dst, $src1, $src2\t# reduce minL partial (sve)" %} + ins_encode %{ + __ sve_sminv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ cmp($dst$$Register, $src1$$Register); + __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::LT); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_minL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MinReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(SVE_COST); + format %{ "sve_reduce_minL $dst, $src1, $src2\t# reduce minL partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_sminv(as_FloatRegister($vtmp$$reg), __ D, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ cmp($dst$$Register, $src1$$Register); + __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::LT); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_minF(vRegF dst, vRegF src1, vReg src2) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (MinReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "sve_fminv $dst, $src2 # vector (sve) (S)\n\t" + "fmins $dst, $dst, $src1\t# min reduction F" %} + ins_encode %{ + __ sve_fminv(as_FloatRegister($dst$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg)); + __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_minF_partial(vRegF dst, vRegF src1, vReg src2, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (MinReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP ptmp, KILL cr); + format %{ "sve_reduce_minF $dst, $src1, $src2\t# reduce min S partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, + Matcher::vector_length(this, $src2)); + __ sve_fminv(as_FloatRegister($dst$$reg), __ S, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_minD(vRegD dst, vRegD src1, vReg src2) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (MinReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "sve_fminv $dst, $src2 # vector (sve) (D)\n\t" + "fmins $dst, $dst, $src1\t# min reduction D" %} + ins_encode %{ + __ sve_fminv(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg)); + __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_minD_partial(vRegD dst, vRegD src1, vReg src2, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (MinReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP ptmp, KILL cr); + format %{ "sve_reduce_minD $dst, $src1, $src2\t# reduce min D partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_fminv(as_FloatRegister($dst$$reg), __ D, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector Math.rint, floor, ceil + +instruct vroundD(vReg dst, vReg src, immI rmode) %{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (RoundDoubleModeV src rmode)); + format %{ "sve_frint $dst, $src, $rmode\t# vector (sve) (D)" %} + ins_encode %{ + switch ($rmode$$constant) { + case RoundDoubleModeNode::rmode_rint: + __ sve_frintn(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + break; + case RoundDoubleModeNode::rmode_floor: + __ sve_frintm(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + break; + case RoundDoubleModeNode::rmode_ceil: + __ sve_frintp(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + break; + } + %} + ins_pipe(pipe_slow); +%} + +// vector replicate + +instruct replicateB(vReg dst, iRegIorL2I src) %{ + predicate(UseSVE > 0); + match(Set dst (ReplicateB src)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $src\t# vector (sve) (B)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateS(vReg dst, iRegIorL2I src) %{ + predicate(UseSVE > 0); + match(Set dst (ReplicateS src)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $src\t# vector (sve) (H)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateI(vReg dst, iRegIorL2I src) %{ + predicate(UseSVE > 0); + match(Set dst (ReplicateI src)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $src\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateL(vReg dst, iRegL src) %{ + predicate(UseSVE > 0); + match(Set dst (ReplicateL src)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $src\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateB_imm8(vReg dst, immI8 con) %{ + predicate(UseSVE > 0); + match(Set dst (ReplicateB con)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $con\t# vector (sve) (B)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ B, $con$$constant); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateS_imm8(vReg dst, immI8_shift8 con) %{ + predicate(UseSVE > 0); + match(Set dst (ReplicateS con)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $con\t# vector (sve) (H)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ H, $con$$constant); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateI_imm8(vReg dst, immI8_shift8 con) %{ + predicate(UseSVE > 0); + match(Set dst (ReplicateI con)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $con\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ S, $con$$constant); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateL_imm8(vReg dst, immL8_shift8 con) %{ + predicate(UseSVE > 0); + match(Set dst (ReplicateL con)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $con\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ D, $con$$constant); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateF(vReg dst, vRegF src) %{ + predicate(UseSVE > 0); + match(Set dst (ReplicateF src)); + ins_cost(SVE_COST); + format %{ "sve_cpy $dst, $src\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_cpy(as_FloatRegister($dst$$reg), __ S, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateD(vReg dst, vRegD src) %{ + predicate(UseSVE > 0); + match(Set dst (ReplicateD src)); + ins_cost(SVE_COST); + format %{ "sve_cpy $dst, $src\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_cpy(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector shift + +instruct vasrB(vReg dst, vReg shift) %{ + predicate(UseSVE > 0); + match(Set dst (RShiftVB dst shift)); + ins_cost(SVE_COST); + format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (B)" %} + ins_encode %{ + __ sve_asr(as_FloatRegister($dst$$reg), __ B, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrS(vReg dst, vReg shift) %{ + predicate(UseSVE > 0); + match(Set dst (RShiftVS dst shift)); + ins_cost(SVE_COST); + format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (H)" %} + ins_encode %{ + __ sve_asr(as_FloatRegister($dst$$reg), __ H, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrI(vReg dst, vReg shift) %{ + predicate(UseSVE > 0); + match(Set dst (RShiftVI dst shift)); + ins_cost(SVE_COST); + format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_asr(as_FloatRegister($dst$$reg), __ S, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrL(vReg dst, vReg shift) %{ + predicate(UseSVE > 0); + match(Set dst (RShiftVL dst shift)); + ins_cost(SVE_COST); + format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_asr(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslB(vReg dst, vReg shift) %{ + predicate(UseSVE > 0); + match(Set dst (LShiftVB dst shift)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (B)" %} + ins_encode %{ + __ sve_lsl(as_FloatRegister($dst$$reg), __ B, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslS(vReg dst, vReg shift) %{ + predicate(UseSVE > 0); + match(Set dst (LShiftVS dst shift)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (H)" %} + ins_encode %{ + __ sve_lsl(as_FloatRegister($dst$$reg), __ H, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslI(vReg dst, vReg shift) %{ + predicate(UseSVE > 0); + match(Set dst (LShiftVI dst shift)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_lsl(as_FloatRegister($dst$$reg), __ S, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslL(vReg dst, vReg shift) %{ + predicate(UseSVE > 0); + match(Set dst (LShiftVL dst shift)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_lsl(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrB(vReg dst, vReg shift) %{ + predicate(UseSVE > 0); + match(Set dst (URShiftVB dst shift)); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (B)" %} + ins_encode %{ + __ sve_lsr(as_FloatRegister($dst$$reg), __ B, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrS(vReg dst, vReg shift) %{ + predicate(UseSVE > 0); + match(Set dst (URShiftVS dst shift)); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (H)" %} + ins_encode %{ + __ sve_lsr(as_FloatRegister($dst$$reg), __ H, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrI(vReg dst, vReg shift) %{ + predicate(UseSVE > 0); + match(Set dst (URShiftVI dst shift)); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_lsr(as_FloatRegister($dst$$reg), __ S, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrL(vReg dst, vReg shift) %{ + predicate(UseSVE > 0); + match(Set dst (URShiftVL dst shift)); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_lsr(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst (RShiftVB src (RShiftCntV shift))); + ins_cost(SVE_COST); + format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (B)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + if (con >= 8) con = 7; + __ sve_asr(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrS_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst (RShiftVS src (RShiftCntV shift))); + ins_cost(SVE_COST); + format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (H)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + if (con >= 16) con = 15; + __ sve_asr(as_FloatRegister($dst$$reg), __ H, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrI_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst (RShiftVI src (RShiftCntV shift))); + ins_cost(SVE_COST); + format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (S)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_asr(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrL_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst (RShiftVL src (RShiftCntV shift))); + ins_cost(SVE_COST); + format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (D)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_asr(as_FloatRegister($dst$$reg), __ D, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst (URShiftVB src (RShiftCntV shift))); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (B)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + if (con >= 8) { + __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_lsr(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst (URShiftVS src (RShiftCntV shift))); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (H)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + if (con >= 16) { + __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_lsr(as_FloatRegister($dst$$reg), __ H, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst (URShiftVI src (RShiftCntV shift))); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (S)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_lsr(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst (URShiftVL src (RShiftCntV shift))); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (D)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_lsr(as_FloatRegister($dst$$reg), __ D, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst (LShiftVB src (LShiftCntV shift))); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (B)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con >= 8) { + __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_lsl(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst (LShiftVS src (LShiftCntV shift))); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (H)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con >= 16) { + __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_lsl(as_FloatRegister($dst$$reg), __ H, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst (LShiftVI src (LShiftCntV shift))); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (S)" %} + ins_encode %{ + int con = (int)$shift$$constant; + __ sve_lsl(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslL_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst (LShiftVL src (LShiftCntV shift))); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (D)" %} + ins_encode %{ + int con = (int)$shift$$constant; + __ sve_lsl(as_FloatRegister($dst$$reg), __ D, + as_FloatRegister($src$$reg), con); %} ins_pipe(pipe_slow); %} instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 16 && + predicate(UseSVE > 0 && (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE)); match(Set dst (LShiftCntV cnt)); match(Set dst (RShiftCntV cnt)); format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (B)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($cnt$$reg)); + __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($cnt$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{ + predicate(UseSVE > 0 && + (n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || + (n->bottom_type()->is_vect()->element_basic_type() == T_CHAR))); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (H)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($cnt$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{ + predicate(UseSVE > 0 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT)); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (S)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($cnt$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{ + predicate(UseSVE > 0 && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG)); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (D)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($cnt$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector sqrt + +instruct vsqrtF(vReg dst, vReg src) %{ + predicate(UseSVE > 0); + match(Set dst (SqrtVF src)); + ins_cost(SVE_COST); + format %{ "sve_fsqrt $dst, $src\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_fsqrt(as_FloatRegister($dst$$reg), __ S, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsqrtD(vReg dst, vReg src) %{ + predicate(UseSVE > 0); + match(Set dst (SqrtVD src)); + ins_cost(SVE_COST); + format %{ "sve_fsqrt $dst, $src\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_fsqrt(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector sub + +instruct vsubB(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0); + match(Set dst (SubVB src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (B)" %} + ins_encode %{ + __ sve_sub(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubS(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0); + match(Set dst (SubVS src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (H)" %} + ins_encode %{ + __ sve_sub(as_FloatRegister($dst$$reg), __ H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubI(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0); + match(Set dst (SubVI src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_sub(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubL(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0); + match(Set dst (SubVL src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_sub(as_FloatRegister($dst$$reg), __ D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubF(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0); + match(Set dst (SubVF src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_fsub(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubD(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0); + match(Set dst (SubVD src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_fsub(as_FloatRegister($dst$$reg), __ D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector mask cast + +instruct vmaskcast(vReg dst) %{ + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length() && + n->bottom_type()->is_vect()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes()); + match(Set dst (VectorMaskCast dst)); + ins_cost(0); + format %{ "vmaskcast $dst\t# empty (sve)" %} + ins_encode %{ + // empty + %} + ins_pipe(pipe_class_empty); +%} + +// ------------------------------ Vector cast ------------------------------- + +instruct vcvtBtoS(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorCastB2X src)); + ins_cost(SVE_COST); + format %{ "sve_sunpklo $dst, H, $src\t# convert B to S vector" %} + ins_encode %{ + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtBtoI(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorCastB2X src)); + ins_cost(2 * SVE_COST); + format %{ "sve_sunpklo $dst, H, $src\n\t" + "sve_sunpklo $dst, S, $dst\t# convert B to I vector" %} + ins_encode %{ + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtBtoL(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (VectorCastB2X src)); + ins_cost(3 * SVE_COST); + format %{ "sve_sunpklo $dst, H, $src\n\t" + "sve_sunpklo $dst, S, $dst\n\t" + "sve_sunpklo $dst, D, $dst\t# convert B to L vector" %} + ins_encode %{ + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtBtoF(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorCastB2X src)); + ins_cost(3 * SVE_COST); + format %{ "sve_sunpklo $dst, H, $src\n\t" + "sve_sunpklo $dst, S, $dst\n\t" + "sve_scvtf $dst, S, $dst, S\t# convert B to F vector" %} + ins_encode %{ + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); + __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($dst$$reg), __ S); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtBtoD(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorCastB2X src)); + ins_cost(4 * SVE_COST); + format %{ "sve_sunpklo $dst, H, $src\n\t" + "sve_sunpklo $dst, S, $dst\n\t" + "sve_sunpklo $dst, D, $dst\n\t" + "sve_scvtf $dst, D, $dst, D\t# convert B to D vector" %} + ins_encode %{ + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg)); + __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtStoB(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorCastS2X src)); + effect(TEMP tmp); + ins_cost(2 * SVE_COST); + format %{ "sve_dup $tmp, B, 0\n\t" + "sve_uzp1 $dst, B, $src, tmp\t# convert S to B vector" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($tmp$$reg), __ B, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtStoI(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorCastS2X src)); + ins_cost(SVE_COST); + format %{ "sve_sunpklo $dst, S, $src\t# convert S to I vector" %} + ins_encode %{ + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtStoL(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (VectorCastS2X src)); + ins_cost(2 * SVE_COST); + format %{ "sve_sunpklo $dst, S, $src\n\t" + "sve_sunpklo $dst, D, $dst\t# convert S to L vector" %} + ins_encode %{ + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg)); + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtStoF(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorCastS2X src)); + ins_cost(2 * SVE_COST); + format %{ "sve_sunpklo $dst, S, $src\n\t" + "sve_scvtf $dst, S, $dst, S\t# convert S to F vector" %} + ins_encode %{ + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg)); + __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($dst$$reg), __ S); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtStoD(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorCastS2X src)); + ins_cost(3 * SVE_COST); + format %{ "sve_sunpklo $dst, S, $src\n\t" + "sve_sunpklo $dst, D, $dst\n\t" + "sve_scvtf $dst, D, $dst, D\t# convert S to D vector" %} + ins_encode %{ + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg)); + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg)); + __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtItoB(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorCastI2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(3 * SVE_COST); + format %{ "sve_dup $tmp, H, 0\n\t" + "sve_uzp1 $dst, H, $src, tmp\n\t" + "sve_uzp1 $dst, B, $dst, tmp\n\t# convert I to B vector" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtItoS(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorCastI2X src)); + effect(TEMP tmp); + ins_cost(2 * SVE_COST); + format %{ "sve_dup $tmp, H, 0\n\t" + "sve_uzp1 $dst, H, $src, tmp\t# convert I to S vector" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtItoL(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (VectorCastI2X src)); + ins_cost(SVE_COST); + format %{ "sve_sunpklo $dst, D, $src\t# convert I to L vector" %} + ins_encode %{ + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtItoF(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorCastI2X src)); + ins_cost(SVE_COST); + format %{ "sve_scvtf $dst, S, $src, S\t# convert I to F vector" %} + ins_encode %{ + __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtItoD(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorCastI2X src)); + ins_cost(2 * SVE_COST); + format %{ "sve_sunpklo $dst, D, $src\n\t" + "sve_scvtf $dst, D, $dst, D\t# convert I to D vector" %} + ins_encode %{ + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg)); + __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtLtoB(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorCastL2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(4 * SVE_COST); + format %{ "sve_dup $tmp, S, 0\n\t" + "sve_uzp1 $dst, S, $src, tmp\n\t" + "sve_uzp1 $dst, H, $dst, tmp\n\t" + "sve_uzp1 $dst, B, $dst, tmp\n\t# convert L to B vector" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtLtoS(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorCastL2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(3 * SVE_COST); + format %{ "sve_dup $tmp, S, 0\n\t" + "sve_uzp1 $dst, S, $src, tmp\n\t" + "sve_uzp1 $dst, H, $dst, tmp\n\t# convert L to S vector" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtLtoI(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorCastL2X src)); + effect(TEMP tmp); + ins_cost(2 * SVE_COST); + format %{ "sve_dup $tmp, S, 0\n\t" + "sve_uzp1 $dst, S, $src, tmp\t# convert L to I vector" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtLtoF(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorCastL2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(3 * SVE_COST); + format %{ "sve_scvtf $dst, S, $src, D\n\t" + "sve_dup $tmp, S, 0\n\t" + "sve_uzp1 $dst, S, $dst, $tmp\t# convert L to F vector" %} + ins_encode %{ + __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D); + __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtLtoD(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorCastL2X src)); + ins_cost(SVE_COST); + format %{ "sve_scvtf $dst, D, $src, D\t# convert L to D vector" %} + ins_encode %{ + __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtFtoB(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorCastF2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(4 * SVE_COST); + format %{ "sve_fcvtzs $dst, S, $src, S\n\t" + "sve_dup $tmp, H, 0\n\t" + "sve_uzp1 $dst, H, $dst, tmp\n\t" + "sve_uzp1 $dst, B, $dst, tmp\n\t# convert F to B vector" %} + ins_encode %{ + __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S); + __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtFtoS(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorCastF2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(3 * SVE_COST); + format %{ "sve_fcvtzs $dst, S, $src, S\n\t" + "sve_dup $tmp, H, 0\n\t" + "sve_uzp1 $dst, H, $dst, tmp\t# convert F to S vector" %} + ins_encode %{ + __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S); + __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtFtoI(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorCastF2X src)); + ins_cost(SVE_COST); + format %{ "sve_fcvtzs $dst, S, $src, S\t# convert F to I vector" %} + ins_encode %{ + __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtFtoL(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (VectorCastF2X src)); + ins_cost(2 * SVE_COST); + format %{ "sve_fcvtzs $dst, S, $src, S\n\t" + "sve_sunpklo $dst, D, $dst\t# convert F to L vector" %} + ins_encode %{ + __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S); + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtFtoD(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorCastF2X src)); + ins_cost(2 * SVE_COST); + format %{ "sve_sunpklo $dst, D, $src\n\t" + "sve_fcvt $dst, D, $dst, S\t# convert F to D vector" %} + ins_encode %{ + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg)); + __ sve_fcvt(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ S); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtDtoB(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorCastD2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(5 * SVE_COST); + format %{ "sve_fcvtzs $dst, D, $src, D\n\t" + "sve_dup $tmp, S, 0\n\t" + "sve_uzp1 $dst, S, $dst, tmp\n\t" + "sve_uzp1 $dst, H, $dst, tmp\n\t" + "sve_uzp1 $dst, B, $dst, tmp\n\t# convert D to B vector" %} + ins_encode %{ + __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D); + __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtDtoS(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorCastD2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(4 * SVE_COST); + format %{ "sve_fcvtzs $dst, D, $src, D\n\t" + "sve_dup $tmp, S, 0\n\t" + "sve_uzp1 $dst, S, $dst, tmp\n\t" + "sve_uzp1 $dst, H, $dst, tmp\n\t# convert D to S vector" %} + ins_encode %{ + __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D); + __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtDtoI(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorCastD2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(3 * SVE_COST); + format %{ "sve_fcvtzs $dst, D, $src, D\n\t" + "sve_dup $tmp, S, 0\n\t" + "sve_uzp1 $dst, S, $dst, tmp\t# convert D to I vector" %} + ins_encode %{ + __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D); + __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtDtoL(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (VectorCastD2X src)); + ins_cost(SVE_COST); + format %{ "sve_fcvtzs $dst, D, $src, D\t# convert D to L vector" %} + ins_encode %{ + __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtDtoF(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorCastD2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(3 * SVE_COST); + format %{ "sve_fcvt $dst, S, $src, D\n\t" + "sve_dup $tmp, S, 0\n\t" + "sve_uzp1 $dst, S, $dst, $tmp\t# convert D to F vector" %} + ins_encode %{ + __ sve_fcvt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D); + __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} +// ------------------------------ Vector extract --------------------------------- + +instruct extractB(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0); + match(Set dst (ExtractB src idx)); + effect(TEMP pTmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_extract $dst, B, $pTmp, $src, $idx\n\t" + "sbfmw $dst, $dst, 0U, 7U\t# extract from vector(B)" %} + ins_encode %{ + __ sve_extract(as_Register($dst$$reg), __ B, as_PRegister($pTmp$$reg), + as_FloatRegister($src$$reg), (int)($idx$$constant)); + __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 7U); + %} + ins_pipe(pipe_slow); +%} + +instruct extractS(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0); + match(Set dst (ExtractS src idx)); + effect(TEMP pTmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_extract $dst, H, $pTmp, $src, $idx\n\t" + "sbfmw $dst, $dst, 0U, 15U\t# extract from vector(S)" %} + ins_encode %{ + __ sve_extract(as_Register($dst$$reg), __ H, as_PRegister($pTmp$$reg), + as_FloatRegister($src$$reg), (int)($idx$$constant)); + __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U); + %} + ins_pipe(pipe_slow); +%} + + +instruct extractI(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0); + match(Set dst (ExtractI src idx)); + effect(TEMP pTmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_extract $dst, S, $pTmp, $src, $idx\t# extract from vector(I)" %} + ins_encode %{ + __ sve_extract(as_Register($dst$$reg), __ S, as_PRegister($pTmp$$reg), + as_FloatRegister($src$$reg), (int)($idx$$constant)); %} ins_pipe(pipe_slow); %} -instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 8 && - (n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || - (n->bottom_type()->is_vect()->element_basic_type() == T_CHAR))); - match(Set dst (LShiftCntV cnt)); - match(Set dst (RShiftCntV cnt)); - format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (H)" %} +instruct extractL(iRegLNoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0); + match(Set dst (ExtractL src idx)); + effect(TEMP pTmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_extract $dst, D, $pTmp, $src, $idx\t# extract from vector(L)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($cnt$$reg)); + __ sve_extract(as_Register($dst$$reg), __ D, as_PRegister($pTmp$$reg), + as_FloatRegister($src$$reg), (int)($idx$$constant)); %} ins_pipe(pipe_slow); %} -instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 && - (n->bottom_type()->is_vect()->element_basic_type() == T_INT)); - match(Set dst (LShiftCntV cnt)); - match(Set dst (RShiftCntV cnt)); - format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (S)" %} +instruct extractF(vRegF dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0); + match(Set dst (ExtractF src idx)); + effect(TEMP pTmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_extract $dst, S, $pTmp, $src, $idx\t# extract from vector(F)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($cnt$$reg)); + __ sve_extract(as_FloatRegister($dst$$reg), __ S, as_PRegister($pTmp$$reg), + as_FloatRegister($src$$reg), (int)($idx$$constant)); %} ins_pipe(pipe_slow); %} -instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 && - (n->bottom_type()->is_vect()->element_basic_type() == T_LONG)); - match(Set dst (LShiftCntV cnt)); - match(Set dst (RShiftCntV cnt)); - format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (D)" %} +instruct extractD(vRegD dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0); + match(Set dst (ExtractD src idx)); + effect(TEMP pTmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_extract $dst, D, $pTmp, $src, $idx\t# extract from vector(D)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($cnt$$reg)); + __ sve_extract(as_FloatRegister($dst$$reg), __ D, as_PRegister($pTmp$$reg), + as_FloatRegister($src$$reg), (int)($idx$$constant)); %} ins_pipe(pipe_slow); %} -// vector sqrt +// ------------------------------- VectorTest ---------------------------------- -instruct vsqrtF(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); - match(Set dst (SqrtVF src)); +instruct vtest_alltrue(iRegINoSp dst, vReg src1, vReg src2, pReg pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + static_cast(n)->get_predicate() == BoolTest::overflow); + match(Set dst (VectorTest src1 src2)); + effect(TEMP pTmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_fsqrt $dst, $src\t# vector (sve) (S)" %} + format %{ "sve_cmpeq $pTmp, $src1, 0\n\t" + "csetw $dst, EQ\t# VectorTest (sve) - alltrue" %} ins_encode %{ - __ sve_fsqrt(as_FloatRegister($dst$$reg), __ S, - ptrue, as_FloatRegister($src$$reg)); + // "src2" is not used for sve. + BasicType bt = Matcher::vector_element_basic_type(this, $src1); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, + ptrue, as_FloatRegister($src1$$reg), 0); + __ csetw(as_Register($dst$$reg), Assembler::EQ); %} ins_pipe(pipe_slow); %} -instruct vsqrtD(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); - match(Set dst (SqrtVD src)); +instruct vtest_anytrue(iRegINoSp dst, vReg src1, vReg src2, pReg pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + static_cast(n)->get_predicate() == BoolTest::ne); + match(Set dst (VectorTest src1 src2)); + effect(TEMP pTmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_fsqrt $dst, $src\t# vector (sve) (D)" %} + format %{ "sve_cmpeq $pTmp, $src1, -1\n\t" + "csetw $dst, NE\t# VectorTest (sve) - anytrue" %} ins_encode %{ - __ sve_fsqrt(as_FloatRegister($dst$$reg), __ D, - ptrue, as_FloatRegister($src$$reg)); + // "src2" is not used for sve. + BasicType bt = Matcher::vector_element_basic_type(this, $src1); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, + ptrue, as_FloatRegister($src1$$reg), -1); + __ csetw(as_Register($dst$$reg), Assembler::NE); %} ins_pipe(pipe_slow); %} -// vector sub +instruct vtest_alltrue_partial(iRegINoSp dst, vReg src1, vReg src2, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + static_cast(n)->get_predicate() == BoolTest::overflow); + match(Set dst (VectorTest src1 src2)); + effect(TEMP pTmp, KILL cr); + ins_cost(SVE_COST); + format %{ "vtest_alltrue_partial $dst, $src1, $src2\t# VectorTest partial (sve) - alltrue" %} + ins_encode %{ + // "src2" is not used for sve. + BasicType bt = Matcher::vector_element_basic_type(this, $src1); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), size, + Matcher::vector_length(this, $src1)); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, + as_PRegister($pTmp$$reg), as_FloatRegister($src1$$reg), 0); + __ csetw(as_Register($dst$$reg), Assembler::EQ); + %} + ins_pipe(pipe_slow); +%} -instruct vsubB(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); - match(Set dst (SubVB src1 src2)); +instruct vtest_anytrue_partial(iRegINoSp dst, vReg src1, vReg src2, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + static_cast(n)->get_predicate() == BoolTest::ne); + match(Set dst (VectorTest src1 src2)); + effect(TEMP pTmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (B)" %} + format %{ "vtest_anytrue_partial $dst, $src1, $src2\t# VectorTest partial (sve) - anytrue" %} ins_encode %{ - __ sve_sub(as_FloatRegister($dst$$reg), __ B, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); + // "src2" is not used for sve. + BasicType bt = Matcher::vector_element_basic_type(this, $src1); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), size, + Matcher::vector_length(this, $src1)); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, + as_PRegister($pTmp$$reg), as_FloatRegister($src1$$reg), -1); + __ csetw(as_Register($dst$$reg), Assembler::NE); %} ins_pipe(pipe_slow); %} -instruct vsubS(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); - match(Set dst (SubVS src1 src2)); +// ------------------------------ Vector insert --------------------------------- + +instruct insertI_small(vReg dst, vReg src, iRegIorL2I val, immI idx, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0 && n->as_Vector()->length() <= 32 && + (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE || + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || + n->bottom_type()->is_vect()->element_basic_type() == T_INT)); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP_DEF dst, TEMP pTmp, KILL cr); + ins_cost(4 * SVE_COST); + format %{ "sve_index $dst, -16, 1\t# (B/S/I)\n\t" + "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" + "sve_orr $dst, $src, $src\n\t" + "sve_cpy $dst, $pTmp, $val\t# insert into vector (B/S/I)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_index(as_FloatRegister($dst$$reg), size, -16, 1); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, ptrue, + as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16); + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg), as_Register($val$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct insertF_small(vReg dst, vReg src, vRegF val, immI idx, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0 && n->as_Vector()->length() <= 32 && + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP_DEF dst, TEMP pTmp, KILL cr); + ins_cost(4 * SVE_COST); + format %{ "sve_index $dst, S, -16, 1\n\t" + "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" + "sve_orr $dst, $src, $src\n\t" + "sve_cpy $dst, $pTmp, $val\t# insert into vector (F)" %} + ins_encode %{ + __ sve_index(as_FloatRegister($dst$$reg), __ S, -16, 1); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ S, ptrue, + as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16); + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), __ S, as_PRegister($pTmp$$reg), as_FloatRegister($val$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct insertI(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg tmp1, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0 && n->as_Vector()->length() > 32 && + (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE || + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || + n->bottom_type()->is_vect()->element_basic_type() == T_INT)); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP pTmp, KILL cr); + ins_cost(5 * SVE_COST); + format %{ "sve_index $tmp1, 0, 1\t# (B/S/I)\n\t" + "sve_dup $dst, $idx\t# (B/S/I)\n\t" + "sve_cmpeq $pTmp, $tmp1, $dst\n\t" + "sve_orr $dst, $src, $src\n\t" + "sve_cpy $dst, $pTmp, $val\t# insert into vector (B/S/I)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_index(as_FloatRegister($tmp1$$reg), size, 0, 1); + __ sve_dup(as_FloatRegister($dst$$reg), size, (int)($idx$$constant)); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, ptrue, + as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg)); + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg), as_Register($val$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct insertL(vReg dst, vReg src, iRegL val, immI idx, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP_DEF dst, TEMP pTmp, KILL cr); + ins_cost(4 * SVE_COST); + format %{ "sve_index $dst, D, -16, 1\n\t" + "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" + "sve_orr $dst, $src, $src\n\t" + "sve_cpy $dst, $pTmp, $val\t# insert into vector (L)" %} + ins_encode %{ + __ sve_index(as_FloatRegister($dst$$reg), __ D, -16, 1); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ D, ptrue, + as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16); + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), __ D, as_PRegister($pTmp$$reg), as_Register($val$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct insertD(vReg dst, vReg src, vRegD val, immI idx, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP_DEF dst, TEMP pTmp, KILL cr); + ins_cost(4 * SVE_COST); + format %{ "sve_index $dst, D, -16, 1\n\t" + "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" + "sve_orr $dst, $src, $src\n\t" + "sve_cpy $dst, $pTmp, $val\t# insert into vector (D)" %} + ins_encode %{ + __ sve_index(as_FloatRegister($dst$$reg), __ D, -16, 1); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ D, ptrue, + as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16); + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), __ D, as_PRegister($pTmp$$reg), as_FloatRegister($val$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct insertF(vReg dst, vReg src, vRegF val, immI idx, vReg tmp1, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0 && n->as_Vector()->length() > 32 && + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP pTmp, KILL cr); + ins_cost(5 * SVE_COST); + format %{ "sve_index $tmp1, S, 0, 1\n\t" + "sve_dup $dst, S, $idx\n\t" + "sve_cmpeq $pTmp, $tmp1, $dst\n\t" + "sve_orr $dst, $src, $src\n\t" + "sve_cpy $dst, $pTmp, $val\t# insert into vector (F)" %} + ins_encode %{ + __ sve_index(as_FloatRegister($tmp1$$reg), __ S, 0, 1); + __ sve_dup(as_FloatRegister($dst$$reg), __ S, (int)($idx$$constant)); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ S, ptrue, + as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg)); + __ sve_orr(as_FloatRegister($dst$$reg), + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), __ S, + as_PRegister($pTmp$$reg), as_FloatRegister($val$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector shuffle ------------------------------- + +instruct loadshuffleB(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorLoadShuffle src)); ins_cost(SVE_COST); - format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (H)" %} + format %{ "sve_orr $dst, $src, $src\t# vector load shuffle (B)" %} ins_encode %{ - __ sve_sub(as_FloatRegister($dst$$reg), __ H, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); + if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { + __ sve_orr(as_FloatRegister($dst$$reg), + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } %} ins_pipe(pipe_slow); %} -instruct vsubI(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); - match(Set dst (SubVI src1 src2)); +instruct loadshuffleS(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorLoadShuffle src)); ins_cost(SVE_COST); - format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (S)" %} + format %{ "sve_uunpklo $dst, $src\t# vector load shuffle (B to H)" %} ins_encode %{ - __ sve_sub(as_FloatRegister($dst$$reg), __ S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); %} ins_pipe(pipe_slow); %} -instruct vsubL(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); - match(Set dst (SubVL src1 src2)); +instruct loadshuffleI(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (VectorLoadShuffle src)); + ins_cost(2 * SVE_COST); + format %{ "sve_uunpklo $dst, H, $src\n\t" + "sve_uunpklo $dst, S, $dst\t# vector load shuffle (B to S)" %} + ins_encode %{ + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct loadshuffleL(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (VectorLoadShuffle src)); + ins_cost(3 * SVE_COST); + format %{ "sve_uunpklo $dst, H, $src\n\t" + "sve_uunpklo $dst, S, $dst\n\t" + "sve_uunpklo $dst, D, $dst\t# vector load shuffle (B to D)" %} + ins_encode %{ + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector rearrange ------------------------------- + +instruct rearrange(vReg dst, vReg src, vReg shuffle) +%{ + predicate(UseSVE > 0); + match(Set dst (VectorRearrange src shuffle)); ins_cost(SVE_COST); - format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (D)" %} + format %{ "sve_tbl $dst, $src, $shuffle\t# vector rearrange" %} ins_encode %{ - __ sve_sub(as_FloatRegister($dst$$reg), __ D, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_tbl(as_FloatRegister($dst$$reg), size, + as_FloatRegister($src$$reg), as_FloatRegister($shuffle$$reg)); %} ins_pipe(pipe_slow); %} -instruct vsubF(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); - match(Set dst (SubVF src1 src2)); +// ------------------------------ Vector Load Gather --------------------------------- + +instruct gatherI(vReg dst, indirect mem, vReg idx) %{ + predicate(UseSVE > 0 && + n->as_LoadVectorGather()->memory_size() == MaxVectorSize && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (LoadVectorGather mem idx)); ins_cost(SVE_COST); - format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (S)" %} + format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (I/F)" %} ins_encode %{ - __ sve_fsub(as_FloatRegister($dst$$reg), __ S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); + __ sve_ld1w_gather(as_FloatRegister($dst$$reg), ptrue, + as_Register($mem$$base), as_FloatRegister($idx$$reg)); %} ins_pipe(pipe_slow); %} -instruct vsubD(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); - match(Set dst (SubVD src1 src2)); +instruct gatherL(vReg dst, indirect mem, vReg idx) %{ + predicate(UseSVE > 0 && + n->as_LoadVectorGather()->memory_size() == MaxVectorSize && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (LoadVectorGather mem idx)); + ins_cost(2 * SVE_COST); + format %{ "sve_uunpklo $idx, $idx\n\t" + "load_vector_gather $dst, $mem, $idx\t# vector load gather (L/D)" %} + ins_encode %{ + __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); + __ sve_ld1d_gather(as_FloatRegister($dst$$reg), ptrue, as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector Load Gather Partial------------------------------- + +instruct gatherI_partial(vReg dst, indirect mem, vReg idx, pRegGov pTmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_LoadVectorGather()->memory_size() < MaxVectorSize && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (LoadVectorGather mem idx)); + effect(TEMP pTmp, KILL cr); + ins_cost(2 * SVE_COST + INSN_COST); + format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" + "load_vector_gather $dst, $pTmp, $mem, $idx\t# vector load gather partial (I/F)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ S, + Matcher::vector_length(this)); + __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($pTmp$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct gatherL_partial(vReg dst, indirect mem, vReg idx, pRegGov pTmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_LoadVectorGather()->memory_size() < MaxVectorSize && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (LoadVectorGather mem idx)); + effect(TEMP pTmp, KILL cr); + ins_cost(3 * SVE_COST + INSN_COST); + format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" + "sve_uunpklo $idx, $idx\n\t" + "load_vector_gather $dst, $pTmp, $mem, $idx\t# vector load gather partial (L/D)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ D, + Matcher::vector_length(this)); + __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); + __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($pTmp$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector Store Scatter ------------------------------- + +instruct scatterI(indirect mem, vReg src, vReg idx) %{ + predicate(UseSVE > 0 && + n->as_StoreVectorScatter()->memory_size() == MaxVectorSize && + (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set mem (StoreVectorScatter mem (Binary src idx))); ins_cost(SVE_COST); - format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (D)" %} + format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (I/F)" %} ins_encode %{ - __ sve_fsub(as_FloatRegister($dst$$reg), __ D, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); + __ sve_st1w_scatter(as_FloatRegister($src$$reg), ptrue, + as_Register($mem$$base), as_FloatRegister($idx$$reg)); %} ins_pipe(pipe_slow); %} -// vector mask cast +instruct scatterL(indirect mem, vReg src, vReg idx) %{ + predicate(UseSVE > 0 && + n->as_StoreVectorScatter()->memory_size() == MaxVectorSize && + (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set mem (StoreVectorScatter mem (Binary src idx))); + ins_cost(2 * SVE_COST); + format %{ "sve_uunpklo $idx, $idx\n\t" + "store_vector_scatter $mem, $idx, $src\t# vector store scatter (L/D)" %} + ins_encode %{ + __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, + as_FloatRegister($idx$$reg)); + __ sve_st1d_scatter(as_FloatRegister($src$$reg), ptrue, + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} -instruct vmaskcast(vReg dst) %{ - predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length() && - n->bottom_type()->is_vect()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes()); - match(Set dst (VectorMaskCast dst)); - ins_cost(0); - format %{ "vmaskcast $dst\t# empty (sve)" %} +// ------------------------------ Vector Store Scatter Partial------------------------------- + +instruct scatterI_partial(indirect mem, vReg src, vReg idx, pRegGov pTmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_StoreVectorScatter()->memory_size() < MaxVectorSize && + (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set mem (StoreVectorScatter mem (Binary src idx))); + effect(TEMP pTmp, KILL cr); + ins_cost(2 * SVE_COST + INSN_COST); + format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" + "store_vector_scatter $mem, $pTmp, $idx, $src\t# vector store scatter partial (I/F)" %} ins_encode %{ - // empty + __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ S, + Matcher::vector_length(this, $src)); + __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($pTmp$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); %} - ins_pipe(pipe_class_empty); + ins_pipe(pipe_slow); +%} + +instruct scatterL_partial(indirect mem, vReg src, vReg idx, pRegGov pTmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_StoreVectorScatter()->memory_size() < MaxVectorSize && + (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set mem (StoreVectorScatter mem (Binary src idx))); + effect(TEMP pTmp, KILL cr); + ins_cost(3 * SVE_COST + INSN_COST); + format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" + "sve_uunpklo $idx, $idx\n\t" + "store_vector_scatter $mem, $pTmp, $idx, $src\t# vector store scatter partial (L/D)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ D, + Matcher::vector_length(this, $src)); + __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); + __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($pTmp$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + + +// ------------------------------ Vector Load Const ------------------------------- + +instruct loadconB(vReg dst, immI0 src) %{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorLoadConst src)); + ins_cost(SVE_COST); + format %{ "sve_index $dst, 0, 1\t# generate iota indices" %} + ins_encode %{ + __ sve_index(as_FloatRegister($dst$$reg), __ B, 0, 1); + %} + ins_pipe(pipe_slow); +%} + +// Intrisics for String.indexOf(char) + + +instruct stringL_indexof_char_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, + iRegI_R0 result, vReg ztmp1, vReg ztmp2, + pRegGov pgtmp, pReg ptmp, rFlagsReg cr) +%{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); + predicate((UseSVE > 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); + effect(TEMP ztmp1, TEMP ztmp2, TEMP pgtmp, TEMP ptmp, KILL cr); + + format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result # use sve" %} + + ins_encode %{ + __ string_indexof_char_sve($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, + as_FloatRegister($ztmp1$$reg), as_FloatRegister($ztmp2$$reg), + as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg), true /* isL */); + %} + ins_pipe(pipe_class_memory); +%} + +instruct stringU_indexof_char_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, + iRegI_R0 result, vReg ztmp1, vReg ztmp2, + pRegGov pgtmp, pReg ptmp, rFlagsReg cr) +%{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); + predicate((UseSVE > 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); + effect(TEMP ztmp1, TEMP ztmp2, TEMP pgtmp, TEMP ptmp, KILL cr); + + format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result # use sve" %} + + ins_encode %{ + __ string_indexof_char_sve($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, + as_FloatRegister($ztmp1$$reg), as_FloatRegister($ztmp2$$reg), + as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg), false /* isL */); + %} + ins_pipe(pipe_class_memory); +%} + +// ---------------------------- Vector mask reductions --------------------------- + +instruct vmask_truecount(iRegINoSp dst, vReg src, pReg ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (VectorMaskTrueCount src)); + effect(TEMP ptmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "vmask_truecount $dst, $src\t# vector mask truecount (sve)" %} + ins_encode %{ + __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, + as_FloatRegister($src$$reg), ptrue, as_PRegister($ptmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_firsttrue(iRegINoSp dst, vReg src, pReg ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (VectorMaskFirstTrue src)); + effect(TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "vmask_firsttrue $dst, $src\t# vector mask firsttrue (sve)" %} + ins_encode %{ + __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, + as_FloatRegister($src$$reg), ptrue, as_PRegister($ptmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_lasttrue(iRegINoSp dst, vReg src, pReg ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (VectorMaskLastTrue src)); + effect(TEMP ptmp, KILL cr); + ins_cost(4 * SVE_COST); + format %{ "vmask_lasttrue $dst, $src\t# vector mask lasttrue (sve)" %} + ins_encode %{ + __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, + as_FloatRegister($src$$reg), ptrue, as_PRegister($ptmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_truecount_partial(iRegINoSp dst, vReg src, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (VectorMaskTrueCount src)); + effect(TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "vmask_truecount $dst, $src\t# vector mask truecount partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ B, + Matcher::vector_length(this, $src)); + __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, as_FloatRegister($src$$reg), + as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_firsttrue_partial(iRegINoSp dst, vReg src, pRegGov pgtmp, pReg ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (VectorMaskFirstTrue src)); + effect(TEMP pgtmp, TEMP ptmp, KILL cr); + ins_cost(4 * SVE_COST); + format %{ "vmask_firsttrue $dst, $src\t# vector mask firsttrue partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ B, + Matcher::vector_length(this, $src)); + __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, as_FloatRegister($src$$reg), + as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_lasttrue_partial(iRegINoSp dst, vReg src, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (VectorMaskLastTrue src)); + effect(TEMP ptmp, KILL cr); + ins_cost(5 * SVE_COST); + format %{ "vmask_lasttrue $dst, $src\t# vector mask lasttrue partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ B, + Matcher::vector_length(this, $src)); + __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, as_FloatRegister($src$$reg), + as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ----------------- Vector mask reductions combined with VectorMaskStore --------------- + +instruct vstoremask_truecount(iRegINoSp dst, vReg src, immI esize, pReg ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (VectorMaskTrueCount (VectorStoreMask src esize))); + effect(TEMP ptmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "vstoremask_truecount $dst, $src\t# vector mask truecount (sve)" %} + ins_encode %{ + unsigned size = $esize$$constant; + assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size"); + Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size); + __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg), + ptrue, as_PRegister($ptmp$$reg), Matcher::vector_length(this, $src)); + %} + ins_pipe(pipe_slow); +%} + +instruct vstoremask_firsttrue(iRegINoSp dst, vReg src, immI esize, pReg ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (VectorMaskFirstTrue (VectorStoreMask src esize))); + effect(TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "vstoremask_firsttrue $dst, $src\t# vector mask firsttrue (sve)" %} + ins_encode %{ + unsigned size = $esize$$constant; + assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size"); + Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size); + __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg), + ptrue, as_PRegister($ptmp$$reg), Matcher::vector_length(this, $src)); + %} + ins_pipe(pipe_slow); +%} + +instruct vstoremask_lasttrue(iRegINoSp dst, vReg src, immI esize, pReg ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (VectorMaskLastTrue (VectorStoreMask src esize))); + effect(TEMP ptmp, KILL cr); + ins_cost(4 * SVE_COST); + format %{ "vstoremask_lasttrue $dst, $src\t# vector mask lasttrue (sve)" %} + ins_encode %{ + unsigned size = $esize$$constant; + assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size"); + Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size); + __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg), + ptrue, as_PRegister($ptmp$$reg), Matcher::vector_length(this, $src)); + %} + ins_pipe(pipe_slow); +%} + +instruct vstoremask_truecount_partial(iRegINoSp dst, vReg src, immI esize, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (VectorMaskTrueCount (VectorStoreMask src esize))); + effect(TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "vstoremask_truecount $dst, $src\t# vector mask truecount partial (sve)" %} + ins_encode %{ + unsigned size = $esize$$constant; + assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size"); + Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src)); + __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg), + as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), MaxVectorSize / size); + %} + ins_pipe(pipe_slow); +%} + +instruct vstoremask_firsttrue_partial(iRegINoSp dst, vReg src, immI esize, pRegGov pgtmp, pReg ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (VectorMaskFirstTrue (VectorStoreMask src esize))); + effect(TEMP pgtmp, TEMP ptmp, KILL cr); + ins_cost(4 * SVE_COST); + format %{ "vstoremask_firsttrue $dst, $src\t# vector mask firsttrue partial (sve)" %} + ins_encode %{ + unsigned size = $esize$$constant; + assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size"); + Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size); + __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), variant, + Matcher::vector_length(this, $src)); + __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg), + as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg), MaxVectorSize / size); + %} + ins_pipe(pipe_slow); %} +instruct vstoremask_lasttrue_partial(iRegINoSp dst, vReg src, immI esize, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (VectorMaskLastTrue (VectorStoreMask src esize))); + effect(TEMP ptmp, KILL cr); + ins_cost(5 * SVE_COST); + format %{ "vstoremask_lasttrue $dst, $src\t# vector mask lasttrue partial (sve)" %} + ins_encode %{ + unsigned size = $esize$$constant; + assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size"); + Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src)); + __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg), + as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), MaxVectorSize / size); + %} + ins_pipe(pipe_slow); +%} diff --git a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 index 28a8a64a6f6c90f9834cdc1ec91b23fdf52f2530..dfdc6a2fda9e4420f0f4a60fea24037fba9fa429 100644 --- a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 @@ -29,34 +29,44 @@ dnl // AArch64 SVE Architecture Description File - -// 4 bit signed offset -- for predicated load/store dnl -dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET($1, $2, $3 ) -dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET(imm_type_abbr, imm_type, imm_len) +define(`TYPE2DATATYPE', +`ifelse($1, `B', `BYTE', + $1, `S', `SHORT', + $1, `I', `INT', + $1, `L', `LONG', + $1, `F', `FLOAT', + $1, `D', `DOUBLE', + `error($1)')')dnl +dnl +dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET($1, $2, $3 $4 ) +dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET(imm_type_abbr, imm_type, imm_len, scale) define(`OPERAND_VMEMORYA_IMMEDIATE_OFFSET', ` operand vmemA_imm$1Offset$3() %{ + // (esize / msize) = $4 predicate(Address::offset_ok_for_sve_immed(n->get_$2(), $3, - Matcher::scalable_vector_reg_size(T_BYTE))); + Matcher::scalable_vector_reg_size(T_BYTE)ifelse($4, `1', `', ` / $4'))); match(Con$1); op_cost(0); format %{ %} interface(CONST_INTER); %}')dnl -OPERAND_VMEMORYA_IMMEDIATE_OFFSET(I, int, 4) -OPERAND_VMEMORYA_IMMEDIATE_OFFSET(L, long, 4) + +// 4 bit signed offset -- for predicated load/store +OPERAND_VMEMORYA_IMMEDIATE_OFFSET(I, int, 4, 1) +OPERAND_VMEMORYA_IMMEDIATE_OFFSET(L, long, 4, 1) dnl dnl OPERAND_VMEMORYA_INDIRECT_OFFSET($1, $2 ) dnl OPERAND_VMEMORYA_INDIRECT_OFFSET(imm_type_abbr, imm_len) define(`OPERAND_VMEMORYA_INDIRECT_OFFSET', ` -operand vmemA_indOff$1$2(iRegP reg, vmemA_imm$1Offset$2 off) +operand vmemA_indOff$1$2$3(iRegP reg, vmemA_imm$1Offset$2 off) %{ constraint(ALLOC_IN_RC(ptr_reg)); match(AddP reg off); op_cost(0); - format %{ "[$reg, $off, MUL VL]" %} + format %{ "[$reg, $off]" %} interface(MEMORY_INTER) %{ base($reg); `index'(0xffffffff); @@ -67,133 +77,81 @@ operand vmemA_indOff$1$2(iRegP reg, vmemA_imm$1Offset$2 off) OPERAND_VMEMORYA_INDIRECT_OFFSET(I, 4) OPERAND_VMEMORYA_INDIRECT_OFFSET(L, 4) +// The indOff of vmemA is valid only when the vector element (load to/store from) +// size equals to memory element (load from/store to) size. opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4); source_hpp %{ - bool op_sve_supported(int opcode); + bool op_sve_supported(int opcode, int vlen, BasicType bt); %} source %{ - static inline BasicType vector_element_basic_type(const MachNode* n) { - const TypeVect* vt = n->bottom_type()->is_vect(); - return vt->element_basic_type(); - } - - static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) { - int def_idx = use->operand_index(opnd); - Node* def = use->in(def_idx); - const TypeVect* vt = def->bottom_type()->is_vect(); - return vt->element_basic_type(); - } - - static Assembler::SIMD_RegVariant elemBytes_to_regVariant(int esize) { - switch(esize) { - case 1: - return Assembler::B; - case 2: - return Assembler::H; - case 4: - return Assembler::S; - case 8: - return Assembler::D; - default: - assert(false, "unsupported"); - ShouldNotReachHere(); - } - return Assembler::INVALID; - } - - static Assembler::SIMD_RegVariant elemType_to_regVariant(BasicType bt) { - return elemBytes_to_regVariant(type2aelembytes(bt)); - } typedef void (C2_MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T, PRegister Pg, const Address &adr); // Predicated load/store, with optional ptrue to all elements of given predicate register. - static void loadStoreA_predicate(C2_MacroAssembler masm, bool is_store, - FloatRegister reg, PRegister pg, BasicType bt, - int opcode, Register base, int index, int size, int disp) { + static void loadStoreA_predicated(C2_MacroAssembler masm, bool is_store, FloatRegister reg, + PRegister pg, BasicType mem_elem_bt, BasicType vector_elem_bt, + int opcode, Register base, int index, int size, int disp) { sve_mem_insn_predicate insn; - Assembler::SIMD_RegVariant type; - int esize = type2aelembytes(bt); + int mesize = type2aelembytes(mem_elem_bt); if (index == -1) { assert(size == 0, "unsupported address mode: scale size = %d", size); - switch(esize) { + switch(mesize) { case 1: insn = is_store ? &C2_MacroAssembler::sve_st1b : &C2_MacroAssembler::sve_ld1b; - type = Assembler::B; break; case 2: insn = is_store ? &C2_MacroAssembler::sve_st1h : &C2_MacroAssembler::sve_ld1h; - type = Assembler::H; break; case 4: insn = is_store ? &C2_MacroAssembler::sve_st1w : &C2_MacroAssembler::sve_ld1w; - type = Assembler::S; break; case 8: insn = is_store ? &C2_MacroAssembler::sve_st1d : &C2_MacroAssembler::sve_ld1d; - type = Assembler::D; break; default: assert(false, "unsupported"); ShouldNotReachHere(); } - (masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE))); + int imm4 = disp / mesize / Matcher::scalable_vector_reg_size(vector_elem_bt); + (masm.*insn)(reg, Assembler::elemType_to_regVariant(vector_elem_bt), pg, Address(base, imm4)); } else { assert(false, "unimplemented"); ShouldNotReachHere(); } } - bool op_sve_supported(int opcode) { + bool op_sve_supported(int opcode, int vlen, BasicType bt) { + int length_in_bytes = vlen * type2aelembytes(bt); switch (opcode) { case Op_MulAddVS2VI: - // No multiply reduction instructions + // No multiply reduction instructions case Op_MulReductionVD: case Op_MulReductionVF: case Op_MulReductionVI: case Op_MulReductionVL: - // Others - case Op_Extract: - case Op_ExtractB: + // Others case Op_ExtractC: - case Op_ExtractD: - case Op_ExtractF: - case Op_ExtractI: - case Op_ExtractL: - case Op_ExtractS: case Op_ExtractUB: + return false; // Vector API specific - case Op_AndReductionV: - case Op_OrReductionV: - case Op_XorReductionV: - case Op_MaxReductionV: - case Op_MinReductionV: - case Op_LoadVectorGather: - case Op_StoreVectorScatter: - case Op_VectorBlend: - case Op_VectorCast: - case Op_VectorCastB2X: - case Op_VectorCastD2X: - case Op_VectorCastF2X: - case Op_VectorCastI2X: - case Op_VectorCastL2X: - case Op_VectorCastS2X: - case Op_VectorInsert: - case Op_VectorLoadConst: - case Op_VectorLoadMask: case Op_VectorLoadShuffle: - case Op_VectorMaskCmp: case Op_VectorRearrange: - case Op_VectorReinterpret: - case Op_VectorStoreMask: - case Op_VectorTest: - return false; + if (vlen < 4 || length_in_bytes > MaxVectorSize) { + return false; + } else { + return true; + } + case Op_LoadVector: + case Op_StoreVector: + return Matcher::vector_size_supported(bt, vlen); default: - return true; + break; } + // By default, we only support vector operations with no less than 8 bytes and 2 elements. + return 8 <= length_in_bytes && length_in_bytes <= MaxVectorSize && vlen >= 2; } %} @@ -214,41 +172,147 @@ dnl // vector load/store -// Use predicated vector load/store +// Unpredicated vector load/store instruct loadV(vReg dst, vmemA mem) %{ - predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16); + predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16 && + n->as_LoadVector()->memory_size() == MaxVectorSize); match(Set dst (LoadVector mem)); - ins_cost(SVE_COST); - format %{ "sve_ldr $dst, $mem\t # vector (sve)" %} + ins_cost(4 * SVE_COST); + format %{ "sve_ldr $dst, $mem\t# vector (sve)" %} ins_encode %{ FloatRegister dst_reg = as_FloatRegister($dst$$reg); - loadStoreA_predicate(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue, - vector_element_basic_type(this), $mem->opcode(), - as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + BasicType bt = Matcher::vector_element_basic_type(this); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue, + bt, bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} ins_pipe(pipe_slow); %} instruct storeV(vReg src, vmemA mem) %{ - predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16); + predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16 && + n->as_StoreVector()->memory_size() == MaxVectorSize); match(Set mem (StoreVector mem src)); - ins_cost(SVE_COST); - format %{ "sve_str $mem, $src\t # vector (sve)" %} + ins_cost(4 * SVE_COST); + format %{ "sve_str $mem, $src\t# vector (sve)" %} + ins_encode %{ + FloatRegister src_reg = as_FloatRegister($src$$reg); + BasicType bt = Matcher::vector_element_basic_type(this, $src); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, src_reg, ptrue, + bt, bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%}dnl + +dnl +define(`VLoadStore', ` +// ifelse(load, $3, Load, Store) Vector ($6 bits) +instruct $3V$4_vreg`'(vReg $7, vmem$4 mem) +%{ + predicate(UseSVE > 0 && `n->as_'ifelse(load, $3, Load, Store)Vector()->memory_size() == $4); + match(Set ifelse(load, $3, dst (LoadVector mem), mem (StoreVector mem src))); + ins_cost(4 * INSN_COST); + format %{ "$1 ifelse(load, $3, `$dst,$mem', `$mem,$src')\t# vector ($6 bits)" %} + ins_encode( `aarch64_enc_'ifelse(load, $3, ldr, str)v$2($7, mem) ); + ins_pipe(v$3`_reg_mem'ifelse(eval($4 * 8), 128, 128, 64)); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 $7 +VLoadStore(ldrh, H, load, 2, D, 16, dst) +VLoadStore(strh, H, store, 2, D, 16, src) +VLoadStore(ldrs, S, load, 4, D, 32, dst) +VLoadStore(strs, S, store, 4, D, 32, src) +VLoadStore(ldrd, D, load, 8, D, 64, dst) +VLoadStore(strd, D, store, 8, D, 64, src) +VLoadStore(ldrq, Q, load, 16, X, 128, dst) +VLoadStore(strq, Q, store, 16, X, 128, src) + +// Predicated vector load/store, based on the vector length of the node. +// Only load/store values in the range of the memory_size. This is needed +// when the memory_size is lower than the hardware supported max vector size. +// And this might happen for Vector API mask vector load/store. +instruct loadV_partial(vReg dst, vmemA mem, pRegGov pTmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() > 16 && + n->as_LoadVector()->memory_size() < MaxVectorSize); + match(Set dst (LoadVector mem)); + effect(TEMP pTmp, KILL cr); + ins_cost(6 * SVE_COST); + format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" + "sve_ldr $dst, $pTmp, $mem\t# load vector predicated" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ elemType_to_regVariant(bt), + Matcher::vector_length(this)); + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, + as_PRegister($pTmp$$reg), bt, bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%} + +instruct storeV_partial(vReg src, vmemA mem, pRegGov pTmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() > 16 && + n->as_StoreVector()->memory_size() < MaxVectorSize); + match(Set mem (StoreVector mem src)); + effect(TEMP pTmp, KILL cr); + ins_cost(5 * SVE_COST); + format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" + "sve_str $src, $pTmp, $mem\t# store vector predicated" %} ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src); + __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ elemType_to_regVariant(bt), + Matcher::vector_length(this, $src)); FloatRegister src_reg = as_FloatRegister($src$$reg); - loadStoreA_predicate(C2_MacroAssembler(&cbuf), true, src_reg, ptrue, - vector_element_basic_type(this, $src), $mem->opcode(), - as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, src_reg, + as_PRegister($pTmp$$reg), bt, bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} ins_pipe(pipe_slow); +%}dnl + + +// vector reinterpret + +instruct reinterpret(vReg dst) %{ + predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() == + n->in(1)->bottom_type()->is_vect()->length_in_bytes()); // src == dst + match(Set dst (VectorReinterpret dst)); + ins_cost(0); + format %{ "# reinterpret $dst\t# do nothing" %} + ins_encode %{ + // empty + %} + ins_pipe(pipe_class_empty); %} +instruct reinterpretResize(vReg dst, vReg src, pRegGov pTmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() != + n->in(1)->bottom_type()->is_vect()->length_in_bytes()); // src != dst + match(Set dst (VectorReinterpret src)); + effect(TEMP_DEF dst, TEMP pTmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "reinterpretResize $dst, $src\t# vector (sve)" %} + ins_encode %{ + uint length_in_bytes_src = Matcher::vector_length_in_bytes(this, $src); + uint length_in_bytes_dst = Matcher::vector_length_in_bytes(this); + uint length_in_bytes_resize = length_in_bytes_src < length_in_bytes_dst ? + length_in_bytes_src : length_in_bytes_dst; + assert(length_in_bytes_src <= MaxVectorSize && length_in_bytes_dst <= MaxVectorSize, + "invalid vector length"); + __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ B, length_in_bytes_resize); + __ sve_dup(as_FloatRegister($dst$$reg), __ B, 0); + __ sve_sel(as_FloatRegister($dst$$reg), __ B, as_PRegister($pTmp$$reg), + as_FloatRegister($src$$reg), as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} dnl dnl UNARY_OP_TRUE_PREDICATE_ETYPE($1, $2, $3, $4, $5, %6 ) dnl UNARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn) define(`UNARY_OP_TRUE_PREDICATE_ETYPE', ` instruct $1(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= $5 && + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == $3); match(Set dst ($2 src)); ins_cost(SVE_COST); @@ -260,6 +324,7 @@ instruct $1(vReg dst, vReg src) %{ ins_pipe(pipe_slow); %}')dnl dnl + // vector abs UNARY_OP_TRUE_PREDICATE_ETYPE(vabsB, AbsVB, T_BYTE, B, 16, sve_abs) UNARY_OP_TRUE_PREDICATE_ETYPE(vabsS, AbsVS, T_SHORT, H, 8, sve_abs) @@ -272,7 +337,7 @@ dnl BINARY_OP_UNPREDICATED($1, $2 $3, $4 $5 ) dnl BINARY_OP_UNPREDICATED(insn_name, op_name, size, min_vec_len, insn) define(`BINARY_OP_UNPREDICATED', ` instruct $1(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= $4); + predicate(UseSVE > 0); match(Set dst ($2 src1 src2)); ins_cost(SVE_COST); format %{ "$5 $dst, $src1, $src2\t # vector (sve) ($3)" %} @@ -296,7 +361,7 @@ dnl BINARY_OP_UNSIZED($1, $2, $3, $4 ) dnl BINARY_OP_UNSIZED(insn_name, op_name, min_vec_len, insn) define(`BINARY_OP_UNSIZED', ` instruct $1(vReg dst, vReg src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= $3); + predicate(UseSVE > 0); match(Set dst ($2 src1 src2)); ins_cost(SVE_COST); format %{ "$4 $dst, $src1, $src2\t# vector (sve)" %} @@ -327,7 +392,7 @@ define(`MATCH_RULE', `ifelse($1, I, dnl define(`VECTOR_NOT', ` instruct vnot$1`'(vReg dst, vReg src, imm$1_M1 m1) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + predicate(UseSVE > 0); MATCH_RULE($1) ins_cost(SVE_COST); format %{ "sve_not $dst, $src\t# vector (sve) $2" %} @@ -352,7 +417,7 @@ define(`MATCH_RULE', `ifelse($1, I, dnl define(`VECTOR_AND_NOT', ` instruct vand_not$1`'(vReg dst, vReg src1, vReg src2, imm$1_M1 m1) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + predicate(UseSVE > 0); MATCH_RULE($1) ins_cost(SVE_COST); format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) $2" %} @@ -372,7 +437,7 @@ dnl VDIVF($1, $2 , $3 ) dnl VDIVF(name_suffix, size, min_vec_len) define(`VDIVF', ` instruct vdiv$1(vReg dst_src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= $3); + predicate(UseSVE > 0); match(Set dst_src1 (DivV$1 dst_src1 src2)); ins_cost(SVE_COST); format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) ($2)" %} @@ -390,13 +455,13 @@ VDIVF(D, D, 2) // vector min/max instruct vmin(vReg dst_src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + predicate(UseSVE > 0); match(Set dst_src1 (MinV dst_src1 src2)); ins_cost(SVE_COST); format %{ "sve_min $dst_src1, $dst_src1, $src2\t # vector (sve)" %} ins_encode %{ - BasicType bt = vector_element_basic_type(this); - Assembler::SIMD_RegVariant size = elemType_to_regVariant(bt); + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); if (is_floating_point_type(bt)) { __ sve_fmin(as_FloatRegister($dst_src1$$reg), size, ptrue, as_FloatRegister($src2$$reg)); @@ -410,13 +475,13 @@ instruct vmin(vReg dst_src1, vReg src2) %{ %} instruct vmax(vReg dst_src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); + predicate(UseSVE > 0); match(Set dst_src1 (MaxV dst_src1 src2)); ins_cost(SVE_COST); format %{ "sve_max $dst_src1, $dst_src1, $src2\t # vector (sve)" %} ins_encode %{ - BasicType bt = vector_element_basic_type(this); - Assembler::SIMD_RegVariant size = elemType_to_regVariant(bt); + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); if (is_floating_point_type(bt)) { __ sve_fmax(as_FloatRegister($dst_src1$$reg), size, ptrue, as_FloatRegister($src2$$reg)); @@ -435,7 +500,7 @@ dnl VFMLA(name_suffix, size, min_vec_len) define(`VFMLA', ` // dst_src1 = dst_src1 + src2 * src3 instruct vfmla$1(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3); + predicate(UseFMA && UseSVE > 0); match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 src3))); ins_cost(SVE_COST); format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %} @@ -457,7 +522,7 @@ define(`VFMLS', ` // dst_src1 = dst_src1 + -src2 * src3 // dst_src1 = dst_src1 + src2 * -src3 instruct vfmls$1(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3); + predicate(UseFMA && UseSVE > 0); match(Set dst_src1 (FmaV$1 dst_src1 (Binary (NegV$1 src2) src3))); match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 (NegV$1 src3)))); ins_cost(SVE_COST); @@ -480,7 +545,7 @@ define(`VFNMLA', ` // dst_src1 = -dst_src1 + -src2 * src3 // dst_src1 = -dst_src1 + src2 * -src3 instruct vfnmla$1(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3); + predicate(UseFMA && UseSVE > 0); match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary (NegV$1 src2) src3))); match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 (NegV$1 src3)))); ins_cost(SVE_COST); @@ -502,7 +567,7 @@ dnl VFNMLS(name_suffix, size, min_vec_len) define(`VFNMLS', ` // dst_src1 = -dst_src1 + src2 * src3 instruct vfnmls$1(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3); + predicate(UseFMA && UseSVE > 0); match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 src3))); ins_cost(SVE_COST); format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %} @@ -524,7 +589,7 @@ define(`VMLA', ` // dst_src1 = dst_src1 + src2 * src3 instruct vmla$1(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= $3); + predicate(UseSVE > 0); match(Set dst_src1 (AddV$1 dst_src1 (MulV$1 src2 src3))); ins_cost(SVE_COST); format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) ($2)" %} @@ -548,7 +613,7 @@ define(`VMLS', ` // dst_src1 = dst_src1 - src2 * src3 instruct vmls$1(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= $3); + predicate(UseSVE > 0); match(Set dst_src1 (SubV$1 dst_src1 (MulV$1 src2 src3))); ins_cost(SVE_COST); format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) ($2)" %} @@ -570,7 +635,7 @@ dnl BINARY_OP_TRUE_PREDICATE($1, $2, $3, $4, $5 ) dnl BINARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn) define(`BINARY_OP_TRUE_PREDICATE', ` instruct $1(vReg dst_src1, vReg src2) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= $4); + predicate(UseSVE > 0); match(Set dst_src1 ($2 dst_src1 src2)); ins_cost(SVE_COST); format %{ "$5 $dst_src1, $dst_src1, $src2\t # vector (sve) ($3)" %} @@ -594,7 +659,7 @@ dnl UNARY_OP_TRUE_PREDICATE($1, $2, $3, $4, $5 ) dnl UNARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_bytes, insn) define(`UNARY_OP_TRUE_PREDICATE', ` instruct $1(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= $4); + predicate(UseSVE > 0); match(Set dst ($2 src)); ins_cost(SVE_COST); format %{ "$5 $dst, $src\t# vector (sve) ($3)" %} @@ -612,216 +677,922 @@ UNARY_OP_TRUE_PREDICATE(vnegD, NegVD, D, 16, sve_fneg) // popcount vector instruct vpopcountI(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); + predicate(UseSVE > 0); match(Set dst (PopCountVI src)); - format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %} + format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %} ins_encode %{ __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg)); %} ins_pipe(pipe_slow); -%}dnl +%} -dnl -dnl REDUCE_ADD_EXT($1, $2, $3, $4, $5, $6, $7 ) -dnl REDUCE_ADD_EXT(insn_name, op_name, reg_dst, reg_src, size, elem_type, insn1) -define(`REDUCE_ADD_EXT', ` -instruct $1($3 dst, $4 src1, vReg src2, vRegD tmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 && - n->in(2)->bottom_type()->is_vect()->element_basic_type() == $6); - match(Set dst ($2 src1 src2)); - effect(TEMP_DEF dst, TEMP tmp); - ins_cost(SVE_COST); - format %{ "sve_uaddv $tmp, $src2\t# vector (sve) ($5)\n\t" - "smov $dst, $tmp, $5, 0\n\t" - "addw $dst, $dst, $src1\n\t" - "$7 $dst, $dst\t # add reduction $5" %} +// vector mask compare + +instruct vmaskcmp(vReg dst, vReg src1, vReg src2, immI cond, pRegGov pTmp, rFlagsReg cr) %{ + predicate(UseSVE > 0); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + effect(TEMP pTmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_cmp $pTmp, $src1, $src2\n\t" + "sve_cpy $dst, $pTmp, -1\t# vector mask cmp (sve)" %} ins_encode %{ - __ sve_uaddv(as_FloatRegister($tmp$$reg), __ $5, - ptrue, as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ $5, 0); - __ addw($dst$$Register, $dst$$Register, $src1$$Register); - __ $7($dst$$Register, $dst$$Register); + BasicType bt = Matcher::vector_element_basic_type(this); + __ sve_compare(as_PRegister($pTmp$$reg), bt, ptrue, as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), (int)$cond$$constant); + __ sve_cpy(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt), + as_PRegister($pTmp$$reg), -1, false); %} ins_pipe(pipe_slow); -%}')dnl -dnl -dnl REDUCE_ADD($1, $2, $3, $4, $5, $6, $7 ) -dnl REDUCE_ADD(insn_name, op_name, reg_dst, reg_src, size, elem_type, insn1) -define(`REDUCE_ADD', ` -instruct $1($3 dst, $4 src1, vReg src2, vRegD tmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 && - n->in(2)->bottom_type()->is_vect()->element_basic_type() == $6); - match(Set dst ($2 src1 src2)); - effect(TEMP_DEF dst, TEMP tmp); +%} + +// vector blend + +instruct vblend(vReg dst, vReg src1, vReg src2, vReg src3, pRegGov pTmp, rFlagsReg cr) %{ + predicate(UseSVE > 0); + match(Set dst (VectorBlend (Binary src1 src2) src3)); + effect(TEMP pTmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_cmpeq $pTmp, $src3, -1\n\t" + "sve_sel $dst, $pTmp, $src2, $src1\t# vector blend (sve)" %} + ins_encode %{ + Assembler::SIMD_RegVariant size = + __ elemType_to_regVariant(Matcher::vector_element_basic_type(this)); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, + ptrue, as_FloatRegister($src3$$reg), -1); + __ sve_sel(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector blend with compare + +instruct vblend_maskcmp(vReg dst, vReg src1, vReg src2, vReg src3, + vReg src4, pRegGov pTmp, immI cond, rFlagsReg cr) %{ + predicate(UseSVE > 0); + match(Set dst (VectorBlend (Binary src1 src2) (VectorMaskCmp (Binary src3 src4) cond))); + effect(TEMP pTmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_cmp $pTmp, $src3, $src4\t# vector cmp (sve)\n\t" + "sve_sel $dst, $pTmp, $src2, $src1\t# vector blend (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ sve_compare(as_PRegister($pTmp$$reg), bt, ptrue, as_FloatRegister($src3$$reg), + as_FloatRegister($src4$$reg), (int)$cond$$constant); + __ sve_sel(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt), + as_PRegister($pTmp$$reg), as_FloatRegister($src2$$reg), + as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector load mask + +instruct vloadmaskB(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorLoadMask src)); ins_cost(SVE_COST); - format %{ "sve_uaddv $tmp, $src2\t# vector (sve) ($5)\n\t" - "umov $dst, $tmp, $5, 0\n\t" - "$7 $dst, $dst, $src1\t # add reduction $5" %} + format %{ "sve_neg $dst, $src\t# vector load mask (B)" %} ins_encode %{ - __ sve_uaddv(as_FloatRegister($tmp$$reg), __ $5, - ptrue, as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ $5, 0); - __ $7($dst$$Register, $dst$$Register, $src1$$Register); + __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, as_FloatRegister($src$$reg)); %} ins_pipe(pipe_slow); -%}')dnl -dnl -dnl REDUCE_ADDF($1, $2, $3, $4 ) -dnl REDUCE_ADDF(insn_name, op_name, reg_dst, size) -define(`REDUCE_ADDF', ` -instruct $1($3 src1_dst, vReg src2) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); - match(Set src1_dst ($2 src1_dst src2)); +%} + +instruct vloadmaskS(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorLoadMask src)); + ins_cost(2 * SVE_COST); + format %{ "sve_uunpklo $dst, H, $src\n\t" + "sve_neg $dst, $dst\t# vector load mask (B to H)" %} + ins_encode %{ + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); + __ sve_neg(as_FloatRegister($dst$$reg), __ H, ptrue, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vloadmaskI(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (VectorLoadMask src)); + ins_cost(3 * SVE_COST); + format %{ "sve_uunpklo $dst, H, $src\n\t" + "sve_uunpklo $dst, S, $dst\n\t" + "sve_neg $dst, $dst\t# vector load mask (B to S)" %} + ins_encode %{ + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); + __ sve_neg(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vloadmaskL(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (VectorLoadMask src)); + ins_cost(4 * SVE_COST); + format %{ "sve_uunpklo $dst, H, $src\n\t" + "sve_uunpklo $dst, S, $dst\n\t" + "sve_uunpklo $dst, D, $dst\n\t" + "sve_neg $dst, $dst\t# vector load mask (B to D)" %} + ins_encode %{ + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg)); + __ sve_neg(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector store mask + +instruct vstoremaskB(vReg dst, vReg src, immI_1 size) %{ + predicate(UseSVE > 0); + match(Set dst (VectorStoreMask src size)); ins_cost(SVE_COST); - format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) ($4)" %} + format %{ "sve_neg $dst, $src\t# vector store mask (B)" %} ins_encode %{ - __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4, - ptrue, as_FloatRegister($src2$$reg)); + __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, + as_FloatRegister($src$$reg)); %} ins_pipe(pipe_slow); -%}')dnl +%} -// vector add reduction -REDUCE_ADD_EXT(reduce_addB, AddReductionVI, iRegINoSp, iRegIorL2I, B, T_BYTE, sxtb) -REDUCE_ADD_EXT(reduce_addS, AddReductionVI, iRegINoSp, iRegIorL2I, H, T_SHORT, sxth) -REDUCE_ADD(reduce_addI, AddReductionVI, iRegINoSp, iRegIorL2I, S, T_INT, addw) -REDUCE_ADD(reduce_addL, AddReductionVL, iRegLNoSp, iRegL, D, T_LONG, add) -REDUCE_ADDF(reduce_addF, AddReductionVF, vRegF, S) -REDUCE_ADDF(reduce_addD, AddReductionVD, vRegD, D) +instruct vstoremaskS(vReg dst, vReg src, vReg tmp, immI_2 size) %{ + predicate(UseSVE > 0); + match(Set dst (VectorStoreMask src size)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(3 * SVE_COST); + format %{ "sve_dup $tmp, H, 0\n\t" + "sve_uzp1 $dst, B, $src, $tmp\n\t" + "sve_neg $dst, B, $dst\t# vector store mask (sve) (H to B)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, + as_FloatRegister($dst$$reg)); + + %} + ins_pipe(pipe_slow); +%} + +instruct vstoremaskI(vReg dst, vReg src, vReg tmp, immI_4 size) %{ + predicate(UseSVE > 0); + match(Set dst (VectorStoreMask src size)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(4 * SVE_COST); + format %{ "sve_dup $tmp, S, 0\n\t" + "sve_uzp1 $dst, H, $src, $tmp\n\t" + "sve_uzp1 $dst, B, $dst, $tmp\n\t" + "sve_neg $dst, B, $dst\t# vector store mask (sve) (S to B)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, + as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, + as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} +instruct vstoremaskL(vReg dst, vReg src, vReg tmp, immI_8 size) %{ + predicate(UseSVE > 0); + match(Set dst (VectorStoreMask src size)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(5 * SVE_COST); + format %{ "sve_dup $tmp, D, 0\n\t" + "sve_uzp1 $dst, S, $src, $tmp\n\t" + "sve_uzp1 $dst, H, $dst, $tmp\n\t" + "sve_uzp1 $dst, B, $dst, $tmp\n\t" + "sve_neg $dst, B, $dst\t# vector store mask (sve) (D to B)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($tmp$$reg), __ D, 0); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, + as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} dnl -dnl REDUCE_FMINMAX($1, $2, $3, $4, $5 ) -dnl REDUCE_FMINMAX(min_max, name_suffix, element_type, size, reg_src_dst) -define(`REDUCE_FMINMAX', ` -instruct reduce_$1$2($5 dst, $5 src1, vReg src2) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); - match(Set dst (translit($1, `m', `M')ReductionV src1 src2)); - ins_cost(INSN_COST); - effect(TEMP_DEF dst); - format %{ "sve_f$1v $dst, $src2 # vector (sve) (S)\n\t" - "f$1s $dst, $dst, $src1\t # $1 reduction $2" %} +dnl +dnl VLOADMASK_LOADV($1, $2 ) +dnl VLOADMASK_LOADV(esize, cond) +define(`VLOADMASK_LOADV', ` +instruct vloadmask_loadV_$1(vReg dst, ifelse($1, `byte', vmemA, indirect) mem) %{ + predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() == MaxVectorSize && + type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) $2); + match(Set dst (VectorLoadMask (LoadVector mem))); + ins_cost(5 * SVE_COST); + format %{ "sve_ld1b $dst, $mem\n\t" + "sve_neg $dst, $dst\t# load vector mask (sve)" %} ins_encode %{ - __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4, - ptrue, as_FloatRegister($src2$$reg)); - __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + BasicType to_vect_bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant to_vect_variant = __ elemType_to_regVariant(to_vect_bt); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue, + T_BOOLEAN, to_vect_bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ sve_neg(dst_reg, to_vect_variant, ptrue, dst_reg); %} ins_pipe(pipe_slow); %}')dnl -// vector max reduction -REDUCE_FMINMAX(max, F, T_FLOAT, S, vRegF) -REDUCE_FMINMAX(max, D, T_DOUBLE, D, vRegD) +dnl +define(`ARGLIST', +`ifelse($1, `byte', vmemA, indirect) mem, vReg src, vReg tmp, ifelse($1, `byte', immI_1, immI_gt_1) esize') +dnl +dnl STOREV_VSTOREMASK($1, ) +dnl STOREV_VSTOREMASK(esize) +define(`STOREV_VSTOREMASK', ` +instruct storeV_vstoremask_$1(ARGLIST($1)) %{ + predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() * + n->as_StoreVector()->in(MemNode::ValueIn)->in(2)->get_int() == MaxVectorSize); + match(Set mem (StoreVector mem (VectorStoreMask src esize))); + effect(TEMP tmp); + ins_cost(5 * SVE_COST); + format %{ "sve_neg $tmp, $src\n\t" + "sve_st1b $tmp, $mem\t# store vector mask (sve)" %} + ins_encode %{ + BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src); + assert(type2aelembytes(from_vect_bt) == (int)$esize$$constant, "unsupported type."); + Assembler::SIMD_RegVariant from_vect_variant = __ elemBytes_to_regVariant($esize$$constant); + __ sve_neg(as_FloatRegister($tmp$$reg), from_vect_variant, ptrue, + as_FloatRegister($src$$reg)); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($tmp$$reg), + ptrue, T_BOOLEAN, from_vect_bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%}')dnl +undefine(ARGLIST)dnl +dnl +// load/store mask vector +VLOADMASK_LOADV(byte, == 1) +VLOADMASK_LOADV(non_byte, > 1) +STOREV_VSTOREMASK(byte) +STOREV_VSTOREMASK(non_byte) -// vector min reduction -REDUCE_FMINMAX(min, F, T_FLOAT, S, vRegF) -REDUCE_FMINMAX(min, D, T_DOUBLE, D, vRegD) +// vector add reduction -// vector Math.rint, floor, ceil +instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); + ins_cost(SVE_COST); + format %{ "sve_reduce_addI $dst, $src1, $src2\t# addB/S/I reduction (sve) (may extend)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_uaddv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ addw($dst$$Register, $dst$$Register, $src1$$Register); + if (bt == T_BYTE) { + __ sxtb($dst$$Register, $dst$$Register); + } else if (bt == T_SHORT) { + __ sxth($dst$$Register, $dst$$Register); + } else { + assert(bt == T_INT, "unsupported type"); + } + %} + ins_pipe(pipe_slow); +%} -instruct vroundD(vReg dst, vReg src, immI rmode) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 && - n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); - match(Set dst (RoundDoubleModeV src rmode)); - format %{ "sve_frint $dst, $src, $rmode\t# vector (sve) (D)" %} +instruct reduce_addI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(SVE_COST); + format %{ "sve_reduce_addI $dst, $src1, $src2\t# addI reduction partial (sve) (may extend)" %} ins_encode %{ - switch ($rmode$$constant) { - case RoundDoubleModeNode::rmode_rint: - __ sve_frintn(as_FloatRegister($dst$$reg), __ D, - ptrue, as_FloatRegister($src$$reg)); - break; - case RoundDoubleModeNode::rmode_floor: - __ sve_frintm(as_FloatRegister($dst$$reg), __ D, - ptrue, as_FloatRegister($src$$reg)); - break; - case RoundDoubleModeNode::rmode_ceil: - __ sve_frintp(as_FloatRegister($dst$$reg), __ D, - ptrue, as_FloatRegister($src$$reg)); - break; + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_uaddv(as_FloatRegister($vtmp$$reg), variant, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ addw($dst$$Register, $dst$$Register, $src1$$Register); + if (bt == T_BYTE) { + __ sxtb($dst$$Register, $dst$$Register); + } else if (bt == T_SHORT) { + __ sxth($dst$$Register, $dst$$Register); + } else { + assert(bt == T_INT, "unsupported type"); } %} ins_pipe(pipe_slow); %} -dnl -dnl REPLICATE($1, $2, $3, $4, $5 ) -dnl REPLICATE(insn_name, op_name, reg_src, size, min_vec_len) -define(`REPLICATE', ` -instruct $1(vReg dst, $3 src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= $5); - match(Set dst ($2 src)); + +instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (AddReductionVL src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); ins_cost(SVE_COST); - format %{ "sve_dup $dst, $src\t# vector (sve) ($4)" %} + format %{ "sve_reduce_addL $dst, $src1, $src2\t# addL reduction (sve)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($dst$$reg), __ $4, as_Register($src$$reg)); + __ sve_uaddv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ add($dst$$Register, $dst$$Register, $src1$$Register); %} ins_pipe(pipe_slow); -%}')dnl -dnl -dnl REPLICATE_IMM8($1, $2, $3, $4, $5 ) -dnl REPLICATE_IMM8(insn_name, op_name, imm_type, size, min_vec_len) -define(`REPLICATE_IMM8', ` -instruct $1(vReg dst, $3 con) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= $5); - match(Set dst ($2 con)); +%} + +instruct reduce_addL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (AddReductionVL src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_dup $dst, $con\t# vector (sve) ($4)" %} + format %{ "sve_reduce_addL $dst, $src1, $src2\t# addL reduction partial (sve)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($dst$$reg), __ $4, $con$$constant); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_uaddv(as_FloatRegister($vtmp$$reg), __ D, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ add($dst$$Register, $dst$$Register, $src1$$Register); %} ins_pipe(pipe_slow); -%}')dnl +%} + dnl -dnl FREPLICATE($1, $2, $3, $4, $5 ) -dnl FREPLICATE(insn_name, op_name, reg_src, size, min_vec_len) -define(`FREPLICATE', ` -instruct $1(vReg dst, $3 src) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= $5); - match(Set dst ($2 src)); +dnl REDUCE_ADDF($1, $2, $3, $4 ) +dnl REDUCE_ADDF(insn_name, op_name, reg_dst, size) +define(`REDUCE_ADDF', ` +instruct $1($3 src1_dst, vReg src2) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set src1_dst (AddReductionV$2 src1_dst src2)); ins_cost(SVE_COST); - format %{ "sve_cpy $dst, $src\t# vector (sve) ($4)" %} + format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) ($4)" %} ins_encode %{ - __ sve_cpy(as_FloatRegister($dst$$reg), __ $4, - ptrue, as_FloatRegister($src$$reg)); + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4, + ptrue, as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %}')dnl - -// vector replicate -REPLICATE(replicateB, ReplicateB, iRegIorL2I, B, 16) -REPLICATE(replicateS, ReplicateS, iRegIorL2I, H, 8) -REPLICATE(replicateI, ReplicateI, iRegIorL2I, S, 4) -REPLICATE(replicateL, ReplicateL, iRegL, D, 2) -REPLICATE_IMM8(replicateB_imm8, ReplicateB, immI8, B, 16) -REPLICATE_IMM8(replicateS_imm8, ReplicateS, immI8_shift8, H, 8) -REPLICATE_IMM8(replicateI_imm8, ReplicateI, immI8_shift8, S, 4) -REPLICATE_IMM8(replicateL_imm8, ReplicateL, immL8_shift8, D, 2) -FREPLICATE(replicateF, ReplicateF, vRegF, S, 4) -FREPLICATE(replicateD, ReplicateD, vRegD, D, 2) dnl -dnl VSHIFT_TRUE_PREDICATE($1, $2, $3, $4, $5 ) -dnl VSHIFT_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn) -define(`VSHIFT_TRUE_PREDICATE', ` -instruct $1(vReg dst, vReg shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= $4); - match(Set dst ($2 dst shift)); +dnl +dnl REDUCE_ADDF_PARTIAL($1, $2, $3, $4 ) +dnl REDUCE_ADDF_PARTIAL(insn_name, suffix, reg_dst, size) +define(`REDUCE_ADDF_PARTIAL', ` +instruct $1($3 src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set src1_dst (AddReductionV$2 src1_dst src2)); ins_cost(SVE_COST); - format %{ "$5 $dst, $dst, $shift\t# vector (sve) ($3)" %} + effect(TEMP ptmp, KILL cr); + format %{ "sve_reduce_add$2 $src1_dst, $src1_dst, $src2\t# add$2 reduction partial (sve) ($4)" %} ins_encode %{ - __ $5(as_FloatRegister($dst$$reg), __ $3, - ptrue, as_FloatRegister($shift$$reg)); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ $4, + Matcher::vector_length(this, $src2)); + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %}')dnl dnl -dnl VSHIFT_IMM_UNPREDICATED($1, $2, $3, $4, $5, $6 ) -dnl VSHIFT_IMM_UNPREDICATED(insn_name, op_name, op_name2, size, min_vec_len, insn) -define(`VSHIFT_IMM_UNPREDICATED', ` -instruct $1(vReg dst, vReg src, immI shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= $5); - match(Set dst ($2 src ($3 shift))); +REDUCE_ADDF(reduce_addF, F, vRegF, S) +REDUCE_ADDF_PARTIAL(reduce_addF_partial, F, vRegF, S) +REDUCE_ADDF(reduce_addD, D, vRegD, D) +REDUCE_ADDF_PARTIAL(reduce_addD_partial, D, vRegD, D) + +// vector and reduction + +instruct reduce_andI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (AndReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); ins_cost(SVE_COST); - format %{ "$6 $dst, $src, $shift\t# vector (sve) ($4)" %} + format %{ "sve_reduce_andI $dst, $src1, $src2\t# andB/S/I reduction (sve) (may extend)" %} ins_encode %{ - int con = (int)$shift$$constant;dnl + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_andv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ andw($dst$$Register, $dst$$Register, $src1$$Register); + if (bt == T_BYTE) { + __ sxtb($dst$$Register, $dst$$Register); + } else if (bt == T_SHORT) { + __ sxth($dst$$Register, $dst$$Register); + } else { + assert(bt == T_INT, "unsupported type"); + } + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_andI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (AndReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(SVE_COST); + format %{ "sve_reduce_andI $dst, $src1, $src2\t# andI reduction partial (sve) (may extend)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_andv(as_FloatRegister($vtmp$$reg), variant, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ andw($dst$$Register, $dst$$Register, $src1$$Register); + if (bt == T_BYTE) { + __ sxtb($dst$$Register, $dst$$Register); + } else if (bt == T_SHORT) { + __ sxth($dst$$Register, $dst$$Register); + } else { + assert(bt == T_INT, "unsupported type"); + } + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_andL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (AndReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); + ins_cost(SVE_COST); + format %{ "sve_reduce_andL $dst, $src1, $src2\t# andL reduction (sve)" %} + ins_encode %{ + __ sve_andv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ andr($dst$$Register, $dst$$Register, $src1$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_andL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (AndReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(SVE_COST); + format %{ "sve_reduce_andL $dst, $src1, $src2\t# andL reduction partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_andv(as_FloatRegister($vtmp$$reg), __ D, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ andr($dst$$Register, $dst$$Register, $src1$$Register); + %} + ins_pipe(pipe_slow); +%} + +// vector or reduction + +instruct reduce_orI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (OrReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); + ins_cost(SVE_COST); + format %{ "sve_reduce_orI $dst, $src1, $src2\t# orB/S/I reduction (sve) (may extend)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_orv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ orrw($dst$$Register, $dst$$Register, $src1$$Register); + if (bt == T_BYTE) { + __ sxtb($dst$$Register, $dst$$Register); + } else if (bt == T_SHORT) { + __ sxth($dst$$Register, $dst$$Register); + } else { + assert(bt == T_INT, "unsupported type"); + } + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_orI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (OrReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(SVE_COST); + format %{ "sve_reduce_orI $dst, $src1, $src2\t# orI reduction partial (sve) (may extend)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_orv(as_FloatRegister($vtmp$$reg), variant, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ orrw($dst$$Register, $dst$$Register, $src1$$Register); + if (bt == T_BYTE) { + __ sxtb($dst$$Register, $dst$$Register); + } else if (bt == T_SHORT) { + __ sxth($dst$$Register, $dst$$Register); + } else { + assert(bt == T_INT, "unsupported type"); + } + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_orL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (OrReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); + ins_cost(SVE_COST); + format %{ "sve_reduce_orL $dst, $src1, $src2\t# orL reduction (sve)" %} + ins_encode %{ + __ sve_orv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ orr($dst$$Register, $dst$$Register, $src1$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_orL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (OrReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(SVE_COST); + format %{ "sve_reduce_orL $dst, $src1, $src2\t# orL reduction partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_orv(as_FloatRegister($vtmp$$reg), __ D, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ orr($dst$$Register, $dst$$Register, $src1$$Register); + %} + ins_pipe(pipe_slow); +%} + +// vector xor reduction + +instruct reduce_eorI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (XorReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); + ins_cost(SVE_COST); + format %{ "sve_reduce_eorI $dst, $src1, $src2\t# xorB/H/I reduction (sve) (may extend)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_eorv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ eorw($dst$$Register, $dst$$Register, $src1$$Register); + if (bt == T_BYTE) { + __ sxtb($dst$$Register, $dst$$Register); + } else if (bt == T_SHORT) { + __ sxth($dst$$Register, $dst$$Register); + } else { + assert(bt == T_INT, "unsupported type"); + } + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_eorI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (XorReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(SVE_COST); + format %{ "sve_reduce_eorI $dst, $src1, $src2\t# xorI reduction partial (sve) (may extend)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_eorv(as_FloatRegister($vtmp$$reg), variant, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ eorw($dst$$Register, $dst$$Register, $src1$$Register); + if (bt == T_BYTE) { + __ sxtb($dst$$Register, $dst$$Register); + } else if (bt == T_SHORT) { + __ sxth($dst$$Register, $dst$$Register); + } else { + assert(bt == T_INT, "unsupported type"); + } + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_eorL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (XorReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); + ins_cost(SVE_COST); + format %{ "sve_reduce_eorL $dst, $src1, $src2\t# xorL reduction (sve)" %} + ins_encode %{ + __ sve_eorv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ eor($dst$$Register, $dst$$Register, $src1$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_eorL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (XorReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(SVE_COST); + format %{ "sve_reduce_eorL $dst, $src1, $src2\t# xorL reduction partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_eorv(as_FloatRegister($vtmp$$reg), __ D, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ eor($dst$$Register, $dst$$Register, $src1$$Register); + %} + ins_pipe(pipe_slow); +%} + +dnl +dnl REDUCE_MAXMIN_I($1, $2, $3 ) +dnl REDUCE_MAXMIN_I(min_max, op_mame, cmp) +define(`REDUCE_MAXMIN_I', ` +instruct reduce_$1I(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE || + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT || + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT)); + match(Set dst ($2 src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); + ins_cost(SVE_COST); + format %{ "sve_reduce_$1I $dst, $src1, $src2\t# reduce $1B/S/I (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_s$1v(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ cmpw($dst$$Register, $src1$$Register); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::$3); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl REDUCE_MAXMIN_L($1, $2, $3 ) +dnl REDUCE_MAXMIN_L(min_max, op_name, cmp) +define(`REDUCE_MAXMIN_L', ` +instruct reduce_$1L(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst ($2 src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp); + ins_cost(SVE_COST); + format %{ "sve_reduce_$1L $dst, $src1, $src2\t# reduce $1L partial (sve)" %} + ins_encode %{ + __ sve_s$1v(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ cmp($dst$$Register, $src1$$Register); + __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::$3); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl REDUCE_MAXMIN_I_PARTIAL($1, $2, $3 ) +dnl REDUCE_MAXMIN_I_PARTIAL(min_max, op_mame, cmp) +define(`REDUCE_MAXMIN_I_PARTIAL', ` +instruct reduce_$1I_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE || + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT || + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT)); + match(Set dst ($2 src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(SVE_COST); + format %{ "sve_reduce_$1I $dst, $src1, $src2\t# reduce $1I partial (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_s$1v(as_FloatRegister($vtmp$$reg), variant, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); + __ cmpw($dst$$Register, $src1$$Register); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::$3); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl REDUCE_MAXMIN_L_PARTIAL($1, $2, $3 ) +dnl REDUCE_MAXMIN_L_PARTIAL(min_max, op_name, cmp) +define(`REDUCE_MAXMIN_L_PARTIAL', ` +instruct reduce_$1L_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst ($2 src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(SVE_COST); + format %{ "sve_reduce_$1L $dst, $src1, $src2\t# reduce $1L partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_s$1v(as_FloatRegister($vtmp$$reg), __ D, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); + __ cmp($dst$$Register, $src1$$Register); + __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::$3); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl REDUCE_FMINMAX($1, $2, $3, $4, $5 ) +dnl REDUCE_FMINMAX(min_max, name_suffix, element_type, size, reg_src_dst) +define(`REDUCE_FMINMAX', ` +instruct reduce_$1$2($5 dst, $5 src1, vReg src2) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (translit($1, `m', `M')ReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "sve_f$1v $dst, $src2 # vector (sve) ($4)\n\t" + "f$1s $dst, $dst, $src1\t# $1 reduction $2" %} + ins_encode %{ + __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4, + ptrue, as_FloatRegister($src2$$reg)); + __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl +dnl REDUCE_FMINMAX_PARTIAL($1, $2, $3, $4, $5 ) +dnl REDUCE_FMINMAX_PARTIAL(min_max, name_suffix, element_type, size, reg_src_dst) +define(`REDUCE_FMINMAX_PARTIAL', ` +instruct reduce_$1$2_partial($5 dst, $5 src1, vReg src2, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (translit($1, `m', `M')ReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP ptmp, KILL cr); + format %{ "sve_reduce_$1$2 $dst, $src1, $src2\t# reduce $1 $4 partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ $4, + Matcher::vector_length(this, $src2)); + __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl + +// vector max reduction +REDUCE_MAXMIN_I(max, MaxReductionV, GT) +REDUCE_MAXMIN_I_PARTIAL(max, MaxReductionV, GT) +REDUCE_MAXMIN_L(max, MaxReductionV, GT) +REDUCE_MAXMIN_L_PARTIAL(max, MaxReductionV, GT) +REDUCE_FMINMAX(max, F, T_FLOAT, S, vRegF) +REDUCE_FMINMAX_PARTIAL(max, F, T_FLOAT, S, vRegF) +REDUCE_FMINMAX(max, D, T_DOUBLE, D, vRegD) +REDUCE_FMINMAX_PARTIAL(max, D, T_DOUBLE, D, vRegD) + +// vector min reduction +REDUCE_MAXMIN_I(min, MinReductionV, LT) +REDUCE_MAXMIN_I_PARTIAL(min, MinReductionV, LT) +REDUCE_MAXMIN_L(min, MinReductionV, LT) +REDUCE_MAXMIN_L_PARTIAL(min, MinReductionV, LT) +REDUCE_FMINMAX(min, F, T_FLOAT, S, vRegF) +REDUCE_FMINMAX_PARTIAL(min, F, T_FLOAT, S, vRegF) +REDUCE_FMINMAX(min, D, T_DOUBLE, D, vRegD) +REDUCE_FMINMAX_PARTIAL(min, D, T_DOUBLE, D, vRegD) + +// vector Math.rint, floor, ceil + +instruct vroundD(vReg dst, vReg src, immI rmode) %{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (RoundDoubleModeV src rmode)); + format %{ "sve_frint $dst, $src, $rmode\t# vector (sve) (D)" %} + ins_encode %{ + switch ($rmode$$constant) { + case RoundDoubleModeNode::rmode_rint: + __ sve_frintn(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + break; + case RoundDoubleModeNode::rmode_floor: + __ sve_frintm(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + break; + case RoundDoubleModeNode::rmode_ceil: + __ sve_frintp(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + break; + } + %} + ins_pipe(pipe_slow); +%} +dnl +dnl REPLICATE($1, $2, $3, $4, $5 ) +dnl REPLICATE(insn_name, op_name, reg_src, size, min_vec_len) +define(`REPLICATE', ` +instruct $1(vReg dst, $3 src) %{ + predicate(UseSVE > 0); + match(Set dst ($2 src)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $src\t# vector (sve) ($4)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ $4, as_Register($src$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl REPLICATE_IMM8($1, $2, $3, $4, $5 ) +dnl REPLICATE_IMM8(insn_name, op_name, imm_type, size, min_vec_len) +define(`REPLICATE_IMM8', ` +instruct $1(vReg dst, $3 con) %{ + predicate(UseSVE > 0); + match(Set dst ($2 con)); + ins_cost(SVE_COST); + format %{ "sve_dup $dst, $con\t# vector (sve) ($4)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ $4, $con$$constant); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl FREPLICATE($1, $2, $3, $4) +dnl FREPLICATE(insn_name, op_name, reg_src, size) +define(`FREPLICATE', ` +instruct $1(vReg dst, $3 src) %{ + predicate(UseSVE > 0); + match(Set dst ($2 src)); + ins_cost(SVE_COST); + format %{ "sve_cpy $dst, $src\t# vector (sve) ($4)" %} + ins_encode %{ + __ sve_cpy(as_FloatRegister($dst$$reg), __ $4, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl + +// vector replicate +REPLICATE(replicateB, ReplicateB, iRegIorL2I, B, 16) +REPLICATE(replicateS, ReplicateS, iRegIorL2I, H, 8) +REPLICATE(replicateI, ReplicateI, iRegIorL2I, S, 4) +REPLICATE(replicateL, ReplicateL, iRegL, D, 2) +REPLICATE_IMM8(replicateB_imm8, ReplicateB, immI8, B, 16) +REPLICATE_IMM8(replicateS_imm8, ReplicateS, immI8_shift8, H, 8) +REPLICATE_IMM8(replicateI_imm8, ReplicateI, immI8_shift8, S, 4) +REPLICATE_IMM8(replicateL_imm8, ReplicateL, immL8_shift8, D, 2) +FREPLICATE(replicateF, ReplicateF, vRegF, S, 4) +FREPLICATE(replicateD, ReplicateD, vRegD, D, 2) +dnl +dnl VSHIFT_TRUE_PREDICATE($1, $2, $3, $4, $5 ) +dnl VSHIFT_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn) +define(`VSHIFT_TRUE_PREDICATE', ` +instruct $1(vReg dst, vReg shift) %{ + predicate(UseSVE > 0); + match(Set dst ($2 dst shift)); + ins_cost(SVE_COST); + format %{ "$5 $dst, $dst, $shift\t# vector (sve) ($3)" %} + ins_encode %{ + __ $5(as_FloatRegister($dst$$reg), __ $3, + ptrue, as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl VSHIFT_IMM_UNPREDICATED($1, $2, $3, $4, $5, $6 ) +dnl VSHIFT_IMM_UNPREDICATED(insn_name, op_name, op_name2, size, min_vec_len, insn) +define(`VSHIFT_IMM_UNPREDICATED', ` +instruct $1(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst ($2 src ($3 shift))); + ins_cost(SVE_COST); + format %{ "$6 $dst, $src, $shift\t# vector (sve) ($4)" %} + ins_encode %{ + int con = (int)$shift$$constant;dnl ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, ` if (con == 0) { __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), @@ -852,7 +1623,7 @@ dnl VSHIFT_COUNT($1, $2, $3, $4 ) dnl VSHIFT_COUNT(insn_name, size, min_vec_len, type) define(`VSHIFT_COUNT', ` instruct $1(vReg dst, iRegIorL2I cnt) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= $3 && + predicate(UseSVE > 0 && ELEMENT_SHORT_CHAR($4, n)); match(Set dst (LShiftCntV cnt)); match(Set dst (RShiftCntV cnt)); @@ -919,3 +1690,923 @@ instruct vmaskcast(vReg dst) %{ ins_pipe(pipe_class_empty); %} +// ------------------------------ Vector cast ------------------------------- +dnl +dnl +define(`VECTOR_CAST_EXTEND1', ` +instruct vcvt$1to$2`'(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorCast$1`'2X src)); + ins_cost(SVE_COST); + format %{ "sve_$3 $dst, $4, $src\t# convert $1 to $2 vector" %} + ins_encode %{ + __ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl +define(`VECTOR_CAST_EXTEND2', ` +instruct vcvt$1to$2`'(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorCast$1`'2X src)); + ins_cost(2 * SVE_COST); + format %{ "sve_$3 $dst, $4, $src\n\t" + "sve_$3 $dst, $5, $dst\t# convert $1 to $2 vector" %} + ins_encode %{ + __ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg)); + __ sve_$3(as_FloatRegister($dst$$reg), __ $5, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl +define(`VECTOR_CAST_EXTEND3', ` +instruct vcvt$1to$2`'(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorCast$1`'2X src)); + ins_cost(3 * SVE_COST); + format %{ "sve_$3 $dst, $4, $src\n\t" + "sve_$3 $dst, $5, $dst\n\t" + "sve_$3 $dst, $6, $dst\t# convert $1 to $2 vector" %} + ins_encode %{ + __ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg)); + __ sve_$3(as_FloatRegister($dst$$reg), __ $5, as_FloatRegister($dst$$reg)); + __ sve_$3(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl +define(`VECTOR_CAST_NARROW1', ` +instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorCast$1`'2X src)); + effect(TEMP tmp); + ins_cost(2 * SVE_COST); + format %{ "sve_$3 $tmp, $4, 0\n\t" + "sve_$5 $dst, $4, $src, tmp\t# convert $1 to $2 vector" %} + ins_encode %{ + __ sve_$3(as_FloatRegister($tmp$$reg), __ $4, 0); + __ sve_$5(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl +define(`VECTOR_CAST_NARROW2', ` +instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorCast$1`'2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(3 * SVE_COST); + format %{ "sve_$3 $tmp, $4, 0\n\t" + "sve_$5 $dst, $4, $src, tmp\n\t" + "sve_$5 $dst, $6, $dst, tmp\n\t# convert $1 to $2 vector" %} + ins_encode %{ + __ sve_$3(as_FloatRegister($tmp$$reg), __ $4, 0); + __ sve_$5(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + __ sve_$5(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl +define(`VECTOR_CAST_NARROW3', ` +instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorCast$1`'2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(4 * SVE_COST); + format %{ "sve_$3 $tmp, $4, 0\n\t" + "sve_$5 $dst, $4, $src, tmp\n\t" + "sve_$5 $dst, $6, $dst, tmp\n\t" + "sve_$5 $dst, $7, $dst, tmp\n\t# convert $1 to $2 vector" %} + ins_encode %{ + __ sve_$3(as_FloatRegister($tmp$$reg), __ $4, 0); + __ sve_$5(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + __ sve_$5(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_$5(as_FloatRegister($dst$$reg), __ $7, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl +define(`VECTOR_CAST_I2F_EXTEND2', ` +instruct vcvt$1to$2`'(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorCast$1`'2X src)); + ins_cost(3 * SVE_COST); + format %{ "sve_$3 $dst, $4, $src\n\t" + "sve_$3 $dst, $5, $dst\n\t" + "sve_$6 $dst, $5, $dst, $5\t# convert $1 to $2 vector" %} + ins_encode %{ + __ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg)); + __ sve_$3(as_FloatRegister($dst$$reg), __ $5, as_FloatRegister($dst$$reg)); + __ sve_$6(as_FloatRegister($dst$$reg), __ $5, ptrue, as_FloatRegister($dst$$reg), __ $5); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl +define(`VECTOR_CAST_I2F_EXTEND3', ` +instruct vcvt$1to$2`'(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorCast$1`'2X src)); + ins_cost(4 * SVE_COST); + format %{ "sve_$3 $dst, $4, $src\n\t" + "sve_$3 $dst, $5, $dst\n\t" + "sve_$3 $dst, $6, $dst\n\t" + "sve_$7 $dst, $6, $dst, $6\t# convert $1 to $2 vector" %} + ins_encode %{ + __ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg)); + __ sve_$3(as_FloatRegister($dst$$reg), __ $5, as_FloatRegister($dst$$reg)); + __ sve_$3(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg)); + __ sve_$7(as_FloatRegister($dst$$reg), __ $6, ptrue, as_FloatRegister($dst$$reg), __ $6); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl +define(`VECTOR_CAST_X2F_NARROW1', ` +instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorCast$1`'2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(3 * SVE_COST); + format %{ "sve_$3 $dst, $4, $src, $5\n\t" + "sve_$6 $tmp, $7, 0\n\t" + "sve_$8 $dst, $7, $dst, $tmp\t# convert $1 to $2 vector" %} + ins_encode %{ + __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $5); + __ sve_$6(as_FloatRegister($tmp$$reg), __ $7, 0); + __ sve_$8(as_FloatRegister($dst$$reg), __ $7, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl +define(`VECTOR_CAST_X2X', ` +instruct vcvt$1to$2`'(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorCast$1`'2X src)); + ins_cost(SVE_COST); + format %{ "sve_$3 $dst, $4, $src, $4\t# convert $1 to $2 vector" %} + ins_encode %{ + __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl +define(`VECTOR_CAST_X2F_EXTEND1', ` +instruct vcvt$1to$2`'(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorCast$1`'2X src)); + ins_cost(2 * SVE_COST); + format %{ "sve_$3 $dst, $4, $src\n\t" + "sve_$5 $dst, $4, $dst, $6\t# convert $1 to $2 vector" %} + ins_encode %{ + __ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg)); + __ sve_$5(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($dst$$reg), __ $6); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl +define(`VECTOR_CAST_F2X_NARROW1', ` +instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorCast$1`'2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(3 * SVE_COST); + format %{ "sve_$3 $dst, $4, $src, $4\n\t" + "sve_$5 $tmp, $6, 0\n\t" + "sve_$7 $dst, $6, $dst, tmp\t# convert $1 to $2 vector" %} + ins_encode %{ + __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4); + __ sve_$5(as_FloatRegister($tmp$$reg), __ $6, 0); + __ sve_$7(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl +define(`VECTOR_CAST_F2X_NARROW2', ` +instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorCast$1`'2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(4 * SVE_COST); + format %{ "sve_$3 $dst, $4, $src, $4\n\t" + "sve_$5 $tmp, $6, 0\n\t" + "sve_$7 $dst, $6, $dst, tmp\n\t" + "sve_$7 $dst, $8, $dst, tmp\n\t# convert $1 to $2 vector" %} + ins_encode %{ + __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4); + __ sve_$5(as_FloatRegister($tmp$$reg), __ $6, 0); + __ sve_$7(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_$7(as_FloatRegister($dst$$reg), __ $8, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl +define(`VECTOR_CAST_F2X_EXTEND1', ` +instruct vcvt$1to$2`'(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorCast$1`'2X src)); + ins_cost(2 * SVE_COST); + format %{ "sve_$3 $dst, $4, $src, $4\n\t" + "sve_$5 $dst, $6, $dst\t# convert $1 to $2 vector" %} + ins_encode %{ + __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4); + __ sve_$5(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl +define(`VECTOR_CAST_F2X_NARROW3', ` +instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorCast$1`'2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(5 * SVE_COST); + format %{ "sve_$3 $dst, $4, $src, $4\n\t" + "sve_$5 $tmp, $6, 0\n\t" + "sve_$7 $dst, $6, $dst, tmp\n\t" + "sve_$7 $dst, $8, $dst, tmp\n\t" + "sve_$7 $dst, $9, $dst, tmp\n\t# convert $1 to $2 vector" %} + ins_encode %{ + __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4); + __ sve_$5(as_FloatRegister($tmp$$reg), __ $6, 0); + __ sve_$7(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_$7(as_FloatRegister($dst$$reg), __ $8, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_$7(as_FloatRegister($dst$$reg), __ $9, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +VECTOR_CAST_EXTEND1(B, S, sunpklo, H) +VECTOR_CAST_EXTEND2(B, I, sunpklo, H, S) +VECTOR_CAST_EXTEND3(B, L, sunpklo, H, S, D) +VECTOR_CAST_I2F_EXTEND2(B, F, sunpklo, H, S, scvtf) +VECTOR_CAST_I2F_EXTEND3(B, D, sunpklo, H, S, D, scvtf) +dnl +VECTOR_CAST_NARROW1(S, B, dup, B, uzp1) +VECTOR_CAST_EXTEND1(S, I, sunpklo, S) +VECTOR_CAST_EXTEND2(S, L, sunpklo, S, D) +VECTOR_CAST_X2F_EXTEND1(S, F, sunpklo, S, scvtf, S) +VECTOR_CAST_I2F_EXTEND2(S, D, sunpklo, S, D, scvtf) +dnl +VECTOR_CAST_NARROW2(I, B, dup, H, uzp1, B) +VECTOR_CAST_NARROW1(I, S, dup, H, uzp1) +VECTOR_CAST_EXTEND1(I, L, sunpklo, D) +VECTOR_CAST_X2X(I, F, scvtf, S) +VECTOR_CAST_X2F_EXTEND1(I, D, sunpklo, D, scvtf, D) +dnl +VECTOR_CAST_NARROW3(L, B, dup, S, uzp1, H, B) +VECTOR_CAST_NARROW2(L, S, dup, S, uzp1, H) +VECTOR_CAST_NARROW1(L, I, dup, S, uzp1) +VECTOR_CAST_X2F_NARROW1(L, F, scvtf, S, D, dup, S, uzp1) +VECTOR_CAST_X2X(L, D, scvtf, D) +dnl +VECTOR_CAST_F2X_NARROW2(F, B, fcvtzs, S, dup, H, uzp1, B) +VECTOR_CAST_F2X_NARROW1(F, S, fcvtzs, S, dup, H, uzp1) +VECTOR_CAST_X2X(F, I, fcvtzs, S) +VECTOR_CAST_F2X_EXTEND1(F, L, fcvtzs, S, sunpklo, D) +VECTOR_CAST_X2F_EXTEND1(F, D, sunpklo, D, fcvt, S) +dnl +VECTOR_CAST_F2X_NARROW3(D, B, fcvtzs, D, dup, S, uzp1, H, B) +VECTOR_CAST_F2X_NARROW2(D, S, fcvtzs, D, dup, S, uzp1, H) +VECTOR_CAST_F2X_NARROW1(D, I, fcvtzs, D, dup, S, uzp1) +VECTOR_CAST_X2X(D, L, fcvtzs, D) +VECTOR_CAST_X2F_NARROW1(D, F, fcvt, S, D, dup, S, uzp1) +dnl +dnl +// ------------------------------ Vector extract --------------------------------- +define(`VECTOR_EXTRACT_SXT', ` +instruct extract$1`'($2 dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0); + match(Set dst (Extract$1 src idx)); + effect(TEMP pTmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_extract $dst, $3, $pTmp, $src, $idx\n\t" + "sbfmw $dst, $dst, 0U, $5\t# extract from vector($1)" %} + ins_encode %{ + __ sve_extract(as_$4($dst$$reg), __ $3, as_PRegister($pTmp$$reg), + as_FloatRegister($src$$reg), (int)($idx$$constant)); + __ sbfmw(as_$4($dst$$reg), as_$4($dst$$reg), 0U, $5); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 $4 $5 +VECTOR_EXTRACT_SXT(B, iRegINoSp, B, Register, 7U) +VECTOR_EXTRACT_SXT(S, iRegINoSp, H, Register, 15U) + +dnl +define(`VECTOR_EXTRACT', ` +instruct extract$1`'($2 dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0); + match(Set dst (Extract$1 src idx)); + effect(TEMP pTmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_extract $dst, $3, $pTmp, $src, $idx\t# extract from vector($1)" %} + ins_encode %{ + __ sve_extract(as_$4($dst$$reg), __ $3, as_PRegister($pTmp$$reg), + as_FloatRegister($src$$reg), (int)($idx$$constant)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 $4 +VECTOR_EXTRACT(I, iRegINoSp, S, Register) +VECTOR_EXTRACT(L, iRegLNoSp, D, Register) +VECTOR_EXTRACT(F, vRegF, S, FloatRegister) +VECTOR_EXTRACT(D, vRegD, D, FloatRegister) + +// ------------------------------- VectorTest ---------------------------------- +dnl +dnl VTEST($1, $2, $3, $4 ) +dnl VTEST(op_name, pred, imm, cond) +define(`VTEST', ` +instruct vtest_$1`'(iRegINoSp dst, vReg src1, vReg src2, pReg pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + static_cast(n)->get_predicate() == BoolTest::$2); + match(Set dst (VectorTest src1 src2)); + effect(TEMP pTmp, KILL cr); + ins_cost(SVE_COST); + format %{ "sve_cmpeq $pTmp, $src1, $3\n\t" + "csetw $dst, $4\t# VectorTest (sve) - $1" %} + ins_encode %{ + // "src2" is not used for sve. + BasicType bt = Matcher::vector_element_basic_type(this, $src1); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, + ptrue, as_FloatRegister($src1$$reg), $3); + __ csetw(as_Register($dst$$reg), Assembler::$4); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +VTEST(alltrue, overflow, 0, EQ) +VTEST(anytrue, ne, -1, NE) +dnl +dnl +dnl VTEST_PARTIAL($1, $2, $3, $4 ) +dnl VTEST_PARTIAL(op_name, pred, imm, cond) +define(`VTEST_PARTIAL', ` +instruct vtest_$1_partial`'(iRegINoSp dst, vReg src1, vReg src2, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + static_cast(n)->get_predicate() == BoolTest::$2); + match(Set dst (VectorTest src1 src2)); + effect(TEMP pTmp, KILL cr); + ins_cost(SVE_COST); + format %{ "vtest_$1_partial $dst, $src1, $src2\t# VectorTest partial (sve) - $1" %} + ins_encode %{ + // "src2" is not used for sve. + BasicType bt = Matcher::vector_element_basic_type(this, $src1); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), size, + Matcher::vector_length(this, $src1)); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, + as_PRegister($pTmp$$reg), as_FloatRegister($src1$$reg), $3); + __ csetw(as_Register($dst$$reg), Assembler::$4); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +VTEST_PARTIAL(alltrue, overflow, 0, EQ) +VTEST_PARTIAL(anytrue, ne, -1, NE) + +// ------------------------------ Vector insert --------------------------------- + +instruct insertI_small(vReg dst, vReg src, iRegIorL2I val, immI idx, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0 && n->as_Vector()->length() <= 32 && + (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE || + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || + n->bottom_type()->is_vect()->element_basic_type() == T_INT)); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP_DEF dst, TEMP pTmp, KILL cr); + ins_cost(4 * SVE_COST); + format %{ "sve_index $dst, -16, 1\t# (B/S/I)\n\t" + "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" + "sve_orr $dst, $src, $src\n\t" + "sve_cpy $dst, $pTmp, $val\t# insert into vector (B/S/I)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_index(as_FloatRegister($dst$$reg), size, -16, 1); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, ptrue, + as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16); + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg), as_Register($val$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct insertF_small(vReg dst, vReg src, vRegF val, immI idx, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0 && n->as_Vector()->length() <= 32 && + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP_DEF dst, TEMP pTmp, KILL cr); + ins_cost(4 * SVE_COST); + format %{ "sve_index $dst, S, -16, 1\n\t" + "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" + "sve_orr $dst, $src, $src\n\t" + "sve_cpy $dst, $pTmp, $val\t# insert into vector (F)" %} + ins_encode %{ + __ sve_index(as_FloatRegister($dst$$reg), __ S, -16, 1); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ S, ptrue, + as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16); + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), __ S, as_PRegister($pTmp$$reg), as_FloatRegister($val$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct insertI(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg tmp1, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0 && n->as_Vector()->length() > 32 && + (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE || + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || + n->bottom_type()->is_vect()->element_basic_type() == T_INT)); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP pTmp, KILL cr); + ins_cost(5 * SVE_COST); + format %{ "sve_index $tmp1, 0, 1\t# (B/S/I)\n\t" + "sve_dup $dst, $idx\t# (B/S/I)\n\t" + "sve_cmpeq $pTmp, $tmp1, $dst\n\t" + "sve_orr $dst, $src, $src\n\t" + "sve_cpy $dst, $pTmp, $val\t# insert into vector (B/S/I)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_index(as_FloatRegister($tmp1$$reg), size, 0, 1); + __ sve_dup(as_FloatRegister($dst$$reg), size, (int)($idx$$constant)); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, ptrue, + as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg)); + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg), as_Register($val$$reg)); + %} + ins_pipe(pipe_slow); +%} +dnl +dnl +define(`VECTOR_INSERT_D', ` +instruct insert$1`'(vReg dst, vReg src, $2 val, immI idx, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($1)); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP_DEF dst, TEMP pTmp, KILL cr); + ins_cost(4 * SVE_COST); + format %{ "sve_index $dst, $3, -16, 1\n\t" + "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" + "sve_orr $dst, $src, $src\n\t" + "sve_cpy $dst, $pTmp, $val\t# insert into vector ($1)" %} + ins_encode %{ + __ sve_index(as_FloatRegister($dst$$reg), __ $3, -16, 1); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ $3, ptrue, + as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16); + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), __ $3, as_PRegister($pTmp$$reg), as_$4($val$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 $4 +VECTOR_INSERT_D(L, iRegL, D, Register) +VECTOR_INSERT_D(D, vRegD, D, FloatRegister) + +instruct insertF(vReg dst, vReg src, vRegF val, immI idx, vReg tmp1, pRegGov pTmp, rFlagsReg cr) +%{ + predicate(UseSVE > 0 && n->as_Vector()->length() > 32 && + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP pTmp, KILL cr); + ins_cost(5 * SVE_COST); + format %{ "sve_index $tmp1, S, 0, 1\n\t" + "sve_dup $dst, S, $idx\n\t" + "sve_cmpeq $pTmp, $tmp1, $dst\n\t" + "sve_orr $dst, $src, $src\n\t" + "sve_cpy $dst, $pTmp, $val\t# insert into vector (F)" %} + ins_encode %{ + __ sve_index(as_FloatRegister($tmp1$$reg), __ S, 0, 1); + __ sve_dup(as_FloatRegister($dst$$reg), __ S, (int)($idx$$constant)); + __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ S, ptrue, + as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg)); + __ sve_orr(as_FloatRegister($dst$$reg), + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), __ S, + as_PRegister($pTmp$$reg), as_FloatRegister($val$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector shuffle ------------------------------- + +instruct loadshuffleB(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorLoadShuffle src)); + ins_cost(SVE_COST); + format %{ "sve_orr $dst, $src, $src\t# vector load shuffle (B)" %} + ins_encode %{ + if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { + __ sve_orr(as_FloatRegister($dst$$reg), + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } + %} + ins_pipe(pipe_slow); +%} + +instruct loadshuffleS(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorLoadShuffle src)); + ins_cost(SVE_COST); + format %{ "sve_uunpklo $dst, $src\t# vector load shuffle (B to H)" %} + ins_encode %{ + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct loadshuffleI(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (VectorLoadShuffle src)); + ins_cost(2 * SVE_COST); + format %{ "sve_uunpklo $dst, H, $src\n\t" + "sve_uunpklo $dst, S, $dst\t# vector load shuffle (B to S)" %} + ins_encode %{ + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct loadshuffleL(vReg dst, vReg src) +%{ + predicate(UseSVE > 0 && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (VectorLoadShuffle src)); + ins_cost(3 * SVE_COST); + format %{ "sve_uunpklo $dst, H, $src\n\t" + "sve_uunpklo $dst, S, $dst\n\t" + "sve_uunpklo $dst, D, $dst\t# vector load shuffle (B to D)" %} + ins_encode %{ + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); + __ sve_uunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector rearrange ------------------------------- + +instruct rearrange(vReg dst, vReg src, vReg shuffle) +%{ + predicate(UseSVE > 0); + match(Set dst (VectorRearrange src shuffle)); + ins_cost(SVE_COST); + format %{ "sve_tbl $dst, $src, $shuffle\t# vector rearrange" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_tbl(as_FloatRegister($dst$$reg), size, + as_FloatRegister($src$$reg), as_FloatRegister($shuffle$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector Load Gather --------------------------------- + +instruct gatherI(vReg dst, indirect mem, vReg idx) %{ + predicate(UseSVE > 0 && + n->as_LoadVectorGather()->memory_size() == MaxVectorSize && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (LoadVectorGather mem idx)); + ins_cost(SVE_COST); + format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (I/F)" %} + ins_encode %{ + __ sve_ld1w_gather(as_FloatRegister($dst$$reg), ptrue, + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct gatherL(vReg dst, indirect mem, vReg idx) %{ + predicate(UseSVE > 0 && + n->as_LoadVectorGather()->memory_size() == MaxVectorSize && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (LoadVectorGather mem idx)); + ins_cost(2 * SVE_COST); + format %{ "sve_uunpklo $idx, $idx\n\t" + "load_vector_gather $dst, $mem, $idx\t# vector load gather (L/D)" %} + ins_encode %{ + __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); + __ sve_ld1d_gather(as_FloatRegister($dst$$reg), ptrue, as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector Load Gather Partial------------------------------- + +instruct gatherI_partial(vReg dst, indirect mem, vReg idx, pRegGov pTmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_LoadVectorGather()->memory_size() < MaxVectorSize && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (LoadVectorGather mem idx)); + effect(TEMP pTmp, KILL cr); + ins_cost(2 * SVE_COST + INSN_COST); + format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" + "load_vector_gather $dst, $pTmp, $mem, $idx\t# vector load gather partial (I/F)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ S, + Matcher::vector_length(this)); + __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($pTmp$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct gatherL_partial(vReg dst, indirect mem, vReg idx, pRegGov pTmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_LoadVectorGather()->memory_size() < MaxVectorSize && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (LoadVectorGather mem idx)); + effect(TEMP pTmp, KILL cr); + ins_cost(3 * SVE_COST + INSN_COST); + format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" + "sve_uunpklo $idx, $idx\n\t" + "load_vector_gather $dst, $pTmp, $mem, $idx\t# vector load gather partial (L/D)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ D, + Matcher::vector_length(this)); + __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); + __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($pTmp$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector Store Scatter ------------------------------- + +instruct scatterI(indirect mem, vReg src, vReg idx) %{ + predicate(UseSVE > 0 && + n->as_StoreVectorScatter()->memory_size() == MaxVectorSize && + (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set mem (StoreVectorScatter mem (Binary src idx))); + ins_cost(SVE_COST); + format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (I/F)" %} + ins_encode %{ + __ sve_st1w_scatter(as_FloatRegister($src$$reg), ptrue, + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct scatterL(indirect mem, vReg src, vReg idx) %{ + predicate(UseSVE > 0 && + n->as_StoreVectorScatter()->memory_size() == MaxVectorSize && + (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set mem (StoreVectorScatter mem (Binary src idx))); + ins_cost(2 * SVE_COST); + format %{ "sve_uunpklo $idx, $idx\n\t" + "store_vector_scatter $mem, $idx, $src\t# vector store scatter (L/D)" %} + ins_encode %{ + __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, + as_FloatRegister($idx$$reg)); + __ sve_st1d_scatter(as_FloatRegister($src$$reg), ptrue, + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector Store Scatter Partial------------------------------- + +instruct scatterI_partial(indirect mem, vReg src, vReg idx, pRegGov pTmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_StoreVectorScatter()->memory_size() < MaxVectorSize && + (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set mem (StoreVectorScatter mem (Binary src idx))); + effect(TEMP pTmp, KILL cr); + ins_cost(2 * SVE_COST + INSN_COST); + format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" + "store_vector_scatter $mem, $pTmp, $idx, $src\t# vector store scatter partial (I/F)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ S, + Matcher::vector_length(this, $src)); + __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($pTmp$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct scatterL_partial(indirect mem, vReg src, vReg idx, pRegGov pTmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_StoreVectorScatter()->memory_size() < MaxVectorSize && + (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set mem (StoreVectorScatter mem (Binary src idx))); + effect(TEMP pTmp, KILL cr); + ins_cost(3 * SVE_COST + INSN_COST); + format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" + "sve_uunpklo $idx, $idx\n\t" + "store_vector_scatter $mem, $pTmp, $idx, $src\t# vector store scatter partial (L/D)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ D, + Matcher::vector_length(this, $src)); + __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); + __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($pTmp$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + + +// ------------------------------ Vector Load Const ------------------------------- + +instruct loadconB(vReg dst, immI0 src) %{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorLoadConst src)); + ins_cost(SVE_COST); + format %{ "sve_index $dst, 0, 1\t# generate iota indices" %} + ins_encode %{ + __ sve_index(as_FloatRegister($dst$$reg), __ B, 0, 1); + %} + ins_pipe(pipe_slow); +%} + +// Intrisics for String.indexOf(char) + +dnl +define(`STRING_INDEXOF_CHAR', ` +instruct string$1_indexof_char_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, + iRegI_R0 result, vReg ztmp1, vReg ztmp2, + pRegGov pgtmp, pReg ptmp, rFlagsReg cr) +%{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); + predicate((UseSVE > 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::$1)); + effect(TEMP ztmp1, TEMP ztmp2, TEMP pgtmp, TEMP ptmp, KILL cr); + + format %{ "String$2 IndexOf char[] $str1,$cnt1,$ch -> $result # use sve" %} + + ins_encode %{ + __ string_indexof_char_sve($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, + as_FloatRegister($ztmp1$$reg), as_FloatRegister($ztmp2$$reg), + as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg), $3 /* isL */); + %} + ins_pipe(pipe_class_memory); +%}')dnl +dnl $1 $2 $3 +STRING_INDEXOF_CHAR(L, Latin1, true) +STRING_INDEXOF_CHAR(U, UTF16, false) + +dnl +dnl VMASK_REDUCTION($1, $2, $3 ) +dnl VMASK_REDUCTION(suffix, op_name, cost) +define(`VMASK_REDUCTION', ` +instruct vmask_$1(iRegINoSp dst, vReg src, pReg ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst ($2 src)); + effect(TEMP ptmp, KILL cr); + ins_cost($3 * SVE_COST); + format %{ "vmask_$1 $dst, $src\t# vector mask $1 (sve)" %} + ins_encode %{ + __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, + as_FloatRegister($src$$reg), ptrue, as_PRegister($ptmp$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +// ---------------------------- Vector mask reductions --------------------------- +VMASK_REDUCTION(truecount, VectorMaskTrueCount, 2) +VMASK_REDUCTION(firsttrue, VectorMaskFirstTrue, 3) +VMASK_REDUCTION(lasttrue, VectorMaskLastTrue, 4) +dnl +dnl VMASK_REDUCTION_PARTIAL($1, $2, $3 ) +dnl VMASK_REDUCTION_PARTIAL(suffix, op_name, cost) +define(`VMASK_REDUCTION_PARTIAL', ` +instruct vmask_$1_partial(iRegINoSp dst, vReg src, pRegGov ifelse($1, `firsttrue', `pgtmp, pReg ptmp', `ptmp'), rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst ($2 src)); + effect(TEMP ifelse($1, `firsttrue', `pgtmp, TEMP ptmp', `ptmp'), KILL cr); + ins_cost($3 * SVE_COST); + format %{ "vmask_$1 $dst, $src\t# vector mask $1 partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister(ifelse($1, `firsttrue', `$pgtmp', `$ptmp')$$reg), __ B, + Matcher::vector_length(this, $src)); + __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, as_FloatRegister($src$$reg), + as_PRegister(ifelse($1, `firsttrue', `$pgtmp', `$ptmp')$$reg), as_PRegister($ptmp$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +VMASK_REDUCTION_PARTIAL(truecount, VectorMaskTrueCount, 3) +VMASK_REDUCTION_PARTIAL(firsttrue, VectorMaskFirstTrue, 4) +VMASK_REDUCTION_PARTIAL(lasttrue, VectorMaskLastTrue, 5) + +dnl +dnl VSTOREMASK_REDUCTION($1, $2, $3 ) +dnl VSTOREMASK_REDUCTION(suffix, op_name, cost) +define(`VSTOREMASK_REDUCTION', ` +instruct vstoremask_$1(iRegINoSp dst, vReg src, immI esize, pReg ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst ($2 (VectorStoreMask src esize))); + effect(TEMP ptmp, KILL cr); + ins_cost($3 * SVE_COST); + format %{ "vstoremask_$1 $dst, $src\t# vector mask $1 (sve)" %} + ins_encode %{ + unsigned size = $esize$$constant; + assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size"); + Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size); + __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg), + ptrue, as_PRegister($ptmp$$reg), Matcher::vector_length(this, $src)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +// ----------------- Vector mask reductions combined with VectorMaskStore --------------- +VSTOREMASK_REDUCTION(truecount, VectorMaskTrueCount, 2) +VSTOREMASK_REDUCTION(firsttrue, VectorMaskFirstTrue, 3) +VSTOREMASK_REDUCTION(lasttrue, VectorMaskLastTrue, 4) +dnl +dnl VSTOREMASK_REDUCTION_PARTIAL($1, $2, $3 ) +dnl VSTOREMASK_REDUCTION_PARTIAL(suffix, op_name, cost) +define(`VSTOREMASK_REDUCTION_PARTIAL', ` +instruct vstoremask_$1_partial(iRegINoSp dst, vReg src, immI esize, pRegGov ifelse($1, `firsttrue', `pgtmp, pReg ptmp', `ptmp'), rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst ($2 (VectorStoreMask src esize))); + effect(TEMP ifelse($1, `firsttrue', `pgtmp, TEMP ptmp', `ptmp'), KILL cr); + ins_cost($3 * SVE_COST); + format %{ "vstoremask_$1 $dst, $src\t# vector mask $1 partial (sve)" %} + ins_encode %{ + unsigned size = $esize$$constant; + assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size"); + Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size); + __ sve_whilelo_zr_imm(as_PRegister(ifelse($1, `firsttrue', `$pgtmp', `$ptmp')$$reg), variant, + Matcher::vector_length(this, $src)); + __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg), + as_PRegister(ifelse($1, `firsttrue', `$pgtmp', `$ptmp')$$reg), as_PRegister($ptmp$$reg), MaxVectorSize / size); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +VSTOREMASK_REDUCTION_PARTIAL(truecount, VectorMaskTrueCount, 3) +VSTOREMASK_REDUCTION_PARTIAL(firsttrue, VectorMaskFirstTrue, 4) +VSTOREMASK_REDUCTION_PARTIAL(lasttrue, VectorMaskLastTrue, 5) diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp index f9974680d2e7b5585fd741c4d69a57876b342225..4e883838a6610ba34d7f737526f7e39b2aead592 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp @@ -54,10 +54,32 @@ Assembler::SIMD_Arrangement Assembler::_esize2arrangement_table[9][2] = { /* 8 */ {T1D, T2D} }; +Assembler::SIMD_RegVariant Assembler::_esize2regvariant[9] = { + INVALID, + B, + H, + INVALID, + S, + INVALID, + INVALID, + INVALID, + D, +}; + +Assembler::SIMD_Arrangement Assembler::esize2arrangement(unsigned esize, bool isQ) { + guarantee(esize < ARRAY_SIZE(_esize2arrangement_table) && + _esize2arrangement_table[esize][isQ] != INVALID_ARRANGEMENT, "unsupported element size"); + return _esize2arrangement_table[esize][isQ]; +} + +Assembler::SIMD_RegVariant Assembler::elemBytes_to_regVariant(unsigned esize) { + guarantee(esize < ARRAY_SIZE(_esize2regvariant) && _esize2regvariant[esize] != INVALID, + "unsupported element size"); + return _esize2regvariant[esize]; +} -Assembler::SIMD_Arrangement Assembler::esize2arrangement(int esize, bool isQ) { - guarantee(esize == 1 || esize == 2 || esize == 4 || esize == 8, "unsupported element size"); - return _esize2arrangement_table[esize][isQ]; +Assembler::SIMD_RegVariant Assembler::elemType_to_regVariant(BasicType bt) { + return elemBytes_to_regVariant(type2aelembytes(bt)); } void Assembler::emit_data64(jlong data, @@ -134,7 +156,16 @@ void Assembler::adrp(Register reg1, const Address &dest, uint64_t &byte_offset) #undef __ -#define starti Instruction_aarch64 do_not_use(this); set_current(&do_not_use) +#define starti Instruction_aarch64 current_insn(this); + +#define f current_insn.f +#define sf current_insn.sf +#define rf current_insn.rf +#define srf current_insn.srf +#define zrf current_insn.zrf +#define prf current_insn.prf +#define pgrf current_insn.pgrf +#define fixed current_insn.fixed void Assembler::adr(Register Rd, address adr) { intptr_t offset = adr - pc(); @@ -156,6 +187,53 @@ void Assembler::adrp(Register reg1, const Address &dest, uint64_t &byte_offset) rf(Rd, 0); } +// An "all-purpose" add/subtract immediate, per ARM documentation: +// A "programmer-friendly" assembler may accept a negative immediate +// between -(2^24 -1) and -1 inclusive, causing it to convert a +// requested ADD operation to a SUB, or vice versa, and then encode +// the absolute value of the immediate as for uimm24. +void Assembler::add_sub_immediate(Instruction_aarch64 ¤t_insn, + Register Rd, Register Rn, unsigned uimm, int op, + int negated_op) { + bool sets_flags = op & 1; // this op sets flags + union { + unsigned u; + int imm; + }; + u = uimm; + bool shift = false; + bool neg = imm < 0; + if (neg) { + imm = -imm; + op = negated_op; + } + assert(Rd != sp || imm % 16 == 0, "misaligned stack"); + if (imm >= (1 << 11) + && ((imm >> 12) << 12 == imm)) { + imm >>= 12; + shift = true; + } + f(op, 31, 29), f(0b10001, 28, 24), f(shift, 23, 22), f(imm, 21, 10); + + // add/subtract immediate ops with the S bit set treat r31 as zr; + // with S unset they use sp. + if (sets_flags) + zrf(Rd, 0); + else + srf(Rd, 0); + + srf(Rn, 5); +} + +#undef f +#undef sf +#undef rf +#undef srf +#undef zrf +#undef prf +#undef pgrf +#undef fixed + #undef starti Address::Address(address target, relocInfo::relocType rtype) : _mode(literal){ @@ -260,43 +338,6 @@ void Assembler::wrap_label(Label &L, prfop op, prefetch_insn insn) { } } -// An "all-purpose" add/subtract immediate, per ARM documentation: -// A "programmer-friendly" assembler may accept a negative immediate -// between -(2^24 -1) and -1 inclusive, causing it to convert a -// requested ADD operation to a SUB, or vice versa, and then encode -// the absolute value of the immediate as for uimm24. -void Assembler::add_sub_immediate(Register Rd, Register Rn, unsigned uimm, int op, - int negated_op) { - bool sets_flags = op & 1; // this op sets flags - union { - unsigned u; - int imm; - }; - u = uimm; - bool shift = false; - bool neg = imm < 0; - if (neg) { - imm = -imm; - op = negated_op; - } - assert(Rd != sp || imm % 16 == 0, "misaligned stack"); - if (imm >= (1 << 11) - && ((imm >> 12) << 12 == imm)) { - imm >>= 12; - shift = true; - } - f(op, 31, 29), f(0b10001, 28, 24), f(shift, 23, 22), f(imm, 21, 10); - - // add/subtract immediate ops with the S bit set treat r31 as zr; - // with S unset they use sp. - if (sets_flags) - zrf(Rd, 0); - else - srf(Rd, 0); - - srf(Rn, 5); -} - bool Assembler::operand_valid_for_add_sub_immediate(int64_t imm) { bool shift = false; uint64_t uimm = (uint64_t)uabs((jlong)imm); diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp index 78e8fc370ad1cdebf9cf15ea3a63b2200d9c6d84..abdc424062de4d860df7d914ed975f5faa27f114 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -247,12 +247,12 @@ public: int nbits = msb - lsb + 1; guarantee(val < (1ULL << nbits), "Field too big for insn"); assert_cond(msb >= lsb); - unsigned mask = checked_cast(right_n_bits(nbits)); val <<= lsb; - mask <<= lsb; insn |= val; - assert_cond((bits & mask) == 0); #ifdef ASSERT + unsigned mask = checked_cast(right_n_bits(nbits)); + mask <<= lsb; + assert_cond((bits & mask) == 0); bits |= mask; #endif } @@ -313,7 +313,7 @@ public: } }; -#define starti Instruction_aarch64 do_not_use(this); set_current(&do_not_use) +#define starti Instruction_aarch64 current_insn(this); class PrePost { int _offset; @@ -647,22 +647,22 @@ typedef enum { class Assembler : public AbstractAssembler { +public: + #ifndef PRODUCT static const uintptr_t asm_bp; - void emit_long(jint x) { + void emit_int32(jint x) { if ((uintptr_t)pc() == asm_bp) NOP(); AbstractAssembler::emit_int32(x); } #else - void emit_long(jint x) { + void emit_int32(jint x) { AbstractAssembler::emit_int32(x); } #endif -public: - enum { instruction_size = 4 }; //---< calculate length of instruction >--- @@ -694,46 +694,14 @@ public: static address locate_next_instruction(address inst); - Instruction_aarch64* current; - - void set_current(Instruction_aarch64* i) { current = i; } - - void f(unsigned val, int msb, int lsb) { - current->f(val, msb, lsb); - } - void f(unsigned val, int msb) { - current->f(val, msb, msb); - } - void sf(int64_t val, int msb, int lsb) { - current->sf(val, msb, lsb); - } - void rf(Register reg, int lsb) { - current->rf(reg, lsb); - } - void srf(Register reg, int lsb) { - current->srf(reg, lsb); - } - void zrf(Register reg, int lsb) { - current->zrf(reg, lsb); - } - void rf(FloatRegister reg, int lsb) { - current->rf(reg, lsb); - } - void prf(PRegister reg, int lsb) { - current->prf(reg, lsb); - } - void pgrf(PRegister reg, int lsb) { - current->pgrf(reg, lsb); - } - void fixed(unsigned value, unsigned mask) { - current->fixed(value, mask); - } - - void emit() { - emit_long(current->get_insn()); - assert_cond(current->get_bits() == 0xffffffff); - current = NULL; - } +#define f current_insn.f +#define sf current_insn.sf +#define rf current_insn.rf +#define srf current_insn.srf +#define zrf current_insn.zrf +#define prf current_insn.prf +#define pgrf current_insn.pgrf +#define fixed current_insn.fixed typedef void (Assembler::* uncond_branch_insn)(address dest); typedef void (Assembler::* compare_and_branch_insn)(Register Rt, address dest); @@ -764,8 +732,8 @@ public: #undef INSN - void add_sub_immediate(Register Rd, Register Rn, unsigned uimm, int op, - int negated_op); + void add_sub_immediate(Instruction_aarch64 ¤t_insn, Register Rd, Register Rn, + unsigned uimm, int op, int negated_op); // Add/subtract (immediate) #define INSN(NAME, decode, negated) \ @@ -777,7 +745,7 @@ public: \ void NAME(Register Rd, Register Rn, unsigned imm) { \ starti; \ - add_sub_immediate(Rd, Rn, imm, decode, negated); \ + add_sub_immediate(current_insn, Rd, Rn, imm, decode, negated); \ } INSN(addsw, 0b001, 0b011); @@ -790,7 +758,7 @@ public: #define INSN(NAME, decode, negated) \ void NAME(Register Rd, Register Rn, unsigned imm) { \ starti; \ - add_sub_immediate(Rd, Rn, imm, decode, negated); \ + add_sub_immediate(current_insn, Rd, Rn, imm, decode, negated); \ } INSN(addw, 0b000, 0b010); @@ -1092,7 +1060,7 @@ public: } void sys(int op1, int CRn, int CRm, int op2, - Register rt = (Register)0b11111) { + Register rt = as_Register(0b11111)) { system(0b01, op1, CRn, CRm, op2, rt); } @@ -1361,7 +1329,7 @@ public: starti; \ f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24), \ sf(offset, 23, 5); \ - rf((Register)Rt, 0); \ + rf(as_Register(Rt), 0); \ } INSN(ldrs, 0b00, 1); @@ -1375,7 +1343,7 @@ public: starti; \ f(size, 31, 30), f(0b111100, 29, 24), f(opc, 23, 22), f(0, 21); \ f(0, 20, 12), f(0b01, 11, 10); \ - rf(Rn, 5), rf((Register)Rt, 0); \ + rf(Rn, 5), rf(as_Register(Rt), 0); \ } INSN(ldrs, 0b10, 0b01); @@ -1408,9 +1376,9 @@ public: f(opc, 31, 30), f(p1, 29, 27), f(V, 26), f(L, 22); zrf(Rt2, 10), zrf(Rt1, 0); if (no_allocate) { - adr.encode_nontemporal_pair(current); + adr.encode_nontemporal_pair(¤t_insn); } else { - adr.encode_pair(current); + adr.encode_pair(¤t_insn); } } @@ -1436,7 +1404,8 @@ public: #define INSN(NAME, size, p1, V, L, no_allocate) \ void NAME(FloatRegister Rt1, FloatRegister Rt2, Address adr) { \ - ld_st1(size, p1, V, L, (Register)Rt1, (Register)Rt2, adr, no_allocate); \ + ld_st1(size, p1, V, L, \ + as_Register(Rt1), as_Register(Rt2), adr, no_allocate); \ } INSN(stps, 0b00, 0b101, 1, 0, false); @@ -1471,7 +1440,7 @@ public: f(size, 31, 30); f(op, 23, 22); // str - adr.encode(current); + adr.encode(¤t_insn); } #define INSN(NAME, size, op) \ @@ -1499,7 +1468,7 @@ public: #define INSN(NAME, size, op) \ void NAME(const Address &adr, prfop pfop = PLDL1KEEP) { \ - ld_st2((Register)pfop, adr, size, op); \ + ld_st2(as_Register(pfop), adr, size, op); \ } INSN(prfm, 0b11, 0b10); // FIXME: PRFM should not be used with @@ -1510,7 +1479,7 @@ public: #define INSN(NAME, size, op) \ void NAME(FloatRegister Rt, const Address &adr) { \ - ld_st2((Register)Rt, adr, size, op, 1); \ + ld_st2(as_Register(Rt), adr, size, op, 1); \ } INSN(strd, 0b11, 0b00); @@ -1533,21 +1502,24 @@ public: T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q, INVALID_ARRANGEMENT }; + enum SIMD_RegVariant { + B, H, S, D, Q, INVALID + }; + private: static SIMD_Arrangement _esize2arrangement_table[9][2]; + static SIMD_RegVariant _esize2regvariant[9]; public: - static SIMD_Arrangement esize2arrangement(int esize, bool isQ); - - enum SIMD_RegVariant { - B, H, S, D, Q, INVALID - }; + static SIMD_Arrangement esize2arrangement(unsigned esize, bool isQ); + static SIMD_RegVariant elemType_to_regVariant(BasicType bt); + static SIMD_RegVariant elemBytes_to_regVariant(unsigned esize); enum shift_kind { LSL, LSR, ASR, ROR }; - void op_shifted_reg(unsigned decode, + void op_shifted_reg(Instruction_aarch64 ¤t_insn, unsigned decode, enum shift_kind kind, unsigned shift, unsigned size, unsigned op) { f(size, 31); @@ -1558,14 +1530,14 @@ public: } // Logical (shifted register) -#define INSN(NAME, size, op, N) \ - void NAME(Register Rd, Register Rn, Register Rm, \ - enum shift_kind kind = LSL, unsigned shift = 0) { \ - starti; \ - guarantee(size == 1 || shift < 32, "incorrect shift"); \ - f(N, 21); \ - zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0); \ - op_shifted_reg(0b01010, kind, shift, size, op); \ +#define INSN(NAME, size, op, N) \ + void NAME(Register Rd, Register Rn, Register Rm, \ + enum shift_kind kind = LSL, unsigned shift = 0) { \ + starti; \ + guarantee(size == 1 || shift < 32, "incorrect shift"); \ + f(N, 21); \ + zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0); \ + op_shifted_reg(current_insn, 0b01010, kind, shift, size, op); \ } INSN(andr, 1, 0b00, 0); @@ -1585,7 +1557,7 @@ public: starti; \ f(N, 21); \ zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0); \ - op_shifted_reg(0b01010, kind, shift, size, op); \ + op_shifted_reg(current_insn, 0b01010, kind, shift, size, op); \ } \ \ /* These instructions have no immediate form. Provide an overload so \ @@ -1632,7 +1604,7 @@ void mvnw(Register Rd, Register Rm, assert_cond(kind != ROR); \ guarantee(size == 1 || shift < 32, "incorrect shift");\ zrf(Rd, 0), zrf(Rn, 5), zrf(Rm, 16); \ - op_shifted_reg(0b01011, kind, shift, size, op); \ + op_shifted_reg(current_insn, 0b01011, kind, shift, size, op); \ } INSN(add, 1, 0b000); @@ -1653,10 +1625,10 @@ void mvnw(Register Rd, Register Rm, ext::operation option, int amount = 0) { \ starti; \ zrf(Rm, 16), srf(Rn, 5), srf(Rd, 0); \ - add_sub_extended_reg(op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \ + add_sub_extended_reg(current_insn, op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \ } - void add_sub_extended_reg(unsigned op, unsigned decode, + void add_sub_extended_reg(Instruction_aarch64 ¤t_insn, unsigned op, unsigned decode, Register Rd, Register Rn, Register Rm, unsigned opt, ext::operation option, unsigned imm) { guarantee(imm <= 4, "shift amount must be <= 4"); @@ -1676,7 +1648,7 @@ void mvnw(Register Rd, Register Rm, ext::operation option, int amount = 0) { \ starti; \ zrf(Rm, 16), srf(Rn, 5), zrf(Rd, 0); \ - add_sub_extended_reg(op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \ + add_sub_extended_reg(current_insn, op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \ } INSN(addsw, 0b001); @@ -1777,7 +1749,7 @@ void mvnw(Register Rd, Register Rm, } #define INSN(NAME, op, op2) \ - void NAME(Register Rd, Register Rn, Register Rm, Condition cond) { \ + void NAME(Register Rd, Register Rn, Register Rm, Condition cond) { \ conditional_select(op, op2, Rd, Rn, Rm, cond); \ } @@ -1793,7 +1765,7 @@ void mvnw(Register Rd, Register Rm, #undef INSN // Data processing - void data_processing(unsigned op29, unsigned opcode, + void data_processing(Instruction_aarch64 ¤t_insn, unsigned op29, unsigned opcode, Register Rd, Register Rn) { f(op29, 31, 29), f(0b11010110, 28, 21); f(opcode, 15, 10); @@ -1801,11 +1773,11 @@ void mvnw(Register Rd, Register Rm, } // (1 source) -#define INSN(NAME, op29, opcode2, opcode) \ - void NAME(Register Rd, Register Rn) { \ - starti; \ - f(opcode2, 20, 16); \ - data_processing(op29, opcode, Rd, Rn); \ +#define INSN(NAME, op29, opcode2, opcode) \ + void NAME(Register Rd, Register Rn) { \ + starti; \ + f(opcode2, 20, 16); \ + data_processing(current_insn, op29, opcode, Rd, Rn); \ } INSN(rbitw, 0b010, 0b00000, 0b00000); @@ -1824,11 +1796,11 @@ void mvnw(Register Rd, Register Rm, #undef INSN // (2 sources) -#define INSN(NAME, op29, opcode) \ - void NAME(Register Rd, Register Rn, Register Rm) { \ - starti; \ - rf(Rm, 16); \ - data_processing(op29, opcode, Rd, Rn); \ +#define INSN(NAME, op29, opcode) \ + void NAME(Register Rd, Register Rn, Register Rm) { \ + starti; \ + rf(Rm, 16); \ + data_processing(current_insn, op29, opcode, Rd, Rn); \ } INSN(udivw, 0b000, 0b000010); @@ -1873,9 +1845,9 @@ void mvnw(Register Rd, Register Rm, #undef INSN -#define INSN(NAME, op54, op31, o0) \ - void NAME(Register Rd, Register Rn, Register Rm) { \ - data_processing(op54, op31, o0, Rd, Rn, Rm, (Register)31); \ +#define INSN(NAME, op54, op31, o0) \ + void NAME(Register Rd, Register Rn, Register Rm) { \ + data_processing(op54, op31, o0, Rd, Rn, Rm, as_Register(31)); \ } INSN(smulh, 0b100, 0b010, 0); @@ -2054,7 +2026,7 @@ public: #define INSN(NAME, op31, type, rmode, opcode) \ void NAME(Register Rd, FloatRegister Vn) { \ - float_int_convert(op31, type, rmode, opcode, Rd, (Register)Vn); \ + float_int_convert(op31, type, rmode, opcode, Rd, as_Register(Vn)); \ } INSN(fcvtzsw, 0b000, 0b00, 0b11, 0b000); @@ -2065,13 +2037,13 @@ public: INSN(fmovs, 0b000, 0b00, 0b00, 0b110); INSN(fmovd, 0b100, 0b01, 0b00, 0b110); - // INSN(fmovhid, 0b100, 0b10, 0b01, 0b110); + INSN(fmovhid, 0b100, 0b10, 0b01, 0b110); #undef INSN #define INSN(NAME, op31, type, rmode, opcode) \ void NAME(FloatRegister Vd, Register Rn) { \ - float_int_convert(op31, type, rmode, opcode, (Register)Vd, Rn); \ + float_int_convert(op31, type, rmode, opcode, as_Register(Vd), Rn); \ } INSN(fmovs, 0b000, 0b00, 0b00, 0b111); @@ -2126,7 +2098,7 @@ public: // Floating-point compare void float_compare(unsigned op31, unsigned type, unsigned op, unsigned op2, - FloatRegister Vn, FloatRegister Vm = (FloatRegister)0) { + FloatRegister Vn, FloatRegister Vm = as_FloatRegister(0)) { starti; f(op31, 31, 29); f(0b11110, 28, 24); @@ -2256,10 +2228,10 @@ private: static short SIMD_Size_in_bytes[]; public: -#define INSN(NAME, op) \ - void NAME(FloatRegister Rt, SIMD_RegVariant T, const Address &adr) { \ - ld_st2((Register)Rt, adr, (int)T & 3, op + ((T==Q) ? 0b10:0b00), 1); \ - } \ +#define INSN(NAME, op) \ + void NAME(FloatRegister Rt, SIMD_RegVariant T, const Address &adr) { \ + ld_st2(as_Register(Rt), adr, (int)T & 3, op + ((T==Q) ? 0b10:0b00), 1); \ + } INSN(ldr, 1); INSN(str, 0); @@ -2958,7 +2930,7 @@ public: f(0, 10), rf(Vn, 5), rf(Vd, 0); } -// SVE arithmetics - unpredicated +// SVE arithmetic - unpredicated #define INSN(NAME, opcode) \ void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \ starti; \ @@ -2995,7 +2967,7 @@ private: public: -// SVE integer arithmetics - predicate +// SVE integer arithmetic - predicate #define INSN(NAME, op1, op2) \ void NAME(FloatRegister Zdn_or_Zd_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Znm_or_Vn) { \ assert(T != Q, "invalid register variant"); \ @@ -3023,7 +2995,7 @@ public: INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar #undef INSN -// SVE floating-point arithmetics - predicate +// SVE floating-point arithmetic - predicate #define INSN(NAME, op1, op2) \ void NAME(FloatRegister Zd_or_Zdn_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn_or_Zm) { \ assert(T == S || T == D, "invalid register variant"); \ @@ -3152,7 +3124,7 @@ private: public: -// SVE load/store - predicated +// SVE contiguous load/store #define INSN(NAME, op1, type, imm_op2, scalar_op2) \ void NAME(FloatRegister Zt, SIMD_RegVariant T, PRegister Pg, const Address &a) { \ assert(T != Q, "invalid register variant"); \ @@ -3169,6 +3141,23 @@ public: INSN(sve_st1d, 0b1110010, 0b11, 0b111, 0b010); #undef INSN +// Gather/scatter load/store (SVE) - scalar plus vector +#define INSN(NAME, op1, type, op2, op3) \ + void NAME(FloatRegister Zt, PRegister Pg, Register Xn, FloatRegister Zm) { \ + starti; \ + f(op1, 31, 25), f(type, 24, 23), f(op2, 22, 21), rf(Zm, 16); \ + f(op3, 15, 13), pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0); \ + } + // SVE 32-bit gather load words (scalar plus 32-bit scaled offsets) + INSN(sve_ld1w_gather, 0b1000010, 0b10, 0b01, 0b010); + // SVE 64-bit gather load (scalar plus 32-bit unpacked scaled offsets) + INSN(sve_ld1d_gather, 0b1100010, 0b11, 0b01, 0b010); + // SVE 32-bit scatter store (scalar plus 32-bit scaled offsets) + INSN(sve_st1w_scatter, 0b1110010, 0b10, 0b11, 0b100); + // SVE 64-bit scatter store (scalar plus unpacked 32-bit scaled offsets) + INSN(sve_st1d_scatter, 0b1110010, 0b11, 0b01, 0b100); +#undef INSN + // SVE load/store - unpredicated #define INSN(NAME, op1) \ void NAME(FloatRegister Zt, const Address &a) { \ @@ -3182,6 +3171,7 @@ public: INSN(sve_str, 0b111); // STR (vector) #undef INSN +// SVE stack frame adjustment #define INSN(NAME, op) \ void NAME(Register Xd, Register Xn, int imm6) { \ starti; \ @@ -3189,8 +3179,8 @@ public: srf(Xn, 16), f(0b01010, 15, 11), sf(imm6, 10, 5), srf(Xd, 0); \ } - INSN(sve_addvl, 0b01); - INSN(sve_addpl, 0b11); + INSN(sve_addvl, 0b01); // Add multiple of vector register size to scalar register + INSN(sve_addpl, 0b11); // Add multiple of predicate register size to scalar register #undef INSN // SVE inc/dec register by element count @@ -3206,15 +3196,15 @@ public: INSN(sve_dec, 1); #undef INSN - // SVE predicate count - void sve_cntp(Register Xd, SIMD_RegVariant T, PRegister Pg, PRegister Pn) { + // SVE increment register by predicate count + void sve_incp(const Register rd, SIMD_RegVariant T, PRegister pg) { starti; assert(T != Q, "invalid size"); - f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000010, 21, 14); - prf(Pg, 10), f(0, 9), prf(Pn, 5), rf(Xd, 0); + f(0b00100101, 31, 24), f(T, 23, 22), f(0b1011001000100, 21, 9), + prf(pg, 5), rf(rd, 0); } - // SVE dup scalar + // SVE broadcast general-purpose register to vector elements (unpredicated) void sve_dup(FloatRegister Zd, SIMD_RegVariant T, Register Rn) { starti; assert(T != Q, "invalid size"); @@ -3222,7 +3212,7 @@ public: srf(Rn, 5), rf(Zd, 0); } - // SVE dup imm + // SVE broadcast signed immediate to vector elements (unpredicated) void sve_dup(FloatRegister Zd, SIMD_RegVariant T, int imm8) { starti; assert(T != Q, "invalid size"); @@ -3245,6 +3235,289 @@ public: f(pattern, 9, 5), f(0b0, 4), prf(pd, 0); } + // SVE copy general-purpose register to vector elements (predicated) + void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, Register Rn) { + starti; + assert(T != Q, "invalid size"); + f(0b00000101, 31, 24), f(T, 23, 22), f(0b101000101, 21, 13); + pgrf(Pg, 10), srf(Rn, 5), rf(Zd, 0); + } + + // SVE copy signed integer immediate to vector elements (predicated) + void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int imm8, bool isMerge) { + starti; + assert(T != Q, "invalid size"); + int sh = 0; + if (imm8 <= 127 && imm8 >= -128) { + sh = 0; + } else if (T != B && imm8 <= 32512 && imm8 >= -32768 && (imm8 & 0xff) == 0) { + sh = 1; + imm8 = (imm8 >> 8); + } else { + guarantee(false, "invalid immediate"); + } + int m = isMerge ? 1 : 0; + f(0b00000101, 31, 24), f(T, 23, 22), f(0b01, 21, 20); + prf(Pg, 16), f(0b0, 15), f(m, 14), f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0); + } + + // SVE conditionally select elements from two vectors + void sve_sel(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, + FloatRegister Zn, FloatRegister Zm) { + starti; + assert(T != Q, "invalid size"); + f(0b00000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16); + f(0b11, 15, 14), prf(Pg, 10), rf(Zn, 5), rf(Zd, 0); + } + +// SVE Integer/Floating-Point Compare - Vectors +#define INSN(NAME, op1, op2, fp) \ + void NAME(Condition cond, PRegister Pd, SIMD_RegVariant T, PRegister Pg, \ + FloatRegister Zn, FloatRegister Zm) { \ + starti; \ + if (fp == 0) { \ + assert(T != Q, "invalid size"); \ + } else { \ + assert(T != B && T != Q, "invalid size"); \ + assert(cond != HI && cond != HS, "invalid condition for fcm"); \ + } \ + int cond_op; \ + switch(cond) { \ + case EQ: cond_op = (op2 << 2) | 0b10; break; \ + case NE: cond_op = (op2 << 2) | 0b11; break; \ + case GE: cond_op = (op2 << 2) | 0b00; break; \ + case GT: cond_op = (op2 << 2) | 0b01; break; \ + case HI: cond_op = 0b0001; break; \ + case HS: cond_op = 0b0000; break; \ + default: \ + ShouldNotReachHere(); \ + } \ + f(op1, 31, 24), f(T, 23, 22), f(0, 21), rf(Zm, 16), f((cond_op >> 1) & 7, 15, 13); \ + pgrf(Pg, 10), rf(Zn, 5), f(cond_op & 1, 4), prf(Pd, 0); \ + } + + INSN(sve_cmp, 0b00100100, 0b10, 0); + INSN(sve_fcm, 0b01100101, 0b01, 1); +#undef INSN + +// SVE Integer Compare - Signed Immediate +void sve_cmp(Condition cond, PRegister Pd, SIMD_RegVariant T, + PRegister Pg, FloatRegister Zn, int imm5) { + starti; + assert(T != Q, "invalid size"); + guarantee(-16 <= imm5 && imm5 <= 15, "invalid immediate"); + int cond_op; + switch(cond) { + case EQ: cond_op = 0b1000; break; + case NE: cond_op = 0b1001; break; + case GE: cond_op = 0b0000; break; + case GT: cond_op = 0b0001; break; + case LE: cond_op = 0b0011; break; + case LT: cond_op = 0b0010; break; + default: + ShouldNotReachHere(); + } + f(0b00100101, 31, 24), f(T, 23, 22), f(0b0, 21), sf(imm5, 20, 16), + f((cond_op >> 1) & 0x7, 15, 13), pgrf(Pg, 10), rf(Zn, 5); + f(cond_op & 0x1, 4), prf(Pd, 0); +} + +// SVE unpack vector elements +#define INSN(NAME, op) \ + void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn) { \ + starti; \ + assert(T != B && T != Q, "invalid size"); \ + f(0b00000101, 31, 24), f(T, 23, 22), f(0b1100, 21, 18); \ + f(op, 17, 16), f(0b001110, 15, 10), rf(Zn, 5), rf(Zd, 0); \ + } + + INSN(sve_uunpkhi, 0b11); // Signed unpack and extend half of vector - high half + INSN(sve_uunpklo, 0b10); // Signed unpack and extend half of vector - low half + INSN(sve_sunpkhi, 0b01); // Unsigned unpack and extend half of vector - high half + INSN(sve_sunpklo, 0b00); // Unsigned unpack and extend half of vector - low half +#undef INSN + +// SVE permute vector elements +#define INSN(NAME, op) \ + void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \ + starti; \ + assert(T != Q, "invalid size"); \ + f(0b00000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16); \ + f(0b01101, 15, 11), f(op, 10), rf(Zn, 5), rf(Zd, 0); \ + } + + INSN(sve_uzp1, 0b0); // Concatenate even elements from two vectors + INSN(sve_uzp2, 0b1); // Concatenate odd elements from two vectors +#undef INSN + +// Predicate counted loop (SVE) (32-bit variants are not included) +#define INSN(NAME, decode) \ + void NAME(PRegister Pd, SIMD_RegVariant T, Register Rn, Register Rm) { \ + starti; \ + assert(T != Q, "invalid register variant"); \ + f(0b00100101, 31, 24), f(T, 23, 22), f(1, 21), \ + zrf(Rm, 16), f(0, 15, 13), f(1, 12), f(decode >> 1, 11, 10), \ + zrf(Rn, 5), f(decode & 1, 4), prf(Pd, 0); \ + } + + INSN(sve_whilelt, 0b010); // While incrementing signed scalar less than scalar + INSN(sve_whilele, 0b011); // While incrementing signed scalar less than or equal to scalar + INSN(sve_whilelo, 0b110); // While incrementing unsigned scalar lower than scalar + INSN(sve_whilels, 0b111); // While incrementing unsigned scalar lower than or the same as scalar +#undef INSN + + // SVE predicate reverse + void sve_rev(PRegister Pd, SIMD_RegVariant T, PRegister Pn) { + starti; + assert(T != Q, "invalid size"); + f(0b00000101, 31, 24), f(T, 23, 22), f(0b1101000100000, 21, 9); + prf(Pn, 5), f(0, 4), prf(Pd, 0); + } + +// SVE partition break condition +#define INSN(NAME, op) \ + void NAME(PRegister Pd, PRegister Pg, PRegister Pn, bool isMerge) { \ + starti; \ + f(0b00100101, 31, 24), f(op, 23, 22), f(0b01000001, 21, 14); \ + prf(Pg, 10), f(0b0, 9), prf(Pn, 5), f(isMerge ? 1 : 0, 4), prf(Pd, 0); \ + } + + INSN(sve_brka, 0b00); // Break after first true condition + INSN(sve_brkb, 0b10); // Break before first true condition +#undef INSN + +// Element count and increment scalar (SVE) +#define INSN(NAME, TYPE) \ + void NAME(Register Xdn, unsigned imm4 = 1, int pattern = 0b11111) { \ + starti; \ + f(0b00000100, 31, 24), f(TYPE, 23, 22), f(0b10, 21, 20); \ + f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(0, 10), f(pattern, 9, 5), rf(Xdn, 0); \ + } + + INSN(sve_cntb, B); // Set scalar to multiple of 8-bit predicate constraint element count + INSN(sve_cnth, H); // Set scalar to multiple of 16-bit predicate constraint element count + INSN(sve_cntw, S); // Set scalar to multiple of 32-bit predicate constraint element count + INSN(sve_cntd, D); // Set scalar to multiple of 64-bit predicate constraint element count +#undef INSN + + // Set scalar to active predicate element count + void sve_cntp(Register Xd, SIMD_RegVariant T, PRegister Pg, PRegister Pn) { + starti; + assert(T != Q, "invalid size"); + f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000010, 21, 14); + prf(Pg, 10), f(0, 9), prf(Pn, 5), rf(Xd, 0); + } + + // SVE convert signed integer to floating-point (predicated) + void sve_scvtf(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg, + FloatRegister Zn, SIMD_RegVariant T_src) { + starti; + assert(T_src != B && T_dst != B && T_src != Q && T_dst != Q && + (T_src != H || T_dst == T_src), "invalid register variant"); + int opc = T_dst; + int opc2 = T_src; + // In most cases we can treat T_dst, T_src as opc, opc2, + // except for the following two combinations. + // +-----+------+---+------------------------------------+ + // | opc | opc2 | U | Instruction Details | + // +-----+------+---+------------------------------------+ + // | 11 | 00 | 0 | SCVTF - 32-bit to double-precision | + // | 11 | 10 | 0 | SCVTF - 64-bit to single-precision | + // +-----+------+---+------------------------------------+ + if (T_src == S && T_dst == D) { + opc = 0b11; + opc2 = 0b00; + } else if (T_src == D && T_dst == S) { + opc = 0b11; + opc2 = 0b10; + } + f(0b01100101, 31, 24), f(opc, 23, 22), f(0b010, 21, 19); + f(opc2, 18, 17), f(0b0101, 16, 13); + pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0); + } + + // SVE floating-point convert to signed integer, rounding toward zero (predicated) + void sve_fcvtzs(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg, + FloatRegister Zn, SIMD_RegVariant T_src) { + starti; + assert(T_src != B && T_dst != B && T_src != Q && T_dst != Q && + (T_dst != H || T_src == H), "invalid register variant"); + int opc = T_src; + int opc2 = T_dst; + // In most cases we can treat T_src, T_dst as opc, opc2, + // except for the following two combinations. + // +-----+------+---+-------------------------------------+ + // | opc | opc2 | U | Instruction Details | + // +-----+------+---+-------------------------------------+ + // | 11 | 10 | 0 | FCVTZS - single-precision to 64-bit | + // | 11 | 00 | 0 | FCVTZS - double-precision to 32-bit | + // +-----+------+---+-------------------------------------+ + if (T_src == S && T_dst == D) { + opc = 0b11; + opc2 = 0b10; + } else if (T_src == D && T_dst == S) { + opc = 0b11; + opc2 = 0b00; + } + f(0b01100101, 31, 24), f(opc, 23, 22), f(0b011, 21, 19); + f(opc2, 18, 17), f(0b0101, 16, 13); + pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0); + } + + // SVE floating-point convert precision (predicated) + void sve_fcvt(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg, + FloatRegister Zn, SIMD_RegVariant T_src) { + starti; + assert(T_src != B && T_dst != B && T_src != Q && T_dst != Q && + T_src != T_dst, "invalid register variant"); + guarantee(T_src != H && T_dst != H, "half-precision unsupported"); + f(0b01100101, 31, 24), f(0b11, 23, 22), f(0b0010, 21, 18); + f(T_dst, 17, 16), f(0b101, 15, 13); + pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0); + } + +// SVE extract element to general-purpose register +#define INSN(NAME, before) \ + void NAME(Register Rd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn) { \ + starti; \ + f(0b00000101, 31, 24), f(T, 23, 22), f(0b10000, 21, 17); \ + f(before, 16), f(0b101, 15, 13); \ + pgrf(Pg, 10), rf(Zn, 5), rf(Rd, 0); \ + } + + INSN(sve_lasta, 0b0); + INSN(sve_lastb, 0b1); +#undef INSN + +// SVE extract element to SIMD&FP scalar register +#define INSN(NAME, before) \ + void NAME(FloatRegister Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn) { \ + starti; \ + f(0b00000101, 31, 24), f(T, 23, 22), f(0b10001, 21, 17); \ + f(before, 16), f(0b100, 15, 13); \ + pgrf(Pg, 10), rf(Zn, 5), rf(Vd, 0); \ + } + + INSN(sve_lasta, 0b0); + INSN(sve_lastb, 0b1); +#undef INSN + + // SVE create index starting from and incremented by immediate + void sve_index(FloatRegister Zd, SIMD_RegVariant T, int imm1, int imm2) { + starti; + f(0b00000100, 31, 24), f(T, 23, 22), f(0b1, 21); + sf(imm2, 20, 16), f(0b010000, 15, 10); + sf(imm1, 9, 5), rf(Zd, 0); + } + + // SVE programmable table lookup/permute using vector of element indices + void sve_tbl(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { + starti; + assert(T != Q, "invalid size"); + f(0b00000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16); + f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0); + } + Assembler(CodeBuffer* code) : AbstractAssembler(code) { } @@ -3265,9 +3538,19 @@ inline Assembler::Membar_mask_bits operator|(Assembler::Membar_mask_bits a, } Instruction_aarch64::~Instruction_aarch64() { - assem->emit(); + assem->emit_int32(insn); + assert_cond(get_bits() == 0xffffffff); } +#undef f +#undef sf +#undef rf +#undef srf +#undef zrf +#undef prf +#undef pgrf +#undef fixed + #undef starti // Invert a condition @@ -3275,8 +3558,6 @@ inline const Assembler::Condition operator~(const Assembler::Condition cond) { return Assembler::Condition(int(cond) ^ 1); } -class BiasedLockingCounters; - extern "C" void das(uint64_t start, int len); #endif // CPU_AARCH64_ASSEMBLER_AARCH64_HPP diff --git a/src/hotspot/cpu/aarch64/atomic_aarch64.hpp b/src/hotspot/cpu/aarch64/atomic_aarch64.hpp index ac12ba9e23d7d1e4657b671d782f42457fd2fde3..f1e1f04c2442261ec991d97a591cec4a1579c113 100644 --- a/src/hotspot/cpu/aarch64/atomic_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/atomic_aarch64.hpp @@ -37,6 +37,8 @@ typedef uint64_t (*aarch64_atomic_stub_t)(volatile void *ptr, uint64_t arg1, uin // Pointers to stubs extern aarch64_atomic_stub_t aarch64_atomic_fetch_add_4_impl; extern aarch64_atomic_stub_t aarch64_atomic_fetch_add_8_impl; +extern aarch64_atomic_stub_t aarch64_atomic_fetch_add_4_relaxed_impl; +extern aarch64_atomic_stub_t aarch64_atomic_fetch_add_8_relaxed_impl; extern aarch64_atomic_stub_t aarch64_atomic_xchg_4_impl; extern aarch64_atomic_stub_t aarch64_atomic_xchg_8_impl; extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_1_impl; @@ -45,5 +47,9 @@ extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_8_impl; extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_1_relaxed_impl; extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_4_relaxed_impl; extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_8_relaxed_impl; +extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_4_release_impl; +extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_8_release_impl; +extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_4_seq_cst_impl; +extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_8_seq_cst_impl; #endif // CPU_AARCH64_ATOMIC_AARCH64_HPP diff --git a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp index 63f03db2dc5db11807febe684e4b90d518ac90c2..826d11b2aaefec630d9eb477cb9e1a2a0c8ee056 100644 --- a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp @@ -361,8 +361,10 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) { ce->add_call_info_here(info()); #ifndef PRODUCT - __ lea(rscratch2, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); - __ incrementw(Address(rscratch2)); + if (PrintC1Statistics) { + __ lea(rscratch2, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); + __ incrementw(Address(rscratch2)); + } #endif __ b(_continuation); diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index e8d0c537d6de73aa9a0bc772bb3116a2548fd5e2..a3a3778783eecc743b808038da2efe5c7e5b6f24 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -784,7 +784,7 @@ void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool po } -void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { +void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide) { LIR_Address* to_addr = dest->as_address_ptr(); PatchingStub* patch = NULL; Register compressed_src = rscratch1; @@ -941,7 +941,7 @@ void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { } -void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) { +void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide) { LIR_Address* addr = src->as_address_ptr(); LIR_Address* from_addr = src->as_address_ptr(); @@ -2577,13 +2577,9 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) { if (!UseFastLocking) { __ b(*op->stub()->entry()); } else if (op->code() == lir_lock) { - Register scratch = noreg; - if (UseBiasedLocking) { - scratch = op->scratch_opr()->as_register(); - } assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); // add debug info for NullPointerException only if one is possible - int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); + int null_check_offset = __ lock_object(hdr, obj, lock, *op->stub()->entry()); if (op->info() != NULL) { add_debug_info_for_null_check(null_check_offset, op->info()); } @@ -2837,7 +2833,7 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { } #endif // first time here. Set profile type. - __ ldr(tmp, mdo_addr); + __ str(tmp, mdo_addr); } else { assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent"); @@ -2911,7 +2907,7 @@ void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* arg void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { if (dest->is_address() || src->is_address()) { move_op(src, dest, type, lir_patch_none, info, - /*pop_fpu_stack*/false, /*unaligned*/false, /*wide*/false); + /*pop_fpu_stack*/false, /*wide*/false); } else { ShouldNotReachHere(); } diff --git a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp index df1b877ee3f3c9c20a50a062a2e0d129a40c4e10..700cda5d4662a9be312bb0af8257601a792600f1 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp @@ -331,11 +331,6 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { // "lock" stores the address of the monitor stack slot, so this is not an oop LIR_Opr lock = new_register(T_INT); - // Need a scratch register for biased locking - LIR_Opr scratch = LIR_OprFact::illegalOpr; - if (UseBiasedLocking) { - scratch = new_register(T_INT); - } CodeEmitInfo* info_for_exception = NULL; if (x->needs_null_check()) { @@ -344,7 +339,7 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { // this CodeEmitInfo must not have the xhandlers because here the // object is already locked (xhandlers expect object to be unlocked) CodeEmitInfo* info = state_for(x, x->state(), true); - monitor_enter(obj.result(), lock, syncTempOpr(), scratch, + monitor_enter(obj.result(), lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no(), info_for_exception, info); } @@ -1132,8 +1127,8 @@ void LIRGenerator::do_NewInstance(NewInstance* x) { CodeEmitInfo* info = state_for(x, x->state()); LIR_Opr reg = result_register_for(x->type()); new_instance(reg, x->klass(), x->is_unresolved(), - FrameMap::r2_oop_opr, - FrameMap::r5_oop_opr, + FrameMap::r10_oop_opr, + FrameMap::r11_oop_opr, FrameMap::r4_oop_opr, LIR_OprFact::illegalOpr, FrameMap::r3_metadata_opr, info); @@ -1148,8 +1143,8 @@ void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { length.load_item_force(FrameMap::r19_opr); LIR_Opr reg = result_register_for(x->type()); - LIR_Opr tmp1 = FrameMap::r2_oop_opr; - LIR_Opr tmp2 = FrameMap::r4_oop_opr; + LIR_Opr tmp1 = FrameMap::r10_oop_opr; + LIR_Opr tmp2 = FrameMap::r11_oop_opr; LIR_Opr tmp3 = FrameMap::r5_oop_opr; LIR_Opr tmp4 = reg; LIR_Opr klass_reg = FrameMap::r3_metadata_opr; @@ -1177,8 +1172,8 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { CodeEmitInfo* info = state_for(x, x->state()); LIR_Opr reg = result_register_for(x->type()); - LIR_Opr tmp1 = FrameMap::r2_oop_opr; - LIR_Opr tmp2 = FrameMap::r4_oop_opr; + LIR_Opr tmp1 = FrameMap::r10_oop_opr; + LIR_Opr tmp2 = FrameMap::r11_oop_opr; LIR_Opr tmp3 = FrameMap::r5_oop_opr; LIR_Opr tmp4 = reg; LIR_Opr klass_reg = FrameMap::r3_metadata_opr; diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp index 6ce4af0372d6cf028604403f260fcf0912fbe7f6..661fad89e47edf1289274d09d6929915b60d7b53 100644 --- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,7 +33,6 @@ #include "oops/arrayOop.hpp" #include "oops/markWord.hpp" #include "runtime/basicLock.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/os.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" @@ -61,7 +60,7 @@ void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result, } } -int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { +int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { const int aligned_mask = BytesPerWord -1; const int hdr_offset = oopDesc::mark_offset_in_bytes(); assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); @@ -82,11 +81,6 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr br(Assembler::NE, slow_case); } - if (UseBiasedLocking) { - assert(scratch != noreg, "should have scratch register at this point"); - biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case); - } - // Load object header ldr(hdr, Address(obj, hdr_offset)); // and mark it as unlocked @@ -122,10 +116,6 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr cbnz(hdr, slow_case); // done bind(done); - if (PrintBiasedLockingStatistics) { - lea(rscratch2, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); - addmw(Address(rscratch2, 0), 1, rscratch1); - } return null_check_offset; } @@ -136,21 +126,13 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_ assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); Label done; - if (UseBiasedLocking) { - // load object - ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); - biased_locking_exit(obj, hdr, done); - } - // load displaced header ldr(hdr, Address(disp_hdr, 0)); // if the loaded hdr is NULL we had recursive locking // if we had recursive locking, we are done cbz(hdr, done); - if (!UseBiasedLocking) { - // load object - ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); - } + // load object + ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); verify_oop(obj); // test if object header is pointing to the displaced header, and if so, restore // the displaced header in the object - if the object header is not pointing to @@ -179,13 +161,8 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) { assert_different_registers(obj, klass, len); - if (UseBiasedLocking && !len->is_valid()) { - assert_different_registers(obj, klass, len, t1, t2); - ldr(t1, Address(klass, Klass::prototype_header_offset())); - } else { - // This assumes that all prototype bits fit in an int32_t - mov(t1, (int32_t)(intptr_t)markWord::prototype().value()); - } + // This assumes that all prototype bits fit in an int32_t + mov(t1, (int32_t)(intptr_t)markWord::prototype().value()); str(t1, Address(obj, oopDesc::mark_offset_in_bytes())); if (UseCompressedClassPointers) { // Take care not to kill klass @@ -203,20 +180,24 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register } // preserves obj, destroys len_in_bytes -void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) { +// +// Scratch registers: t1 = r10, t2 = r11 +// +void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2) { assert(hdr_size_in_bytes >= 0, "header size must be positive or 0"); + assert(t1 == r10 && t2 == r11, "must be"); + Label done; // len_in_bytes is positive and ptr sized subs(len_in_bytes, len_in_bytes, hdr_size_in_bytes); br(Assembler::EQ, done); - // Preserve obj - if (hdr_size_in_bytes) - add(obj, obj, hdr_size_in_bytes); - zero_memory(obj, len_in_bytes, t1); - if (hdr_size_in_bytes) - sub(obj, obj, hdr_size_in_bytes); + // zero_words() takes ptr in r10 and count in words in r11 + mov(rscratch1, len_in_bytes); + lea(t1, Address(obj, hdr_size_in_bytes)); + lsr(t2, rscratch1, LogBytesPerWord); + zero_words(t1, t2); bind(done); } @@ -231,6 +212,7 @@ void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB); } +// Scratch registers: t1 = r10, t2 = r11 void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, bool is_tlab_allocated) { assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "con_size_in_bytes is not multiple of alignment"); @@ -241,45 +223,13 @@ void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) { // clear rest of allocated space const Register index = t2; - const int threshold = 16 * BytesPerWord; // approximate break even point for code size (see comments below) if (var_size_in_bytes != noreg) { mov(index, var_size_in_bytes); - initialize_body(obj, index, hdr_size_in_bytes, t1); - } else if (con_size_in_bytes <= threshold) { - // use explicit null stores - int i = hdr_size_in_bytes; - if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) { - str(zr, Address(obj, i)); - i += BytesPerWord; - } - for (; i < con_size_in_bytes; i += 2 * BytesPerWord) - stp(zr, zr, Address(obj, i)); + initialize_body(obj, index, hdr_size_in_bytes, t1, t2); } else if (con_size_in_bytes > hdr_size_in_bytes) { - block_comment("zero memory"); - // use loop to null out the fields - - int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord; - mov(index, words / 8); - - const int unroll = 8; // Number of str(zr) instructions we'll unroll - int remainder = words % unroll; - lea(rscratch1, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord)); - - Label entry_point, loop; - b(entry_point); - - bind(loop); - sub(index, index, 1); - for (int i = -unroll; i < 0; i++) { - if (-i == remainder) - bind(entry_point); - str(zr, Address(rscratch1, i * wordSize)); - } - if (remainder == 0) - bind(entry_point); - add(rscratch1, rscratch1, unroll * wordSize); - cbnz(index, loop); - + con_size_in_bytes -= hdr_size_in_bytes; + lea(t1, Address(obj, hdr_size_in_bytes)); + zero_words(t1, con_size_in_bytes / BytesPerWord); } } @@ -314,8 +264,7 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, initialize_header(obj, klass, len, t1, t2); // clear rest of allocated space - const Register len_zero = len; - initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero); + initialize_body(obj, arr_size, header_size * BytesPerWord, t1, t2); membar(StoreStore); diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp index 67330bcda1b7f230ffe7fb69359865127bd55373..92b293ca6be3e33b5932c125e856a8d58b21a15f 100644 --- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. + * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -48,7 +48,7 @@ using MacroAssembler::null_check; ); void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2); - void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1); + void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2); void float_cmp(bool is_float, int unordered_result, FloatRegister f0, FloatRegister f1, @@ -58,9 +58,8 @@ using MacroAssembler::null_check; // hdr : must be r0, contents destroyed // obj : must point to the object to lock, contents preserved // disp_hdr: must point to the displaced header location, contents preserved - // scratch : scratch register, contents destroyed // returns code offset at which to add null check debug information - int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); + int lock_object (Register swap, Register obj, Register disp_hdr, Label& slow_case); // unlocking // hdr : contents destroyed diff --git a/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp index e68942f5205d880adcc831f6f7c6ebd06dd3d6b5..e02989c1e9f8d03eadbbe3a0dd41333993c063fc 100644 --- a/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -656,9 +656,9 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) && !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { Label slow_path; - Register obj_size = r2; - Register t1 = r19; - Register t2 = r4; + Register obj_size = r19; + Register t1 = r10; + Register t2 = r11; assert_different_registers(klass, obj, obj_size, t1, t2); __ stp(r19, zr, Address(__ pre(sp, -2 * wordSize))); @@ -769,9 +769,9 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { // allocations. // Otherwise, just go to the slow path. if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { - Register arr_size = r4; - Register t1 = r2; - Register t2 = r5; + Register arr_size = r5; + Register t1 = r10; + Register t2 = r11; Label slow_path; assert_different_registers(length, klass, obj, arr_size, t1, t2); @@ -801,7 +801,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { __ andr(t1, t1, Klass::_lh_header_size_mask); __ sub(arr_size, arr_size, t1); // body length __ add(t1, t1, obj); // body start - __ initialize_body(t1, arr_size, 0, t2); + __ initialize_body(t1, arr_size, 0, t1, t2); __ membar(Assembler::StoreStore); __ verify_oop(obj); diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index fab8ff669b1b8eabec00afd05e99055c47c05f80..2713576bf4ce8310b901f1d27bd9782b8c2b59de 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,6 +27,7 @@ #include "asm/assembler.inline.hpp" #include "opto/c2_MacroAssembler.hpp" #include "opto/intrinsicnode.hpp" +#include "opto/subnode.hpp" #include "runtime/stubRoutines.hpp" #ifdef PRODUCT @@ -539,6 +540,75 @@ void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, BIND(DONE); } +void C2_MacroAssembler::string_indexof_char_sve(Register str1, Register cnt1, + Register ch, Register result, + FloatRegister ztmp1, + FloatRegister ztmp2, + PRegister tmp_pg, + PRegister tmp_pdn, bool isL) +{ + // Note that `tmp_pdn` should *NOT* be used as governing predicate register. + assert(tmp_pg->is_governing(), + "this register has to be a governing predicate register"); + + Label LOOP, MATCH, DONE, NOMATCH; + Register vec_len = rscratch1; + Register idx = rscratch2; + + SIMD_RegVariant T = (isL == true) ? B : H; + + cbz(cnt1, NOMATCH); + + // Assign the particular char throughout the vector. + sve_dup(ztmp2, T, ch); + if (isL) { + sve_cntb(vec_len); + } else { + sve_cnth(vec_len); + } + mov(idx, 0); + + // Generate a predicate to control the reading of input string. + sve_whilelt(tmp_pg, T, idx, cnt1); + + BIND(LOOP); + // Read a vector of 8- or 16-bit data depending on the string type. Note + // that inactive elements indicated by the predicate register won't cause + // a data read from memory to the destination vector. + if (isL) { + sve_ld1b(ztmp1, T, tmp_pg, Address(str1, idx)); + } else { + sve_ld1h(ztmp1, T, tmp_pg, Address(str1, idx, Address::lsl(1))); + } + add(idx, idx, vec_len); + + // Perform the comparison. An element of the destination predicate is set + // to active if the particular char is matched. + sve_cmp(Assembler::EQ, tmp_pdn, T, tmp_pg, ztmp1, ztmp2); + + // Branch if the particular char is found. + br(NE, MATCH); + + sve_whilelt(tmp_pg, T, idx, cnt1); + + // Loop back if the particular char not found. + br(MI, LOOP); + + BIND(NOMATCH); + mov(result, -1); + b(DONE); + + BIND(MATCH); + // Undo the index increment. + sub(idx, idx, vec_len); + + // Crop the vector to find its location. + sve_brka(tmp_pdn, tmp_pg, tmp_pdn, false /* isMerge */); + add(result, idx, -1); + sve_incp(result, T, tmp_pdn); + BIND(DONE); +} + void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result, Register tmp1, Register tmp2, Register tmp3) @@ -832,3 +902,98 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2, BLOCK_COMMENT("} string_compare"); } + +void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1, + FloatRegister src2, int cond, bool isQ) { + SIMD_Arrangement size = esize2arrangement((unsigned)type2aelembytes(bt), isQ); + if (bt == T_FLOAT || bt == T_DOUBLE) { + switch (cond) { + case BoolTest::eq: fcmeq(dst, size, src1, src2); break; + case BoolTest::ne: { + fcmeq(dst, size, src1, src2); + notr(dst, T16B, dst); + break; + } + case BoolTest::ge: fcmge(dst, size, src1, src2); break; + case BoolTest::gt: fcmgt(dst, size, src1, src2); break; + case BoolTest::le: fcmge(dst, size, src2, src1); break; + case BoolTest::lt: fcmgt(dst, size, src2, src1); break; + default: + assert(false, "unsupported"); + ShouldNotReachHere(); + } + } else { + switch (cond) { + case BoolTest::eq: cmeq(dst, size, src1, src2); break; + case BoolTest::ne: { + cmeq(dst, size, src1, src2); + notr(dst, T16B, dst); + break; + } + case BoolTest::ge: cmge(dst, size, src1, src2); break; + case BoolTest::gt: cmgt(dst, size, src1, src2); break; + case BoolTest::le: cmge(dst, size, src2, src1); break; + case BoolTest::lt: cmgt(dst, size, src2, src1); break; + case BoolTest::uge: cmhs(dst, size, src1, src2); break; + case BoolTest::ugt: cmhi(dst, size, src1, src2); break; + case BoolTest::ult: cmhi(dst, size, src2, src1); break; + case BoolTest::ule: cmhs(dst, size, src2, src1); break; + default: + assert(false, "unsupported"); + ShouldNotReachHere(); + } + } +} + +void C2_MacroAssembler::sve_compare(PRegister pd, BasicType bt, PRegister pg, + FloatRegister zn, FloatRegister zm, int cond) { + assert(pg->is_governing(), "This register has to be a governing predicate register"); + FloatRegister z1 = zn, z2 = zm; + // Convert the original BoolTest condition to Assembler::condition. + Condition condition; + switch (cond) { + case BoolTest::eq: condition = Assembler::EQ; break; + case BoolTest::ne: condition = Assembler::NE; break; + case BoolTest::le: z1 = zm; z2 = zn; condition = Assembler::GE; break; + case BoolTest::ge: condition = Assembler::GE; break; + case BoolTest::lt: z1 = zm; z2 = zn; condition = Assembler::GT; break; + case BoolTest::gt: condition = Assembler::GT; break; + default: + assert(false, "unsupported compare condition"); + ShouldNotReachHere(); + } + + SIMD_RegVariant size = elemType_to_regVariant(bt); + if (bt == T_FLOAT || bt == T_DOUBLE) { + sve_fcm(condition, pd, size, pg, z1, z2); + } else { + assert(is_integral_type(bt), "unsupported element type"); + sve_cmp(condition, pd, size, pg, z1, z2); + } +} + +void C2_MacroAssembler::sve_vmask_reduction(int opc, Register dst, SIMD_RegVariant size, FloatRegister src, + PRegister pg, PRegister pn, int length) { + assert(pg->is_governing(), "This register has to be a governing predicate register"); + // The conditional flags will be clobbered by this function + sve_cmp(Assembler::NE, pn, size, pg, src, 0); + switch (opc) { + case Op_VectorMaskTrueCount: + sve_cntp(dst, size, ptrue, pn); + break; + case Op_VectorMaskFirstTrue: + sve_brkb(pn, pg, pn, false); + sve_cntp(dst, size, ptrue, pn); + break; + case Op_VectorMaskLastTrue: + sve_rev(pn, size, pn); + sve_brkb(pn, ptrue, pn, false); + sve_cntp(dst, size, ptrue, pn); + movw(rscratch1, length - 1); + subw(dst, rscratch1, dst); + break; + default: + assert(false, "unsupported"); + ShouldNotReachHere(); + } +} diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp index b2f6226bf9e59e6f9a581f085189d3a2771eed59..fb0fbabea9ed70dcf5cfc538ae6b15cc43bac85a 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -46,7 +46,42 @@ Register tmp1, Register tmp2, Register tmp3); void stringL_indexof_char(Register str1, Register cnt1, - Register ch, Register result, - Register tmp1, Register tmp2, Register tmp3); + Register ch, Register result, + Register tmp1, Register tmp2, Register tmp3); + + void string_indexof_char_sve(Register str1, Register cnt1, + Register ch, Register result, + FloatRegister ztmp1, FloatRegister ztmp2, + PRegister pgtmp, PRegister ptmp, bool isL); + + // SIMD&FP comparison + void neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1, + FloatRegister src2, int cond, bool isQ); + + void sve_compare(PRegister pd, BasicType bt, PRegister pg, + FloatRegister zn, FloatRegister zm, int cond); + + void sve_vmask_reduction(int opc, Register dst, SIMD_RegVariant size, FloatRegister src, + PRegister pg, PRegister pn, int length = MaxVectorSize); + + // Generate predicate through whilelo, by comparing ZR with an unsigned + // immediate. rscratch1 will be clobbered. + inline void sve_whilelo_zr_imm(PRegister pd, SIMD_RegVariant size, uint imm) { + assert(UseSVE > 0, "not supported"); + mov(rscratch1, imm); + sve_whilelo(pd, size, zr, rscratch1); + } + + // Extract a scalar element from an sve vector at position 'idx'. + // rscratch1 will be clobbered. + // T could be FloatRegister or Register. + template + inline void sve_extract(T dst, SIMD_RegVariant size, PRegister pg, FloatRegister src, int idx) { + assert(UseSVE > 0, "not supported"); + assert(pg->is_governing(), "This register has to be a governing predicate register"); + mov(rscratch1, idx); + sve_whilele(pg, size, zr, rscratch1); + sve_lastb(dst, size, pg, src); + } #endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP diff --git a/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp index f15b6faa79d06a8d1cac9b17ac3dae3d8899b15e..34e6e688abbca070d5b8807c2959a17b8753aab0 100644 --- a/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp @@ -44,10 +44,8 @@ define_pd_global(intx, CompileThreshold, 10000); define_pd_global(intx, OnStackReplacePercentage, 140); define_pd_global(intx, ConditionalMoveLimit, 3); -define_pd_global(intx, FLOATPRESSURE, 32); define_pd_global(intx, FreqInlineSize, 325); define_pd_global(intx, MinJumpTableSize, 10); -define_pd_global(intx, INTPRESSURE, 24); define_pd_global(intx, InteriorEntryAlignment, 16); define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); define_pd_global(intx, LoopUnrollLimit, 60); diff --git a/src/hotspot/cpu/aarch64/foreign_globals_aarch64.cpp b/src/hotspot/cpu/aarch64/foreign_globals_aarch64.cpp index 6531eb03edc47ba5bbb0187eea7385644bf3fd4d..d08afc79a52451ed082ce5dafb7e2c24e8c4ec20 100644 --- a/src/hotspot/cpu/aarch64/foreign_globals_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/foreign_globals_aarch64.cpp @@ -45,17 +45,18 @@ bool ABIDescriptor::is_volatile_reg(FloatRegister reg) const { const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const { oop abi_oop = JNIHandles::resolve_non_null(jabi); ABIDescriptor abi; + const Register (*to_Register)(int) = as_Register; objArrayOop inputStorage = cast(abi_oop->obj_field(ABI.inputStorage_offset)); - loadArray(inputStorage, INTEGER_TYPE, abi._integer_argument_registers, as_Register); + loadArray(inputStorage, INTEGER_TYPE, abi._integer_argument_registers, to_Register); loadArray(inputStorage, VECTOR_TYPE, abi._vector_argument_registers, as_FloatRegister); objArrayOop outputStorage = cast(abi_oop->obj_field(ABI.outputStorage_offset)); - loadArray(outputStorage, INTEGER_TYPE, abi._integer_return_registers, as_Register); + loadArray(outputStorage, INTEGER_TYPE, abi._integer_return_registers, to_Register); loadArray(outputStorage, VECTOR_TYPE, abi._vector_return_registers, as_FloatRegister); objArrayOop volatileStorage = cast(abi_oop->obj_field(ABI.volatileStorage_offset)); - loadArray(volatileStorage, INTEGER_TYPE, abi._integer_additional_volatile_registers, as_Register); + loadArray(volatileStorage, INTEGER_TYPE, abi._integer_additional_volatile_registers, to_Register); loadArray(volatileStorage, VECTOR_TYPE, abi._vector_additional_volatile_registers, as_FloatRegister); abi._stack_alignment_bytes = abi_oop->int_field(ABI.stackAlignment_offset); diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.cpp b/src/hotspot/cpu/aarch64/frame_aarch64.cpp index db9fe41569c6242ae04a89a6bee3091f560a7fee..35cc1ee1f8402e4c52ed8dfe3af2a9686aa9db5e 100644 --- a/src/hotspot/cpu/aarch64/frame_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/frame_aarch64.cpp @@ -362,11 +362,16 @@ frame frame::sender_for_entry_frame(RegisterMap* map) const { return fr; } -JavaFrameAnchor* OptimizedEntryBlob::jfa_for_frame(const frame& frame) const { +OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const { ShouldNotCallThis(); return nullptr; } +bool frame::optimized_entry_frame_is_first() const { + ShouldNotCallThis(); + return false; +} + frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const { ShouldNotCallThis(); return {}; @@ -809,7 +814,6 @@ frame::frame(void* sp, void* fp, void* pc) { init((intptr_t*)sp, (intptr_t*)fp, (address)pc); } -void frame::pd_ps() {} #endif void JavaFrameAnchor::make_walkable(JavaThread* thread) { diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.inline.hpp b/src/hotspot/cpu/aarch64/frame_aarch64.inline.hpp index 61702f9ad3978515da69e8ce38d4d31cecd3dfe7..5be02aa57e7a3c8c56c21c19fe79c46060b08f92 100644 --- a/src/hotspot/cpu/aarch64/frame_aarch64.inline.hpp +++ b/src/hotspot/cpu/aarch64/frame_aarch64.inline.hpp @@ -236,14 +236,20 @@ inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { // Compiled frames inline oop frame::saved_oop_result(RegisterMap* map) const { + PRAGMA_DIAG_PUSH + PRAGMA_NONNULL_IGNORED oop* result_adr = (oop *)map->location(r0->as_VMReg()); + PRAGMA_DIAG_POP guarantee(result_adr != NULL, "bad register save location"); return (*result_adr); } inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + PRAGMA_DIAG_PUSH + PRAGMA_NONNULL_IGNORED oop* result_adr = (oop *)map->location(r0->as_VMReg()); + PRAGMA_DIAG_POP guarantee(result_adr != NULL, "bad register save location"); *result_adr = obj; diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp index f72e8b3f3636af0c7a3fa5b281a14c036ea27c1d..77a6dc8b7396ff06d5524435c7a9c3c0b217f5e9 100644 --- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp @@ -343,7 +343,7 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier Register pre_val_reg = stub->pre_val()->as_register(); if (stub->do_load()) { - ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/); } __ cbz(pre_val_reg, *stub->continuation()); ce->store_parameter(stub->pre_val()->as_register(), 0); diff --git a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp index bc3fc6355d0183b4cad5ca7ff506c7afa1d6ffb1..a4a2b14203976333a707fa734ba02aedf4dca30a 100644 --- a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp @@ -276,7 +276,7 @@ void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { __ load_method_holder_cld(rscratch1, rmethod); // Is it a strong CLD? - __ ldr(rscratch2, Address(rscratch1, ClassLoaderData::keep_alive_offset())); + __ ldrw(rscratch2, Address(rscratch1, ClassLoaderData::keep_alive_offset())); __ cbnz(rscratch2, method_live); // Is it a weak but alive CLD? diff --git a/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp index 8598fb7e7c124cf0d38414c107089a5e4feec0d5..4ce9869a157d23410a6cb951a0fbe2b2cf343008 100644 --- a/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp @@ -116,7 +116,7 @@ void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p", nm->method()->name_and_sig_as_C_string(), nm, *(address *) return_address_ptr, nm->is_osr_method(), thread, - thread->get_thread_name(), frame.sp(), nm->verified_entry_point()); + thread->name(), frame.sp(), nm->verified_entry_point()); } new_frame->sp = frame.sp(); diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp index 77e9161aeea9a0d2a7451299f6251539e117af90..53de1d921fca33dc4213bbdfb1574c48d9a901eb 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp @@ -613,7 +613,7 @@ void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, Shen Register pre_val_reg = stub->pre_val()->as_register(); if (stub->do_load()) { - ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/); } __ cbz(pre_val_reg, *stub->continuation()); ce->store_parameter(stub->pre_val()->as_register(), 0); diff --git a/src/hotspot/cpu/aarch64/globals_aarch64.hpp b/src/hotspot/cpu/aarch64/globals_aarch64.hpp index a3159db967a7bdb55c73ceb3c29f4af11c9e78af..aa6c3ad95f53409028067fd3d90a1e5180883636 100644 --- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp @@ -39,7 +39,6 @@ define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs define_pd_global(uintx, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment. define_pd_global(intx, CodeEntryAlignment, 64); define_pd_global(intx, OptoLoopAlignment, 16); -define_pd_global(intx, InlineFrequencyCount, 100); #define DEFAULT_STACK_YELLOW_PAGES (2) #define DEFAULT_STACK_RED_PAGES (1) diff --git a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp index bbecb7d3581b835941250e8b54090a534492222d..47449cca6f75320ec9a39873b93cec7348ce3d23 100644 --- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp @@ -39,7 +39,6 @@ #include "prims/jvmtiExport.hpp" #include "prims/jvmtiThreadState.hpp" #include "runtime/basicLock.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/frame.inline.hpp" #include "runtime/safepointMechanism.hpp" #include "runtime/sharedRuntime.hpp" @@ -754,10 +753,6 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) br(Assembler::NE, slow_case); } - if (UseBiasedLocking) { - biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case); - } - // Load (object->mark() | 1) into swap_reg ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes())); orr(swap_reg, rscratch1, 1); @@ -769,17 +764,7 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) "displached header must be first word in BasicObjectLock"); Label fail; - if (PrintBiasedLockingStatistics) { - Label fast; - cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail); - bind(fast); - atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), - rscratch2, rscratch1, tmp); - b(done); - bind(fail); - } else { - cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL); - } + cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL); // Fast check for recursive lock. // @@ -816,12 +801,6 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) // Save the test result, for recursive case, the result is zero str(swap_reg, Address(lock_reg, mark_offset)); - - if (PrintBiasedLockingStatistics) { - br(Assembler::NE, slow_case); - atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), - rscratch2, rscratch1, tmp); - } br(Assembler::EQ, done); bind(slow_case); @@ -872,10 +851,6 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) // Free entry str(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); - if (UseBiasedLocking) { - biased_locking_exit(obj_reg, header_reg, done); - } - // Load the old header from BasicLock structure ldr(header_reg, Address(swap_reg, BasicLock::displaced_header_offset_in_bytes())); diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 2ea457ed18422424f405263077cf70544367ef90..5c9b1fc327d09af0d73b39e40e8b54bb106a1b90 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -37,6 +37,7 @@ #include "gc/shared/tlab_globals.hpp" #include "interpreter/bytecodeHistogram.hpp" #include "interpreter/interpreter.hpp" +#include "compiler/compileTask.hpp" #include "compiler/disassembler.hpp" #include "memory/resourceArea.hpp" #include "memory/universe.hpp" @@ -44,7 +45,6 @@ #include "oops/accessDecorators.hpp" #include "oops/compressedOops.inline.hpp" #include "oops/klass.inline.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/icache.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/jniHandles.inline.hpp" @@ -442,178 +442,6 @@ void MacroAssembler::reserved_stack_check() { bind(no_reserved_zone_enabling); } -void MacroAssembler::biased_locking_enter(Register lock_reg, - Register obj_reg, - Register swap_reg, - Register tmp_reg, - bool swap_reg_contains_mark, - Label& done, - Label* slow_case, - BiasedLockingCounters* counters) { - assert(UseBiasedLocking, "why call this otherwise?"); - assert_different_registers(lock_reg, obj_reg, swap_reg); - - if (PrintBiasedLockingStatistics && counters == NULL) - counters = BiasedLocking::counters(); - - assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, rscratch1, rscratch2, noreg); - assert(markWord::age_shift == markWord::lock_bits + markWord::biased_lock_bits, "biased locking makes assumptions about bit layout"); - Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); - Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); - Address saved_mark_addr(lock_reg, 0); - - // Biased locking - // See whether the lock is currently biased toward our thread and - // whether the epoch is still valid - // Note that the runtime guarantees sufficient alignment of JavaThread - // pointers to allow age to be placed into low bits - // First check to see whether biasing is even enabled for this object - Label cas_label; - if (!swap_reg_contains_mark) { - ldr(swap_reg, mark_addr); - } - andr(tmp_reg, swap_reg, markWord::biased_lock_mask_in_place); - cmp(tmp_reg, (u1)markWord::biased_lock_pattern); - br(Assembler::NE, cas_label); - // The bias pattern is present in the object's header. Need to check - // whether the bias owner and the epoch are both still current. - load_prototype_header(tmp_reg, obj_reg); - orr(tmp_reg, tmp_reg, rthread); - eor(tmp_reg, swap_reg, tmp_reg); - andr(tmp_reg, tmp_reg, ~((int) markWord::age_mask_in_place)); - if (counters != NULL) { - Label around; - cbnz(tmp_reg, around); - atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, rscratch1, rscratch2); - b(done); - bind(around); - } else { - cbz(tmp_reg, done); - } - - Label try_revoke_bias; - Label try_rebias; - - // At this point we know that the header has the bias pattern and - // that we are not the bias owner in the current epoch. We need to - // figure out more details about the state of the header in order to - // know what operations can be legally performed on the object's - // header. - - // If the low three bits in the xor result aren't clear, that means - // the prototype header is no longer biased and we have to revoke - // the bias on this object. - andr(rscratch1, tmp_reg, markWord::biased_lock_mask_in_place); - cbnz(rscratch1, try_revoke_bias); - - // Biasing is still enabled for this data type. See whether the - // epoch of the current bias is still valid, meaning that the epoch - // bits of the mark word are equal to the epoch bits of the - // prototype header. (Note that the prototype header's epoch bits - // only change at a safepoint.) If not, attempt to rebias the object - // toward the current thread. Note that we must be absolutely sure - // that the current epoch is invalid in order to do this because - // otherwise the manipulations it performs on the mark word are - // illegal. - andr(rscratch1, tmp_reg, markWord::epoch_mask_in_place); - cbnz(rscratch1, try_rebias); - - // The epoch of the current bias is still valid but we know nothing - // about the owner; it might be set or it might be clear. Try to - // acquire the bias of the object using an atomic operation. If this - // fails we will go in to the runtime to revoke the object's bias. - // Note that we first construct the presumed unbiased header so we - // don't accidentally blow away another thread's valid bias. - { - Label here; - mov(rscratch1, markWord::biased_lock_mask_in_place | markWord::age_mask_in_place | markWord::epoch_mask_in_place); - andr(swap_reg, swap_reg, rscratch1); - orr(tmp_reg, swap_reg, rthread); - cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, rscratch1, here, slow_case); - // If the biasing toward our thread failed, this means that - // another thread succeeded in biasing it toward itself and we - // need to revoke that bias. The revocation will occur in the - // interpreter runtime in the slow case. - bind(here); - if (counters != NULL) { - atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), - tmp_reg, rscratch1, rscratch2); - } - } - b(done); - - bind(try_rebias); - // At this point we know the epoch has expired, meaning that the - // current "bias owner", if any, is actually invalid. Under these - // circumstances _only_, we are allowed to use the current header's - // value as the comparison value when doing the cas to acquire the - // bias in the current epoch. In other words, we allow transfer of - // the bias from one thread to another directly in this situation. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - { - Label here; - load_prototype_header(tmp_reg, obj_reg); - orr(tmp_reg, rthread, tmp_reg); - cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, rscratch1, here, slow_case); - // If the biasing toward our thread failed, then another thread - // succeeded in biasing it toward itself and we need to revoke that - // bias. The revocation will occur in the runtime in the slow case. - bind(here); - if (counters != NULL) { - atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), - tmp_reg, rscratch1, rscratch2); - } - } - b(done); - - bind(try_revoke_bias); - // The prototype mark in the klass doesn't have the bias bit set any - // more, indicating that objects of this data type are not supposed - // to be biased any more. We are going to try to reset the mark of - // this object to the prototype value and fall through to the - // CAS-based locking scheme. Note that if our CAS fails, it means - // that another thread raced us for the privilege of revoking the - // bias of this particular object, so it's okay to continue in the - // normal locking code. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - { - Label here, nope; - load_prototype_header(tmp_reg, obj_reg); - cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, rscratch1, here, &nope); - bind(here); - - // Fall through to the normal CAS-based lock, because no matter what - // the result of the above CAS, some thread must have succeeded in - // removing the bias bit from the object's header. - if (counters != NULL) { - atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, - rscratch1, rscratch2); - } - bind(nope); - } - - bind(cas_label); -} - -void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { - assert(UseBiasedLocking, "why call this otherwise?"); - - // Check for biased locking unlock case, which is a no-op - // Note: we do not have to check the thread ID for two reasons. - // First, the interpreter checks for IllegalMonitorStateException at - // a higher level. Second, if the bias was revoked while we held the - // lock, the object could not be rebiased toward another thread, so - // the bias bit would be clear. - ldr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - andr(temp_reg, temp_reg, markWord::biased_lock_mask_in_place); - cmp(temp_reg, (u1)markWord::biased_lock_pattern); - br(Assembler::EQ, done); -} - static void pass_arg0(MacroAssembler* masm, Register arg) { if (c_rarg0 != arg ) { masm->mov(c_rarg0, arg); @@ -2027,15 +1855,6 @@ void MacroAssembler::increment(Address dst, int value) str(rscratch1, dst); } - -void MacroAssembler::pusha() { - push(0x7fffffff, sp); -} - -void MacroAssembler::popa() { - pop(0x7fffffff, sp); -} - // Push lots of registers in the bit set supplied. Don't push sp. // Return the number of words pushed int MacroAssembler::push(unsigned int bitset, Register stack) { @@ -2677,7 +2496,7 @@ void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { void MacroAssembler::push_CPU_state(bool save_vectors, bool use_sve, int sve_vector_size_in_bytes) { - push(0x3fffffff, sp); // integer registers except lr & sp + push(RegSet::range(r0, r29), sp); // integer registers except lr & sp if (save_vectors && use_sve && sve_vector_size_in_bytes > 16) { sub(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers); for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) { @@ -2709,11 +2528,20 @@ void MacroAssembler::pop_CPU_state(bool restore_vectors, bool use_sve, as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step))); } - if (restore_vectors) { + // We may use predicate registers and rely on ptrue with SVE, + // regardless of wide vector (> 8 bytes) used or not. + if (use_sve) { reinitialize_ptrue(); } - pop(0x3fffffff, sp); // integer registers except lr & sp + // integer registers except lr & sp + pop(RegSet::range(r0, r17), sp); +#ifdef R18_RESERVED + ldp(zr, r19, Address(post(sp, 2 * wordSize))); + pop(RegSet::range(r20, r29), sp); +#else + pop(RegSet::range(r18_tls, r29), sp); +#endif } /** @@ -3835,11 +3663,6 @@ void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) cmp(trial_klass, tmp); } -void MacroAssembler::load_prototype_header(Register dst, Register src) { - load_klass(dst, src); - ldr(dst, Address(dst, Klass::prototype_header_offset())); -} - void MacroAssembler::store_klass(Register dst, Register src) { // FIXME: Should this be a store release? concurrent gcs assumes // klass length is valid if klass field is not null. @@ -4266,68 +4089,6 @@ void MacroAssembler::eden_allocate(Register obj, bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); } -// Zero words; len is in bytes -// Destroys all registers except addr -// len must be a nonzero multiple of wordSize -void MacroAssembler::zero_memory(Register addr, Register len, Register t1) { - assert_different_registers(addr, len, t1, rscratch1, rscratch2); - -#ifdef ASSERT - { Label L; - tst(len, BytesPerWord - 1); - br(Assembler::EQ, L); - stop("len is not a multiple of BytesPerWord"); - bind(L); - } -#endif - -#ifndef PRODUCT - block_comment("zero memory"); -#endif - - Label loop; - Label entry; - -// Algorithm: -// -// scratch1 = cnt & 7; -// cnt -= scratch1; -// p += scratch1; -// switch (scratch1) { -// do { -// cnt -= 8; -// p[-8] = 0; -// case 7: -// p[-7] = 0; -// case 6: -// p[-6] = 0; -// // ... -// case 1: -// p[-1] = 0; -// case 0: -// p += 8; -// } while (cnt); -// } - - const int unroll = 8; // Number of str(zr) instructions we'll unroll - - lsr(len, len, LogBytesPerWord); - andr(rscratch1, len, unroll - 1); // tmp1 = cnt % unroll - sub(len, len, rscratch1); // cnt -= unroll - // t1 always points to the end of the region we're about to zero - add(t1, addr, rscratch1, Assembler::LSL, LogBytesPerWord); - adr(rscratch2, entry); - sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2); - br(rscratch2); - bind(loop); - sub(len, len, unroll); - for (int i = -unroll; i < 0; i++) - Assembler::str(zr, Address(t1, i * wordSize)); - bind(entry); - add(t1, t1, unroll * wordSize); - cbnz(len, loop); -} - void MacroAssembler::verify_tlab() { #ifdef ASSERT if (UseTLAB && VerifyOops) { @@ -4846,10 +4607,11 @@ void MacroAssembler::string_equals(Register a1, Register a2, // handle anything smaller than this ourselves in zero_words(). const int MacroAssembler::zero_words_block_size = 8; -// zero_words() is used by C2 ClearArray patterns. It is as small as -// possible, handling small word counts locally and delegating -// anything larger to the zero_blocks stub. It is expanded many times -// in compiled code, so it is important to keep it short. +// zero_words() is used by C2 ClearArray patterns and by +// C1_MacroAssembler. It is as small as possible, handling small word +// counts locally and delegating anything larger to the zero_blocks +// stub. It is expanded many times in compiled code, so it is +// important to keep it short. // ptr: Address of a buffer to be zeroed. // cnt: Count in HeapWords. @@ -4858,32 +4620,46 @@ const int MacroAssembler::zero_words_block_size = 8; address MacroAssembler::zero_words(Register ptr, Register cnt) { assert(is_power_of_2(zero_words_block_size), "adjust this"); - assert(ptr == r10 && cnt == r11, "mismatch in register usage"); BLOCK_COMMENT("zero_words {"); - cmp(cnt, (u1)zero_words_block_size); + assert(ptr == r10 && cnt == r11, "mismatch in register usage"); + RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::aarch64::zero_blocks()); + assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated"); + + subs(rscratch1, cnt, zero_words_block_size); Label around; br(LO, around); { RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::aarch64::zero_blocks()); assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated"); - if (StubRoutines::aarch64::complete()) { + // Make sure this is a C2 compilation. C1 allocates space only for + // trampoline stubs generated by Call LIR ops, and in any case it + // makes sense for a C1 compilation task to proceed as quickly as + // possible. + CompileTask* task; + if (StubRoutines::aarch64::complete() + && Thread::current()->is_Compiler_thread() + && (task = ciEnv::current()->task()) + && is_c2_compile(task->comp_level())) { address tpc = trampoline_call(zero_blocks); if (tpc == NULL) { DEBUG_ONLY(reset_labels(around)); - postcond(pc() == badAddress); + assert(false, "failed to allocate space for trampoline"); return NULL; } } else { - bl(zero_blocks); + far_call(zero_blocks); } } bind(around); + + // We have a few words left to do. zero_blocks has adjusted r10 and r11 + // for us. for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { Label l; tbz(cnt, exact_log2(i), l); for (int j = 0; j < i; j += 2) { - stp(zr, zr, post(ptr, 16)); + stp(zr, zr, post(ptr, 2 * BytesPerWord)); } bind(l); } @@ -4893,46 +4669,56 @@ address MacroAssembler::zero_words(Register ptr, Register cnt) str(zr, Address(ptr)); bind(l); } + BLOCK_COMMENT("} zero_words"); - postcond(pc() != badAddress); return pc(); } // base: Address of a buffer to be zeroed, 8 bytes aligned. // cnt: Immediate count in HeapWords. -#define SmallArraySize (18 * BytesPerLong) +// +// r10, r11, rscratch1, and rscratch2 are clobbered. void MacroAssembler::zero_words(Register base, uint64_t cnt) { - BLOCK_COMMENT("zero_words {"); - int i = cnt & 1; // store any odd word to start - if (i) str(zr, Address(base)); - - if (cnt <= SmallArraySize / BytesPerLong) { + guarantee(zero_words_block_size < BlockZeroingLowLimit, + "increase BlockZeroingLowLimit"); + if (cnt <= (uint64_t)BlockZeroingLowLimit / BytesPerWord) { +#ifndef PRODUCT + { + char buf[64]; + snprintf(buf, sizeof buf, "zero_words (count = %" PRIu64 ") {", cnt); + BLOCK_COMMENT(buf); + } +#endif + if (cnt >= 16) { + uint64_t loops = cnt/16; + if (loops > 1) { + mov(rscratch2, loops - 1); + } + { + Label loop; + bind(loop); + for (int i = 0; i < 16; i += 2) { + stp(zr, zr, Address(base, i * BytesPerWord)); + } + add(base, base, 16 * BytesPerWord); + if (loops > 1) { + subs(rscratch2, rscratch2, 1); + br(GE, loop); + } + } + } + cnt %= 16; + int i = cnt & 1; // store any odd word to start + if (i) str(zr, Address(base)); for (; i < (int)cnt; i += 2) { stp(zr, zr, Address(base, i * wordSize)); } + BLOCK_COMMENT("} zero_words"); } else { - const int unroll = 4; // Number of stp(zr, zr) instructions we'll unroll - int remainder = cnt % (2 * unroll); - for (; i < remainder; i += 2) { - stp(zr, zr, Address(base, i * wordSize)); - } - Label loop; - Register cnt_reg = rscratch1; - Register loop_base = rscratch2; - cnt = cnt - remainder; - mov(cnt_reg, cnt); - // adjust base and prebias by -2 * wordSize so we can pre-increment - add(loop_base, base, (remainder - 2) * wordSize); - bind(loop); - sub(cnt_reg, cnt_reg, 2 * unroll); - for (i = 1; i < unroll; i++) { - stp(zr, zr, Address(loop_base, 2 * i * wordSize)); - } - stp(zr, zr, Address(pre(loop_base, 2 * unroll * wordSize))); - cbnz(cnt_reg, loop); + mov(r10, base); mov(r11, cnt); + zero_words(r10, r11); } - BLOCK_COMMENT("} zero_words"); } // Zero blocks of memory by using DC ZVA. @@ -4987,23 +4773,37 @@ void MacroAssembler::fill_words(Register base, Register cnt, Register value) { // Algorithm: // -// scratch1 = cnt & 7; +// if (cnt == 0) { +// return; +// } +// if ((p & 8) != 0) { +// *p++ = v; +// } +// +// scratch1 = cnt & 14; // cnt -= scratch1; // p += scratch1; -// switch (scratch1) { +// switch (scratch1 / 2) { // do { -// cnt -= 8; -// p[-8] = v; +// cnt -= 16; +// p[-16] = v; +// p[-15] = v; // case 7: -// p[-7] = v; +// p[-14] = v; +// p[-13] = v; // case 6: -// p[-6] = v; +// p[-12] = v; +// p[-11] = v; // // ... // case 1: +// p[-2] = v; // p[-1] = v; // case 0: -// p += 8; +// p += 16; // } while (cnt); +// } +// if ((cnt & 1) == 1) { +// *p++ = v; // } assert_different_registers(base, cnt, value, rscratch1, rscratch2); @@ -5339,49 +5139,6 @@ void MacroAssembler::safepoint_isb() { #endif } -void MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1, - FloatRegister src2, int cond, bool isQ) { - SIMD_Arrangement size = esize2arrangement(type2aelembytes(bt), isQ); - if (bt == T_FLOAT || bt == T_DOUBLE) { - switch (cond) { - case BoolTest::eq: fcmeq(dst, size, src1, src2); break; - case BoolTest::ne: { - fcmeq(dst, size, src1, src2); - notr(dst, T16B, dst); - break; - } - case BoolTest::ge: fcmge(dst, size, src1, src2); break; - case BoolTest::gt: fcmgt(dst, size, src1, src2); break; - case BoolTest::le: fcmge(dst, size, src2, src1); break; - case BoolTest::lt: fcmgt(dst, size, src2, src1); break; - default: - assert(false, "unsupported"); - ShouldNotReachHere(); - } - } else { - switch (cond) { - case BoolTest::eq: cmeq(dst, size, src1, src2); break; - case BoolTest::ne: { - cmeq(dst, size, src1, src2); - notr(dst, T16B, dst); - break; - } - case BoolTest::ge: cmge(dst, size, src1, src2); break; - case BoolTest::gt: cmgt(dst, size, src1, src2); break; - case BoolTest::le: cmge(dst, size, src2, src1); break; - case BoolTest::lt: cmgt(dst, size, src2, src1); break; - case BoolTest::uge: cmhs(dst, size, src1, src2); break; - case BoolTest::ugt: cmhi(dst, size, src1, src2); break; - case BoolTest::ult: cmhi(dst, size, src2, src1); break; - case BoolTest::ule: cmhs(dst, size, src2, src1); break; - default: - assert(false, "unsupported"); - ShouldNotReachHere(); - } - } -} - - #ifndef PRODUCT void MacroAssembler::verify_cross_modify_fence_not_required() { if (VerifyCrossModifyFence) { diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index db3006fe0938f0b94b92159f58a8232b231bb0b7..3287f153ab8f846c74619b0e774dab5484896201 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -105,20 +105,6 @@ class MacroAssembler: public Assembler { void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod); - // Biased locking support - // lock_reg and obj_reg must be loaded up with the appropriate values. - // swap_reg is killed. - // tmp_reg must be supplied and must not be rscratch1 or rscratch2 - // Optional slow case is for implementations (interpreter and C1) which branch to - // slow case directly. Leaves condition codes set for C2's Fast_Lock node. - void biased_locking_enter(Register lock_reg, Register obj_reg, - Register swap_reg, Register tmp_reg, - bool swap_reg_contains_mark, - Label& done, Label* slow_case = NULL, - BiasedLockingCounters* counters = NULL); - void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); - - // Helper functions for statistics gathering. // Unconditional atomic increment. void atomic_incw(Register counter_addr, Register tmp, Register tmp2); @@ -847,8 +833,6 @@ public: // stored using routines that take a jobject. void store_heap_oop_null(Address dst); - void load_prototype_header(Register dst, Register src); - void store_klass_gap(Register dst, Register src); // This dummy is to prevent a call to store_heap_oop from @@ -904,7 +888,6 @@ public: Register t2, // temp register Label& slow_case // continuation point if fast allocation fails ); - void zero_memory(Register addr, Register len, Register t1); void verify_tlab(); // interface method calling @@ -1058,8 +1041,6 @@ public: bool acquire, bool release, bool weak, Register result); - // SIMD&FP comparison - void neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1, FloatRegister src2, int cond, bool isQ); private: void compare_eq(Register rn, Register rm, enum operand_size size); @@ -1125,10 +1106,6 @@ public: void push(Register src); void pop(Register dst); - // push all registers onto the stack - void pusha(); - void popa(); - void repne_scan(Register addr, Register value, Register count, Register scratch); void repne_scanw(Register addr, Register value, Register count, @@ -1315,11 +1292,37 @@ public: void kernel_crc32c_using_crc32c(Register crc, Register buf, Register len, Register tmp0, Register tmp1, Register tmp2, Register tmp3); + + void ghash_modmul (FloatRegister result, + FloatRegister result_lo, FloatRegister result_hi, FloatRegister b, + FloatRegister a, FloatRegister vzr, FloatRegister a1_xor_a0, FloatRegister p, + FloatRegister t1, FloatRegister t2, FloatRegister t3); + void ghash_load_wide(int index, Register data, FloatRegister result, FloatRegister state); public: void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen, Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6, Register tmp7); void mul_add(Register out, Register in, Register offs, Register len, Register k); + void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi, + FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0, + FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3); + void ghash_multiply_wide(int index, + FloatRegister result_lo, FloatRegister result_hi, + FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0, + FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3); + void ghash_reduce(FloatRegister result, FloatRegister lo, FloatRegister hi, + FloatRegister p, FloatRegister z, FloatRegister t1); + void ghash_reduce_wide(int index, FloatRegister result, FloatRegister lo, FloatRegister hi, + FloatRegister p, FloatRegister z, FloatRegister t1); + void ghash_processBlocks_wide(address p, Register state, Register subkeyH, + Register data, Register blocks, int unrolls); + + + void aesenc_loadkeys(Register key, Register keylen); + void aesecb_encrypt(Register from, Register to, Register keylen, + FloatRegister data = v0, int unrolls = 1); + void aesecb_decrypt(Register from, Register to, Register key, Register keylen); + void aes_round(FloatRegister input, FloatRegister subkey); // Place an ISB after code may have been modified due to a safepoint. void safepoint_isb(); diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64_aes.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64_aes.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bf2b8768e11120ef16abea550fdd93d62b2b9f73 --- /dev/null +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64_aes.cpp @@ -0,0 +1,680 @@ +/* + * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" + +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "macroAssembler_aarch64.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/stubRoutines.hpp" + +void MacroAssembler::aesecb_decrypt(Register from, Register to, Register key, Register keylen) { + Label L_doLast; + + ld1(v0, T16B, from); // get 16 bytes of input + + ld1(v5, T16B, post(key, 16)); + rev32(v5, T16B, v5); + + ld1(v1, v2, v3, v4, T16B, post(key, 64)); + rev32(v1, T16B, v1); + rev32(v2, T16B, v2); + rev32(v3, T16B, v3); + rev32(v4, T16B, v4); + aesd(v0, v1); + aesimc(v0, v0); + aesd(v0, v2); + aesimc(v0, v0); + aesd(v0, v3); + aesimc(v0, v0); + aesd(v0, v4); + aesimc(v0, v0); + + ld1(v1, v2, v3, v4, T16B, post(key, 64)); + rev32(v1, T16B, v1); + rev32(v2, T16B, v2); + rev32(v3, T16B, v3); + rev32(v4, T16B, v4); + aesd(v0, v1); + aesimc(v0, v0); + aesd(v0, v2); + aesimc(v0, v0); + aesd(v0, v3); + aesimc(v0, v0); + aesd(v0, v4); + aesimc(v0, v0); + + ld1(v1, v2, T16B, post(key, 32)); + rev32(v1, T16B, v1); + rev32(v2, T16B, v2); + + cmpw(keylen, 44); + br(Assembler::EQ, L_doLast); + + aesd(v0, v1); + aesimc(v0, v0); + aesd(v0, v2); + aesimc(v0, v0); + + ld1(v1, v2, T16B, post(key, 32)); + rev32(v1, T16B, v1); + rev32(v2, T16B, v2); + + cmpw(keylen, 52); + br(Assembler::EQ, L_doLast); + + aesd(v0, v1); + aesimc(v0, v0); + aesd(v0, v2); + aesimc(v0, v0); + + ld1(v1, v2, T16B, post(key, 32)); + rev32(v1, T16B, v1); + rev32(v2, T16B, v2); + + bind(L_doLast); + + aesd(v0, v1); + aesimc(v0, v0); + aesd(v0, v2); + + eor(v0, T16B, v0, v5); + + st1(v0, T16B, to); + + // Preserve the address of the start of the key + sub(key, key, keylen, LSL, exact_log2(sizeof (jint))); +} + +// Load expanded key into v17..v31 +void MacroAssembler::aesenc_loadkeys(Register key, Register keylen) { + Label L_loadkeys_44, L_loadkeys_52; + cmpw(keylen, 52); + br(Assembler::LO, L_loadkeys_44); + br(Assembler::EQ, L_loadkeys_52); + + ld1(v17, v18, T16B, post(key, 32)); + rev32(v17, T16B, v17); + rev32(v18, T16B, v18); + bind(L_loadkeys_52); + ld1(v19, v20, T16B, post(key, 32)); + rev32(v19, T16B, v19); + rev32(v20, T16B, v20); + bind(L_loadkeys_44); + ld1(v21, v22, v23, v24, T16B, post(key, 64)); + rev32(v21, T16B, v21); + rev32(v22, T16B, v22); + rev32(v23, T16B, v23); + rev32(v24, T16B, v24); + ld1(v25, v26, v27, v28, T16B, post(key, 64)); + rev32(v25, T16B, v25); + rev32(v26, T16B, v26); + rev32(v27, T16B, v27); + rev32(v28, T16B, v28); + ld1(v29, v30, v31, T16B, post(key, 48)); + rev32(v29, T16B, v29); + rev32(v30, T16B, v30); + rev32(v31, T16B, v31); + + // Preserve the address of the start of the key + sub(key, key, keylen, LSL, exact_log2(sizeof (jint))); +} + +// NeoverseTM N1Software Optimization Guide: +// Adjacent AESE/AESMC instruction pairs and adjacent AESD/AESIMC +// instruction pairs will exhibit the performance characteristics +// described in Section 4.6. +void MacroAssembler::aes_round(FloatRegister input, FloatRegister subkey) { + aese(input, subkey); aesmc(input, input); +} + +// KernelGenerator +// +// The abstract base class of an unrolled function generator. +// Subclasses override generate(), length(), and next() to generate +// unrolled and interleaved functions. +// +// The core idea is that a subclass defines a method which generates +// the base case of a function and a method to generate a clone of it, +// shifted to a different set of registers. KernelGenerator will then +// generate several interleaved copies of the function, with each one +// using a different set of registers. + +// The subclass must implement three methods: length(), which is the +// number of instruction bundles in the intrinsic, generate(int n) +// which emits the nth instruction bundle in the intrinsic, and next() +// which takes an instance of the generator and returns a version of it, +// shifted to a new set of registers. + +class KernelGenerator: public MacroAssembler { +protected: + const int _unrolls; +public: + KernelGenerator(Assembler *as, int unrolls) + : MacroAssembler(as->code()), _unrolls(unrolls) { } + virtual void generate(int index) = 0; + virtual int length() = 0; + virtual KernelGenerator *next() = 0; + int unrolls() { return _unrolls; } + void unroll(); +}; + +void KernelGenerator::unroll() { + ResourceMark rm; + KernelGenerator **generators + = NEW_RESOURCE_ARRAY(KernelGenerator *, unrolls()); + + generators[0] = this; + for (int i = 1; i < unrolls(); i++) { + generators[i] = generators[i-1]->next(); + } + + for (int j = 0; j < length(); j++) { + for (int i = 0; i < unrolls(); i++) { + generators[i]->generate(j); + } + } +} + +// An unrolled and interleaved generator for AES encryption. +class AESKernelGenerator: public KernelGenerator { + Register _from, _to; + const Register _keylen; + FloatRegister _data; + const FloatRegister _subkeys; + bool _once; + Label _rounds_44, _rounds_52; + +public: + AESKernelGenerator(Assembler *as, int unrolls, + Register from, Register to, Register keylen, FloatRegister data, + FloatRegister subkeys, bool once = true) + : KernelGenerator(as, unrolls), + _from(from), _to(to), _keylen(keylen), _data(data), + _subkeys(subkeys), _once(once) { + } + + virtual void generate(int index) { + switch (index) { + case 0: + if (_from != noreg) { + ld1(_data, T16B, _from); // get 16 bytes of input + } + break; + case 1: + if (_once) { + cmpw(_keylen, 52); + br(Assembler::LO, _rounds_44); + br(Assembler::EQ, _rounds_52); + } + break; + case 2: aes_round(_data, _subkeys + 0); break; + case 3: aes_round(_data, _subkeys + 1); break; + case 4: + if (_once) bind(_rounds_52); + break; + case 5: aes_round(_data, _subkeys + 2); break; + case 6: aes_round(_data, _subkeys + 3); break; + case 7: + if (_once) bind(_rounds_44); + break; + case 8: aes_round(_data, _subkeys + 4); break; + case 9: aes_round(_data, _subkeys + 5); break; + case 10: aes_round(_data, _subkeys + 6); break; + case 11: aes_round(_data, _subkeys + 7); break; + case 12: aes_round(_data, _subkeys + 8); break; + case 13: aes_round(_data, _subkeys + 9); break; + case 14: aes_round(_data, _subkeys + 10); break; + case 15: aes_round(_data, _subkeys + 11); break; + case 16: aes_round(_data, _subkeys + 12); break; + case 17: aese(_data, _subkeys + 13); break; + case 18: eor(_data, T16B, _data, _subkeys + 14); break; + case 19: + if (_to != noreg) { + st1(_data, T16B, _to); + } + break; + default: ShouldNotReachHere(); + } + } + + virtual KernelGenerator *next() { + return new AESKernelGenerator(this, _unrolls, + _from, _to, _keylen, + _data + 1, _subkeys, /*once*/false); + } + + virtual int length() { return 20; } +}; + +// Uses expanded key in v17..v31 +// Returns encrypted values in inputs. +// If to != noreg, store value at to; likewise from +// Preserves key, keylen +// Increments from, to +// Input data in v0, v1, ... +// unrolls controls the number of times to unroll the generated function +void MacroAssembler::aesecb_encrypt(Register from, Register to, Register keylen, + FloatRegister data, int unrolls) { + AESKernelGenerator(this, unrolls, from, to, keylen, data, v17) .unroll(); +} + +// ghash_multiply and ghash_reduce are the non-unrolled versions of +// the GHASH function generators. +void MacroAssembler::ghash_multiply(FloatRegister result_lo, FloatRegister result_hi, + FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0, + FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3) { + // Karatsuba multiplication performs a 128*128 -> 256-bit + // multiplication in three 128-bit multiplications and a few + // additions. + // + // (C1:C0) = A1*B1, (D1:D0) = A0*B0, (E1:E0) = (A0+A1)(B0+B1) + // (A1:A0)(B1:B0) = C1:(C0+C1+D1+E1):(D1+C0+D0+E0):D0 + // + // Inputs: + // + // A0 in a.d[0] (subkey) + // A1 in a.d[1] + // (A1+A0) in a1_xor_a0.d[0] + // + // B0 in b.d[0] (state) + // B1 in b.d[1] + + ext(tmp1, T16B, b, b, 0x08); + pmull2(result_hi, T1Q, b, a, T2D); // A1*B1 + eor(tmp1, T16B, tmp1, b); // (B1+B0) + pmull(result_lo, T1Q, b, a, T1D); // A0*B0 + pmull(tmp2, T1Q, tmp1, a1_xor_a0, T1D); // (A1+A0)(B1+B0) + + ext(tmp1, T16B, result_lo, result_hi, 0x08); + eor(tmp3, T16B, result_hi, result_lo); // A1*B1+A0*B0 + eor(tmp2, T16B, tmp2, tmp1); + eor(tmp2, T16B, tmp2, tmp3); + + // Register pair holds the result of carry-less multiplication + ins(result_hi, D, tmp2, 0, 1); + ins(result_lo, D, tmp2, 1, 0); +} + +void MacroAssembler::ghash_reduce(FloatRegister result, FloatRegister lo, FloatRegister hi, + FloatRegister p, FloatRegister vzr, FloatRegister t1) { + const FloatRegister t0 = result; + + // The GCM field polynomial f is z^128 + p(z), where p = + // z^7+z^2+z+1. + // + // z^128 === -p(z) (mod (z^128 + p(z))) + // + // so, given that the product we're reducing is + // a == lo + hi * z^128 + // substituting, + // === lo - hi * p(z) (mod (z^128 + p(z))) + // + // we reduce by multiplying hi by p(z) and subtracting the result + // from (i.e. XORing it with) lo. Because p has no nonzero high + // bits we can do this with two 64-bit multiplications, lo*p and + // hi*p. + + pmull2(t0, T1Q, hi, p, T2D); + ext(t1, T16B, t0, vzr, 8); + eor(hi, T16B, hi, t1); + ext(t1, T16B, vzr, t0, 8); + eor(lo, T16B, lo, t1); + pmull(t0, T1Q, hi, p, T1D); + eor(result, T16B, lo, t0); +} + +class GHASHMultiplyGenerator: public KernelGenerator { + FloatRegister _result_lo, _result_hi, _b, + _a, _vzr, _a1_xor_a0, _p, + _tmp1, _tmp2, _tmp3; + +public: + GHASHMultiplyGenerator(Assembler *as, int unrolls, + /* offsetted registers */ + FloatRegister result_lo, FloatRegister result_hi, + FloatRegister b, + /* non-offsetted (shared) registers */ + FloatRegister a, FloatRegister a1_xor_a0, FloatRegister p, FloatRegister vzr, + /* offseted (temp) registers */ + FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3) + : KernelGenerator(as, unrolls), + _result_lo(result_lo), _result_hi(result_hi), _b(b), + _a(a), _vzr(vzr), _a1_xor_a0(a1_xor_a0), _p(p), + _tmp1(tmp1), _tmp2(tmp2), _tmp3(tmp3) { } + + int register_stride = 7; + + virtual void generate(int index) { + // Karatsuba multiplication performs a 128*128 -> 256-bit + // multiplication in three 128-bit multiplications and a few + // additions. + // + // (C1:C0) = A1*B1, (D1:D0) = A0*B0, (E1:E0) = (A0+A1)(B0+B1) + // (A1:A0)(B1:B0) = C1:(C0+C1+D1+E1):(D1+C0+D0+E0):D0 + // + // Inputs: + // + // A0 in a.d[0] (subkey) + // A1 in a.d[1] + // (A1+A0) in a1_xor_a0.d[0] + // + // B0 in b.d[0] (state) + // B1 in b.d[1] + + switch (index) { + case 0: ext(_tmp1, T16B, _b, _b, 0x08); break; + case 1: pmull2(_result_hi, T1Q, _b, _a, T2D); // A1*B1 + break; + case 2: eor(_tmp1, T16B, _tmp1, _b); // (B1+B0) + break; + case 3: pmull(_result_lo, T1Q, _b, _a, T1D); // A0*B0 + break; + case 4: pmull(_tmp2, T1Q, _tmp1, _a1_xor_a0, T1D); // (A1+A0)(B1+B0) + break; + + case 5: ext(_tmp1, T16B, _result_lo, _result_hi, 0x08); break; + case 6: eor(_tmp3, T16B, _result_hi, _result_lo); // A1*B1+A0*B0 + break; + case 7: eor(_tmp2, T16B, _tmp2, _tmp1); break; + case 8: eor(_tmp2, T16B, _tmp2, _tmp3); break; + + // Register pair <_result_hi:_result_lo> holds the _result of carry-less multiplication + case 9: ins(_result_hi, D, _tmp2, 0, 1); break; + case 10: ins(_result_lo, D, _tmp2, 1, 0); break; + default: ShouldNotReachHere(); + } + } + + virtual KernelGenerator *next() { + GHASHMultiplyGenerator *result = new GHASHMultiplyGenerator(*this); + result->_result_lo += register_stride; + result->_result_hi += register_stride; + result->_b += register_stride; + result->_tmp1 += register_stride; + result->_tmp2 += register_stride; + result->_tmp3 += register_stride; + return result; + } + + virtual int length() { return 11; } +}; + +// Reduce the 128-bit product in hi:lo by the GCM field polynomial. +// The FloatRegister argument called data is optional: if it is a +// valid register, we interleave LD1 instructions with the +// reduction. This is to reduce latency next time around the loop. +class GHASHReduceGenerator: public KernelGenerator { + FloatRegister _result, _lo, _hi, _p, _vzr, _data, _t1; + int _once; +public: + GHASHReduceGenerator(Assembler *as, int unrolls, + /* offsetted registers */ + FloatRegister result, FloatRegister lo, FloatRegister hi, + /* non-offsetted (shared) registers */ + FloatRegister p, FloatRegister vzr, FloatRegister data, + /* offseted (temp) registers */ + FloatRegister t1) + : KernelGenerator(as, unrolls), + _result(result), _lo(lo), _hi(hi), + _p(p), _vzr(vzr), _data(data), _t1(t1), _once(true) { } + + int register_stride = 7; + + virtual void generate(int index) { + const FloatRegister t0 = _result; + + switch (index) { + // The GCM field polynomial f is z^128 + p(z), where p = + // z^7+z^2+z+1. + // + // z^128 === -p(z) (mod (z^128 + p(z))) + // + // so, given that the product we're reducing is + // a == lo + hi * z^128 + // substituting, + // === lo - hi * p(z) (mod (z^128 + p(z))) + // + // we reduce by multiplying hi by p(z) and subtracting the _result + // from (i.e. XORing it with) lo. Because p has no nonzero high + // bits we can do this with two 64-bit multiplications, lo*p and + // hi*p. + + case 0: pmull2(t0, T1Q, _hi, _p, T2D); break; + case 1: ext(_t1, T16B, t0, _vzr, 8); break; + case 2: eor(_hi, T16B, _hi, _t1); break; + case 3: ext(_t1, T16B, _vzr, t0, 8); break; + case 4: eor(_lo, T16B, _lo, _t1); break; + case 5: pmull(t0, T1Q, _hi, _p, T1D); break; + case 6: eor(_result, T16B, _lo, t0); break; + default: ShouldNotReachHere(); + } + + // Sprinkle load instructions into the generated instructions + if (_data->is_valid() && _once) { + assert(length() >= unrolls(), "not enough room for inteleaved loads"); + if (index < unrolls()) { + ld1((_data + index*register_stride), T16B, post(r2, 0x10)); + } + } + } + + virtual KernelGenerator *next() { + GHASHReduceGenerator *result = new GHASHReduceGenerator(*this); + result->_result += register_stride; + result->_hi += register_stride; + result->_lo += register_stride; + result->_t1 += register_stride; + result->_once = false; + return result; + } + + int length() { return 7; } +}; + +// Perform a GHASH multiply/reduce on a single FloatRegister. +void MacroAssembler::ghash_modmul(FloatRegister result, + FloatRegister result_lo, FloatRegister result_hi, FloatRegister b, + FloatRegister a, FloatRegister vzr, FloatRegister a1_xor_a0, FloatRegister p, + FloatRegister t1, FloatRegister t2, FloatRegister t3) { + ghash_multiply(result_lo, result_hi, a, b, a1_xor_a0, t1, t2, t3); + ghash_reduce(result, result_lo, result_hi, p, vzr, t1); +} + +// Interleaved GHASH processing. +// +// Clobbers all vector registers. +// +void MacroAssembler::ghash_processBlocks_wide(address field_polynomial, Register state, + Register subkeyH, + Register data, Register blocks, int unrolls) { + int register_stride = 7; + + // Bafflingly, GCM uses little-endian for the byte order, but + // big-endian for the bit order. For example, the polynomial 1 is + // represented as the 16-byte string 80 00 00 00 | 12 bytes of 00. + // + // So, we must either reverse the bytes in each word and do + // everything big-endian or reverse the bits in each byte and do + // it little-endian. On AArch64 it's more idiomatic to reverse + // the bits in each byte (we have an instruction, RBIT, to do + // that) and keep the data in little-endian bit order throught the + // calculation, bit-reversing the inputs and outputs. + + assert(unrolls * register_stride < 32, "out of registers"); + + FloatRegister a1_xor_a0 = v28; + FloatRegister Hprime = v29; + FloatRegister vzr = v30; + FloatRegister p = v31; + eor(vzr, T16B, vzr, vzr); // zero register + + ldrq(p, field_polynomial); // The field polynomial + + ldrq(v0, Address(state)); + ldrq(Hprime, Address(subkeyH)); + + rev64(v0, T16B, v0); // Bit-reverse words in state and subkeyH + rbit(v0, T16B, v0); + rev64(Hprime, T16B, Hprime); + rbit(Hprime, T16B, Hprime); + + // Powers of H -> Hprime + + Label already_calculated, done; + { + // The first time around we'll have to calculate H**2, H**3, etc. + // Look at the largest power of H in the subkeyH array to see if + // it's already been calculated. + ldp(rscratch1, rscratch2, Address(subkeyH, 16 * (unrolls - 1))); + orr(rscratch1, rscratch1, rscratch2); + cbnz(rscratch1, already_calculated); + + orr(v6, T16B, Hprime, Hprime); // Start with H in v6 and Hprime + for (int i = 1; i < unrolls; i++) { + ext(a1_xor_a0, T16B, Hprime, Hprime, 0x08); // long-swap subkeyH into a1_xor_a0 + eor(a1_xor_a0, T16B, a1_xor_a0, Hprime); // xor subkeyH into subkeyL (Karatsuba: (A1+A0)) + ghash_modmul(/*result*/v6, /*result_lo*/v5, /*result_hi*/v4, /*b*/v6, + Hprime, vzr, a1_xor_a0, p, + /*temps*/v1, v3, v2); + rev64(v1, T16B, v6); + rbit(v1, T16B, v1); + strq(v1, Address(subkeyH, 16 * i)); + } + b(done); + } + { + bind(already_calculated); + + // Load the largest power of H we need into v6. + ldrq(v6, Address(subkeyH, 16 * (unrolls - 1))); + rev64(v6, T16B, v6); + rbit(v6, T16B, v6); + } + bind(done); + + orr(Hprime, T16B, v6, v6); // Move H ** unrolls into Hprime + + // Hprime contains (H ** 1, H ** 2, ... H ** unrolls) + // v0 contains the initial state. Clear the others. + for (int i = 1; i < unrolls; i++) { + int ofs = register_stride * i; + eor(ofs+v0, T16B, ofs+v0, ofs+v0); // zero each state register + } + + ext(a1_xor_a0, T16B, Hprime, Hprime, 0x08); // long-swap subkeyH into a1_xor_a0 + eor(a1_xor_a0, T16B, a1_xor_a0, Hprime); // xor subkeyH into subkeyL (Karatsuba: (A1+A0)) + + // Load #unrolls blocks of data + for (int ofs = 0; ofs < unrolls * register_stride; ofs += register_stride) { + ld1(v2+ofs, T16B, post(data, 0x10)); + } + + // Register assignments, replicated across 4 clones, v0 ... v23 + // + // v0: input / output: current state, result of multiply/reduce + // v1: temp + // v2: input: one block of data (the ciphertext) + // also used as a temp once the data has been consumed + // v3: temp + // v4: output: high part of product + // v5: output: low part ... + // v6: unused + // + // Not replicated: + // + // v28: High part of H xor low part of H' + // v29: H' (hash subkey) + // v30: zero + // v31: Reduction polynomial of the Galois field + + // Inner loop. + // Do the whole load/add/multiply/reduce over all our data except + // the last few rows. + { + Label L_ghash_loop; + bind(L_ghash_loop); + + // Prefetching doesn't help here. In fact, on Neoverse N1 it's worse. + // prfm(Address(data, 128), PLDL1KEEP); + + // Xor data into current state + for (int ofs = 0; ofs < unrolls * register_stride; ofs += register_stride) { + rbit((v2+ofs), T16B, (v2+ofs)); + eor((v2+ofs), T16B, v0+ofs, (v2+ofs)); // bit-swapped data ^ bit-swapped state + } + + // Generate fully-unrolled multiply-reduce in two stages. + + GHASHMultiplyGenerator(this, unrolls, + /*result_lo*/v5, /*result_hi*/v4, /*data*/v2, + Hprime, a1_xor_a0, p, vzr, + /*temps*/v1, v3, /* reuse b*/v2) .unroll(); + + // NB: GHASHReduceGenerator also loads the next #unrolls blocks of + // data into v0, v0+ofs, the current state. + GHASHReduceGenerator (this, unrolls, + /*result*/v0, /*lo*/v5, /*hi*/v4, p, vzr, + /*data*/v2, /*temp*/v3) .unroll(); + + sub(blocks, blocks, unrolls); + cmp(blocks, (unsigned char)(unrolls * 2)); + br(GE, L_ghash_loop); + } + + // Merge the #unrolls states. Note that the data for the next + // iteration has already been loaded into v4, v4+ofs, etc... + + // First, we multiply/reduce each clone by the appropriate power of H. + for (int i = 0; i < unrolls; i++) { + int ofs = register_stride * i; + ldrq(Hprime, Address(subkeyH, 16 * (unrolls - i - 1))); + + rbit(v2+ofs, T16B, v2+ofs); + eor(v2+ofs, T16B, ofs+v0, v2+ofs); // bit-swapped data ^ bit-swapped state + + rev64(Hprime, T16B, Hprime); + rbit(Hprime, T16B, Hprime); + ext(a1_xor_a0, T16B, Hprime, Hprime, 0x08); // long-swap subkeyH into a1_xor_a0 + eor(a1_xor_a0, T16B, a1_xor_a0, Hprime); // xor subkeyH into subkeyL (Karatsuba: (A1+A0)) + ghash_modmul(/*result*/v0+ofs, /*result_lo*/v5+ofs, /*result_hi*/v4+ofs, /*b*/v2+ofs, + Hprime, vzr, a1_xor_a0, p, + /*temps*/v1+ofs, v3+ofs, /* reuse b*/v2+ofs); + } + + // Then we sum the results. + for (int i = 0; i < unrolls - 1; i++) { + int ofs = register_stride * i; + eor(v0, T16B, v0, v0 + register_stride + ofs); + } + + sub(blocks, blocks, (unsigned char)unrolls); + + // And finally bit-reverse the state back to big endian. + rev64(v0, T16B, v0); + rbit(v0, T16B, v0); + st1(v0, T16B, state); +} diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64_log.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64_log.cpp index 6787aeffd18893e11f8cadf9f2efa790714e1fd0..d65c3df226d7d4270d33bc03eb21778f8ce6cdc7 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64_log.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64_log.cpp @@ -1,5 +1,5 @@ /* Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT) - * Copyright (c) 2016, Intel Corporation. + * Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. * Intel Math Library (LIBM) Source Code * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. diff --git a/src/hotspot/cpu/aarch64/matcher_aarch64.hpp b/src/hotspot/cpu/aarch64/matcher_aarch64.hpp index f08c0d494aa657ccc3bca289438befa1728fb24b..e0bb39f9e5ffcf61690572f368aa84dc39796cff 100644 --- a/src/hotspot/cpu/aarch64/matcher_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/matcher_aarch64.hpp @@ -55,6 +55,9 @@ // No support for generic vector operands. static const bool supports_generic_vector_operands = false; + // No support for 48 extra htbl entries in aes-gcm intrinsic + static const int htbl_entries = 0; + static constexpr bool isSimpleConstant64(jlong value) { // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. // Probably always true, even if a temp register is required. @@ -138,6 +141,11 @@ return false; } + // Does the CPU supports vector constant rotate instructions? + static constexpr bool supports_vector_constant_rotates(int shift) { + return false; + } + // Does the CPU supports vector unsigned comparison instructions? static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) { // Not supported on SVE yet. @@ -155,4 +163,7 @@ return true; } + // Implements a variant of EncodeISOArrayNode that encode ASCII only + static const bool supports_encode_ascii_array = false; + #endif // CPU_AARCH64_MATCHER_AARCH64_HPP diff --git a/src/hotspot/cpu/aarch64/pauth_aarch64.hpp b/src/hotspot/cpu/aarch64/pauth_aarch64.hpp index 6109964458fb8bc8da296774f6c2fee1f192c5d5..e12a671daf1e2552cab87b3ac3344bb9a5d61b65 100644 --- a/src/hotspot/cpu/aarch64/pauth_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/pauth_aarch64.hpp @@ -22,8 +22,8 @@ * */ -#ifndef CPU_AARCH64_PAUTH_AARCH64_INLINE_HPP -#define CPU_AARCH64_PAUTH_AARCH64_INLINE_HPP +#ifndef CPU_AARCH64_PAUTH_AARCH64_HPP +#define CPU_AARCH64_PAUTH_AARCH64_HPP #include OS_CPU_HEADER_INLINE(pauth) @@ -32,4 +32,4 @@ inline bool pauth_ptr_is_raw(address ptr) { return ptr == pauth_strip_pointer(ptr); } -#endif // CPU_AARCH64_PAUTH_AARCH64_INLINE_HPP +#endif // CPU_AARCH64_PAUTH_AARCH64_HPP diff --git a/src/hotspot/cpu/aarch64/register_aarch64.hpp b/src/hotspot/cpu/aarch64/register_aarch64.hpp index 5a152d62777d868675c2a0e113acdc6b16b2205a..479bd1f37c43563dcd49e5cc7ab301a3754cce2b 100644 --- a/src/hotspot/cpu/aarch64/register_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/register_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -36,7 +36,7 @@ typedef VMRegImpl* VMReg; class RegisterImpl; typedef RegisterImpl* Register; -inline Register as_Register(int encoding) { +inline const Register as_Register(int encoding) { return (Register)(intptr_t) encoding; } @@ -53,7 +53,7 @@ class RegisterImpl: public AbstractRegisterImpl { Register successor() const { return as_Register(encoding() + 1); } // construction - inline friend Register as_Register(int encoding); + inline friend const Register as_Register(int encoding); VMReg as_VMReg(); @@ -242,6 +242,7 @@ class PRegisterImpl: public AbstractRegisterImpl { public: enum { number_of_registers = 16, + number_of_governing_registers = 8, max_slots_per_register = 1 }; @@ -257,6 +258,7 @@ class PRegisterImpl: public AbstractRegisterImpl { int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } int encoding_nocheck() const { return (intptr_t)this; } bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + bool is_governing() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_governing_registers; } const char* name() const; }; @@ -420,4 +422,8 @@ inline FloatRegister AbstractRegSet::first() { return first ? as_FloatRegister(exact_log2(first)) : fnoreg; } +inline Register as_Register(FloatRegister reg) { + return as_Register(reg->encoding()); +} + #endif // CPU_AARCH64_REGISTER_AARCH64_HPP diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp index da1584d796952421cec9cbb4b47a349294d279c4..b6ee437603b234a0a8b0c32fb21db12c37225b48 100644 --- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp @@ -1773,10 +1773,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Load the oop from the handle __ ldr(obj_reg, Address(oop_handle_reg, 0)); - if (UseBiasedLocking) { - __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock); - } - // Load (object->mark() | 1) into swap_reg %r0 __ ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes())); __ orr(swap_reg, rscratch1, 1); @@ -1924,11 +1920,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ ldr(obj_reg, Address(oop_handle_reg, 0)); Label done; - - if (UseBiasedLocking) { - __ biased_locking_exit(obj_reg, old_hdr, done); - } - // Simple recursive lock? __ ldr(rscratch1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp index 331b29ea3725937a7a368acd2ffb15574a6f387b..0388bb73b919b912162fdf80aadeef45658d398e 100644 --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp @@ -2560,8 +2560,6 @@ class StubGenerator: public StubCodeGenerator { __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); - Label L_doLast; - const Register from = c_rarg0; // source array address const Register to = c_rarg1; // destination array address const Register key = c_rarg2; // key array address @@ -2572,75 +2570,8 @@ class StubGenerator: public StubCodeGenerator { __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - __ ld1(v0, __ T16B, from); // get 16 bytes of input - - __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); - __ rev32(v1, __ T16B, v1); - __ rev32(v2, __ T16B, v2); - __ rev32(v3, __ T16B, v3); - __ rev32(v4, __ T16B, v4); - __ aese(v0, v1); - __ aesmc(v0, v0); - __ aese(v0, v2); - __ aesmc(v0, v0); - __ aese(v0, v3); - __ aesmc(v0, v0); - __ aese(v0, v4); - __ aesmc(v0, v0); - - __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); - __ rev32(v1, __ T16B, v1); - __ rev32(v2, __ T16B, v2); - __ rev32(v3, __ T16B, v3); - __ rev32(v4, __ T16B, v4); - __ aese(v0, v1); - __ aesmc(v0, v0); - __ aese(v0, v2); - __ aesmc(v0, v0); - __ aese(v0, v3); - __ aesmc(v0, v0); - __ aese(v0, v4); - __ aesmc(v0, v0); - - __ ld1(v1, v2, __ T16B, __ post(key, 32)); - __ rev32(v1, __ T16B, v1); - __ rev32(v2, __ T16B, v2); - - __ cmpw(keylen, 44); - __ br(Assembler::EQ, L_doLast); - - __ aese(v0, v1); - __ aesmc(v0, v0); - __ aese(v0, v2); - __ aesmc(v0, v0); - - __ ld1(v1, v2, __ T16B, __ post(key, 32)); - __ rev32(v1, __ T16B, v1); - __ rev32(v2, __ T16B, v2); - - __ cmpw(keylen, 52); - __ br(Assembler::EQ, L_doLast); - - __ aese(v0, v1); - __ aesmc(v0, v0); - __ aese(v0, v2); - __ aesmc(v0, v0); - - __ ld1(v1, v2, __ T16B, __ post(key, 32)); - __ rev32(v1, __ T16B, v1); - __ rev32(v2, __ T16B, v2); - - __ BIND(L_doLast); - - __ aese(v0, v1); - __ aesmc(v0, v0); - __ aese(v0, v2); - - __ ld1(v1, __ T16B, key); - __ rev32(v1, __ T16B, v1); - __ eor(v0, __ T16B, v0, v1); - - __ st1(v0, __ T16B, to); + __ aesenc_loadkeys(key, keylen); + __ aesecb_encrypt(from, to, keylen); __ mov(r0, 0); @@ -2673,76 +2604,7 @@ class StubGenerator: public StubCodeGenerator { __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - __ ld1(v0, __ T16B, from); // get 16 bytes of input - - __ ld1(v5, __ T16B, __ post(key, 16)); - __ rev32(v5, __ T16B, v5); - - __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); - __ rev32(v1, __ T16B, v1); - __ rev32(v2, __ T16B, v2); - __ rev32(v3, __ T16B, v3); - __ rev32(v4, __ T16B, v4); - __ aesd(v0, v1); - __ aesimc(v0, v0); - __ aesd(v0, v2); - __ aesimc(v0, v0); - __ aesd(v0, v3); - __ aesimc(v0, v0); - __ aesd(v0, v4); - __ aesimc(v0, v0); - - __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); - __ rev32(v1, __ T16B, v1); - __ rev32(v2, __ T16B, v2); - __ rev32(v3, __ T16B, v3); - __ rev32(v4, __ T16B, v4); - __ aesd(v0, v1); - __ aesimc(v0, v0); - __ aesd(v0, v2); - __ aesimc(v0, v0); - __ aesd(v0, v3); - __ aesimc(v0, v0); - __ aesd(v0, v4); - __ aesimc(v0, v0); - - __ ld1(v1, v2, __ T16B, __ post(key, 32)); - __ rev32(v1, __ T16B, v1); - __ rev32(v2, __ T16B, v2); - - __ cmpw(keylen, 44); - __ br(Assembler::EQ, L_doLast); - - __ aesd(v0, v1); - __ aesimc(v0, v0); - __ aesd(v0, v2); - __ aesimc(v0, v0); - - __ ld1(v1, v2, __ T16B, __ post(key, 32)); - __ rev32(v1, __ T16B, v1); - __ rev32(v2, __ T16B, v2); - - __ cmpw(keylen, 52); - __ br(Assembler::EQ, L_doLast); - - __ aesd(v0, v1); - __ aesimc(v0, v0); - __ aesd(v0, v2); - __ aesimc(v0, v0); - - __ ld1(v1, v2, __ T16B, __ post(key, 32)); - __ rev32(v1, __ T16B, v1); - __ rev32(v2, __ T16B, v2); - - __ BIND(L_doLast); - - __ aesd(v0, v1); - __ aesimc(v0, v0); - __ aesd(v0, v2); - - __ eor(v0, __ T16B, v0, v5); - - __ st1(v0, __ T16B, to); + __ aesecb_decrypt(from, to, key, keylen); __ mov(r0, 0); @@ -2964,6 +2826,390 @@ class StubGenerator: public StubCodeGenerator { return start; } + // CTR AES crypt. + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - counter vector byte array address + // c_rarg4 - input length + // c_rarg5 - saved encryptedCounter start + // c_rarg6 - saved used length + // + // Output: + // r0 - input length + // + address generate_counterMode_AESCrypt() { + const Register in = c_rarg0; + const Register out = c_rarg1; + const Register key = c_rarg2; + const Register counter = c_rarg3; + const Register saved_len = c_rarg4, len = r10; + const Register saved_encrypted_ctr = c_rarg5; + const Register used_ptr = c_rarg6, used = r12; + + const Register offset = r7; + const Register keylen = r11; + + const unsigned char block_size = 16; + const int bulk_width = 4; + // NB: bulk_width can be 4 or 8. 8 gives slightly faster + // performance with larger data sizes, but it also means that the + // fast path isn't used until you have at least 8 blocks, and up + // to 127 bytes of data will be executed on the slow path. For + // that reason, and also so as not to blow away too much icache, 4 + // blocks seems like a sensible compromise. + + // Algorithm: + // + // if (len == 0) { + // goto DONE; + // } + // int result = len; + // do { + // if (used >= blockSize) { + // if (len >= bulk_width * blockSize) { + // CTR_large_block(); + // if (len == 0) + // goto DONE; + // } + // for (;;) { + // 16ByteVector v0 = counter; + // embeddedCipher.encryptBlock(v0, 0, encryptedCounter, 0); + // used = 0; + // if (len < blockSize) + // break; /* goto NEXT */ + // 16ByteVector v1 = load16Bytes(in, offset); + // v1 = v1 ^ encryptedCounter; + // store16Bytes(out, offset); + // used = blockSize; + // offset += blockSize; + // len -= blockSize; + // if (len == 0) + // goto DONE; + // } + // } + // NEXT: + // out[outOff++] = (byte)(in[inOff++] ^ encryptedCounter[used++]); + // len--; + // } while (len != 0); + // DONE: + // return result; + // + // CTR_large_block() + // Wide bulk encryption of whole blocks. + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt"); + const address start = __ pc(); + __ enter(); + + Label DONE, CTR_large_block, large_block_return; + __ ldrw(used, Address(used_ptr)); + __ cbzw(saved_len, DONE); + + __ mov(len, saved_len); + __ mov(offset, 0); + + // Compute #rounds for AES based on the length of the key array + __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + __ aesenc_loadkeys(key, keylen); + + { + Label L_CTR_loop, NEXT; + + __ bind(L_CTR_loop); + + __ cmp(used, block_size); + __ br(__ LO, NEXT); + + // Maybe we have a lot of data + __ subsw(rscratch1, len, bulk_width * block_size); + __ br(__ HS, CTR_large_block); + __ BIND(large_block_return); + __ cbzw(len, DONE); + + // Setup the counter + __ movi(v4, __ T4S, 0); + __ movi(v5, __ T4S, 1); + __ ins(v4, __ S, v5, 3, 3); // v4 contains { 0, 0, 0, 1 } + + __ ld1(v0, __ T16B, counter); // Load the counter into v0 + __ rev32(v16, __ T16B, v0); + __ addv(v16, __ T4S, v16, v4); + __ rev32(v16, __ T16B, v16); + __ st1(v16, __ T16B, counter); // Save the incremented counter back + + { + // We have fewer than bulk_width blocks of data left. Encrypt + // them one by one until there is less than a full block + // remaining, being careful to save both the encrypted counter + // and the counter. + + Label inner_loop; + __ bind(inner_loop); + // Counter to encrypt is in v0 + __ aesecb_encrypt(noreg, noreg, keylen); + __ st1(v0, __ T16B, saved_encrypted_ctr); + + // Do we have a remaining full block? + + __ mov(used, 0); + __ cmp(len, block_size); + __ br(__ LO, NEXT); + + // Yes, we have a full block + __ ldrq(v1, Address(in, offset)); + __ eor(v1, __ T16B, v1, v0); + __ strq(v1, Address(out, offset)); + __ mov(used, block_size); + __ add(offset, offset, block_size); + + __ subw(len, len, block_size); + __ cbzw(len, DONE); + + // Increment the counter, store it back + __ orr(v0, __ T16B, v16, v16); + __ rev32(v16, __ T16B, v16); + __ addv(v16, __ T4S, v16, v4); + __ rev32(v16, __ T16B, v16); + __ st1(v16, __ T16B, counter); // Save the incremented counter back + + __ b(inner_loop); + } + + __ BIND(NEXT); + + // Encrypt a single byte, and loop. + // We expect this to be a rare event. + __ ldrb(rscratch1, Address(in, offset)); + __ ldrb(rscratch2, Address(saved_encrypted_ctr, used)); + __ eor(rscratch1, rscratch1, rscratch2); + __ strb(rscratch1, Address(out, offset)); + __ add(offset, offset, 1); + __ add(used, used, 1); + __ subw(len, len,1); + __ cbnzw(len, L_CTR_loop); + } + + __ bind(DONE); + __ strw(used, Address(used_ptr)); + __ mov(r0, saved_len); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(lr); + + // Bulk encryption + + __ BIND (CTR_large_block); + assert(bulk_width == 4 || bulk_width == 8, "must be"); + + if (bulk_width == 8) { + __ sub(sp, sp, 4 * 16); + __ st1(v12, v13, v14, v15, __ T16B, Address(sp)); + } + __ sub(sp, sp, 4 * 16); + __ st1(v8, v9, v10, v11, __ T16B, Address(sp)); + RegSet saved_regs = (RegSet::of(in, out, offset) + + RegSet::of(saved_encrypted_ctr, used_ptr, len)); + __ push(saved_regs, sp); + __ andr(len, len, -16 * bulk_width); // 8/4 encryptions, 16 bytes per encryption + __ add(in, in, offset); + __ add(out, out, offset); + + // Keys should already be loaded into the correct registers + + __ ld1(v0, __ T16B, counter); // v0 contains the first counter + __ rev32(v16, __ T16B, v0); // v16 contains byte-reversed counter + + // AES/CTR loop + { + Label L_CTR_loop; + __ BIND(L_CTR_loop); + + // Setup the counters + __ movi(v8, __ T4S, 0); + __ movi(v9, __ T4S, 1); + __ ins(v8, __ S, v9, 3, 3); // v8 contains { 0, 0, 0, 1 } + + for (FloatRegister f = v0; f < v0 + bulk_width; f++) { + __ rev32(f, __ T16B, v16); + __ addv(v16, __ T4S, v16, v8); + } + + __ ld1(v8, v9, v10, v11, __ T16B, __ post(in, 4 * 16)); + + // Encrypt the counters + __ aesecb_encrypt(noreg, noreg, keylen, v0, bulk_width); + + if (bulk_width == 8) { + __ ld1(v12, v13, v14, v15, __ T16B, __ post(in, 4 * 16)); + } + + // XOR the encrypted counters with the inputs + for (int i = 0; i < bulk_width; i++) { + __ eor(v0 + i, __ T16B, v0 + i, v8 + i); + } + + // Write the encrypted data + __ st1(v0, v1, v2, v3, __ T16B, __ post(out, 4 * 16)); + if (bulk_width == 8) { + __ st1(v4, v5, v6, v7, __ T16B, __ post(out, 4 * 16)); + } + + __ subw(len, len, 16 * bulk_width); + __ cbnzw(len, L_CTR_loop); + } + + // Save the counter back where it goes + __ rev32(v16, __ T16B, v16); + __ st1(v16, __ T16B, counter); + + __ pop(saved_regs, sp); + + __ ld1(v8, v9, v10, v11, __ T16B, __ post(sp, 4 * 16)); + if (bulk_width == 8) { + __ ld1(v12, v13, v14, v15, __ T16B, __ post(sp, 4 * 16)); + } + + __ andr(rscratch1, len, -16 * bulk_width); + __ sub(len, len, rscratch1); + __ add(offset, offset, rscratch1); + __ mov(used, 16); + __ strw(used, Address(used_ptr)); + __ b(large_block_return); + + return start; + } + + // Vector AES Galois Counter Mode implementation. Parameters: + // + // in = c_rarg0 + // len = c_rarg1 + // ct = c_rarg2 - ciphertext that ghash will read (in for encrypt, out for decrypt) + // out = c_rarg3 + // key = c_rarg4 + // state = c_rarg5 - GHASH.state + // subkeyHtbl = c_rarg6 - powers of H + // subkeyHtbl_48_entries = c_rarg7 (not used) + // counter = [sp, #0] pointer to 16 bytes of CTR + // return - number of processed bytes + address generate_galoisCounterMode_AESCrypt() { + address ghash_polynomial = __ pc(); + __ emit_int64(0x87); // The low-order bits of the field + // polynomial (i.e. p = z^7+z^2+z+1) + // repeated in the low and high parts of a + // 128-bit vector + __ emit_int64(0x87); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "galoisCounterMode_AESCrypt"); + address start = __ pc(); + __ enter(); + + const Register in = c_rarg0; + const Register len = c_rarg1; + const Register ct = c_rarg2; + const Register out = c_rarg3; + // and updated with the incremented counter in the end + + const Register key = c_rarg4; + const Register state = c_rarg5; + + const Register subkeyHtbl = c_rarg6; + + // Pointer to CTR is passed on the stack before the (fp, lr) pair. + const Address counter_mem(sp, 2 * wordSize); + const Register counter = c_rarg7; + __ ldr(counter, counter_mem); + + const Register keylen = r10; + // Save state before entering routine + __ sub(sp, sp, 4 * 16); + __ st1(v12, v13, v14, v15, __ T16B, Address(sp)); + __ sub(sp, sp, 4 * 16); + __ st1(v8, v9, v10, v11, __ T16B, Address(sp)); + + // __ andr(len, len, -512); + __ andr(len, len, -16 * 8); // 8 encryptions, 16 bytes per encryption + __ str(len, __ pre(sp, -2 * wordSize)); + + Label DONE; + __ cbz(len, DONE); + + // Compute #rounds for AES based on the length of the key array + __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + __ aesenc_loadkeys(key, keylen); + __ ld1(v0, __ T16B, counter); // v0 contains the first counter + __ rev32(v16, __ T16B, v0); // v16 contains byte-reversed counter + + // AES/CTR loop + { + Label L_CTR_loop; + __ BIND(L_CTR_loop); + + // Setup the counters + __ movi(v8, __ T4S, 0); + __ movi(v9, __ T4S, 1); + __ ins(v8, __ S, v9, 3, 3); // v8 contains { 0, 0, 0, 1 } + for (FloatRegister f = v0; f < v8; f++) { + __ rev32(f, __ T16B, v16); + __ addv(v16, __ T4S, v16, v8); + } + + __ ld1(v8, v9, v10, v11, __ T16B, __ post(in, 4 * 16)); + + // Encrypt the counters + __ aesecb_encrypt(noreg, noreg, keylen, v0, /*unrolls*/8); + + __ ld1(v12, v13, v14, v15, __ T16B, __ post(in, 4 * 16)); + + // XOR the encrypted counters with the inputs + for (int i = 0; i < 8; i++) { + __ eor(v0 + i, __ T16B, v0 + i, v8 + i); + } + __ st1(v0, v1, v2, v3, __ T16B, __ post(out, 4 * 16)); + __ st1(v4, v5, v6, v7, __ T16B, __ post(out, 4 * 16)); + + __ subw(len, len, 16 * 8); + __ cbnzw(len, L_CTR_loop); + } + + __ rev32(v16, __ T16B, v16); + __ st1(v16, __ T16B, counter); + + __ ldr(len, Address(sp)); + __ lsr(len, len, exact_log2(16)); // We want the count of blocks + + // GHASH/CTR loop + __ ghash_processBlocks_wide(ghash_polynomial, state, subkeyHtbl, ct, + len, /*unrolls*/4); + +#ifdef ASSERT + { Label L; + __ cmp(len, (unsigned char)0); + __ br(Assembler::EQ, L); + __ stop("stubGenerator: abort"); + __ bind(L); + } +#endif + + __ bind(DONE); + // Return the number of bytes processed + __ ldr(r0, __ post(sp, 2 * wordSize)); + + __ ld1(v8, v9, v10, v11, __ T16B, __ post(sp, 4 * 16)); + __ ld1(v12, v13, v14, v15, __ T16B, __ post(sp, 4 * 16)); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(lr); + return start; + } + // Arguments: // // Inputs: @@ -4227,69 +4473,6 @@ class StubGenerator: public StubCodeGenerator { return start; } - void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi, - FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0, - FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3, FloatRegister tmp4) { - // Karatsuba multiplication performs a 128*128 -> 256-bit - // multiplication in three 128-bit multiplications and a few - // additions. - // - // (C1:C0) = A1*B1, (D1:D0) = A0*B0, (E1:E0) = (A0+A1)(B0+B1) - // (A1:A0)(B1:B0) = C1:(C0+C1+D1+E1):(D1+C0+D0+E0):D0 - // - // Inputs: - // - // A0 in a.d[0] (subkey) - // A1 in a.d[1] - // (A1+A0) in a1_xor_a0.d[0] - // - // B0 in b.d[0] (state) - // B1 in b.d[1] - - __ ext(tmp1, __ T16B, b, b, 0x08); - __ pmull2(result_hi, __ T1Q, b, a, __ T2D); // A1*B1 - __ eor(tmp1, __ T16B, tmp1, b); // (B1+B0) - __ pmull(result_lo, __ T1Q, b, a, __ T1D); // A0*B0 - __ pmull(tmp2, __ T1Q, tmp1, a1_xor_a0, __ T1D); // (A1+A0)(B1+B0) - - __ ext(tmp4, __ T16B, result_lo, result_hi, 0x08); - __ eor(tmp3, __ T16B, result_hi, result_lo); // A1*B1+A0*B0 - __ eor(tmp2, __ T16B, tmp2, tmp4); - __ eor(tmp2, __ T16B, tmp2, tmp3); - - // Register pair holds the result of carry-less multiplication - __ ins(result_hi, __ D, tmp2, 0, 1); - __ ins(result_lo, __ D, tmp2, 1, 0); - } - - void ghash_reduce(FloatRegister result, FloatRegister lo, FloatRegister hi, - FloatRegister p, FloatRegister z, FloatRegister t1) { - const FloatRegister t0 = result; - - // The GCM field polynomial f is z^128 + p(z), where p = - // z^7+z^2+z+1. - // - // z^128 === -p(z) (mod (z^128 + p(z))) - // - // so, given that the product we're reducing is - // a == lo + hi * z^128 - // substituting, - // === lo - hi * p(z) (mod (z^128 + p(z))) - // - // we reduce by multiplying hi by p(z) and subtracting the result - // from (i.e. XORing it with) lo. Because p has no nonzero high - // bits we can do this with two 64-bit multiplications, lo*p and - // hi*p. - - __ pmull2(t0, __ T1Q, hi, p, __ T2D); - __ ext(t1, __ T16B, t0, z, 8); - __ eor(hi, __ T16B, hi, t1); - __ ext(t1, __ T16B, z, t0, 8); - __ eor(lo, __ T16B, lo, t1); - __ pmull(t0, __ T1Q, hi, p, __ T1D); - __ eor(result, __ T16B, lo, t0); - } - address generate_has_negatives(address &has_negatives_long) { const u1 large_loop_size = 64; const uint64_t UPPER_BIT_MASK=0x8080808080808080; @@ -4656,18 +4839,6 @@ class StubGenerator: public StubCodeGenerator { return entry; } - // code for comparing 16 bytes of strings with same encoding - void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) { - Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, tmp1 = r10, tmp2 = r11; - __ ldr(rscratch1, Address(__ post(str1, 8))); - __ eor(rscratch2, tmp1, tmp2); - __ ldr(cnt1, Address(__ post(str2, 8))); - __ cbnz(rscratch2, DIFF1); - __ ldr(tmp1, Address(__ post(str1, 8))); - __ eor(rscratch2, rscratch1, cnt1); - __ ldr(tmp2, Address(__ post(str2, 8))); - __ cbnz(rscratch2, DIFF2); - } // code for comparing 16 characters of strings with Latin1 and Utf16 encoding void compare_string_16_x_LU(Register tmpL, Register tmpU, Label &DIFF1, @@ -4874,15 +5045,18 @@ class StubGenerator: public StubCodeGenerator { : "compare_long_string_same_encoding UU"); address entry = __ pc(); Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4, - tmp1 = r10, tmp2 = r11; - Label SMALL_LOOP, LARGE_LOOP_PREFETCH, CHECK_LAST, DIFF2, TAIL, - LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF, - DIFF_LAST_POSITION, DIFF_LAST_POSITION2; + tmp1 = r10, tmp2 = r11, tmp1h = rscratch1, tmp2h = rscratch2; + + Label LARGE_LOOP_PREFETCH, LOOP_COMPARE16, DIFF, LESS16, LESS8, CAL_DIFFERENCE, LENGTH_DIFF; + // exit from large loop when less than 64 bytes left to read or we're about // to prefetch memory behind array border int largeLoopExitCondition = MAX2(64, SoftwarePrefetchHintDistance)/(isLL ? 1 : 2); - // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used - // update cnt2 counter with already loaded 8 bytes + + // before jumping to stub, pre-load 8 bytes already, so do comparison directly + __ eor(rscratch2, tmp1, tmp2); + __ cbnz(rscratch2, CAL_DIFFERENCE); + __ sub(cnt2, cnt2, wordSize/(isLL ? 1 : 2)); // update pointers, because of previous read __ add(str1, str1, wordSize); @@ -4891,80 +5065,88 @@ class StubGenerator: public StubCodeGenerator { __ bind(LARGE_LOOP_PREFETCH); __ prfm(Address(str1, SoftwarePrefetchHintDistance)); __ prfm(Address(str2, SoftwarePrefetchHintDistance)); - compare_string_16_bytes_same(DIFF, DIFF2); - compare_string_16_bytes_same(DIFF, DIFF2); + + __ align(OptoLoopAlignment); + for (int i = 0; i < 4; i++) { + __ ldp(tmp1, tmp1h, Address(str1, i * 16)); + __ ldp(tmp2, tmp2h, Address(str2, i * 16)); + __ cmp(tmp1, tmp2); + __ ccmp(tmp1h, tmp2h, 0, Assembler::EQ); + __ br(Assembler::NE, DIFF); + } __ sub(cnt2, cnt2, isLL ? 64 : 32); - compare_string_16_bytes_same(DIFF, DIFF2); + __ add(str1, str1, 64); + __ add(str2, str2, 64); __ subs(rscratch2, cnt2, largeLoopExitCondition); - compare_string_16_bytes_same(DIFF, DIFF2); - __ br(__ GT, LARGE_LOOP_PREFETCH); - __ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left? + __ br(Assembler::GE, LARGE_LOOP_PREFETCH); + __ cbz(cnt2, LENGTH_DIFF); // no more chars left? } - // less than 16 bytes left? - __ subs(cnt2, cnt2, isLL ? 16 : 8); - __ br(__ LT, TAIL); + + __ subs(rscratch1, cnt2, isLL ? 16 : 8); + __ br(Assembler::LE, LESS16); __ align(OptoLoopAlignment); - __ bind(SMALL_LOOP); - compare_string_16_bytes_same(DIFF, DIFF2); - __ subs(cnt2, cnt2, isLL ? 16 : 8); - __ br(__ GE, SMALL_LOOP); - __ bind(TAIL); - __ adds(cnt2, cnt2, isLL ? 16 : 8); - __ br(__ EQ, LAST_CHECK_AND_LENGTH_DIFF); + __ bind(LOOP_COMPARE16); + __ ldp(tmp1, tmp1h, Address(__ post(str1, 16))); + __ ldp(tmp2, tmp2h, Address(__ post(str2, 16))); + __ cmp(tmp1, tmp2); + __ ccmp(tmp1h, tmp2h, 0, Assembler::EQ); + __ br(Assembler::NE, DIFF); + __ sub(cnt2, cnt2, isLL ? 16 : 8); + __ subs(rscratch2, cnt2, isLL ? 16 : 8); + __ br(Assembler::LT, LESS16); + + __ ldp(tmp1, tmp1h, Address(__ post(str1, 16))); + __ ldp(tmp2, tmp2h, Address(__ post(str2, 16))); + __ cmp(tmp1, tmp2); + __ ccmp(tmp1h, tmp2h, 0, Assembler::EQ); + __ br(Assembler::NE, DIFF); + __ sub(cnt2, cnt2, isLL ? 16 : 8); + __ subs(rscratch2, cnt2, isLL ? 16 : 8); + __ br(Assembler::GE, LOOP_COMPARE16); + __ cbz(cnt2, LENGTH_DIFF); + + __ bind(LESS16); + // each 8 compare __ subs(cnt2, cnt2, isLL ? 8 : 4); - __ br(__ LE, CHECK_LAST); - __ eor(rscratch2, tmp1, tmp2); - __ cbnz(rscratch2, DIFF); + __ br(Assembler::LE, LESS8); __ ldr(tmp1, Address(__ post(str1, 8))); __ ldr(tmp2, Address(__ post(str2, 8))); + __ eor(rscratch2, tmp1, tmp2); + __ cbnz(rscratch2, CAL_DIFFERENCE); __ sub(cnt2, cnt2, isLL ? 8 : 4); - __ bind(CHECK_LAST); + + __ bind(LESS8); // directly load last 8 bytes if (!isLL) { - __ add(cnt2, cnt2, cnt2); // now in bytes + __ add(cnt2, cnt2, cnt2); } + __ ldr(tmp1, Address(str1, cnt2)); + __ ldr(tmp2, Address(str2, cnt2)); __ eor(rscratch2, tmp1, tmp2); - __ cbnz(rscratch2, DIFF); - __ ldr(rscratch1, Address(str1, cnt2)); - __ ldr(cnt1, Address(str2, cnt2)); - __ eor(rscratch2, rscratch1, cnt1); __ cbz(rscratch2, LENGTH_DIFF); - // Find the first different characters in the longwords and - // compute their difference. - __ bind(DIFF2); - __ rev(rscratch2, rscratch2); - __ clz(rscratch2, rscratch2); - __ andr(rscratch2, rscratch2, isLL ? -8 : -16); - __ lsrv(rscratch1, rscratch1, rscratch2); - if (isLL) { - __ lsrv(cnt1, cnt1, rscratch2); - __ uxtbw(rscratch1, rscratch1); - __ uxtbw(cnt1, cnt1); - } else { - __ lsrv(cnt1, cnt1, rscratch2); - __ uxthw(rscratch1, rscratch1); - __ uxthw(cnt1, cnt1); - } - __ subw(result, rscratch1, cnt1); - __ b(LENGTH_DIFF); + __ b(CAL_DIFFERENCE); + __ bind(DIFF); + __ cmp(tmp1, tmp2); + __ csel(tmp1, tmp1, tmp1h, Assembler::NE); + __ csel(tmp2, tmp2, tmp2h, Assembler::NE); + // reuse rscratch2 register for the result of eor instruction + __ eor(rscratch2, tmp1, tmp2); + + __ bind(CAL_DIFFERENCE); __ rev(rscratch2, rscratch2); __ clz(rscratch2, rscratch2); __ andr(rscratch2, rscratch2, isLL ? -8 : -16); __ lsrv(tmp1, tmp1, rscratch2); + __ lsrv(tmp2, tmp2, rscratch2); if (isLL) { - __ lsrv(tmp2, tmp2, rscratch2); __ uxtbw(tmp1, tmp1); __ uxtbw(tmp2, tmp2); } else { - __ lsrv(tmp2, tmp2, rscratch2); __ uxthw(tmp1, tmp1); __ uxthw(tmp2, tmp2); } __ subw(result, tmp1, tmp2); - __ b(LENGTH_DIFF); - __ bind(LAST_CHECK_AND_LENGTH_DIFF); - __ eor(rscratch2, tmp1, tmp2); - __ cbnz(rscratch2, DIFF); + __ bind(LENGTH_DIFF); __ ret(lr); return entry; @@ -5387,6 +5569,8 @@ class StubGenerator: public StubCodeGenerator { FloatRegister vzr = v30; __ eor(vzr, __ T16B, vzr, vzr); // zero register + __ ldrq(v24, p); // The field polynomial + __ ldrq(v0, Address(state)); __ ldrq(v1, Address(subkeyH)); @@ -5395,10 +5579,8 @@ class StubGenerator: public StubCodeGenerator { __ rev64(v1, __ T16B, v1); __ rbit(v1, __ T16B, v1); - __ ldrq(v26, p); - - __ ext(v16, __ T16B, v1, v1, 0x08); // long-swap subkeyH into v1 - __ eor(v16, __ T16B, v16, v1); // xor subkeyH into subkeyL (Karatsuba: (A1+A0)) + __ ext(v4, __ T16B, v1, v1, 0x08); // long-swap subkeyH into v1 + __ eor(v4, __ T16B, v4, v1); // xor subkeyH into subkeyL (Karatsuba: (A1+A0)) { Label L_ghash_loop; @@ -5410,21 +5592,70 @@ class StubGenerator: public StubCodeGenerator { __ eor(v2, __ T16B, v0, v2); // bit-swapped data ^ bit-swapped state // Multiply state in v2 by subkey in v1 - ghash_multiply(/*result_lo*/v5, /*result_hi*/v7, - /*a*/v1, /*b*/v2, /*a1_xor_a0*/v16, - /*temps*/v6, v20, v18, v21); + __ ghash_multiply(/*result_lo*/v5, /*result_hi*/v7, + /*a*/v1, /*b*/v2, /*a1_xor_a0*/v4, + /*temps*/v6, v3, /*reuse/clobber b*/v2); // Reduce v7:v5 by the field polynomial - ghash_reduce(v0, v5, v7, v26, vzr, v20); + __ ghash_reduce(/*result*/v0, /*lo*/v5, /*hi*/v7, /*p*/v24, vzr, /*temp*/v3); __ sub(blocks, blocks, 1); __ cbnz(blocks, L_ghash_loop); } // The bit-reversed result is at this point in v0 - __ rev64(v1, __ T16B, v0); - __ rbit(v1, __ T16B, v1); + __ rev64(v0, __ T16B, v0); + __ rbit(v0, __ T16B, v0); + + __ st1(v0, __ T16B, state); + __ ret(lr); + + return start; + } + + address generate_ghash_processBlocks_wide() { + address small = generate_ghash_processBlocks(); + + StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks_wide"); + __ align(wordSize * 2); + address p = __ pc(); + __ emit_int64(0x87); // The low-order bits of the field + // polynomial (i.e. p = z^7+z^2+z+1) + // repeated in the low and high parts of a + // 128-bit vector + __ emit_int64(0x87); + + __ align(CodeEntryAlignment); + address start = __ pc(); + + Register state = c_rarg0; + Register subkeyH = c_rarg1; + Register data = c_rarg2; + Register blocks = c_rarg3; + + const int unroll = 4; + + __ cmp(blocks, (unsigned char)(unroll * 2)); + __ br(__ LT, small); + + if (unroll > 1) { + // Save state before entering routine + __ sub(sp, sp, 4 * 16); + __ st1(v12, v13, v14, v15, __ T16B, Address(sp)); + __ sub(sp, sp, 4 * 16); + __ st1(v8, v9, v10, v11, __ T16B, Address(sp)); + } + + __ ghash_processBlocks_wide(p, state, subkeyH, data, blocks, unroll); + + if (unroll > 1) { + // And restore state + __ ld1(v8, v9, v10, v11, __ T16B, __ post(sp, 4 * 16)); + __ ld1(v12, v13, v14, v15, __ T16B, __ post(sp, 4 * 16)); + } + + __ cmp(blocks, (unsigned char)0); + __ br(__ GT, small); - __ st1(v1, __ T16B, state); __ ret(lr); return start; @@ -5683,6 +5914,7 @@ class StubGenerator: public StubCodeGenerator { * c_rarg3 - dest_start * c_rarg4 - dest_offset * c_rarg5 - isURL + * c_rarg6 - isMIME * */ address generate_base64_decodeBlock() { @@ -5765,12 +5997,13 @@ class StubGenerator: public StubCodeGenerator { StubCodeMark mark(this, "StubRoutines", "decodeBlock"); address start = __ pc(); - Register src = c_rarg0; // source array - Register soff = c_rarg1; // source start offset - Register send = c_rarg2; // source end offset - Register dst = c_rarg3; // dest array - Register doff = c_rarg4; // position for writing to dest array - Register isURL = c_rarg5; // Base64 or URL character set + Register src = c_rarg0; // source array + Register soff = c_rarg1; // source start offset + Register send = c_rarg2; // source end offset + Register dst = c_rarg3; // dest array + Register doff = c_rarg4; // position for writing to dest array + Register isURL = c_rarg5; // Base64 or URL character set + Register isMIME = c_rarg6; // Decoding MIME block - unused in this implementation Register length = send; // reuse send as length of source data to process @@ -5954,6 +6187,10 @@ class StubGenerator: public StubCodeGenerator { acquire = false; release = false; break; + case memory_order_release: + acquire = false; + release = true; + break; default: acquire = true; release = true; @@ -5972,10 +6209,16 @@ class StubGenerator: public StubCodeGenerator { __ ret(lr); } - void gen_ldaddal_entry(Assembler::operand_size size) { + void gen_ldadd_entry(Assembler::operand_size size, atomic_memory_order order) { Register prev = r2, addr = c_rarg0, incr = c_rarg1; - __ ldaddal(size, incr, prev, addr); - __ membar(Assembler::StoreStore|Assembler::StoreLoad); + // If not relaxed, then default to conservative. Relaxed is the only + // case we use enough to be worth specializing. + if (order == memory_order_relaxed) { + __ ldadd(size, incr, prev, addr); + } else { + __ ldaddal(size, incr, prev, addr); + __ membar(Assembler::StoreStore|Assembler::StoreLoad); + } if (size == Assembler::xword) { __ mov(r0, prev); } else { @@ -6005,12 +6248,21 @@ class StubGenerator: public StubCodeGenerator { StubCodeMark mark(this, "StubRoutines", "atomic entry points"); address first_entry = __ pc(); - // All memory_order_conservative + // ADD, memory_order_conservative AtomicStubMark mark_fetch_add_4(_masm, &aarch64_atomic_fetch_add_4_impl); - gen_ldaddal_entry(Assembler::word); + gen_ldadd_entry(Assembler::word, memory_order_conservative); AtomicStubMark mark_fetch_add_8(_masm, &aarch64_atomic_fetch_add_8_impl); - gen_ldaddal_entry(Assembler::xword); + gen_ldadd_entry(Assembler::xword, memory_order_conservative); + // ADD, memory_order_relaxed + AtomicStubMark mark_fetch_add_4_relaxed + (_masm, &aarch64_atomic_fetch_add_4_relaxed_impl); + gen_ldadd_entry(MacroAssembler::word, memory_order_relaxed); + AtomicStubMark mark_fetch_add_8_relaxed + (_masm, &aarch64_atomic_fetch_add_8_relaxed_impl); + gen_ldadd_entry(MacroAssembler::xword, memory_order_relaxed); + + // XCHG, memory_order_conservative AtomicStubMark mark_xchg_4(_masm, &aarch64_atomic_xchg_4_impl); gen_swpal_entry(Assembler::word); AtomicStubMark mark_xchg_8_impl(_masm, &aarch64_atomic_xchg_8_impl); @@ -6035,6 +6287,20 @@ class StubGenerator: public StubCodeGenerator { (_masm, &aarch64_atomic_cmpxchg_8_relaxed_impl); gen_cas_entry(MacroAssembler::xword, memory_order_relaxed); + AtomicStubMark mark_cmpxchg_4_release + (_masm, &aarch64_atomic_cmpxchg_4_release_impl); + gen_cas_entry(MacroAssembler::word, memory_order_release); + AtomicStubMark mark_cmpxchg_8_release + (_masm, &aarch64_atomic_cmpxchg_8_release_impl); + gen_cas_entry(MacroAssembler::xword, memory_order_release); + + AtomicStubMark mark_cmpxchg_4_seq_cst + (_masm, &aarch64_atomic_cmpxchg_4_seq_cst_impl); + gen_cas_entry(MacroAssembler::word, memory_order_seq_cst); + AtomicStubMark mark_cmpxchg_8_seq_cst + (_masm, &aarch64_atomic_cmpxchg_8_seq_cst_impl); + gen_cas_entry(MacroAssembler::xword, memory_order_seq_cst); + ICache::invalidate_range(first_entry, __ pc() - first_entry); } #endif // LINUX @@ -7111,7 +7377,8 @@ class StubGenerator: public StubCodeGenerator { // generate GHASH intrinsics code if (UseGHASHIntrinsics) { - StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); + // StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); + StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks_wide(); } if (UseBASE64Intrinsics) { @@ -7128,6 +7395,8 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); + StubRoutines::_galoisCounterMode_AESCrypt = generate_galoisCounterMode_AESCrypt(); + StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt(); } if (UseSHA1Intrinsics) { @@ -7193,6 +7462,8 @@ void StubGenerator_generate(CodeBuffer* code, bool all) { DEFAULT_ATOMIC_OP(fetch_add, 4, ) DEFAULT_ATOMIC_OP(fetch_add, 8, ) +DEFAULT_ATOMIC_OP(fetch_add, 4, _relaxed) +DEFAULT_ATOMIC_OP(fetch_add, 8, _relaxed) DEFAULT_ATOMIC_OP(xchg, 4, ) DEFAULT_ATOMIC_OP(xchg, 8, ) DEFAULT_ATOMIC_OP(cmpxchg, 1, ) @@ -7201,6 +7472,10 @@ DEFAULT_ATOMIC_OP(cmpxchg, 8, ) DEFAULT_ATOMIC_OP(cmpxchg, 1, _relaxed) DEFAULT_ATOMIC_OP(cmpxchg, 4, _relaxed) DEFAULT_ATOMIC_OP(cmpxchg, 8, _relaxed) +DEFAULT_ATOMIC_OP(cmpxchg, 4, _release) +DEFAULT_ATOMIC_OP(cmpxchg, 8, _release) +DEFAULT_ATOMIC_OP(cmpxchg, 4, _seq_cst) +DEFAULT_ATOMIC_OP(cmpxchg, 8, _seq_cst) #undef DEFAULT_ATOMIC_OP diff --git a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp index 7578791b5401c6f99ade28887746b07ab0b8f0c3..e3ebc4484470120a4c445054414a638b326e9e92 100644 --- a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp @@ -36,7 +36,7 @@ static bool returns_to_call_stub(address return_pc) { enum platform_dependent_constants { code_size1 = 19000, // simply increase if too small (assembler will crash if too small) - code_size2 = 28000 // simply increase if too small (assembler will crash if too small) + code_size2 = 38000 // simply increase if too small (assembler will crash if too small) }; class aarch64 { diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp index db253fe5c2cd07a3a0cbc894525e5885a75594c0..e20cffd57670b49e7ee7372a5432f1a0c1249e2b 100644 --- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp @@ -1397,11 +1397,12 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ cmp(rscratch1, (u1)StackOverflow::stack_guard_yellow_reserved_disabled); __ br(Assembler::NE, no_reguard); - __ pusha(); // XXX only save smashed registers + __ push_call_clobbered_registers(); __ mov(c_rarg0, rthread); __ mov(rscratch2, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); __ blr(rscratch2); - __ popa(); // XXX only restore smashed registers + __ pop_call_clobbered_registers(); + __ bind(no_reguard); } diff --git a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp index 7cdaa89f2a3a0645328ee6a9e00a131890799f15..aff2d284c5722966316ac940efeb8f5e3ad44e0b 100644 --- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp @@ -3553,11 +3553,7 @@ void TemplateTable::_new() { // initialize object header only. __ bind(initialize_header); - if (UseBiasedLocking) { - __ ldr(rscratch1, Address(r4, Klass::prototype_header_offset())); - } else { - __ mov(rscratch1, (intptr_t)markWord::prototype().value()); - } + __ mov(rscratch1, (intptr_t)markWord::prototype().value()); __ str(rscratch1, Address(r0, oopDesc::mark_offset_in_bytes())); __ store_klass_gap(r0, zr); // zero klass gap for compressed oops __ store_klass(r0, r4); // store klass last diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp index 123c429fa0f9eed50e63012421fa89138061c53f..a3ac94505b09bb6c71bf328fca9243d606216228 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp @@ -237,6 +237,9 @@ void VM_Version::initialize() { warning("UseAESIntrinsics enabled, but UseAES not, enabling"); UseAES = true; } + if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { + FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); + } } else { if (UseAES) { warning("AES instructions are not available on this CPU"); @@ -246,12 +249,12 @@ void VM_Version::initialize() { warning("AES intrinsics are not available on this CPU"); FLAG_SET_DEFAULT(UseAESIntrinsics, false); } + if (UseAESCTRIntrinsics) { + warning("AES/CTR intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); + } } - if (UseAESCTRIntrinsics) { - warning("AES/CTR intrinsics are not available on this CPU"); - FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); - } if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { UseCRC32Intrinsics = true; diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad index 062670460282597cf4bd577a4391099f7010fa6f..1eb14630e7a7efb8a56ae0b30b4fdcd9fe9d9509 100644 --- a/src/hotspot/cpu/arm/arm.ad +++ b/src/hotspot/cpu/arm/arm.ad @@ -1001,10 +1001,6 @@ OptoRegPair Matcher::vector_return_value(uint ideal_reg) { return OptoRegPair(0, 0); } -const int Matcher::float_pressure(int default_pressure_threshold) { - return default_pressure_threshold; -} - // Vector width in bytes const int Matcher::vector_width_in_bytes(BasicType bt) { return MaxVectorSize; @@ -1055,7 +1051,7 @@ MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, return NULL; } -bool Matcher::is_generic_reg2reg_move(MachNode* m) { +bool Matcher::is_reg2reg_move(MachNode* m) { ShouldNotReachHere(); // generic vector operands not supported return false; } @@ -1100,6 +1096,16 @@ bool Matcher::is_spillable_arg( int reg ) { return can_be_java_arg(reg); } +uint Matcher::int_pressure_limit() +{ + return (INTPRESSURE == -1) ? 12 : INTPRESSURE; +} + +uint Matcher::float_pressure_limit() +{ + return (FLOATPRESSURE == -1) ? 30 : FLOATPRESSURE; +} + bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { return false; } @@ -5454,7 +5460,6 @@ instruct storeXConditional( memoryex mem, iRegX oldval, iRegX newval, iRegX tmp, __ cmp($tmp$$Register, 1, eq); __ b(loop, eq); __ teq($tmp$$Register, 0); - // used by biased locking only. Requires a membar. __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadStore | MacroAssembler::LoadLoad), noreg); %} ins_pipe( long_memory_op ); @@ -8954,7 +8959,6 @@ instruct partialSubtypeCheck( R0RegP index, R1RegP sub, R2RegP super, flagsRegP instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch ) %{ match(Set pcc (FastLock object box)); - predicate(!(UseBiasedLocking && !UseOptoBiasInlining)); effect(TEMP scratch, TEMP scratch2); ins_cost(DEFAULT_COST*3); @@ -8966,22 +8970,6 @@ instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRe ins_pipe(long_memory_op); %} -instruct cmpFastLock_noBiasInline(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, - iRegP scratch, iRegP scratch3) %{ - match(Set pcc (FastLock object box)); - predicate(UseBiasedLocking && !UseOptoBiasInlining); - - effect(TEMP scratch, TEMP scratch2, TEMP scratch3); - ins_cost(DEFAULT_COST*5); - - format %{ "FASTLOCK $object, $box; KILL $scratch, $scratch2, $scratch3" %} - ins_encode %{ - __ fast_lock($object$$Register, $box$$Register, $scratch$$Register, $scratch2$$Register, $scratch3$$Register); - %} - ins_pipe(long_memory_op); -%} - - instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch ) %{ match(Set pcc (FastUnlock object box)); effect(TEMP scratch, TEMP scratch2); diff --git a/src/hotspot/cpu/arm/assembler_arm.cpp b/src/hotspot/cpu/arm/assembler_arm.cpp index 3336d9f7d11f88540a0cee3575b89a8a121e55a6..d23207a6e3422c8ce495dab399c9faea5fafb38e 100644 --- a/src/hotspot/cpu/arm/assembler_arm.cpp +++ b/src/hotspot/cpu/arm/assembler_arm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,7 +34,6 @@ #include "memory/resourceArea.hpp" #include "prims/jvm_misc.hpp" #include "prims/methodHandles.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/objectMonitor.hpp" #include "runtime/os.hpp" diff --git a/src/hotspot/cpu/arm/assembler_arm_32.cpp b/src/hotspot/cpu/arm/assembler_arm_32.cpp index 0166d85a21ad0a7395cd93c15442ce7f333af822..5dd9d39392a0e7d595e779c65af863d8eec0a3bf 100644 --- a/src/hotspot/cpu/arm/assembler_arm_32.cpp +++ b/src/hotspot/cpu/arm/assembler_arm_32.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,7 +34,6 @@ #include "memory/resourceArea.hpp" #include "prims/jvm_misc.hpp" #include "prims/methodHandles.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/objectMonitor.hpp" #include "runtime/os.hpp" diff --git a/src/hotspot/cpu/arm/c1_Defs_arm.hpp b/src/hotspot/cpu/arm/c1_Defs_arm.hpp index fd165edb6dd168b407224252fa4fd4e081caab06..b8ed431891db886cdcc33bd4811635579ac74f2c 100644 --- a/src/hotspot/cpu/arm/c1_Defs_arm.hpp +++ b/src/hotspot/cpu/arm/c1_Defs_arm.hpp @@ -76,6 +76,5 @@ enum { #define PATCHED_ADDR (204) #define CARDTABLEBARRIERSET_POST_BARRIER_HELPER -#define GENERATE_ADDRESS_IS_PREFERRED #endif // CPU_ARM_C1_DEFS_ARM_HPP diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp index 13a95b26db6fdf442a7b0eade88312f64baa6589..2ee7b68f72037323500dfa5883b02dd9e6e361c6 100644 --- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp @@ -247,7 +247,7 @@ int LIR_Assembler::emit_unwind_handler() { if (method()->is_synchronized()) { monitor_address(0, FrameMap::R0_opr); stub = new MonitorExitStub(FrameMap::R0_opr, true, 0); - __ unlock_object(R2, R1, R0, Rtemp, *stub->entry()); + __ unlock_object(R2, R1, R0, *stub->entry()); __ bind(*stub->continuation()); } @@ -494,8 +494,7 @@ void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool po void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, - bool pop_fpu_stack, bool wide, - bool unaligned) { + bool pop_fpu_stack, bool wide) { LIR_Address* to_addr = dest->as_address_ptr(); Register base_reg = to_addr->base()->as_pointer_register(); const bool needs_patching = (patch_code != lir_patch_none); @@ -695,7 +694,7 @@ void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, - bool wide, bool unaligned) { + bool wide) { assert(src->is_address(), "should not call otherwise"); assert(dest->is_register(), "should not call otherwise"); LIR_Address* addr = src->as_address_ptr(); @@ -2429,19 +2428,17 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) { Register obj = op->obj_opr()->as_pointer_register(); Register hdr = op->hdr_opr()->as_pointer_register(); Register lock = op->lock_opr()->as_pointer_register(); - Register tmp = op->scratch_opr()->is_illegal() ? noreg : - op->scratch_opr()->as_pointer_register(); if (!UseFastLocking) { __ b(*op->stub()->entry()); } else if (op->code() == lir_lock) { assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); - int null_check_offset = __ lock_object(hdr, obj, lock, tmp, *op->stub()->entry()); + int null_check_offset = __ lock_object(hdr, obj, lock, *op->stub()->entry()); if (op->info() != NULL) { add_debug_info_for_null_check(null_check_offset, op->info()); } } else if (op->code() == lir_unlock) { - __ unlock_object(hdr, obj, lock, tmp, *op->stub()->entry()); + __ unlock_object(hdr, obj, lock, *op->stub()->entry()); } else { ShouldNotReachHere(); } diff --git a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp index b16986ee2a21aaf2e4796ba4c0ae8817717c37d7..7697934b718efaefab74e560460a5f9d7f853714 100644 --- a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp +++ b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp @@ -412,21 +412,13 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { LIR_Opr lock = new_pointer_register(); LIR_Opr hdr = new_pointer_register(); - // Need a scratch register for biased locking on arm - LIR_Opr scratch = LIR_OprFact::illegalOpr; - if(UseBiasedLocking) { - scratch = new_pointer_register(); - } else { - scratch = atomicLockOpr(); - } - CodeEmitInfo* info_for_exception = NULL; if (x->needs_null_check()) { info_for_exception = state_for(x); } CodeEmitInfo* info = state_for(x, x->state(), true); - monitor_enter(obj.result(), lock, hdr, scratch, + monitor_enter(obj.result(), lock, hdr, LIR_OprFact::illegalOpr, x->monitor_no(), info_for_exception, info); } diff --git a/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp index ac7137ca66d831a5c2bea6e4d29c8faaba8e0e06..7f1d4341872c2dc36af92b4f011a9d6c64cf73a8 100644 --- a/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp @@ -31,7 +31,6 @@ #include "oops/arrayOop.hpp" #include "oops/markWord.hpp" #include "runtime/basicLock.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/os.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" @@ -90,11 +89,7 @@ void C1_MacroAssembler::try_allocate(Register obj, Register obj_end, Register tm void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp) { assert_different_registers(obj, klass, len, tmp); - if(UseBiasedLocking && !len->is_valid()) { - ldr(tmp, Address(klass, Klass::prototype_header_offset())); - } else { - mov(tmp, (intptr_t)markWord::prototype().value()); - } + mov(tmp, (intptr_t)markWord::prototype().value()); str(tmp, Address(obj, oopDesc::mark_offset_in_bytes())); str(klass, Address(obj, oopDesc::klass_offset_in_bytes())); @@ -187,14 +182,12 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, initialize_object(obj, tmp1, klass, len, tmp2, tmp3, header_size_in_bytes, -1, /* is_tlab_allocated */ UseTLAB); } -int C1_MacroAssembler::lock_object(Register hdr, Register obj, - Register disp_hdr, Register tmp1, - Label& slow_case) { +int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { Label done, fast_lock, fast_lock_done; int null_check_offset = 0; const Register tmp2 = Rtemp; // Rtemp should be free at c1 LIR level - assert_different_registers(hdr, obj, disp_hdr, tmp1, tmp2); + assert_different_registers(hdr, obj, disp_hdr, tmp2); assert(BasicObjectLock::lock_offset_in_bytes() == 0, "ajust this code"); const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); @@ -211,10 +204,6 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, b(slow_case, ne); } - if (UseBiasedLocking) { - biased_locking_enter(obj, hdr/*scratched*/, tmp1, false, tmp2, done, slow_case); - } - assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions"); // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. @@ -234,8 +223,9 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, // -2- test (hdr - SP) if the low two bits are 0 sub(tmp2, hdr, SP, eq); movs(tmp2, AsmOperand(tmp2, lsr, exact_log2(os::vm_page_size())), eq); - // If 'eq' then OK for recursive fast locking: store 0 into a lock record. - str(tmp2, Address(disp_hdr, mark_offset), eq); + // If still 'eq' then recursive locking OK + // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8267042) + str(tmp2, Address(disp_hdr, mark_offset)); b(fast_lock_done, eq); // else need slow case b(slow_case); @@ -248,23 +238,12 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, cas_for_lock_acquire(hdr, disp_hdr, obj, tmp2, slow_case); bind(fast_lock_done); - -#ifndef PRODUCT - if (PrintBiasedLockingStatistics) { - cond_atomic_inc32(al, BiasedLocking::fast_path_entry_count_addr()); - } -#endif // !PRODUCT - bind(done); return null_check_offset; } -void C1_MacroAssembler::unlock_object(Register hdr, Register obj, - Register disp_hdr, Register tmp, - Label& slow_case) { - // Note: this method is not using its 'tmp' argument - +void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { assert_different_registers(hdr, obj, disp_hdr, Rtemp); Register tmp2 = Rtemp; @@ -273,11 +252,6 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, const int mark_offset = BasicLock::displaced_header_offset_in_bytes(); Label done; - if (UseBiasedLocking) { - // load object - ldr(obj, Address(disp_hdr, obj_offset)); - biased_locking_exit(obj, hdr, done); - } assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions"); @@ -286,10 +260,8 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, // If hdr is NULL, we've got recursive locking and there's nothing more to do cbz(hdr, done); - if(!UseBiasedLocking) { - // load object - ldr(obj, Address(disp_hdr, obj_offset)); - } + // load object + ldr(obj, Address(disp_hdr, obj_offset)); // Restore the object header cas_for_lock_release(disp_hdr, hdr, obj, tmp2, slow_case); diff --git a/src/hotspot/cpu/arm/c1_MacroAssembler_arm.hpp b/src/hotspot/cpu/arm/c1_MacroAssembler_arm.hpp index faf546db698f0dfa77ef8b8f01266193cebeb16f..acf8fb9ef8d7c7cca8037a5a90d811cac9d4e05b 100644 --- a/src/hotspot/cpu/arm/c1_MacroAssembler_arm.hpp +++ b/src/hotspot/cpu/arm/c1_MacroAssembler_arm.hpp @@ -59,9 +59,9 @@ max_array_allocation_length = 0x01000000 }; - int lock_object(Register hdr, Register obj, Register disp_hdr, Register tmp, Label& slow_case); + int lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case); - void unlock_object(Register hdr, Register obj, Register disp_hdr, Register tmp, Label& slow_case); + void unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case); // This platform only uses signal-based null checks. The Label is not needed. void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); } diff --git a/src/hotspot/cpu/arm/c1_globals_arm.hpp b/src/hotspot/cpu/arm/c1_globals_arm.hpp index 8f196bc5e6abb6f557157b824a9ddb82fdc70bd1..55917decc30cad6e53ca8c298b8220607960e6dd 100644 --- a/src/hotspot/cpu/arm/c1_globals_arm.hpp +++ b/src/hotspot/cpu/arm/c1_globals_arm.hpp @@ -35,7 +35,7 @@ #ifndef COMPILER2 // avoid duplicated definitions, favoring C2 version define_pd_global(bool, BackgroundCompilation, true ); -define_pd_global(bool, InlineIntrinsics, false); // TODO: ARM +define_pd_global(bool, InlineIntrinsics, true ); define_pd_global(bool, PreferInterpreterNativeStubs, false); define_pd_global(bool, ProfileTraps, false); define_pd_global(bool, UseOnStackReplacement, true ); diff --git a/src/hotspot/cpu/arm/c2_MacroAssembler_arm.cpp b/src/hotspot/cpu/arm/c2_MacroAssembler_arm.cpp index 2211d5c5fa338aa4a8c34287d70e7a0b85540167..07d22a1fce92cb2942a48764d2e79a06ad82e6a7 100644 --- a/src/hotspot/cpu/arm/c2_MacroAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/c2_MacroAssembler_arm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -78,7 +78,7 @@ void C2_MacroAssembler::char_arrays_equals(Register ary1, Register ary2, // mov(result_reg, 1); //equal } -void C2_MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2, Register scratch3) { +void C2_MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2) { assert(VM_Version::supports_ldrex(), "unsupported, yet?"); Register Rmark = Rscratch2; @@ -97,14 +97,6 @@ void C2_MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratc b(done, ne); } - if (UseBiasedLocking && !UseOptoBiasInlining) { - assert(scratch3 != noreg, "need extra temporary for -XX:-UseOptoBiasInlining"); - biased_locking_enter(Roop, Rmark, Rscratch, false, scratch3, done, done); - // Fall through if lock not biased otherwise branch to done - } - - // Invariant: Rmark loaded below does not contain biased lock pattern - ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes())); tst(Rmark, markWord::unlocked_value); b(fast_lock, ne); @@ -148,10 +140,6 @@ void C2_MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscra Label done; - if (UseBiasedLocking && !UseOptoBiasInlining) { - biased_locking_exit(Roop, Rscratch, done); - } - ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // If hdr is NULL, we've got recursive locking and there's nothing more to do cmp(Rmark, 0); diff --git a/src/hotspot/cpu/arm/c2_MacroAssembler_arm.hpp b/src/hotspot/cpu/arm/c2_MacroAssembler_arm.hpp index 2f43e6e4e24d7255142e5750cea280f07bf0061f..c2fe811bf32e70612ae46c715ff56b838bf250e1 100644 --- a/src/hotspot/cpu/arm/c2_MacroAssembler_arm.hpp +++ b/src/hotspot/cpu/arm/c2_MacroAssembler_arm.hpp @@ -33,7 +33,7 @@ Register limit, Register result, Register chr1, Register chr2, Label& Ldone); - void fast_lock(Register obj, Register box, Register scratch, Register scratch2, Register scratch3 = noreg); + void fast_lock(Register obj, Register box, Register scratch, Register scratch2); void fast_unlock(Register obj, Register box, Register scratch, Register scratch2); #endif // CPU_ARM_C2_MACROASSEMBLER_ARM_HPP diff --git a/src/hotspot/cpu/arm/c2_globals_arm.hpp b/src/hotspot/cpu/arm/c2_globals_arm.hpp index 7754001dd0af8f65f2a8ea45b0a31cbf9d606487..57ed8f11c08b1c5f6ce98a2a67fa0241a1f6c2d7 100644 --- a/src/hotspot/cpu/arm/c2_globals_arm.hpp +++ b/src/hotspot/cpu/arm/c2_globals_arm.hpp @@ -34,7 +34,7 @@ define_pd_global(bool, BackgroundCompilation, true); define_pd_global(bool, CICompileOSR, true); -define_pd_global(bool, InlineIntrinsics, false); +define_pd_global(bool, InlineIntrinsics, true); define_pd_global(bool, PreferInterpreterNativeStubs, false); define_pd_global(bool, ProfileTraps, true); define_pd_global(bool, UseOnStackReplacement, true); @@ -45,9 +45,7 @@ define_pd_global(intx, CompileThreshold, 10000); define_pd_global(intx, OnStackReplacePercentage, 140); define_pd_global(intx, ConditionalMoveLimit, 4); // C2 gets to use all the float/double registers -define_pd_global(intx, FLOATPRESSURE, 30); define_pd_global(intx, FreqInlineSize, 175); -define_pd_global(intx, INTPRESSURE, 12); define_pd_global(intx, InteriorEntryAlignment, 16); // = CodeEntryAlignment define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K)); // The default setting 16/16 seems to work best. diff --git a/src/hotspot/cpu/arm/frame_arm.cpp b/src/hotspot/cpu/arm/frame_arm.cpp index 17841a8cf71522cb1082aa8897034f4cc65827e3..bd85b1b895412f231a5a8e520228d70609debd1e 100644 --- a/src/hotspot/cpu/arm/frame_arm.cpp +++ b/src/hotspot/cpu/arm/frame_arm.cpp @@ -313,6 +313,16 @@ frame frame::sender_for_entry_frame(RegisterMap* map) const { return fr; } +OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const { + ShouldNotCallThis(); + return nullptr; +} + +bool frame::optimized_entry_frame_is_first() const { + ShouldNotCallThis(); + return false; +} + //------------------------------------------------------------------------------ // frame::verify_deopt_original_pc // @@ -564,7 +574,6 @@ frame::frame(void* sp, void* fp, void* pc) { init((intptr_t*)sp, (intptr_t*)fp, (address)pc); } -void frame::pd_ps() {} #endif intptr_t *frame::initial_deoptimization_info() { diff --git a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp index b67f39e2e6b65c2854757dcce95d91032bcae53d..63bf45b8db5d38e4e84f0028c4dcce73108032f9 100644 --- a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp @@ -332,7 +332,7 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier Register pre_val_reg = stub->pre_val()->as_register(); if (stub->do_load()) { - ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/); } __ cbz(pre_val_reg, *stub->continuation()); diff --git a/src/hotspot/cpu/arm/globals_arm.hpp b/src/hotspot/cpu/arm/globals_arm.hpp index bd7ef342944d55286ef155ce5a6e08ea93c106ae..9e135d493424d8c187629882b5aedb598577ee68 100644 --- a/src/hotspot/cpu/arm/globals_arm.hpp +++ b/src/hotspot/cpu/arm/globals_arm.hpp @@ -53,7 +53,6 @@ define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); -define_pd_global(intx, InlineFrequencyCount, 50); #if defined(COMPILER1) || defined(COMPILER2) define_pd_global(intx, InlineSmallCode, 1500); #endif diff --git a/src/hotspot/cpu/arm/interp_masm_arm.cpp b/src/hotspot/cpu/arm/interp_masm_arm.cpp index 48efb6a80364e74d0b4f39473391c98e42abd6b7..2562f51f6fef4ddbe69cf8ab44d6db283db05c93 100644 --- a/src/hotspot/cpu/arm/interp_masm_arm.cpp +++ b/src/hotspot/cpu/arm/interp_masm_arm.cpp @@ -40,7 +40,6 @@ #include "prims/jvmtiExport.hpp" #include "prims/jvmtiThreadState.hpp" #include "runtime/basicLock.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/frame.inline.hpp" #include "runtime/safepointMechanism.hpp" #include "runtime/sharedRuntime.hpp" @@ -890,11 +889,6 @@ void InterpreterMacroAssembler::lock_object(Register Rlock) { b(slow_case, ne); } - if (UseBiasedLocking) { - biased_locking_enter(Robj, Rmark/*scratched*/, R0, false, Rtemp, done, slow_case); - } - - // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. // That would be acceptable as ether CAS or slow case path is taken in that case. // Exception to that is if the object is locked by the calling thread, then the recursive test will pass (guaranteed as @@ -912,12 +906,6 @@ void InterpreterMacroAssembler::lock_object(Register Rlock) { cas_for_lock_acquire(Rmark, Rlock, Robj, Rtemp, slow_case); -#ifndef PRODUCT - if (PrintBiasedLockingStatistics) { - cond_atomic_inc32(al, BiasedLocking::fast_path_entry_count_addr()); - } -#endif //!PRODUCT - b(done); // If we got here that means the object is locked by ether calling thread or another thread. @@ -962,13 +950,6 @@ void InterpreterMacroAssembler::lock_object(Register Rlock) { // If still 'eq' then recursive locking OK: store 0 into lock record str(R0, Address(Rlock, mark_offset), eq); - -#ifndef PRODUCT - if (PrintBiasedLockingStatistics) { - cond_atomic_inc32(eq, BiasedLocking::fast_path_entry_count_addr()); - } -#endif // !PRODUCT - b(done, eq); bind(slow_case); @@ -1010,10 +991,6 @@ void InterpreterMacroAssembler::unlock_object(Register Rlock) { // Free entry str(Rzero, Address(Rlock, obj_offset)); - if (UseBiasedLocking) { - biased_locking_exit(Robj, Rmark, done); - } - // Load the old header from BasicLock structure ldr(Rmark, Address(Rlock, mark_offset)); diff --git a/src/hotspot/cpu/arm/macroAssembler_arm.cpp b/src/hotspot/cpu/arm/macroAssembler_arm.cpp index 1896a940b19e74a9112982901f09399dfce2b602..12dfc1adf0d1c327702c2513ca3ea52a5686b290 100644 --- a/src/hotspot/cpu/arm/macroAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/macroAssembler_arm.cpp @@ -40,7 +40,6 @@ #include "oops/accessDecorators.hpp" #include "oops/klass.inline.hpp" #include "prims/methodHandles.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/jniHandles.hpp" #include "runtime/objectMonitor.hpp" @@ -1288,221 +1287,6 @@ void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) { #endif // !PRODUCT - -// Building block for CAS cases of biased locking: makes CAS and records statistics. -// The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set. -void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg, - Register tmp, Label& slow_case, int* counter_addr) { - - cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case); -#ifdef ASSERT - breakpoint(ne); // Fallthrough only on success -#endif -#ifndef PRODUCT - if (counter_addr != NULL) { - cond_atomic_inc32(al, counter_addr); - } -#endif // !PRODUCT -} - -void MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg, - bool swap_reg_contains_mark, - Register tmp2, - Label& done, Label& slow_case, - BiasedLockingCounters* counters) { - // obj_reg must be preserved (at least) if the bias locking fails - // tmp_reg is a temporary register - // swap_reg was used as a temporary but contained a value - // that was used afterwards in some call pathes. Callers - // have been fixed so that swap_reg no longer needs to be - // saved. - // Rtemp in no longer scratched - - assert(UseBiasedLocking, "why call this otherwise?"); - assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2); - guarantee(swap_reg!=tmp_reg, "invariant"); - assert(tmp_reg != noreg, "must supply tmp_reg"); - -#ifndef PRODUCT - if (PrintBiasedLockingStatistics && (counters == NULL)) { - counters = BiasedLocking::counters(); - } -#endif - - assert(markWord::age_shift == markWord::lock_bits + markWord::biased_lock_bits, "biased locking makes assumptions about bit layout"); - Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes()); - - // Biased locking - // See whether the lock is currently biased toward our thread and - // whether the epoch is still valid - // Note that the runtime guarantees sufficient alignment of JavaThread - // pointers to allow age to be placed into low bits - // First check to see whether biasing is even enabled for this object - Label cas_label; - - if (!swap_reg_contains_mark) { - ldr(swap_reg, mark_addr); - } - - // On MP platform loads could return 'stale' values in some cases. - // That is acceptable since either CAS or slow case path is taken in the worst case. - - andr(tmp_reg, swap_reg, markWord::biased_lock_mask_in_place); - cmp(tmp_reg, markWord::biased_lock_pattern); - - b(cas_label, ne); - - // The bias pattern is present in the object's header. Need to check - // whether the bias owner and the epoch are both still current. - load_klass(tmp_reg, obj_reg); - ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); - orr(tmp_reg, tmp_reg, Rthread); - eor(tmp_reg, tmp_reg, swap_reg); - - bics(tmp_reg, tmp_reg, ((int) markWord::age_mask_in_place)); - -#ifndef PRODUCT - if (counters != NULL) { - cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr()); - } -#endif // !PRODUCT - - b(done, eq); - - Label try_revoke_bias; - Label try_rebias; - - // At this point we know that the header has the bias pattern and - // that we are not the bias owner in the current epoch. We need to - // figure out more details about the state of the header in order to - // know what operations can be legally performed on the object's - // header. - - // If the low three bits in the xor result aren't clear, that means - // the prototype header is no longer biased and we have to revoke - // the bias on this object. - tst(tmp_reg, markWord::biased_lock_mask_in_place); - b(try_revoke_bias, ne); - - // Biasing is still enabled for this data type. See whether the - // epoch of the current bias is still valid, meaning that the epoch - // bits of the mark word are equal to the epoch bits of the - // prototype header. (Note that the prototype header's epoch bits - // only change at a safepoint.) If not, attempt to rebias the object - // toward the current thread. Note that we must be absolutely sure - // that the current epoch is invalid in order to do this because - // otherwise the manipulations it performs on the mark word are - // illegal. - tst(tmp_reg, markWord::epoch_mask_in_place); - b(try_rebias, ne); - - // tmp_reg has the age, epoch and pattern bits cleared - // The remaining (owner) bits are (Thread ^ current_owner) - - // The epoch of the current bias is still valid but we know nothing - // about the owner; it might be set or it might be clear. Try to - // acquire the bias of the object using an atomic operation. If this - // fails we will go in to the runtime to revoke the object's bias. - // Note that we first construct the presumed unbiased header so we - // don't accidentally blow away another thread's valid bias. - - // Note that we know the owner is not ourself. Hence, success can - // only happen when the owner bits is 0 - - // until the assembler can be made smarter, we need to make some assumptions about the values - // so we can optimize this: - assert((markWord::biased_lock_mask_in_place | markWord::age_mask_in_place | markWord::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed"); - - mov(swap_reg, AsmOperand(swap_reg, lsl, 23)); - mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markWord with thread bits cleared (for CAS) - - orr(tmp_reg, swap_reg, Rthread); // new mark - - biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, - (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL); - - // If the biasing toward our thread failed, this means that - // another thread succeeded in biasing it toward itself and we - // need to revoke that bias. The revocation will occur in the - // interpreter runtime in the slow case. - - b(done); - - bind(try_rebias); - - // At this point we know the epoch has expired, meaning that the - // current "bias owner", if any, is actually invalid. Under these - // circumstances _only_, we are allowed to use the current header's - // value as the comparison value when doing the cas to acquire the - // bias in the current epoch. In other words, we allow transfer of - // the bias from one thread to another directly in this situation. - - // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) - - eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) - - // owner bits 'random'. Set them to Rthread. - mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); - mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); - - orr(tmp_reg, tmp_reg, Rthread); // new mark - - biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, - (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL); - - // If the biasing toward our thread failed, then another thread - // succeeded in biasing it toward itself and we need to revoke that - // bias. The revocation will occur in the runtime in the slow case. - - b(done); - - bind(try_revoke_bias); - - // The prototype mark in the klass doesn't have the bias bit set any - // more, indicating that objects of this data type are not supposed - // to be biased any more. We are going to try to reset the mark of - // this object to the prototype value and fall through to the - // CAS-based locking scheme. Note that if our CAS fails, it means - // that another thread raced us for the privilege of revoking the - // bias of this particular object, so it's okay to continue in the - // normal locking code. - - // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) - - eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) - - // owner bits 'random'. Clear them - mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); - mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); - - biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label, - (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL); - - // Fall through to the normal CAS-based lock, because no matter what - // the result of the above CAS, some thread must have succeeded in - // removing the bias bit from the object's header. - - bind(cas_label); -} - - -void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) { - assert(UseBiasedLocking, "why call this otherwise?"); - - // Check for biased locking unlock case, which is a no-op - // Note: we do not have to check the thread ID for two reasons. - // First, the interpreter checks for IllegalMonitorStateException at - // a higher level. Second, if the bias was revoked while we held the - // lock, the object could not be rebiased toward another thread, so - // the bias bit would be clear. - ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - - andr(tmp_reg, tmp_reg, markWord::biased_lock_mask_in_place); - cmp(tmp_reg, markWord::biased_lock_pattern); - b(done, eq); -} - - void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) { diff --git a/src/hotspot/cpu/arm/macroAssembler_arm.hpp b/src/hotspot/cpu/arm/macroAssembler_arm.hpp index a27a54e1c7119ece9d8ac08917fa57a6aac7f83f..b07782332dbc620f9a08182f7ac81382d6194027 100644 --- a/src/hotspot/cpu/arm/macroAssembler_arm.hpp +++ b/src/hotspot/cpu/arm/macroAssembler_arm.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,8 +28,6 @@ #include "code/relocInfo.hpp" #include "utilities/powerOfTwo.hpp" -class BiasedLockingCounters; - // Introduced AddressLiteral and its subclasses to ease portability from // x86 and avoid relocation issues class AddressLiteral { @@ -359,29 +357,6 @@ public: ShouldNotReachHere(); } - // Biased locking support - // lock_reg and obj_reg must be loaded up with the appropriate values. - // swap_reg must be supplied. - // tmp_reg must be supplied. - // Done label is branched to with condition code EQ set if the lock is - // biased and we acquired it. Slow case label is branched to with - // condition code NE set if the lock is biased but we failed to acquire - // it. Otherwise fall through. - // Notes: - // - swap_reg and tmp_reg are scratched - // - Rtemp was (implicitly) scratched and can now be specified as the tmp2 - void biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg, - bool swap_reg_contains_mark, - Register tmp2, - Label& done, Label& slow_case, - BiasedLockingCounters* counters = NULL); - void biased_locking_exit(Register obj_reg, Register temp_reg, Label& done); - - // Building block for CAS cases of biased locking: makes CAS and records statistics. - // Optional slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set. - void biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg, - Register tmp, Label& slow_case, int* counter_addr); - void resolve_jobject(Register value, Register tmp1, Register tmp2); void nop() { diff --git a/src/hotspot/cpu/arm/matcher_arm.hpp b/src/hotspot/cpu/arm/matcher_arm.hpp index 0d011a620f95d9c91a6de56f63f71ed846c01b7d..6254f4b33991d66069d47cf02aee04e1e7ed4e7d 100644 --- a/src/hotspot/cpu/arm/matcher_arm.hpp +++ b/src/hotspot/cpu/arm/matcher_arm.hpp @@ -56,6 +56,9 @@ // No support for generic vector operands. static const bool supports_generic_vector_operands = false; + // No support for 48 extra htbl entries in aes-gcm intrinsic + static const int htbl_entries = -1; + static constexpr bool isSimpleConstant64(jlong value) { // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. return false; @@ -131,6 +134,11 @@ return false; // not supported } + // Does the CPU supports vector constant rotate instructions? + static constexpr bool supports_vector_constant_rotates(int shift) { + return false; + } + // Does the CPU supports vector unsigned comparison instructions? static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) { return false; @@ -147,4 +155,7 @@ return false; } + // Implements a variant of EncodeISOArrayNode that encode ASCII only + static const bool supports_encode_ascii_array = false; + #endif // CPU_ARM_MATCHER_ARM_HPP diff --git a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp index 58cbac7419c15478f3d6d47ff7b2e7d5707b4101..09ae8e0e80461fad65ee6af49a2dddea66e251ff 100644 --- a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp +++ b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp @@ -862,11 +862,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, assert(markWord::unlocked_value == 1, "adjust this code"); __ tbz(Rtemp, exact_log2(markWord::unlocked_value), slow_case); - if (UseBiasedLocking) { - assert(is_power_of_2(markWord::biased_lock_bit_in_place), "adjust this code"); - __ tbnz(Rtemp, exact_log2(markWord::biased_lock_bit_in_place), slow_case); - } - __ bics(Rtemp, Rtemp, ~markWord::hash_mask_in_place); __ mov(R0, AsmOperand(Rtemp, lsr, markWord::hash_shift), ne); __ bx(LR, ne); @@ -1151,17 +1146,13 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, const Register disp_hdr = altFP_7_11; const Register tmp = R8; - Label slow_lock, slow_lock_biased, lock_done, fast_lock; + Label slow_lock, lock_done, fast_lock; if (method->is_synchronized()) { // The first argument is a handle to sync object (a class or an instance) __ ldr(sync_obj, Address(R1)); // Remember the handle for the unlocking code __ mov(sync_handle, R1); - if(UseBiasedLocking) { - __ biased_locking_enter(sync_obj, tmp, disp_hdr/*scratched*/, false, Rtemp, lock_done, slow_lock_biased); - } - const Register mark = tmp; // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. // That would be acceptable as either CAS or slow case path is taken in that case @@ -1180,8 +1171,9 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // -2- test (hdr - SP) if the low two bits are 0 __ sub(Rtemp, mark, SP, eq); __ movs(Rtemp, AsmOperand(Rtemp, lsr, exact_log2(os::vm_page_size())), eq); - // If still 'eq' then recursive locking OK: set displaced header to 0 - __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()), eq); + // If still 'eq' then recursive locking OK + // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8267042) + __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes())); __ b(lock_done, eq); __ b(slow_lock); @@ -1242,12 +1234,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, if (method->is_synchronized()) { __ ldr(sync_obj, Address(sync_handle)); - if(UseBiasedLocking) { - __ biased_locking_exit(sync_obj, Rtemp, unlock_done); - // disp_hdr may not have been saved on entry with biased locking - __ sub(disp_hdr, FP, lock_slot_fp_offset); - } - // See C1_MacroAssembler::unlock_object() for more comments __ ldr(R2, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes())); __ cbz(R2, unlock_done); @@ -1303,11 +1289,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, if (method->is_synchronized()) { // Locking slow case - if(UseBiasedLocking) { - __ bind(slow_lock_biased); - __ sub(disp_hdr, FP, lock_slot_fp_offset); - } - __ bind(slow_lock); push_param_registers(masm, fp_regs_in_arguments); diff --git a/src/hotspot/cpu/arm/stubGenerator_arm.cpp b/src/hotspot/cpu/arm/stubGenerator_arm.cpp index e9ebb14a9c9425329806182a752eab3607aea346..a202a7f09490b6a3dddf009753186477f2f1d3dc 100644 --- a/src/hotspot/cpu/arm/stubGenerator_arm.cpp +++ b/src/hotspot/cpu/arm/stubGenerator_arm.cpp @@ -635,17 +635,17 @@ class StubGenerator: public StubCodeGenerator { Register result_hi = R1; Register src = R0; - if (!os::is_MP()) { - __ ldmia(src, RegisterSet(result_lo, result_hi)); - __ bx(LR); - } else if (VM_Version::supports_ldrexd()) { + if (VM_Version::supports_ldrexd()) { __ ldrexd(result_lo, Address(src)); __ clrex(); // FIXME: safe to remove? - __ bx(LR); + } else if (!os::is_MP()) { + // Last-ditch attempt: we are allegedly running on uni-processor. + // Load the thing non-atomically and hope for the best. + __ ldmia(src, RegisterSet(result_lo, result_hi)); } else { __ stop("Atomic load(jlong) unsupported on this platform"); - __ bx(LR); } + __ bx(LR); return start; } @@ -662,10 +662,7 @@ class StubGenerator: public StubCodeGenerator { Register scratch_hi = R3; /* After load from stack */ Register result = R3; - if (!os::is_MP()) { - __ stmia(dest, RegisterSet(newval_lo, newval_hi)); - __ bx(LR); - } else if (VM_Version::supports_ldrexd()) { + if (VM_Version::supports_ldrexd()) { __ mov(Rtemp, dest); // get dest to Rtemp Label retry; __ bind(retry); @@ -673,11 +670,14 @@ class StubGenerator: public StubCodeGenerator { __ strexd(result, R0, Address(Rtemp)); __ rsbs(result, result, 1); __ b(retry, eq); - __ bx(LR); + } else if (!os::is_MP()) { + // Last-ditch attempt: we are allegedly running on uni-processor. + // Store the thing non-atomically and hope for the best. + __ stmia(dest, RegisterSet(newval_lo, newval_hi)); } else { __ stop("Atomic store(jlong) unsupported on this platform"); - __ bx(LR); } + __ bx(LR); return start; } diff --git a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp index 2891532d9bb3e7dd121b01e44bca3c9545b50ab9..fa8d9e7fb587d6eaa91b62eb918350758c46b109 100644 --- a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp +++ b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp @@ -124,14 +124,114 @@ address TemplateInterpreterGenerator::generate_abstract_entry(void) { address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { if (!InlineIntrinsics) return NULL; // Generate a vanilla entry - // TODO: ARM - return NULL; + address entry_point = NULL; + Register continuation = LR; + bool use_runtime_call = false; + switch (kind) { + case Interpreter::java_lang_math_abs: + entry_point = __ pc(); +#ifdef __SOFTFP__ + use_runtime_call = true; + __ ldrd(R0, Address(SP)); +#else // !__SOFTFP__ + __ ldr_double(D0, Address(SP)); + __ abs_double(D0, D0); +#endif // __SOFTFP__ + break; + case Interpreter::java_lang_math_sqrt: + entry_point = __ pc(); +#ifdef __SOFTFP__ + use_runtime_call = true; + __ ldrd(R0, Address(SP)); +#else // !__SOFTFP__ + __ ldr_double(D0, Address(SP)); + __ sqrt_double(D0, D0); +#endif // __SOFTFP__ + break; + case Interpreter::java_lang_math_sin: + case Interpreter::java_lang_math_cos: + case Interpreter::java_lang_math_tan: + case Interpreter::java_lang_math_log: + case Interpreter::java_lang_math_log10: + case Interpreter::java_lang_math_exp: + entry_point = __ pc(); + use_runtime_call = true; +#ifdef __SOFTFP__ + __ ldrd(R0, Address(SP)); +#else // !__SOFTFP__ + __ ldr_double(D0, Address(SP)); +#endif // __SOFTFP__ + break; + case Interpreter::java_lang_math_pow: + entry_point = __ pc(); + use_runtime_call = true; +#ifdef __SOFTFP__ + __ ldrd(R0, Address(SP, 2 * Interpreter::stackElementSize)); + __ ldrd(R2, Address(SP)); +#else // !__SOFTFP__ + __ ldr_double(D0, Address(SP, 2 * Interpreter::stackElementSize)); + __ ldr_double(D1, Address(SP)); +#endif // __SOFTFP__ + break; + case Interpreter::java_lang_math_fmaD: + case Interpreter::java_lang_math_fmaF: + // TODO: Implement intrinsic + break; + default: + ShouldNotReachHere(); + } - address entry_point = __ pc(); - STOP("generate_math_entry"); + if (entry_point != NULL) { + __ mov(SP, Rsender_sp); + if (use_runtime_call) { + __ mov(Rtmp_save0, LR); + continuation = Rtmp_save0; + generate_math_runtime_call(kind); + } + __ ret(continuation); + } return entry_point; } +void TemplateInterpreterGenerator::generate_math_runtime_call(AbstractInterpreter::MethodKind kind) { + address fn; + switch (kind) { +#ifdef __SOFTFP__ + case Interpreter::java_lang_math_abs: + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dabs); + break; + case Interpreter::java_lang_math_sqrt: + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt); + break; +#endif // __SOFTFP__ + case Interpreter::java_lang_math_sin: + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); + break; + case Interpreter::java_lang_math_cos: + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); + break; + case Interpreter::java_lang_math_tan: + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); + break; + case Interpreter::java_lang_math_log: + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); + break; + case Interpreter::java_lang_math_log10: + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); + break; + case Interpreter::java_lang_math_exp: + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); + break; + case Interpreter::java_lang_math_pow: + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); + break; + default: + ShouldNotReachHere(); + fn = NULL; // silence "maybe uninitialized" compiler warnings + } + __ call_VM_leaf(fn); +} + address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { address entry = __ pc(); diff --git a/src/hotspot/cpu/arm/templateTable_arm.cpp b/src/hotspot/cpu/arm/templateTable_arm.cpp index 30649a5e10470241cf9349bd25a326be926ee3dd..85bf0ce812ee6adb55dbae877bbccbb32938bfe1 100644 --- a/src/hotspot/cpu/arm/templateTable_arm.cpp +++ b/src/hotspot/cpu/arm/templateTable_arm.cpp @@ -490,29 +490,30 @@ void TemplateTable::ldc2_w() { __ add(Rtemp, Rtags, tags_offset); __ ldrb(Rtemp, Address(Rtemp, Rindex)); - Label Condy, exit; -#ifdef __ABI_HARD__ - Label NotDouble; + Label Done, NotLong, NotDouble; __ cmp(Rtemp, JVM_CONSTANT_Double); __ b(NotDouble, ne); +#ifdef __SOFTFP__ + __ ldr(R0_tos_lo, Address(Rbase, base_offset + 0 * wordSize)); + __ ldr(R1_tos_hi, Address(Rbase, base_offset + 1 * wordSize)); +#else // !__SOFTFP__ __ ldr_double(D0_tos, Address(Rbase, base_offset)); - +#endif // __SOFTFP__ __ push(dtos); - __ b(exit); + __ b(Done); __ bind(NotDouble); -#endif __ cmp(Rtemp, JVM_CONSTANT_Long); - __ b(Condy, ne); + __ b(NotLong, ne); __ ldr(R0_tos_lo, Address(Rbase, base_offset + 0 * wordSize)); __ ldr(R1_tos_hi, Address(Rbase, base_offset + 1 * wordSize)); __ push(ltos); - __ b(exit); + __ b(Done); + __ bind(NotLong); - __ bind(Condy); - condy_helper(exit); + condy_helper(Done); - __ bind(exit); + __ bind(Done); } @@ -3967,11 +3968,7 @@ void TemplateTable::_new() { // initialize object header only. __ bind(initialize_header); - if (UseBiasedLocking) { - __ ldr(Rtemp, Address(Rklass, Klass::prototype_header_offset())); - } else { - __ mov_slow(Rtemp, (intptr_t)markWord::prototype().value()); - } + __ mov_slow(Rtemp, (intptr_t)markWord::prototype().value()); // mark __ str(Rtemp, Address(Robj, oopDesc::mark_offset_in_bytes())); diff --git a/src/hotspot/cpu/arm/vm_version_arm.hpp b/src/hotspot/cpu/arm/vm_version_arm.hpp index 8b50d6d21e0640b271857ba522bb2e00bd68612b..a6e2dc8b442198115b3f4ad62fa36aefa16c4053 100644 --- a/src/hotspot/cpu/arm/vm_version_arm.hpp +++ b/src/hotspot/cpu/arm/vm_version_arm.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -94,8 +94,6 @@ class VM_Version: public Abstract_VM_Version { static bool supports_compare_and_exchange() { return true; } static bool supports_kuser_cmpxchg32() { return _kuser_helper_version >= KUSER_VERSION_CMPXCHG32; } static bool supports_kuser_cmpxchg64() { return _kuser_helper_version >= KUSER_VERSION_CMPXCHG64; } - // Override Abstract_VM_Version implementation - static bool use_biased_locking(); static bool has_vfp() { return (_features & vfp_m) != 0; } static bool has_vfp3_32() { return (_features & vfp3_32_m) != 0; } diff --git a/src/hotspot/cpu/arm/vm_version_arm_32.cpp b/src/hotspot/cpu/arm/vm_version_arm_32.cpp index e6fd8b986685530d45b720bf803c36916531e144..73d64d02fe790027a3b3db27fcf488258b1c9b7b 100644 --- a/src/hotspot/cpu/arm/vm_version_arm_32.cpp +++ b/src/hotspot/cpu/arm/vm_version_arm_32.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -348,16 +348,3 @@ void VM_Version::initialize() { _is_initialized = true; } - -bool VM_Version::use_biased_locking() { - get_os_cpu_info(); - // The cost of CAS on uniprocessor ARM v6 and later is low compared to the - // overhead related to slightly longer Biased Locking execution path. - // Testing shows no improvement when running with Biased Locking enabled - // on an ARMv6 and higher uniprocessor systems. The situation is different on - // ARMv5 and MP systems. - // - // Therefore the Biased Locking is enabled on ARMv5 and ARM MP only. - // - return (!os::is_MP() && (arm_arch() > 5)) ? false : true; -} diff --git a/src/hotspot/cpu/ppc/assembler_ppc.cpp b/src/hotspot/cpu/ppc/assembler_ppc.cpp index 6a6be870991b846853175d11ec98e77a00177315..ff6788f624115eeeb6b4655b2d294452a14c61b7 100644 --- a/src/hotspot/cpu/ppc/assembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/assembler_ppc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2015 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -30,7 +30,6 @@ #include "interpreter/interpreter.hpp" #include "memory/resourceArea.hpp" #include "prims/methodHandles.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/objectMonitor.hpp" #include "runtime/os.hpp" diff --git a/src/hotspot/cpu/ppc/assembler_ppc.hpp b/src/hotspot/cpu/ppc/assembler_ppc.hpp index ba5c55f6b8a565e360a2388fddb54ff0beb4100e..2f4287a9553d1bbff29a87f77bf94e7bbff9dd12 100644 --- a/src/hotspot/cpu/ppc/assembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/assembler_ppc.hpp @@ -47,6 +47,9 @@ class Address { Address(Register b, address d = 0) : _base(b), _index(noreg), _disp((intptr_t)d) {} + Address(Register b, ByteSize d) + : _base(b), _index(noreg), _disp((intptr_t)d) {} + Address(Register b, intptr_t d) : _base(b), _index(noreg), _disp(d) {} @@ -576,6 +579,7 @@ class Assembler : public AbstractAssembler { XVNMSUBASP_OPCODE=(60u<< OPCODE_SHIFT | 209u << 3), XVNMSUBADP_OPCODE=(60u<< OPCODE_SHIFT | 241u << 3), XVRDPI_OPCODE = (60u << OPCODE_SHIFT | 201u << 2), + XVRDPIC_OPCODE = (60u << OPCODE_SHIFT | 235u << 2), XVRDPIM_OPCODE = (60u << OPCODE_SHIFT | 249u << 2), XVRDPIP_OPCODE = (60u << OPCODE_SHIFT | 233u << 2), @@ -823,6 +827,10 @@ class Assembler : public AbstractAssembler { // Prefixed addi/li PADDI_PREFIX_OPCODE = PREFIX_PRIMARY_OPCODE | (2u << PRE_TYPE_SHIFT), PADDI_SUFFIX_OPCODE = ADDI_OPCODE, + + // xxpermx + XXPERMX_PREFIX_OPCODE = PREFIX_PRIMARY_OPCODE | (1u << PRE_TYPE_SHIFT), + XXPERMX_SUFFIX_OPCODE = (34u << OPCODE_SHIFT), }; // Trap instructions TO bits @@ -2347,6 +2355,7 @@ class Assembler : public AbstractAssembler { inline void mtvrd( VectorRegister d, Register a); inline void mfvrd( Register a, VectorRegister d); inline void xxperm( VectorSRegister d, VectorSRegister a, VectorSRegister b); + inline void xxpermx( VectorSRegister d, VectorSRegister a, VectorSRegister b, VectorSRegister c, int ui3); inline void xxpermdi( VectorSRegister d, VectorSRegister a, VectorSRegister b, int dm); inline void xxmrghw( VectorSRegister d, VectorSRegister a, VectorSRegister b); inline void xxmrglw( VectorSRegister d, VectorSRegister a, VectorSRegister b); @@ -2384,6 +2393,7 @@ class Assembler : public AbstractAssembler { inline void xvnmsubasp(VectorSRegister d, VectorSRegister a, VectorSRegister b); inline void xvnmsubadp(VectorSRegister d, VectorSRegister a, VectorSRegister b); inline void xvrdpi( VectorSRegister d, VectorSRegister b); + inline void xvrdpic( VectorSRegister d, VectorSRegister b); inline void xvrdpim( VectorSRegister d, VectorSRegister b); inline void xvrdpip( VectorSRegister d, VectorSRegister b); diff --git a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp index 79709d01bf19c1174223acf7a9f2faab9369771c..2f28ac067517e825ab7914ba00eb4a7b3538a83e 100644 --- a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp +++ b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp @@ -142,6 +142,11 @@ inline void Assembler::paddi_r0ok(Register d, Register a, long si34, bool r = fa emit_int32(PADDI_SUFFIX_OPCODE | rt(d) | ra(a) | d1_eo(si34)); } +inline void Assembler::xxpermx( VectorSRegister d, VectorSRegister a, VectorSRegister b, VectorSRegister c, int ui3) { + emit_int32(XXPERMX_PREFIX_OPCODE | uimm(ui3, 3)); + emit_int32(XXPERMX_SUFFIX_OPCODE | vsrt(d) | vsra(a) | vsrb(b) | vsrc(c)); +} + // Fixed-Point Arithmetic Instructions with Overflow detection inline void Assembler::addo( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } inline void Assembler::addo_( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } @@ -848,6 +853,7 @@ inline void Assembler::xvmsubadp( VectorSRegister d, VectorSRegister a, VectorSR inline void Assembler::xvnmsubasp(VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVNMSUBASP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); } inline void Assembler::xvnmsubadp(VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVNMSUBADP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); } inline void Assembler::xvrdpi( VectorSRegister d, VectorSRegister b) { emit_int32( XVRDPI_OPCODE | vsrt(d) | vsrb(b)); } +inline void Assembler::xvrdpic( VectorSRegister d, VectorSRegister b) { emit_int32( XVRDPIC_OPCODE | vsrt(d) | vsrb(b)); } inline void Assembler::xvrdpim( VectorSRegister d, VectorSRegister b) { emit_int32( XVRDPIM_OPCODE | vsrt(d) | vsrb(b)); } inline void Assembler::xvrdpip( VectorSRegister d, VectorSRegister b) { emit_int32( XVRDPIP_OPCODE | vsrt(d) | vsrb(b)); } diff --git a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp index 8eedca7886cbd729b9a5e0ba45affe46471267b7..65df14c2c8a57f4373cab0038468dfbf5e37a167 100644 --- a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp @@ -491,12 +491,14 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) { ce->verify_oop_map(info()); #ifndef PRODUCT - const address counter = (address)&Runtime1::_arraycopy_slowcase_cnt; - const Register tmp = R3, tmp2 = R4; - int simm16_offs = __ load_const_optimized(tmp, counter, tmp2, true); - __ lwz(tmp2, simm16_offs, tmp); - __ addi(tmp2, tmp2, 1); - __ stw(tmp2, simm16_offs, tmp); + if (PrintC1Statistics) { + const address counter = (address)&Runtime1::_arraycopy_slowcase_cnt; + const Register tmp = R3, tmp2 = R4; + int simm16_offs = __ load_const_optimized(tmp, counter, tmp2, true); + __ lwz(tmp2, simm16_offs, tmp); + __ addi(tmp2, tmp2, 1); + __ stw(tmp2, simm16_offs, tmp); + } #endif __ b(_continuation); diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp index af9a856ae087d56950880cae67147fe768b35768..831e2bdfe930df2de90bf2fdba5905dd9854780f 100644 --- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp @@ -714,7 +714,7 @@ void LIR_Assembler::explicit_null_check(Register addr, CodeEmitInfo* info) { // Attention: caller must encode oop if needed -int LIR_Assembler::store(LIR_Opr from_reg, Register base, int offset, BasicType type, bool wide, bool unaligned) { +int LIR_Assembler::store(LIR_Opr from_reg, Register base, int offset, BasicType type, bool wide) { int store_offset; if (!Assembler::is_simm16(offset)) { // For offsets larger than a simm16 we setup the offset. @@ -794,7 +794,7 @@ int LIR_Assembler::store(LIR_Opr from_reg, Register base, Register disp, BasicTy } -int LIR_Assembler::load(Register base, int offset, LIR_Opr to_reg, BasicType type, bool wide, bool unaligned) { +int LIR_Assembler::load(Register base, int offset, LIR_Opr to_reg, BasicType type, bool wide) { int load_offset; if (!Assembler::is_simm16(offset)) { // For offsets larger than a simm16 we setup the offset. @@ -965,7 +965,7 @@ void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmi offset = store(tmp, base, addr->index()->as_pointer_register(), type, wide); } else { assert(Assembler::is_simm16(addr->disp()), "can't handle larger addresses"); - offset = store(tmp, base, addr->disp(), type, wide, false); + offset = store(tmp, base, addr->disp(), type, wide); } if (info != NULL) { @@ -1120,7 +1120,7 @@ Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { void LIR_Assembler::mem2reg(LIR_Opr src_opr, LIR_Opr dest, BasicType type, - LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool unaligned) { + LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide) { assert(type != T_METADATA, "load of metadata ptr not supported"); LIR_Address* addr = src_opr->as_address_ptr(); @@ -1170,9 +1170,8 @@ void LIR_Assembler::mem2reg(LIR_Opr src_opr, LIR_Opr dest, BasicType type, if (disp_reg == noreg) { assert(Assembler::is_simm16(disp_value), "should have set this up"); - offset = load(src, disp_value, to_reg, type, wide, unaligned); + offset = load(src, disp_value, to_reg, type, wide); } else { - assert(!unaligned, "unexpected"); offset = load(src, disp_reg, to_reg, type, wide); } @@ -1193,8 +1192,7 @@ void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { addr = frame_map()->address_for_double_slot(src->double_stack_ix()); } - bool unaligned = addr.disp() % 8 != 0; - load(addr.base(), addr.disp(), dest, dest->type(), true /*wide*/, unaligned); + load(addr.base(), addr.disp(), dest, dest->type(), true /*wide*/); } @@ -1205,8 +1203,8 @@ void LIR_Assembler::reg2stack(LIR_Opr from_reg, LIR_Opr dest, BasicType type, bo } else if (dest->is_double_word()) { addr = frame_map()->address_for_slot(dest->double_stack_ix()); } - bool unaligned = addr.disp() % 8 != 0; - store(from_reg, addr.base(), addr.disp(), from_reg->type(), true /*wide*/, unaligned); + + store(from_reg, addr.base(), addr.disp(), from_reg->type(), true /*wide*/); } @@ -1242,7 +1240,7 @@ void LIR_Assembler::reg2reg(LIR_Opr from_reg, LIR_Opr to_reg) { void LIR_Assembler::reg2mem(LIR_Opr from_reg, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, - bool wide, bool unaligned) { + bool wide) { assert(type != T_METADATA, "store of metadata ptr not supported"); LIR_Address* addr = dest->as_address_ptr(); @@ -1299,9 +1297,8 @@ void LIR_Assembler::reg2mem(LIR_Opr from_reg, LIR_Opr dest, BasicType type, if (disp_reg == noreg) { assert(Assembler::is_simm16(disp_value), "should have set this up"); - offset = store(from_reg, src, disp_value, type, wide, unaligned); + offset = store(from_reg, src, disp_value, type, wide); } else { - assert(!unaligned, "unexpected"); offset = store(from_reg, src, disp_reg, type, wide); } diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp index 861430b79ebad117c28dd84ba7a562c985160c98..1cb40d63fdd13cb20d149559c693f4389aaed226 100644 --- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp @@ -38,10 +38,10 @@ void explicit_null_check(Register addr, CodeEmitInfo* info); - int store(LIR_Opr from_reg, Register base, int offset, BasicType type, bool wide, bool unaligned); + int store(LIR_Opr from_reg, Register base, int offset, BasicType type, bool wide); int store(LIR_Opr from_reg, Register base, Register disp, BasicType type, bool wide); - int load(Register base, int offset, LIR_Opr to_reg, BasicType type, bool wide, bool unaligned); + int load(Register base, int offset, LIR_Opr to_reg, BasicType type, bool wide); int load(Register base, Register disp, LIR_Opr to_reg, BasicType type, bool wide); int shift_amount(BasicType t); diff --git a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp index 2c685920367ed6244eb40e685a10e79b9ce10b15..6d71c87d3e73e4a583d0da00e59d9940846b1532 100644 --- a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp @@ -309,12 +309,7 @@ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result void LIRGenerator::store_stack_parameter(LIR_Opr item, ByteSize offset_from_sp) { BasicType t = item->type(); LIR_Opr sp_opr = FrameMap::SP_opr; - if ((t == T_LONG || t == T_DOUBLE) && - (in_bytes(offset_from_sp) % 8 != 0)) { - __ unaligned_move(item, new LIR_Address(sp_opr, in_bytes(offset_from_sp), t)); - } else { - __ move(item, new LIR_Address(sp_opr, in_bytes(offset_from_sp), t)); - } + __ move(item, new LIR_Address(sp_opr, in_bytes(offset_from_sp), t)); } diff --git a/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp index 47cee69549021913ebf7afc45cbe90e5957edfa7..2e7fb125eb3b6640ddb7587c8654a5bdae14e22c 100644 --- a/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp @@ -33,7 +33,6 @@ #include "oops/arrayOop.hpp" #include "oops/markWord.hpp" #include "runtime/basicLock.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/os.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" @@ -115,10 +114,6 @@ void C1_MacroAssembler::lock_object(Register Rmark, Register Roop, Register Rbox bne(CCR0, slow_int); } - if (UseBiasedLocking) { - biased_locking_enter(CCR0, Roop, Rmark, Rscratch, R0, done, &slow_int); - } - // ... and mark it unlocked. ori(Rmark, Rmark, markWord::unlocked_value); @@ -164,21 +159,14 @@ void C1_MacroAssembler::unlock_object(Register Rmark, Register Roop, Register Rb Address mark_addr(Roop, oopDesc::mark_offset_in_bytes()); assert(mark_addr.disp() == 0, "cas must take a zero displacement"); - if (UseBiasedLocking) { - // Load the object out of the BasicObjectLock. - ld(Roop, BasicObjectLock::obj_offset_in_bytes(), Rbox); - verify_oop(Roop, FILE_AND_LINE); - biased_locking_exit(CCR0, Roop, R0, done); - } // Test first it it is a fast recursive unlock. ld(Rmark, BasicLock::displaced_header_offset_in_bytes(), Rbox); cmpdi(CCR0, Rmark, 0); beq(CCR0, done); - if (!UseBiasedLocking) { - // Load object. - ld(Roop, BasicObjectLock::obj_offset_in_bytes(), Rbox); - verify_oop(Roop, FILE_AND_LINE); - } + + // Load object. + ld(Roop, BasicObjectLock::obj_offset_in_bytes(), Rbox); + verify_oop(Roop, FILE_AND_LINE); // Check if it is still a light weight lock, this is is true if we see // the stack address of the basicLock in the markWord of the object. @@ -222,11 +210,7 @@ void C1_MacroAssembler::try_allocate( void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) { assert_different_registers(obj, klass, len, t1, t2); - if (UseBiasedLocking && !len->is_valid()) { - ld(t1, in_bytes(Klass::prototype_header_offset()), klass); - } else { - load_const_optimized(t1, (intx)markWord::prototype().value()); - } + load_const_optimized(t1, (intx)markWord::prototype().value()); std(t1, oopDesc::mark_offset_in_bytes(), obj); store_klass(obj, klass); if (len->is_valid()) { diff --git a/src/hotspot/cpu/ppc/c2_globals_ppc.hpp b/src/hotspot/cpu/ppc/c2_globals_ppc.hpp index bb103cdf6091b8b05f29a9c6ae38efc8083f66b4..00a92ff6b6251dbd57b12d80b517b4cf9d63ac32 100644 --- a/src/hotspot/cpu/ppc/c2_globals_ppc.hpp +++ b/src/hotspot/cpu/ppc/c2_globals_ppc.hpp @@ -44,10 +44,8 @@ define_pd_global(intx, CompileThreshold, 10000); define_pd_global(intx, OnStackReplacePercentage, 140); define_pd_global(intx, ConditionalMoveLimit, 3); -define_pd_global(intx, FLOATPRESSURE, 28); define_pd_global(intx, FreqInlineSize, 175); define_pd_global(intx, MinJumpTableSize, 10); -define_pd_global(intx, INTPRESSURE, 26); define_pd_global(intx, InteriorEntryAlignment, 16); define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K)); define_pd_global(intx, RegisterCostAreaRatio, 16000); diff --git a/src/hotspot/cpu/ppc/frame_ppc.cpp b/src/hotspot/cpu/ppc/frame_ppc.cpp index 0f4e34dc9e74ecad5ad166715f21e479163290c9..870345789d6112339549d9964ebe4e7c9da3b531 100644 --- a/src/hotspot/cpu/ppc/frame_ppc.cpp +++ b/src/hotspot/cpu/ppc/frame_ppc.cpp @@ -52,7 +52,6 @@ void RegisterMap::check_location_valid() { #endif // ASSERT bool frame::safe_for_sender(JavaThread *thread) { - bool safe = false; address sp = (address)_sp; address fp = (address)_fp; address unextended_sp = (address)_unextended_sp; @@ -70,28 +69,23 @@ bool frame::safe_for_sender(JavaThread *thread) { // An fp must be within the stack and above (but not equal) sp. bool fp_safe = thread->is_in_stack_range_excl(fp, sp); - // An interpreter fp must be within the stack and above (but not equal) sp. - // Moreover, it must be at least the size of the ijava_state structure. + // An interpreter fp must be fp_safe. + // Moreover, it must be at a distance at least the size of the ijava_state structure. bool fp_interp_safe = fp_safe && ((fp - sp) >= ijava_state_size); // We know sp/unextended_sp are safe, only fp is questionable here // If the current frame is known to the code cache then we can attempt to - // to construct the sender and do some validation of it. This goes a long way + // construct the sender and do some validation of it. This goes a long way // toward eliminating issues when we get in frame construction code if (_cb != NULL ){ - // Entry frame checks - if (is_entry_frame()) { - // An entry frame must have a valid fp. - return fp_safe && is_entry_frame_valid(thread); - } - // Now check if the frame is complete and the test is - // reliable. Unfortunately we can only check frame completeness for - // runtime stubs and nmethods. Other generic buffer blobs are more - // problematic so we just assume they are OK. Adapter blobs never have a - // complete frame and are never OK + // First check if the frame is complete and the test is reliable. + // Unfortunately we can only check frame completeness for runtime stubs + // and nmethods. Other generic buffer blobs are more problematic + // so we just assume they are OK. + // Adapter blobs never have a complete frame and are never OK if (!_cb->is_frame_complete_at(_pc)) { if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { return false; @@ -103,10 +97,23 @@ bool frame::safe_for_sender(JavaThread *thread) { return false; } + // Entry frame checks + if (is_entry_frame()) { + // An entry frame must have a valid fp. + return fp_safe && is_entry_frame_valid(thread); + } + if (is_interpreted_frame() && !fp_interp_safe) { return false; } + // At this point, there still is a chance that fp_safe is false. + // In particular, (fp == NULL) might be true. So let's check and + // bail out before we actually dereference from fp. + if (!fp_safe) { + return false; + } + abi_minframe* sender_abi = (abi_minframe*) fp; intptr_t* sender_sp = (intptr_t*) fp; address sender_pc = (address) sender_abi->lr;; @@ -197,6 +204,16 @@ frame frame::sender_for_entry_frame(RegisterMap *map) const { return fr; } +OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const { + ShouldNotCallThis(); + return nullptr; +} + +bool frame::optimized_entry_frame_is_first() const { + ShouldNotCallThis(); + return false; +} + frame frame::sender_for_interpreter_frame(RegisterMap *map) const { // Pass callers initial_caller_sp as unextended_sp. return frame(sender_sp(), sender_pc(), (intptr_t*)get_ijava_state()->sender_sp); @@ -370,5 +387,4 @@ frame::frame(void* sp, void* fp, void* pc) : _sp((intptr_t*)sp), _unextended_sp( find_codeblob_and_set_pc_and_deopt_state((address)pc); // also sets _fp and adjusts _unextended_sp } -void frame::pd_ps() {} #endif diff --git a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp index a95197aeed772e37677cfbafb42f0d45fb9c9ad4..a9328e8d616705b0e2caa8e40c254b5e46816a10 100644 --- a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp @@ -393,7 +393,7 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier Register pre_val_reg = stub->pre_val()->as_register(); if (stub->do_load()) { - ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/); } __ cmpdi(CCR0, pre_val_reg, 0); diff --git a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp index 800b34e4ba73663e206807a0c0ff7b489612c296..3758cc2fcf7627a618571efeb88589494092d0e5 100644 --- a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp @@ -111,16 +111,28 @@ void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, } } +// Generic implementation. GCs can provide an optimized one. void BarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2, MacroAssembler::PreservationLevel preservation_level) { - Label done; + Label done, not_weak, verify; __ cmpdi(CCR0, value, 0); __ beq(CCR0, done); // Use NULL as-is. - __ clrrdi(tmp1, value, JNIHandles::weak_tag_size); - __ ld(value, 0, tmp1); // Resolve (untagged) jobject. + __ andi_(tmp1, value, JNIHandles::weak_tag_mask); + __ beq(CCR0, not_weak); // Test for jweak tag. + // Resolve (untagged) jobject. + __ clrrdi(value, value, JNIHandles::weak_tag_size); + load_at(masm, IN_NATIVE | ON_PHANTOM_OOP_REF, T_OBJECT, + value, (intptr_t)0, value, tmp1, tmp2, preservation_level); + __ b(verify); + + __ bind(not_weak); + load_at(masm, IN_NATIVE, T_OBJECT, + value, (intptr_t)0, value, tmp1, tmp2, preservation_level); + + __ bind(verify); __ verify_oop(value, FILE_AND_LINE); __ bind(done); } @@ -178,7 +190,7 @@ void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler *masm, Register tmp1, __ ld(tmp1_class_loader_data, in_bytes(InstanceKlass::class_loader_data_offset()), tmp1); // Fast path: If class loader is strong, the holder cannot be unloaded. - __ ld(tmp2, in_bytes(ClassLoaderData::keep_alive_offset()), tmp1_class_loader_data); + __ lwz(tmp2, in_bytes(ClassLoaderData::keep_alive_offset()), tmp1_class_loader_data); __ cmpdi(CCR0, tmp2, 0); __ bne(CCR0, skip_barrier); diff --git a/src/hotspot/cpu/ppc/gc/shared/modRefBarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/shared/modRefBarrierSetAssembler_ppc.cpp index ed66c5f892918383b26305bb0a46bf47170dd373..1d1f923108f2ae96b6bae1e6e0d9a082119c81e5 100644 --- a/src/hotspot/cpu/ppc/gc/shared/modRefBarrierSetAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/shared/modRefBarrierSetAssembler_ppc.cpp @@ -26,6 +26,7 @@ #include "precompiled.hpp" #include "asm/macroAssembler.inline.hpp" #include "gc/shared/modRefBarrierSetAssembler.hpp" +#include "runtime/jniHandles.hpp" #define __ masm-> @@ -74,3 +75,17 @@ void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet deco preservation_level); } } + +void ModRefBarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value, + Register tmp1, Register tmp2, + MacroAssembler::PreservationLevel preservation_level) { + Label done; + __ cmpdi(CCR0, value, 0); + __ beq(CCR0, done); // Use NULL as-is. + + __ clrrdi(tmp1, value, JNIHandles::weak_tag_size); + __ ld(value, 0, tmp1); // Resolve (untagged) jobject. + + __ verify_oop(value, FILE_AND_LINE); + __ bind(done); +} diff --git a/src/hotspot/cpu/ppc/gc/shared/modRefBarrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/shared/modRefBarrierSetAssembler_ppc.hpp index eec826212803ca1b9b97e5c094977593d52381f4..5d105f6c0484f7fd0901a23ed6e6e55260796b3b 100644 --- a/src/hotspot/cpu/ppc/gc/shared/modRefBarrierSetAssembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/gc/shared/modRefBarrierSetAssembler_ppc.hpp @@ -57,6 +57,10 @@ public: Register base, RegisterOrConstant ind_or_offs, Register val, Register tmp1, Register tmp2, Register tmp3, MacroAssembler::PreservationLevel preservation_level); + + virtual void resolve_jobject(MacroAssembler* masm, Register value, + Register tmp1, Register tmp2, + MacroAssembler::PreservationLevel preservation_level); }; #endif // CPU_PPC_GC_SHARED_MODREFBARRIERSETASSEMBLER_PPC_HPP diff --git a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..26c3bf371f3fed342304fe676cd8d207fc0758fd --- /dev/null +++ b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp @@ -0,0 +1,567 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "asm/register.hpp" +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/codeBlob.hpp" +#include "code/vmreg.inline.hpp" +#include "gc/z/zBarrier.inline.hpp" +#include "gc/z/zBarrierSet.hpp" +#include "gc/z/zBarrierSetAssembler.hpp" +#include "gc/z/zBarrierSetRuntime.hpp" +#include "gc/z/zThreadLocalData.hpp" +#include "memory/resourceArea.hpp" +#include "register_ppc.hpp" +#include "runtime/sharedRuntime.hpp" +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" +#ifdef COMPILER1 +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "gc/z/c1/zBarrierSetC1.hpp" +#endif // COMPILER1 +#ifdef COMPILER2 +#include "gc/z/c2/zBarrierSetC2.hpp" +#endif // COMPILER2 + +#undef __ +#define __ masm-> + +void ZBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register base, RegisterOrConstant ind_or_offs, Register dst, + Register tmp1, Register tmp2, + MacroAssembler::PreservationLevel preservation_level, Label *L_handle_null) { + __ block_comment("load_at (zgc) {"); + + // Check whether a special gc barrier is required for this particular load + // (e.g. whether it's a reference load or not) + if (!ZBarrierSet::barrier_needed(decorators, type)) { + BarrierSetAssembler::load_at(masm, decorators, type, base, ind_or_offs, dst, + tmp1, tmp2, preservation_level, L_handle_null); + return; + } + + if (ind_or_offs.is_register()) { + assert_different_registers(base, ind_or_offs.as_register(), tmp1, tmp2, R0, noreg); + assert_different_registers(dst, ind_or_offs.as_register(), tmp1, tmp2, R0, noreg); + } else { + assert_different_registers(base, tmp1, tmp2, R0, noreg); + assert_different_registers(dst, tmp1, tmp2, R0, noreg); + } + + /* ==== Load the pointer using the standard implementation for the actual heap access + and the decompression of compressed pointers ==== */ + // Result of 'load_at' (standard implementation) will be written back to 'dst'. + // As 'base' is required for the C-call, it must be reserved in case of a register clash. + Register saved_base = base; + if (base == dst) { + __ mr(tmp2, base); + saved_base = tmp2; + } + + BarrierSetAssembler::load_at(masm, decorators, type, base, ind_or_offs, dst, + tmp1, noreg, preservation_level, L_handle_null); + + /* ==== Check whether pointer is dirty ==== */ + Label skip_barrier; + + // Load bad mask into scratch register. + __ ld(tmp1, (intptr_t) ZThreadLocalData::address_bad_mask_offset(), R16_thread); + + // The color bits of the to-be-tested pointer do not have to be equivalent to the 'bad_mask' testing bits. + // A pointer is classified as dirty if any of the color bits that also match the bad mask is set. + // Conversely, it follows that the logical AND of the bad mask and the pointer must be zero + // if the pointer is not dirty. + // Only dirty pointers must be processed by this barrier, so we can skip it in case the latter condition holds true. + __ and_(tmp1, tmp1, dst); + __ beq(CCR0, skip_barrier); + + /* ==== Invoke barrier ==== */ + int nbytes_save = 0; + + const bool needs_frame = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR; + const bool preserve_gp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_REGS; + const bool preserve_fp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS; + + const bool preserve_R3 = dst != R3_ARG1; + + if (needs_frame) { + if (preserve_gp_registers) { + nbytes_save = (preserve_fp_registers + ? MacroAssembler::num_volatile_gp_regs + MacroAssembler::num_volatile_fp_regs + : MacroAssembler::num_volatile_gp_regs) * BytesPerWord; + nbytes_save -= preserve_R3 ? 0 : BytesPerWord; + __ save_volatile_gprs(R1_SP, -nbytes_save, preserve_fp_registers, preserve_R3); + } + + __ save_LR_CR(tmp1); + __ push_frame_reg_args(nbytes_save, tmp1); + } + + // Setup arguments + if (saved_base != R3_ARG1) { + __ mr_if_needed(R3_ARG1, dst); + __ add(R4_ARG2, ind_or_offs, saved_base); + } else if (dst != R4_ARG2) { + __ add(R4_ARG2, ind_or_offs, saved_base); + __ mr(R3_ARG1, dst); + } else { + __ add(R0, ind_or_offs, saved_base); + __ mr(R3_ARG1, dst); + __ mr(R4_ARG2, R0); + } + + __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators)); + + Register result = R3_RET; + if (needs_frame) { + __ pop_frame(); + __ restore_LR_CR(tmp1); + + if (preserve_R3) { + __ mr(R0, R3_RET); + result = R0; + } + + if (preserve_gp_registers) { + __ restore_volatile_gprs(R1_SP, -nbytes_save, preserve_fp_registers, preserve_R3); + } + } + __ mr_if_needed(dst, result); + + __ bind(skip_barrier); + __ block_comment("} load_at (zgc)"); +} + +#ifdef ASSERT +// The Z store barrier only verifies the pointers it is operating on and is thus a sole debugging measure. +void ZBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register base, RegisterOrConstant ind_or_offs, Register val, + Register tmp1, Register tmp2, Register tmp3, + MacroAssembler::PreservationLevel preservation_level) { + __ block_comment("store_at (zgc) {"); + + // If the 'val' register is 'noreg', the to-be-stored value is a null pointer. + if (is_reference_type(type) && val != noreg) { + __ ld(tmp1, in_bytes(ZThreadLocalData::address_bad_mask_offset()), R16_thread); + __ and_(tmp1, tmp1, val); + __ asm_assert_eq("Detected dirty pointer on the heap in Z store barrier"); + } + + // Store value + BarrierSetAssembler::store_at(masm, decorators, type, base, ind_or_offs, val, tmp1, tmp2, tmp3, preservation_level); + + __ block_comment("} store_at (zgc)"); +} +#endif // ASSERT + +void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler *masm, DecoratorSet decorators, BasicType component_type, + Register src, Register dst, Register count, + Register preserve1, Register preserve2) { + __ block_comment("arraycopy_prologue (zgc) {"); + + /* ==== Check whether a special gc barrier is required for this particular load ==== */ + if (!is_reference_type(component_type)) { + return; + } + + Label skip_barrier; + + // Fast path: Array is of length zero + __ cmpdi(CCR0, count, 0); + __ beq(CCR0, skip_barrier); + + /* ==== Ensure register sanity ==== */ + Register tmp_R11 = R11_scratch1; + + assert_different_registers(src, dst, count, tmp_R11, noreg); + if (preserve1 != noreg) { + // Not technically required, but unlikely being intended. + assert_different_registers(preserve1, preserve2); + } + + /* ==== Invoke barrier (slowpath) ==== */ + int nbytes_save = 0; + + { + assert(!noreg->is_volatile(), "sanity"); + + if (preserve1->is_volatile()) { + __ std(preserve1, -BytesPerWord * ++nbytes_save, R1_SP); + } + + if (preserve2->is_volatile() && preserve1 != preserve2) { + __ std(preserve2, -BytesPerWord * ++nbytes_save, R1_SP); + } + + __ std(src, -BytesPerWord * ++nbytes_save, R1_SP); + __ std(dst, -BytesPerWord * ++nbytes_save, R1_SP); + __ std(count, -BytesPerWord * ++nbytes_save, R1_SP); + + __ save_LR_CR(tmp_R11); + __ push_frame_reg_args(nbytes_save, tmp_R11); + } + + // ZBarrierSetRuntime::load_barrier_on_oop_array_addr(src, count) + if (count == R3_ARG1) { + if (src == R4_ARG2) { + // Arguments are provided in reverse order + __ mr(tmp_R11, count); + __ mr(R3_ARG1, src); + __ mr(R4_ARG2, tmp_R11); + } else { + __ mr(R4_ARG2, count); + __ mr(R3_ARG1, src); + } + } else { + __ mr_if_needed(R3_ARG1, src); + __ mr_if_needed(R4_ARG2, count); + } + + __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_array_addr()); + + __ pop_frame(); + __ restore_LR_CR(tmp_R11); + + { + __ ld(count, -BytesPerWord * nbytes_save--, R1_SP); + __ ld(dst, -BytesPerWord * nbytes_save--, R1_SP); + __ ld(src, -BytesPerWord * nbytes_save--, R1_SP); + + if (preserve2->is_volatile() && preserve1 != preserve2) { + __ ld(preserve2, -BytesPerWord * nbytes_save--, R1_SP); + } + + if (preserve1->is_volatile()) { + __ ld(preserve1, -BytesPerWord * nbytes_save--, R1_SP); + } + } + + __ bind(skip_barrier); + + __ block_comment("} arraycopy_prologue (zgc)"); +} + +void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register dst, Register jni_env, + Register obj, Register tmp, Label& slowpath) { + __ block_comment("try_resolve_jobject_in_native (zgc) {"); + + assert_different_registers(jni_env, obj, tmp); + + // Resolve the pointer using the standard implementation for weak tag handling and pointer verfication. + BarrierSetAssembler::try_resolve_jobject_in_native(masm, dst, jni_env, obj, tmp, slowpath); + + // Check whether pointer is dirty. + __ ld(tmp, + in_bytes(ZThreadLocalData::address_bad_mask_offset() - JavaThread::jni_environment_offset()), + jni_env); + + __ and_(tmp, obj, tmp); + __ bne(CCR0, slowpath); + + __ block_comment("} try_resolve_jobject_in_native (zgc)"); +} + +#undef __ + +#ifdef COMPILER1 +#define __ ce->masm()-> + +// Code emitted by LIR node "LIR_OpZLoadBarrierTest" which in turn is emitted by ZBarrierSetC1::load_barrier. +// The actual compare and branch instructions are represented as stand-alone LIR nodes. +void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce, + LIR_Opr ref) const { + __ block_comment("load_barrier_test (zgc) {"); + + __ ld(R0, in_bytes(ZThreadLocalData::address_bad_mask_offset()), R16_thread); + __ andr(R0, R0, ref->as_pointer_register()); + __ cmpdi(CCR5 /* as mandated by LIR node */, R0, 0); + + __ block_comment("} load_barrier_test (zgc)"); +} + +// Code emitted by code stub "ZLoadBarrierStubC1" which in turn is emitted by ZBarrierSetC1::load_barrier. +// Invokes the runtime stub which is defined just below. +void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce, + ZLoadBarrierStubC1* stub) const { + __ block_comment("c1_load_barrier_stub (zgc) {"); + + __ bind(*stub->entry()); + + /* ==== Determine relevant data registers and ensure register sanity ==== */ + Register ref = stub->ref()->as_register(); + Register ref_addr = noreg; + + // Determine reference address + if (stub->tmp()->is_valid()) { + // 'tmp' register is given, so address might have an index or a displacement. + ce->leal(stub->ref_addr(), stub->tmp()); + ref_addr = stub->tmp()->as_pointer_register(); + } else { + // 'tmp' register is not given, so address must have neither an index nor a displacement. + // The address' base register is thus usable as-is. + assert(stub->ref_addr()->as_address_ptr()->disp() == 0, "illegal displacement"); + assert(!stub->ref_addr()->as_address_ptr()->index()->is_valid(), "illegal index"); + + ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register(); + } + + assert_different_registers(ref, ref_addr, R0, noreg); + + /* ==== Invoke stub ==== */ + // Pass arguments via stack. The stack pointer will be bumped by the stub. + __ std(ref, (intptr_t) -1 * BytesPerWord, R1_SP); + __ std(ref_addr, (intptr_t) -2 * BytesPerWord, R1_SP); + + __ load_const_optimized(R0, stub->runtime_stub()); + __ call_stub(R0); + + // The runtime stub passes the result via the R0 register, overriding the previously-loaded stub address. + __ mr_if_needed(ref, R0); + __ b(*stub->continuation()); + + __ block_comment("} c1_load_barrier_stub (zgc)"); +} + +#undef __ +#define __ sasm-> + +// Code emitted by runtime code stub which in turn is emitted by ZBarrierSetC1::generate_c1_runtime_stubs. +void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, + DecoratorSet decorators) const { + __ block_comment("c1_load_barrier_runtime_stub (zgc) {"); + + const int stack_parameters = 2; + const int nbytes_save = (MacroAssembler::num_volatile_regs + stack_parameters) * BytesPerWord; + + __ save_volatile_gprs(R1_SP, -nbytes_save); + __ save_LR_CR(R0); + + // Load arguments back again from the stack. + __ ld(R3_ARG1, (intptr_t) -1 * BytesPerWord, R1_SP); // ref + __ ld(R4_ARG2, (intptr_t) -2 * BytesPerWord, R1_SP); // ref_addr + + __ push_frame_reg_args(nbytes_save, R0); + + __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators)); + + __ verify_oop(R3_RET, "Bad pointer after barrier invocation"); + __ mr(R0, R3_RET); + + __ pop_frame(); + __ restore_LR_CR(R3_RET); + __ restore_volatile_gprs(R1_SP, -nbytes_save); + + __ blr(); + + __ block_comment("} c1_load_barrier_runtime_stub (zgc)"); +} + +#undef __ +#endif // COMPILER1 + +#ifdef COMPILER2 + +OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) const { + if (!OptoReg::is_reg(opto_reg)) { + return OptoReg::Bad; + } + + VMReg vm_reg = OptoReg::as_VMReg(opto_reg); + if ((vm_reg->is_Register() || vm_reg ->is_FloatRegister()) && (opto_reg & 1) != 0) { + return OptoReg::Bad; + } + + return opto_reg; +} + +#define __ _masm-> + +class ZSaveLiveRegisters { + + private: + MacroAssembler* _masm; + RegMask _reg_mask; + Register _result_reg; + + public: + ZSaveLiveRegisters(MacroAssembler *masm, ZLoadBarrierStubC2 *stub) + : _masm(masm), _reg_mask(stub->live()), _result_reg(stub->ref()) { + + const int total_regs_amount = iterate_over_register_mask(ACTION_SAVE); + + __ save_LR_CR(R0); + __ push_frame_reg_args(total_regs_amount * BytesPerWord, R0); + } + + ~ZSaveLiveRegisters() { + __ pop_frame(); + __ restore_LR_CR(R0); + + iterate_over_register_mask(ACTION_RESTORE); + } + + private: + enum IterationAction : int { + ACTION_SAVE = 0, + ACTION_RESTORE = 1 + }; + + int iterate_over_register_mask(IterationAction action) { + int reg_save_index = 0; + RegMaskIterator live_regs_iterator(_reg_mask); + + while(live_regs_iterator.has_next()) { + const OptoReg::Name opto_reg = live_regs_iterator.next(); + + // Filter out stack slots (spilled registers, i.e., stack-allocated registers). + if (!OptoReg::is_reg(opto_reg)) { + continue; + } + + const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); + if (vm_reg->is_Register()) { + Register std_reg = vm_reg->as_Register(); + + // '_result_reg' will hold the end result of the operation. Its content must thus not be preserved. + if (std_reg == _result_reg) { + continue; + } + + if (std_reg->encoding() >= R2->encoding() && std_reg->encoding() <= R12->encoding()) { + reg_save_index++; + + if (action == ACTION_SAVE) { + _masm->std(std_reg, (intptr_t) -reg_save_index * BytesPerWord, R1_SP); + } else if (action == ACTION_RESTORE) { + _masm->ld(std_reg, (intptr_t) -reg_save_index * BytesPerWord, R1_SP); + } else { + fatal("Sanity"); + } + } + } else if (vm_reg->is_FloatRegister()) { + FloatRegister fp_reg = vm_reg->as_FloatRegister(); + if (fp_reg->encoding() >= F0->encoding() && fp_reg->encoding() <= F13->encoding()) { + reg_save_index++; + + if (action == ACTION_SAVE) { + _masm->stfd(fp_reg, (intptr_t) -reg_save_index * BytesPerWord, R1_SP); + } else if (action == ACTION_RESTORE) { + _masm->lfd(fp_reg, (intptr_t) -reg_save_index * BytesPerWord, R1_SP); + } else { + fatal("Sanity"); + } + } + } else if (vm_reg->is_ConditionRegister()) { + // NOP. Conditions registers are covered by save_LR_CR + } else { + if (vm_reg->is_VectorRegister()) { + fatal("Vector registers are unsupported. Found register %s", vm_reg->name()); + } else if (vm_reg->is_SpecialRegister()) { + fatal("Special registers are unsupported. Found register %s", vm_reg->name()); + } else { + fatal("Register type is not known"); + } + } + } + + return reg_save_index; + } +}; + +#undef __ +#define __ _masm-> + +class ZSetupArguments { + private: + MacroAssembler* const _masm; + const Register _ref; + const Address _ref_addr; + + public: + ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : + _masm(masm), + _ref(stub->ref()), + _ref_addr(stub->ref_addr()) { + + // Desired register/argument configuration: + // _ref: R3_ARG1 + // _ref_addr: R4_ARG2 + + // '_ref_addr' can be unspecified. In that case, the barrier will not heal the reference. + if (_ref_addr.base() == noreg) { + assert_different_registers(_ref, R0, noreg); + + __ mr_if_needed(R3_ARG1, _ref); + __ li(R4_ARG2, 0); + } else { + assert_different_registers(_ref, _ref_addr.base(), R0, noreg); + assert(!_ref_addr.index()->is_valid(), "reference addresses must not contain an index component"); + + if (_ref != R4_ARG2) { + // Calculate address first as the address' base register might clash with R4_ARG2 + __ add(R4_ARG2, (intptr_t) _ref_addr.disp(), _ref_addr.base()); + __ mr_if_needed(R3_ARG1, _ref); + } else if (_ref_addr.base() != R3_ARG1) { + __ mr(R3_ARG1, _ref); + __ add(R4_ARG2, (intptr_t) _ref_addr.disp(), _ref_addr.base()); // Cloberring _ref + } else { + // Arguments are provided in inverse order (i.e. _ref == R4_ARG2, _ref_addr == R3_ARG1) + __ mr(R0, _ref); + __ add(R4_ARG2, (intptr_t) _ref_addr.disp(), _ref_addr.base()); + __ mr(R3_ARG1, R0); + } + } + } +}; + +#undef __ +#define __ masm-> + +void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const { + __ block_comment("generate_c2_load_barrier_stub (zgc) {"); + + __ bind(*stub->entry()); + + Register ref = stub->ref(); + Address ref_addr = stub->ref_addr(); + + assert_different_registers(ref, ref_addr.base()); + + { + ZSaveLiveRegisters save_live_registers(masm, stub); + ZSetupArguments setup_arguments(masm, stub); + + __ call_VM_leaf(stub->slow_path()); + __ mr_if_needed(ref, R3_RET); + } + + __ b(*stub->continuation()); + + __ block_comment("} generate_c2_load_barrier_stub (zgc)"); +} + +#undef __ +#endif // COMPILER2 diff --git a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e2ff1bf53ae8052310663a8b18b44c7313f161e5 --- /dev/null +++ b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef CPU_PPC_GC_Z_ZBARRIERSETASSEMBLER_PPC_HPP +#define CPU_PPC_GC_Z_ZBARRIERSETASSEMBLER_PPC_HPP + +#include "code/vmreg.hpp" +#include "oops/accessDecorators.hpp" +#ifdef COMPILER2 +#include "opto/optoreg.hpp" +#endif // COMPILER2 + +#ifdef COMPILER1 +class LIR_Assembler; +class LIR_OprDesc; +typedef LIR_OprDesc* LIR_Opr; +class StubAssembler; +class ZLoadBarrierStubC1; +#endif // COMPILER1 + +#ifdef COMPILER2 +class Node; +class ZLoadBarrierStubC2; +#endif // COMPILER2 + +class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { +public: + virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register base, RegisterOrConstant ind_or_offs, Register dst, + Register tmp1, Register tmp2, + MacroAssembler::PreservationLevel preservation_level, Label *L_handle_null = NULL); + +#ifdef ASSERT + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register base, RegisterOrConstant ind_or_offs, Register val, + Register tmp1, Register tmp2, Register tmp3, + MacroAssembler::PreservationLevel preservation_level); +#endif // ASSERT + + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register src, Register dst, Register count, + Register preserve1, Register preserve2); + + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register dst, Register jni_env, + Register obj, Register tmp, Label& slowpath); + +#ifdef COMPILER1 + void generate_c1_load_barrier_test(LIR_Assembler* ce, + LIR_Opr ref) const; + + void generate_c1_load_barrier_stub(LIR_Assembler* ce, + ZLoadBarrierStubC1* stub) const; + + void generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, + DecoratorSet decorators) const; +#endif // COMPILER1 + +#ifdef COMPILER2 + OptoReg::Name refine_register(const Node* node, OptoReg::Name opto_reg) const; + + void generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const; +#endif // COMPILER2 +}; + +#endif // CPU_AARCH64_GC_Z_ZBARRIERSETASSEMBLER_AARCH64_HPP diff --git a/src/hotspot/cpu/ppc/gc/z/zGlobals_ppc.cpp b/src/hotspot/cpu/ppc/gc/z/zGlobals_ppc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..93c2f9b4dc44e5b1b41c2a60ad3b51879f44302d --- /dev/null +++ b/src/hotspot/cpu/ppc/gc/z/zGlobals_ppc.cpp @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "gc/shared/gcLogPrecious.hpp" +#include "gc/shared/gc_globals.hpp" +#include "gc/z/zGlobals.hpp" +#include "runtime/globals.hpp" +#include "runtime/os.hpp" +#include "utilities/globalDefinitions.hpp" +#include "utilities/powerOfTwo.hpp" +#include + +#ifdef LINUX +#include +#endif // LINUX + +// +// The overall memory layouts across different power platforms are similar and only differ with regards to +// the position of the highest addressable bit; the position of the metadata bits and the size of the actual +// addressable heap address space are adjusted accordingly. +// +// The following memory schema shows an exemplary layout in which bit '45' is the highest addressable bit. +// It is assumed that this virtual memroy address space layout is predominant on the power platform. +// +// Standard Address Space & Pointer Layout +// --------------------------------------- +// +// +--------------------------------+ 0x00007FFFFFFFFFFF (127 TiB - 1) +// . . +// . . +// . . +// +--------------------------------+ 0x0000140000000000 (20 TiB) +// | Remapped View | +// +--------------------------------+ 0x0000100000000000 (16 TiB) +// . . +// +--------------------------------+ 0x00000c0000000000 (12 TiB) +// | Marked1 View | +// +--------------------------------+ 0x0000080000000000 (8 TiB) +// | Marked0 View | +// +--------------------------------+ 0x0000040000000000 (4 TiB) +// . . +// +--------------------------------+ 0x0000000000000000 +// +// 6 4 4 4 4 +// 3 6 5 2 1 0 +// +--------------------+----+-----------------------------------------------+ +// |00000000 00000000 00|1111|11 11111111 11111111 11111111 11111111 11111111| +// +--------------------+----+-----------------------------------------------+ +// | | | +// | | * 41-0 Object Offset (42-bits, 4TB address space) +// | | +// | * 45-42 Metadata Bits (4-bits) 0001 = Marked0 (Address view 4-8TB) +// | 0010 = Marked1 (Address view 8-12TB) +// | 0100 = Remapped (Address view 16-20TB) +// | 1000 = Finalizable (Address view N/A) +// | +// * 63-46 Fixed (18-bits, always zero) +// + +// Maximum value as per spec (Power ISA v2.07): 2 ^ 60 bytes, i.e. 1 EiB (exbibyte) +static const unsigned int MAXIMUM_MAX_ADDRESS_BIT = 60; + +// Most modern power processors provide an address space with not more than 45 bit addressable bit, +// that is an address space of 32 TiB in size. +static const unsigned int DEFAULT_MAX_ADDRESS_BIT = 45; + +// Minimum value returned, if probing fails: 64 GiB +static const unsigned int MINIMUM_MAX_ADDRESS_BIT = 36; + +// Determines the highest addressable bit of the virtual address space (depends on platform) +// by trying to interact with memory in that address range, +// i.e. by syncing existing mappings (msync) or by temporarily mapping the memory area (mmap). +// If one of those operations succeeds, it is proven that the targeted memory area is within the virtual address space. +// +// To reduce the number of required system calls to a bare minimum, the DEFAULT_MAX_ADDRESS_BIT is intentionally set +// lower than what the ABI would theoretically permit. +// Such an avoidance strategy, however, might impose unnecessary limits on processors that exceed this limit. +// If DEFAULT_MAX_ADDRESS_BIT is addressable, the next higher bit will be tested as well to ensure that +// the made assumption does not artificially restrict the memory availability. +static unsigned int probe_valid_max_address_bit(size_t init_bit, size_t min_bit) { + assert(init_bit >= min_bit, "Sanity"); + assert(init_bit <= MAXIMUM_MAX_ADDRESS_BIT, "Test bit is outside the assumed address space range"); + +#ifdef LINUX + unsigned int max_valid_address_bit = 0; + void* last_allocatable_address = nullptr; + + const unsigned int page_size = os::vm_page_size(); + + for (size_t i = init_bit; i >= min_bit; --i) { + void* base_addr = (void*) (((unsigned long) 1U) << i); + + /* ==== Try msync-ing already mapped memory page ==== */ + if (msync(base_addr, page_size, MS_ASYNC) == 0) { + // The page of the given address was synced by the linux kernel and must thus be both, mapped and valid. + max_valid_address_bit = i; + break; + } + if (errno != ENOMEM) { + // An unexpected error occurred, i.e. an error not indicating that the targeted memory page is unmapped, + // but pointing out another type of issue. + // Even though this should never happen, those issues may come up due to undefined behavior. +#ifdef ASSERT + fatal("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); +#else // ASSERT + log_warning_p(gc)("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); +#endif // ASSERT + continue; + } + + /* ==== Try mapping memory page on our own ==== */ + last_allocatable_address = mmap(base_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); + if (last_allocatable_address != MAP_FAILED) { + munmap(last_allocatable_address, page_size); + } + + if (last_allocatable_address == base_addr) { + // As the linux kernel mapped exactly the page we have requested, the address must be valid. + max_valid_address_bit = i; + break; + } + + log_info_p(gc, init)("Probe failed for bit '%zu'", i); + } + + if (max_valid_address_bit == 0) { + // Probing did not bring up any usable address bit. + // As an alternative, the VM evaluates the address returned by mmap as it is expected that the reserved page + // will be close to the probed address that was out-of-range. + // As per mmap(2), "the kernel [will take] [the address] as a hint about where to + // place the mapping; on Linux, the mapping will be created at a nearby page boundary". + // It should thus be a "close enough" approximation to the real virtual memory address space limit. + // + // This recovery strategy is only applied in production builds. + // In debug builds, an assertion in 'ZPlatformAddressOffsetBits' will bail out the VM to indicate that + // the assumed address space is no longer up-to-date. + if (last_allocatable_address != MAP_FAILED) { + const unsigned int bitpos = BitsPerSize_t - count_leading_zeros((size_t) last_allocatable_address) - 1; + log_info_p(gc, init)("Did not find any valid addresses within the range, using address '%u' instead", bitpos); + return bitpos; + } + +#ifdef ASSERT + fatal("Available address space can not be determined"); +#else // ASSERT + log_warning_p(gc)("Cannot determine available address space. Falling back to default value."); + return DEFAULT_MAX_ADDRESS_BIT; +#endif // ASSERT + } else { + if (max_valid_address_bit == init_bit) { + // An usable address bit has been found immediately. + // To ensure that the entire virtual address space is exploited, the next highest bit will be tested as well. + log_info_p(gc, init)("Hit valid address '%u' on first try, retrying with next higher bit", max_valid_address_bit); + return MAX2(max_valid_address_bit, probe_valid_max_address_bit(init_bit + 1, init_bit + 1)); + } + } + + log_info_p(gc, init)("Found valid address '%u'", max_valid_address_bit); + return max_valid_address_bit; +#else // LINUX + return DEFAULT_MAX_ADDRESS_BIT; +#endif // LINUX +} + +size_t ZPlatformAddressOffsetBits() { + const static unsigned int valid_max_address_offset_bits = + probe_valid_max_address_bit(DEFAULT_MAX_ADDRESS_BIT, MINIMUM_MAX_ADDRESS_BIT) + 1; + assert(valid_max_address_offset_bits >= MINIMUM_MAX_ADDRESS_BIT, + "Highest addressable bit is outside the assumed address space range"); + + const size_t max_address_offset_bits = valid_max_address_offset_bits - 3; + const size_t min_address_offset_bits = max_address_offset_bits - 2; + const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio); + const size_t address_offset_bits = log2i_exact(address_offset); + + return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits); +} + +size_t ZPlatformAddressMetadataShift() { + return ZPlatformAddressOffsetBits(); +} diff --git a/src/hotspot/cpu/ppc/gc/z/zGlobals_ppc.hpp b/src/hotspot/cpu/ppc/gc/z/zGlobals_ppc.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3657b16fc1aa6b39410ae5193bf4fd742978ade4 --- /dev/null +++ b/src/hotspot/cpu/ppc/gc/z/zGlobals_ppc.hpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef CPU_PPC_GC_Z_ZGLOBALS_PPC_HPP +#define CPU_PPC_GC_Z_ZGLOBALS_PPC_HPP + +#include "globalDefinitions_ppc.hpp" +const size_t ZPlatformGranuleSizeShift = 21; // 2MB +const size_t ZPlatformHeapViews = 3; +const size_t ZPlatformCacheLineSize = DEFAULT_CACHE_LINE_SIZE; + +size_t ZPlatformAddressOffsetBits(); +size_t ZPlatformAddressMetadataShift(); + +#endif // CPU_PPC_GC_Z_ZGLOBALS_PPC_HPP diff --git a/src/hotspot/cpu/ppc/gc/z/z_ppc.ad b/src/hotspot/cpu/ppc/gc/z/z_ppc.ad new file mode 100644 index 0000000000000000000000000000000000000000..a8ce64ed1d9c1913c360ebb60e311c5b583933e1 --- /dev/null +++ b/src/hotspot/cpu/ppc/gc/z/z_ppc.ad @@ -0,0 +1,298 @@ +// +// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2021 SAP SE. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +source_hpp %{ + +#include "gc/shared/gc_globals.hpp" +#include "gc/z/c2/zBarrierSetC2.hpp" +#include "gc/z/zThreadLocalData.hpp" + +%} + +source %{ + +static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, + Register tmp, uint8_t barrier_data) { + if (barrier_data == ZLoadBarrierElided) { + return; + } + + ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data); + __ ld(tmp, in_bytes(ZThreadLocalData::address_bad_mask_offset()), R16_thread); + __ and_(tmp, tmp, ref); + __ bne_far(CCR0, *stub->entry(), MacroAssembler::bc_far_optimize_on_relocate); + __ bind(*stub->continuation()); +} + +static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, + Register tmp) { + ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, ZLoadBarrierStrong); + __ b(*stub->entry()); + __ bind(*stub->continuation()); +} + +static void z_compare_and_swap(MacroAssembler& _masm, const MachNode* node, + Register res, Register mem, Register oldval, Register newval, + Register tmp_xchg, Register tmp_mask, + bool weak, bool acquire) { + // z-specific load barrier requires strong CAS operations. + // Weak CAS operations are thus only emitted if the barrier is elided. + __ cmpxchgd(CCR0, tmp_xchg, oldval, newval, mem, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), res, NULL, true, + weak && node->barrier_data() == ZLoadBarrierElided); + + if (node->barrier_data() != ZLoadBarrierElided) { + Label skip_barrier; + + __ ld(tmp_mask, in_bytes(ZThreadLocalData::address_bad_mask_offset()), R16_thread); + __ and_(tmp_mask, tmp_mask, tmp_xchg); + __ beq(CCR0, skip_barrier); + + // CAS must have failed because pointer in memory is bad. + z_load_barrier_slow_path(_masm, node, Address(mem), tmp_xchg, res /* used as tmp */); + + __ cmpxchgd(CCR0, tmp_xchg, oldval, newval, mem, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), res, NULL, true, weak); + + __ bind(skip_barrier); + } + + if (acquire) { + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + // Uses the isync instruction as an acquire barrier. + // This exploits the compare and the branch in the z load barrier (load, compare and branch, isync). + __ isync(); + } else { + __ sync(); + } + } +} + +static void z_compare_and_exchange(MacroAssembler& _masm, const MachNode* node, + Register res, Register mem, Register oldval, Register newval, Register tmp, + bool weak, bool acquire) { + // z-specific load barrier requires strong CAS operations. + // Weak CAS operations are thus only emitted if the barrier is elided. + __ cmpxchgd(CCR0, res, oldval, newval, mem, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), noreg, NULL, true, + weak && node->barrier_data() == ZLoadBarrierElided); + + if (node->barrier_data() != ZLoadBarrierElided) { + Label skip_barrier; + __ ld(tmp, in_bytes(ZThreadLocalData::address_bad_mask_offset()), R16_thread); + __ and_(tmp, tmp, res); + __ beq(CCR0, skip_barrier); + + z_load_barrier_slow_path(_masm, node, Address(mem), res, tmp); + + __ cmpxchgd(CCR0, res, oldval, newval, mem, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), noreg, NULL, true, weak); + + __ bind(skip_barrier); + } + + if (acquire) { + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + // Uses the isync instruction as an acquire barrier. + // This exploits the compare and the branch in the z load barrier (load, compare and branch, isync). + __ isync(); + } else { + __ sync(); + } + } +} + +%} + +instruct zLoadP(iRegPdst dst, memoryAlg4 mem, iRegPdst tmp, flagsRegCR0 cr0) +%{ + match(Set dst (LoadP mem)); + effect(TEMP_DEF dst, TEMP tmp, KILL cr0); + ins_cost(MEMORY_REF_COST); + + predicate((UseZGC && n->as_Load()->barrier_data() != 0) + && (n->as_Load()->is_unordered() || followed_by_acquire(n))); + + format %{ "LD $dst, $mem" %} + ins_encode %{ + assert($mem$$index == 0, "sanity"); + __ ld($dst$$Register, $mem$$disp, $mem$$base$$Register); + z_load_barrier(_masm, this, Address($mem$$base$$Register, $mem$$disp), $dst$$Register, $tmp$$Register, barrier_data()); + %} + ins_pipe(pipe_class_default); +%} + +// Load Pointer Volatile +instruct zLoadP_acq(iRegPdst dst, memoryAlg4 mem, iRegPdst tmp, flagsRegCR0 cr0) +%{ + match(Set dst (LoadP mem)); + effect(TEMP_DEF dst, TEMP tmp, KILL cr0); + ins_cost(3 * MEMORY_REF_COST); + + // Predicate on instruction order is implicitly present due to the predicate of the cheaper zLoadP operation + predicate(UseZGC && n->as_Load()->barrier_data() != 0); + + format %{ "LD acq $dst, $mem" %} + ins_encode %{ + __ ld($dst$$Register, $mem$$disp, $mem$$base$$Register); + z_load_barrier(_masm, this, Address($mem$$base$$Register, $mem$$disp), $dst$$Register, $tmp$$Register, barrier_data()); + + // Uses the isync instruction as an acquire barrier. + // This exploits the compare and the branch in the z load barrier (load, compare and branch, isync). + __ isync(); + %} + ins_pipe(pipe_class_default); +%} + +instruct zCompareAndSwapP(iRegIdst res, iRegPdst mem, iRegPsrc oldval, iRegPsrc newval, + iRegPdst tmp_xchg, iRegPdst tmp_mask, flagsRegCR0 cr0) %{ + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp_xchg, TEMP tmp_mask, KILL cr0); + + predicate((UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong) + && (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*) n)->order() != MemNode::seqcst)); + + format %{ "CMPXCHG $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + z_compare_and_swap(_masm, this, + $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register, + $tmp_xchg$$Register, $tmp_mask$$Register, + false /* weak */, false /* acquire */); + %} + ins_pipe(pipe_class_default); +%} + +instruct zCompareAndSwapP_acq(iRegIdst res, iRegPdst mem, iRegPsrc oldval, iRegPsrc newval, + iRegPdst tmp_xchg, iRegPdst tmp_mask, flagsRegCR0 cr0) %{ + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp_xchg, TEMP tmp_mask, KILL cr0); + + predicate((UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong) + && (((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*) n)->order() == MemNode::seqcst)); + + format %{ "CMPXCHG acq $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + z_compare_and_swap(_masm, this, + $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register, + $tmp_xchg$$Register, $tmp_mask$$Register, + false /* weak */, true /* acquire */); + %} + ins_pipe(pipe_class_default); +%} + +instruct zCompareAndSwapPWeak(iRegIdst res, iRegPdst mem, iRegPsrc oldval, iRegPsrc newval, + iRegPdst tmp_xchg, iRegPdst tmp_mask, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp_xchg, TEMP tmp_mask, KILL cr0); + + predicate((UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong) + && ((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*) n)->order() != MemNode::seqcst); + + format %{ "weak CMPXCHG $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + z_compare_and_swap(_masm, this, + $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register, + $tmp_xchg$$Register, $tmp_mask$$Register, + true /* weak */, false /* acquire */); + %} + ins_pipe(pipe_class_default); +%} + +instruct zCompareAndSwapPWeak_acq(iRegIdst res, iRegPdst mem, iRegPsrc oldval, iRegPsrc newval, + iRegPdst tmp_xchg, iRegPdst tmp_mask, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp_xchg, TEMP tmp_mask, KILL cr0); + + predicate((UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong) + && (((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*) n)->order() == MemNode::seqcst)); + + format %{ "weak CMPXCHG acq $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + z_compare_and_swap(_masm, this, + $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register, + $tmp_xchg$$Register, $tmp_mask$$Register, + true /* weak */, true /* acquire */); + %} + ins_pipe(pipe_class_default); +%} + +instruct zCompareAndExchangeP(iRegPdst res, iRegPdst mem, iRegPsrc oldval, iRegPsrc newval, + iRegPdst tmp, flagsRegCR0 cr0) %{ + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + + predicate((UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong) + && ( + ((CompareAndSwapNode*)n)->order() != MemNode::acquire + && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst + )); + + format %{ "CMPXCHG $res, $mem, $oldval, $newval; as ptr; ptr" %} + ins_encode %{ + z_compare_and_exchange(_masm, this, + $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register, $tmp$$Register, + false /* weak */, false /* acquire */); + %} + ins_pipe(pipe_class_default); +%} + +instruct zCompareAndExchangeP_acq(iRegPdst res, iRegPdst mem, iRegPsrc oldval, iRegPsrc newval, + iRegPdst tmp, flagsRegCR0 cr0) %{ + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + + predicate((UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong) + && ( + ((CompareAndSwapNode*)n)->order() == MemNode::acquire + || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst + )); + + format %{ "CMPXCHG acq $res, $mem, $oldval, $newval; as ptr; ptr" %} + ins_encode %{ + z_compare_and_exchange(_masm, this, + $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register, $tmp$$Register, + false /* weak */, true /* acquire */); + %} + ins_pipe(pipe_class_default); +%} + +instruct zGetAndSetP(iRegPdst res, iRegPdst mem, iRegPsrc newval, iRegPdst tmp, flagsRegCR0 cr0) %{ + match(Set res (GetAndSetP mem newval)); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + + predicate(UseZGC && n->as_LoadStore()->barrier_data() != 0); + + format %{ "GetAndSetP $res, $mem, $newval" %} + ins_encode %{ + __ getandsetd($res$$Register, $newval$$Register, $mem$$Register, MacroAssembler::cmpxchgx_hint_atomic_update()); + z_load_barrier(_masm, this, Address(noreg, (intptr_t) 0), $res$$Register, $tmp$$Register, barrier_data()); + + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} diff --git a/src/hotspot/cpu/ppc/globals_ppc.hpp b/src/hotspot/cpu/ppc/globals_ppc.hpp index 0625c34a97d88343c3d273d68f3bcaea51809cd2..0bab7663feb32d167510de63489ce55a2d4f79a4 100644 --- a/src/hotspot/cpu/ppc/globals_ppc.hpp +++ b/src/hotspot/cpu/ppc/globals_ppc.hpp @@ -58,7 +58,6 @@ define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); define_pd_global(uintx, CodeCacheSegmentSize, 128); define_pd_global(intx, CodeEntryAlignment, 128); define_pd_global(intx, OptoLoopAlignment, 16); -define_pd_global(intx, InlineFrequencyCount, 100); define_pd_global(intx, InlineSmallCode, 1500); // Flags for template interpreter. diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp index 70466fdf3c33f95eac79c7fb40baf9b6b4926cbe..1a416314d707ae6befb09a2f7fe8416012993304 100644 --- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp @@ -943,10 +943,6 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { bne(CCR0, slow_case); } - if (UseBiasedLocking) { - biased_locking_enter(CCR0, object, displaced_header, tmp, current_header, done, &slow_case); - } - // Set displaced_header to be (markWord of object | UNLOCK_VALUE). ori(displaced_header, displaced_header, markWord::unlocked_value); @@ -1048,13 +1044,6 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) { assert_different_registers(object, displaced_header, object_mark_addr, current_header); - if (UseBiasedLocking) { - // The object address from the monitor is in object. - ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor); - assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); - biased_locking_exit(CCR0, object, displaced_header, free_slot); - } - // Test first if we are in the fast recursive case. ld(displaced_header, BasicObjectLock::lock_offset_in_bytes() + BasicLock::displaced_header_offset_in_bytes(), monitor); @@ -1070,7 +1059,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) { // If we still have a lightweight lock, unlock the object and be done. // The object address from the monitor is in object. - if (!UseBiasedLocking) { ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor); } + ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor); addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes()); // We have the displaced header in displaced_header. If the lock is still diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index 918ad1d7c897432191d108177d229ecb291b4ddf..98565003691a45fd9c35ea0609d3c8ec2717cd60 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -35,7 +35,6 @@ #include "oops/klass.inline.hpp" #include "oops/methodData.hpp" #include "prims/methodHandles.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/icache.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/objectMonitor.hpp" @@ -106,6 +105,10 @@ void MacroAssembler::align(int modulus, int max, int rem) { for (int c = (padding >> 2); c > 0; --c) { nop(); } } +void MacroAssembler::align_prefix() { + if (is_aligned(offset() + BytesPerInstWord, 64)) { nop(); } +} + // Issue instructions that calculate given TOC from global TOC. void MacroAssembler::calculate_address_from_global_toc(Register dst, address addr, bool hi16, bool lo16, bool add_relocation, bool emit_dummy_addr) { @@ -2073,218 +2076,6 @@ RegisterOrConstant MacroAssembler::argument_offset(RegisterOrConstant arg_slot, } } -// Supports temp2_reg = R0. -void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj_reg, - Register mark_reg, Register temp_reg, - Register temp2_reg, Label& done, Label* slow_case) { - assert(UseBiasedLocking, "why call this otherwise?"); - -#ifdef ASSERT - assert_different_registers(obj_reg, mark_reg, temp_reg, temp2_reg); -#endif - - Label cas_label; - - // Branch to done if fast path fails and no slow_case provided. - Label *slow_case_int = (slow_case != NULL) ? slow_case : &done; - - // Biased locking - // See whether the lock is currently biased toward our thread and - // whether the epoch is still valid - // Note that the runtime guarantees sufficient alignment of JavaThread - // pointers to allow age to be placed into low bits - assert(markWord::age_shift == markWord::lock_bits + markWord::biased_lock_bits, - "biased locking makes assumptions about bit layout"); - - if (PrintBiasedLockingStatistics) { - load_const(temp2_reg, (address) BiasedLocking::total_entry_count_addr(), temp_reg); - lwzx(temp_reg, temp2_reg); - addi(temp_reg, temp_reg, 1); - stwx(temp_reg, temp2_reg); - } - - andi(temp_reg, mark_reg, markWord::biased_lock_mask_in_place); - cmpwi(cr_reg, temp_reg, markWord::biased_lock_pattern); - bne(cr_reg, cas_label); - - load_klass(temp_reg, obj_reg); - - load_const_optimized(temp2_reg, ~((int) markWord::age_mask_in_place)); - ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg); - orr(temp_reg, R16_thread, temp_reg); - xorr(temp_reg, mark_reg, temp_reg); - andr(temp_reg, temp_reg, temp2_reg); - cmpdi(cr_reg, temp_reg, 0); - if (PrintBiasedLockingStatistics) { - Label l; - bne(cr_reg, l); - load_const(temp2_reg, (address) BiasedLocking::biased_lock_entry_count_addr()); - lwzx(mark_reg, temp2_reg); - addi(mark_reg, mark_reg, 1); - stwx(mark_reg, temp2_reg); - // restore mark_reg - ld(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg); - bind(l); - } - beq(cr_reg, done); - - Label try_revoke_bias; - Label try_rebias; - - // At this point we know that the header has the bias pattern and - // that we are not the bias owner in the current epoch. We need to - // figure out more details about the state of the header in order to - // know what operations can be legally performed on the object's - // header. - - // If the low three bits in the xor result aren't clear, that means - // the prototype header is no longer biased and we have to revoke - // the bias on this object. - andi(temp2_reg, temp_reg, markWord::biased_lock_mask_in_place); - cmpwi(cr_reg, temp2_reg, 0); - bne(cr_reg, try_revoke_bias); - - // Biasing is still enabled for this data type. See whether the - // epoch of the current bias is still valid, meaning that the epoch - // bits of the mark word are equal to the epoch bits of the - // prototype header. (Note that the prototype header's epoch bits - // only change at a safepoint.) If not, attempt to rebias the object - // toward the current thread. Note that we must be absolutely sure - // that the current epoch is invalid in order to do this because - // otherwise the manipulations it performs on the mark word are - // illegal. - - int shift_amount = 64 - markWord::epoch_shift; - // rotate epoch bits to right (little) end and set other bits to 0 - // [ big part | epoch | little part ] -> [ 0..0 | epoch ] - rldicl_(temp2_reg, temp_reg, shift_amount, 64 - markWord::epoch_bits); - // branch if epoch bits are != 0, i.e. they differ, because the epoch has been incremented - bne(CCR0, try_rebias); - - // The epoch of the current bias is still valid but we know nothing - // about the owner; it might be set or it might be clear. Try to - // acquire the bias of the object using an atomic operation. If this - // fails we will go in to the runtime to revoke the object's bias. - // Note that we first construct the presumed unbiased header so we - // don't accidentally blow away another thread's valid bias. - andi(mark_reg, mark_reg, (markWord::biased_lock_mask_in_place | - markWord::age_mask_in_place | - markWord::epoch_mask_in_place)); - orr(temp_reg, R16_thread, mark_reg); - - assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); - - // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg). - cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg, - /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg, - /*where=*/obj_reg, - MacroAssembler::MemBarAcq, - MacroAssembler::cmpxchgx_hint_acquire_lock(), - noreg, slow_case_int); // bail out if failed - - // If the biasing toward our thread failed, this means that - // another thread succeeded in biasing it toward itself and we - // need to revoke that bias. The revocation will occur in the - // interpreter runtime in the slow case. - if (PrintBiasedLockingStatistics) { - load_const(temp2_reg, (address) BiasedLocking::anonymously_biased_lock_entry_count_addr(), temp_reg); - lwzx(temp_reg, temp2_reg); - addi(temp_reg, temp_reg, 1); - stwx(temp_reg, temp2_reg); - } - b(done); - - bind(try_rebias); - // At this point we know the epoch has expired, meaning that the - // current "bias owner", if any, is actually invalid. Under these - // circumstances _only_, we are allowed to use the current header's - // value as the comparison value when doing the cas to acquire the - // bias in the current epoch. In other words, we allow transfer of - // the bias from one thread to another directly in this situation. - load_klass(temp_reg, obj_reg); - andi(temp2_reg, mark_reg, markWord::age_mask_in_place); - orr(temp2_reg, R16_thread, temp2_reg); - ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg); - orr(temp_reg, temp2_reg, temp_reg); - - assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); - - cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg, - /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg, - /*where=*/obj_reg, - MacroAssembler::MemBarAcq, - MacroAssembler::cmpxchgx_hint_acquire_lock(), - noreg, slow_case_int); // bail out if failed - - // If the biasing toward our thread failed, this means that - // another thread succeeded in biasing it toward itself and we - // need to revoke that bias. The revocation will occur in the - // interpreter runtime in the slow case. - if (PrintBiasedLockingStatistics) { - load_const(temp2_reg, (address) BiasedLocking::rebiased_lock_entry_count_addr(), temp_reg); - lwzx(temp_reg, temp2_reg); - addi(temp_reg, temp_reg, 1); - stwx(temp_reg, temp2_reg); - } - b(done); - - bind(try_revoke_bias); - // The prototype mark in the klass doesn't have the bias bit set any - // more, indicating that objects of this data type are not supposed - // to be biased any more. We are going to try to reset the mark of - // this object to the prototype value and fall through to the - // CAS-based locking scheme. Note that if our CAS fails, it means - // that another thread raced us for the privilege of revoking the - // bias of this particular object, so it's okay to continue in the - // normal locking code. - load_klass(temp_reg, obj_reg); - ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg); - andi(temp2_reg, mark_reg, markWord::age_mask_in_place); - orr(temp_reg, temp_reg, temp2_reg); - - assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); - - // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg). - cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg, - /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg, - /*where=*/obj_reg, - MacroAssembler::MemBarAcq, - MacroAssembler::cmpxchgx_hint_acquire_lock()); - - // reload markWord in mark_reg before continuing with lightweight locking - ld(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg); - - // Fall through to the normal CAS-based lock, because no matter what - // the result of the above CAS, some thread must have succeeded in - // removing the bias bit from the object's header. - if (PrintBiasedLockingStatistics) { - Label l; - bne(cr_reg, l); - load_const(temp2_reg, (address) BiasedLocking::revoked_lock_entry_count_addr(), temp_reg); - lwzx(temp_reg, temp2_reg); - addi(temp_reg, temp_reg, 1); - stwx(temp_reg, temp2_reg); - bind(l); - } - - bind(cas_label); -} - -void MacroAssembler::biased_locking_exit (ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done) { - // Check for biased locking unlock case, which is a no-op - // Note: we do not have to check the thread ID for two reasons. - // First, the interpreter checks for IllegalMonitorStateException at - // a higher level. Second, if the bias was revoked while we held the - // lock, the object could not be rebiased toward another thread, so - // the bias bit would be clear. - - ld(temp_reg, 0, mark_addr); - andi(temp_reg, temp_reg, markWord::biased_lock_mask_in_place); - - cmpwi(cr_reg, temp_reg, markWord::biased_lock_pattern); - beq(cr_reg, done); -} - // allocation (for C1) void MacroAssembler::eden_allocate( Register obj, // result: pointer to object after successful allocation @@ -2695,14 +2486,13 @@ void MacroAssembler::rtm_stack_locking(ConditionRegister flag, Metadata* method_data, bool profile_rtm, Label& DONE_LABEL, Label& IsInflated) { assert(UseRTMForStackLocks, "why call this otherwise?"); - assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking"); Label L_rtm_retry, L_decrement_retry, L_on_abort; if (RTMRetryCount > 0) { load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort bind(L_rtm_retry); } - andi_(R0, mark_word, markWord::monitor_value); // inflated vs stack-locked|neutral|biased + andi_(R0, mark_word, markWord::monitor_value); // inflated vs stack-locked|neutral bne(CCR0, IsInflated); if (PrintPreciseRTMLockingStatistics || profile_rtm) { @@ -2720,10 +2510,10 @@ void MacroAssembler::rtm_stack_locking(ConditionRegister flag, } tbegin_(); beq(CCR0, L_on_abort); - ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // Reload in transaction, conflicts need to be tracked. - andi(R0, mark_word, markWord::biased_lock_mask_in_place); // look at 3 lock bits - cmpwi(flag, R0, markWord::unlocked_value); // bits = 001 unlocked - beq(flag, DONE_LABEL); // all done if unlocked + ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // Reload in transaction, conflicts need to be tracked. + andi(R0, mark_word, markWord::lock_mask_in_place); // look at 2 lock bits + cmpwi(flag, R0, markWord::unlocked_value); // bits = 01 unlocked + beq(flag, DONE_LABEL); // all done if unlocked if (UseRTMXendForLockBusy) { tend_(); @@ -2837,7 +2627,6 @@ void MacroAssembler::rtm_inflated_locking(ConditionRegister flag, // "The box" is the space on the stack where we copy the object mark. void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box, Register temp, Register displaced_header, Register current_header, - bool try_bias, RTMLockingCounters* rtm_counters, RTMLockingCounters* stack_rtm_counters, Metadata* method_data, @@ -2858,10 +2647,6 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register bne(flag, cont); } - if (try_bias) { - biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont); - } - #if INCLUDE_RTM_OPT if (UseRTMForStackLocks && use_rtm) { rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header, @@ -2964,26 +2749,21 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, Register temp, Register displaced_header, Register current_header, - bool try_bias, bool use_rtm) { + bool use_rtm) { assert_different_registers(oop, box, temp, displaced_header, current_header); assert(flag != CCR0, "bad condition register"); Label cont; Label object_has_monitor; - if (try_bias) { - biased_locking_exit(flag, oop, current_header, cont); - } - #if INCLUDE_RTM_OPT if (UseRTMForStackLocks && use_rtm) { - assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking"); Label L_regular_unlock; - ld(current_header, oopDesc::mark_offset_in_bytes(), oop); // fetch markword - andi(R0, current_header, markWord::biased_lock_mask_in_place); // look at 3 lock bits - cmpwi(flag, R0, markWord::unlocked_value); // bits = 001 unlocked - bne(flag, L_regular_unlock); // else RegularLock - tend_(); // otherwise end... - b(cont); // ... and we're done + ld(current_header, oopDesc::mark_offset_in_bytes(), oop); // fetch markword + andi(R0, current_header, markWord::lock_mask_in_place); // look at 2 lock bits + cmpwi(flag, R0, markWord::unlocked_value); // bits = 01 unlocked + bne(flag, L_regular_unlock); // else RegularLock + tend_(); // otherwise end... + b(cont); // ... and we're done bind(L_regular_unlock); } #endif diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp index 55cefd284360abceb4d014ea130fa71cd9bc33a0..5c53beede16859ac09b19d5f9ba34314f8747841 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp @@ -88,6 +88,16 @@ class MacroAssembler: public Assembler { // nop padding void align(int modulus, int max = 252, int rem = 0); + // Align prefix opcode to make sure it's not on the last word of a + // 64-byte block. + // + // Note: do not call align_prefix() in a .ad file (e.g. ppc.ad). Instead + // add ins_alignment(2) to the instruct definition and implement the + // compute_padding() method of the instruct node to use + // compute_prefix_padding(). See loadConI32Node::compute_padding() in + // ppc.ad for an example. + void align_prefix(); + // // Constants, loading constants, TOC support // @@ -589,24 +599,6 @@ class MacroAssembler: public Assembler { // Method handle support (JSR 292). RegisterOrConstant argument_offset(RegisterOrConstant arg_slot, Register temp_reg, int extra_slot_offset = 0); - // Biased locking support - // Upon entry,obj_reg must contain the target object, and mark_reg - // must contain the target object's header. - // Destroys mark_reg if an attempt is made to bias an anonymously - // biased lock. In this case a failure will go either to the slow - // case or fall through with the notEqual condition code set with - // the expectation that the slow case in the runtime will be called. - // In the fall-through case where the CAS-based lock is done, - // mark_reg is not destroyed. - void biased_locking_enter(ConditionRegister cr_reg, Register obj_reg, Register mark_reg, Register temp_reg, - Register temp2_reg, Label& done, Label* slow_case = NULL); - // Upon entry, the base register of mark_addr must contain the oop. - // Destroys temp_reg. - // If allow_delay_slot_filling is set to true, the next instruction - // emitted after this one will go in an annulled delay slot if the - // biased locking exit case failed. - void biased_locking_exit(ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done); - // allocation (for C1) void eden_allocate( Register obj, // result: pointer to object after successful allocation @@ -655,7 +647,6 @@ class MacroAssembler: public Assembler { void compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3, - bool try_bias = UseBiasedLocking, RTMLockingCounters* rtm_counters = NULL, RTMLockingCounters* stack_rtm_counters = NULL, Metadata* method_data = NULL, @@ -663,7 +654,7 @@ class MacroAssembler: public Assembler { void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3, - bool try_bias = UseBiasedLocking, bool use_rtm = false); + bool use_rtm = false); // Check if safepoint requested and if so branch void safepoint_poll(Label& slow_path, Register temp, bool at_return, bool in_nmethod); diff --git a/src/hotspot/cpu/ppc/matcher_ppc.hpp b/src/hotspot/cpu/ppc/matcher_ppc.hpp index cbcebc23ddc9241c62623a2cd6a764fb8dc19792..877f0be33c4413b644f76aef049c5707d6c0ec04 100644 --- a/src/hotspot/cpu/ppc/matcher_ppc.hpp +++ b/src/hotspot/cpu/ppc/matcher_ppc.hpp @@ -57,6 +57,9 @@ // No support for generic vector operands. static const bool supports_generic_vector_operands = false; + // No support for 48 extra htbl entries in aes-gcm intrinsic + static const int htbl_entries = -1; + static constexpr bool isSimpleConstant64(jlong value) { // Probably always true, even if a temp register is required. return true; @@ -138,6 +141,11 @@ return false; } + // Does the CPU supports vector constant rotate instructions? + static constexpr bool supports_vector_constant_rotates(int shift) { + return false; + } + // Does the CPU supports vector unsigned comparison instructions? static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) { return false; @@ -156,5 +164,7 @@ return VM_Version::has_fcfids(); } + // Implements a variant of EncodeISOArrayNode that encode ASCII only + static const bool supports_encode_ascii_array = false; #endif // CPU_PPC_MATCHER_PPC_HPP diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index 0b66ce7489aeb442012435e5cfd9431fad879ca7..4a43aa2f12a7e17221efe232298657d061e669d7 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -2108,6 +2108,8 @@ const bool Matcher::match_rule_supported(int opcode) { switch (opcode) { case Op_SqrtD: return VM_Version::has_fsqrt(); + case Op_RoundDoubleMode: + return VM_Version::has_vsx(); case Op_CountLeadingZerosI: case Op_CountLeadingZerosL: return UseCountLeadingZerosInstructionsPPC64; @@ -2193,10 +2195,6 @@ OptoRegPair Matcher::vector_return_value(uint ideal_reg) { return OptoRegPair(0, 0); } -const int Matcher::float_pressure(int default_pressure_threshold) { - return default_pressure_threshold; -} - // Vector width in bytes. const int Matcher::vector_width_in_bytes(BasicType bt) { if (SuperwordUseVSX) { @@ -2272,7 +2270,7 @@ MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, return NULL; } -bool Matcher::is_generic_reg2reg_move(MachNode* m) { +bool Matcher::is_reg2reg_move(MachNode* m) { ShouldNotReachHere(); // generic vector operands not supported return false; } @@ -2363,6 +2361,16 @@ bool Matcher::is_spillable_arg(int reg) { return can_be_java_arg(reg); } +uint Matcher::int_pressure_limit() +{ + return (INTPRESSURE == -1) ? 26 : INTPRESSURE; +} + +uint Matcher::float_pressure_limit() +{ + return (FLOATPRESSURE == -1) ? 28 : FLOATPRESSURE; +} + bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { return false; } @@ -5523,7 +5531,7 @@ instruct loadN2P_klass_unscaled(iRegPdst dst, memory mem) %{ // Load Pointer instruct loadP(iRegPdst dst, memoryAlg4 mem) %{ match(Set dst (LoadP mem)); - predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); + predicate((n->as_Load()->is_unordered() || followed_by_acquire(n)) && n->as_Load()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); format %{ "LD $dst, $mem \t// ptr" %} @@ -5537,6 +5545,8 @@ instruct loadP_ac(iRegPdst dst, memoryAlg4 mem) %{ match(Set dst (LoadP mem)); ins_cost(3*MEMORY_REF_COST); + predicate(n->as_Load()->barrier_data() == 0); + format %{ "LD $dst, $mem \t// ptr acquire\n\t" "TWI $dst\n\t" "ISYNC" %} @@ -5548,7 +5558,7 @@ instruct loadP_ac(iRegPdst dst, memoryAlg4 mem) %{ // LoadP + CastP2L instruct loadP2X(iRegLdst dst, memoryAlg4 mem) %{ match(Set dst (CastP2X (LoadP mem))); - predicate(_kids[0]->_leaf->as_Load()->is_unordered()); + predicate(_kids[0]->_leaf->as_Load()->is_unordered() && _kids[0]->_leaf->as_Load()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); format %{ "LD $dst, $mem \t// ptr + p2x" %} @@ -7470,6 +7480,7 @@ instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLs instruct storePConditional_regP_regP_regP(flagsRegCR0 cr0, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{ match(Set cr0 (StorePConditional mem_ptr (Binary oldVal newVal))); ins_cost(2*MEMORY_REF_COST); + predicate(n->as_LoadStore()->barrier_data() == 0); format %{ "STDCX_ if ($cr0 = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %} ins_encode %{ @@ -7634,6 +7645,7 @@ instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2))); effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump + predicate(n->as_LoadStore()->barrier_data() == 0); format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %} ins_encode %{ // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. @@ -7856,7 +7868,7 @@ instruct weakCompareAndSwapL_acq_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, instruct weakCompareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{ match(Set res (WeakCompareAndSwapP mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + predicate((((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0); effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump format %{ "weak CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %} ins_encode %{ @@ -7870,7 +7882,7 @@ instruct weakCompareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iReg instruct weakCompareAndSwapP_acq_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{ match(Set res (WeakCompareAndSwapP mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst); + predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0); effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump format %{ "weak CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as bool; ptr" %} ins_encode %{ @@ -8126,7 +8138,8 @@ instruct compareAndExchangeL_acq_regP_regL_regL(iRegLdst res, iRegPdst mem_ptr, instruct compareAndExchangeP_regP_regP_regP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndExchangeP mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + predicate((((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst) + && n->as_LoadStore()->barrier_data() == 0); effect(TEMP_DEF res, TEMP cr0); format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as ptr; ptr" %} ins_encode %{ @@ -8140,7 +8153,8 @@ instruct compareAndExchangeP_regP_regP_regP(iRegPdst res, iRegPdst mem_ptr, iReg instruct compareAndExchangeP_acq_regP_regP_regP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndExchangeP mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst); + predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) + && n->as_LoadStore()->barrier_data() == 0); effect(TEMP_DEF res, TEMP cr0); format %{ "CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as ptr; ptr" %} ins_encode %{ @@ -8362,6 +8376,7 @@ instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndSetP mem_ptr src)); + predicate(n->as_LoadStore()->barrier_data() == 0); effect(TEMP_DEF res, TEMP cr0); format %{ "GetAndSetP $res, $mem_ptr, $src" %} ins_encode %{ @@ -12117,8 +12132,7 @@ instruct cmpFastLock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iR format %{ "FASTLOCK $oop, $box, $tmp1, $tmp2" %} ins_encode %{ __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register, - $tmp1$$Register, $tmp2$$Register, /*tmp3*/ R0, - UseBiasedLocking && !UseOptoBiasInlining); + $tmp1$$Register, $tmp2$$Register, /*tmp3*/ R0); // If locking was successfull, crx should indicate 'EQ'. // The compiler generates a branch to the runtime call to // _complete_monitor_locking_Java for the case where crx is 'NE'. @@ -12136,10 +12150,9 @@ instruct cmpFastLock_tm(flagsReg crx, iRegPdst oop, rarg2RegP box, iRegPdst tmp1 ins_encode %{ __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, - /*Biased Locking*/ false, _rtm_counters, _stack_rtm_counters, ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), - /*TM*/ true, ra_->C->profile_rtm()); + /*RTM*/ true, ra_->C->profile_rtm()); // If locking was successfull, crx should indicate 'EQ'. // The compiler generates a branch to the runtime call to // _complete_monitor_locking_Java for the case where crx is 'NE'. @@ -12156,7 +12169,6 @@ instruct cmpFastUnlock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, ins_encode %{ __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, - UseBiasedLocking && !UseOptoBiasInlining, false); // If unlocking was successfull, crx should indicate 'EQ'. // The compiler generates a branch to the runtime call to @@ -12174,7 +12186,7 @@ instruct cmpFastUnlock_tm(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp ins_encode %{ __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, - /*Biased Locking*/ false, /*TM*/ true); + /*RTM*/ true); // If unlocking was successfull, crx should indicate 'EQ'. // The compiler generates a branch to the runtime call to // _complete_monitor_unlocking_Java for the case where crx is 'NE'. @@ -12784,6 +12796,7 @@ instruct has_negatives(rarg1RegP ary1, iRegIsrc len, iRegIdst result, iRegLdst t // encode char[] to byte[] in ISO_8859_1 instruct encode_iso_array(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst result, iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{ + predicate(!((EncodeISOArrayNode*)n)->is_ascii()); match(Set result (EncodeISOArray src (Binary dst len))); effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0); @@ -13961,7 +13974,7 @@ instruct roundD_reg(regD dst, regD src, immI8 rmode) %{ ins_encode %{ switch ($rmode$$constant) { case RoundDoubleModeNode::rmode_rint: - __ frin($dst$$FloatRegister, $src$$FloatRegister); + __ xvrdpic($dst$$FloatRegister->to_vsr(), $src$$FloatRegister->to_vsr()); break; case RoundDoubleModeNode::rmode_floor: __ frim($dst$$FloatRegister, $src$$FloatRegister); @@ -13985,7 +13998,7 @@ instruct vround2D_reg(vecX dst, vecX src, immI8 rmode) %{ ins_encode %{ switch ($rmode$$constant) { case RoundDoubleModeNode::rmode_rint: - __ xvrdpi($dst$$VectorSRegister, $src$$VectorSRegister); + __ xvrdpic($dst$$VectorSRegister, $src$$VectorSRegister); break; case RoundDoubleModeNode::rmode_floor: __ xvrdpim($dst$$VectorSRegister, $src$$VectorSRegister); diff --git a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp index 94869ae7ca2dd76bd8cbfab22ff790a315ed1fba..377df777511573212b9ea114e848d3bb8fef7942 100644 --- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp +++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp @@ -2154,14 +2154,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // Get the lock box slot's address. __ addi(r_box, R1_SP, lock_offset); -# ifdef ASSERT - if (UseBiasedLocking) { - // Making the box point to itself will make it clear it went unused - // but also be obviously invalid. - __ std(r_box, 0, r_box); - } -# endif // ASSERT - // Try fastpath for locking. // fast_lock kills r_temp_1, r_temp_2, r_temp_3. __ compiler_fast_lock_object(r_flag, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3); diff --git a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp index c4e60e9a513ec988402d7860177fa54e7439af1a..0e68a38dbaf9a453e6ff197e21bb5dd3fb74b9bb 100644 --- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp +++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp @@ -3643,8 +3643,14 @@ class StubGenerator: public StubCodeGenerator { // Underscore (URL = 1) #define US (signed char)((-'_' + 63) & 0xff) +// For P10 (or later) only +#define VALID_B64 0x80 +#define VB64(x) (VALID_B64 | x) + #define VEC_ALIGN __attribute__ ((aligned(16))) +#define BLK_OFFSETOF(x) (offsetof(constant_block, x)) + // In little-endian mode, the lxv instruction loads the element at EA into // element 15 of the the vector register, EA+1 goes into element 14, and so // on. @@ -3660,95 +3666,123 @@ class StubGenerator: public StubCodeGenerator { StubCodeMark mark(this, "StubRoutines", "base64_decodeBlock"); address start = __ function_entry(); - static const signed char VEC_ALIGN offsetLUT_val[16] = { - ARRAY_TO_LXV_ORDER( - 0, 0, PLS, DIG, UC, UC, LC, LC, - 0, 0, 0, 0, 0, 0, 0, 0 ) }; - - static const signed char VEC_ALIGN offsetLUT_URL_val[16] = { - ARRAY_TO_LXV_ORDER( - 0, 0, HYP, DIG, UC, UC, LC, LC, - 0, 0, 0, 0, 0, 0, 0, 0 ) }; - - static const unsigned char VEC_ALIGN maskLUT_val[16] = { - ARRAY_TO_LXV_ORDER( - /* 0 */ (unsigned char)0b10101000, - /* 1 .. 9 */ (unsigned char)0b11111000, (unsigned char)0b11111000, (unsigned char)0b11111000, (unsigned char)0b11111000, - (unsigned char)0b11111000, (unsigned char)0b11111000, (unsigned char)0b11111000, (unsigned char)0b11111000, - (unsigned char)0b11111000, - /* 10 */ (unsigned char)0b11110000, - /* 11 */ (unsigned char)0b01010100, - /* 12 .. 14 */ (unsigned char)0b01010000, (unsigned char)0b01010000, (unsigned char)0b01010000, - /* 15 */ (unsigned char)0b01010100 ) }; - - static const unsigned char VEC_ALIGN maskLUT_URL_val[16] = { - ARRAY_TO_LXV_ORDER( - /* 0 */ (unsigned char)0b10101000, - /* 1 .. 9 */ (unsigned char)0b11111000, (unsigned char)0b11111000, (unsigned char)0b11111000, (unsigned char)0b11111000, - (unsigned char)0b11111000, (unsigned char)0b11111000, (unsigned char)0b11111000, (unsigned char)0b11111000, - (unsigned char)0b11111000, - /* 10 */ (unsigned char)0b11110000, - /* 11 .. 12 */ (unsigned char)0b01010000, (unsigned char)0b01010000, - /* 13 */ (unsigned char)0b01010100, - /* 14 */ (unsigned char)0b01010000, - /* 15 */ (unsigned char)0b01110000 ) }; - - static const unsigned char VEC_ALIGN bitposLUT_val[16] = { - ARRAY_TO_LXV_ORDER( - 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, (unsigned char)0x80, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ) }; - - static const unsigned char VEC_ALIGN pack_lshift_val[16] = { - ARRAY_TO_LXV_ORDER( - 0, 6, 4, 2, 0, 6, 4, 2, 0, 6, 4, 2, 0, 6, 4, 2 ) }; - - static const unsigned char VEC_ALIGN pack_rshift_val[16] = { - ARRAY_TO_LXV_ORDER( - 0, 2, 4, 0, 0, 2, 4, 0, 0, 2, 4, 0, 0, 2, 4, 0 ) }; - - // The first 4 index values are "don't care" because - // we only use the first 12 bytes of the vector, - // which are decoded from 16 bytes of Base64 characters. - static const unsigned char VEC_ALIGN pack_permute_val[16] = { - ARRAY_TO_LXV_ORDER( - 0, 0, 0, 0, - 0, 1, 2, - 4, 5, 6, - 8, 9, 10, - 12, 13, 14 ) }; - - static const unsigned char VEC_ALIGN p10_pack_permute_val[16] = { - ARRAY_TO_LXV_ORDER( - 0, 0, 0, 0, 7, 6, 5, 4, - 3, 2, 15, 14, 13, 12, 11, 10 ) }; - - // loop_unrolls needs to be a power of two so that the rounding can be - // done using a mask. - // - // The amount of loop unrolling was determined by running a benchmark - // that decodes a 20k block of Base64 data on a Power9 machine: - // loop_unrolls = 1 : - // (min, avg, max) = (108639.215, 110530.479, 110779.920), stdev = 568.437 - // loop_unrolls = 2 : - // (min, avg, max) = (108259.029, 110174.202, 110399.642), stdev = 561.729 - // loop_unrolls = 4 : - // (min, avg, max) = (106514.175, 108373.110, 108514.786), stdev = 392.237 - // loop_unrolls = 8 : - // (min, avg, max) = (106281.283, 108316.668, 108539.953), stdev = 553.938 - // loop_unrolls = 16 : - // (min, avg, max) = (108580.768, 110631.161, 110766.237), stdev = 430.510 - // - // Comparing only the max values, there's no reason to go past - // loop_unrolls = 1. Performance at loop_unrolls = 16 is similar but - // has the disadvantage of requiring a larger minimum block of data to - // work with. A value of 1 gives a minimum of (16 + 12) = 28 bytes - // before the intrinsic will decode any data. See the reason for the - // +12 in the following logic. - const unsigned loop_unrolls = 1; - - const unsigned vec_size = 16; // size of vector registers in bytes - const unsigned block_size = vec_size * loop_unrolls; // number of bytes to process in each pass through the loop - const unsigned block_size_shift = exact_log2(block_size); + typedef struct { + signed char offsetLUT_val[16]; + signed char offsetLUT_URL_val[16]; + unsigned char maskLUT_val[16]; + unsigned char maskLUT_URL_val[16]; + unsigned char bitposLUT_val[16]; + unsigned char table_32_47_val[16]; + unsigned char table_32_47_URL_val[16]; + unsigned char table_48_63_val[16]; + unsigned char table_64_79_val[16]; + unsigned char table_80_95_val[16]; + unsigned char table_80_95_URL_val[16]; + unsigned char table_96_111_val[16]; + unsigned char table_112_127_val[16]; + unsigned char pack_lshift_val[16]; + unsigned char pack_rshift_val[16]; + unsigned char pack_permute_val[16]; + } constant_block; + + static const constant_block VEC_ALIGN const_block = { + + .offsetLUT_val = { + ARRAY_TO_LXV_ORDER( + 0, 0, PLS, DIG, UC, UC, LC, LC, + 0, 0, 0, 0, 0, 0, 0, 0 ) }, + + .offsetLUT_URL_val = { + ARRAY_TO_LXV_ORDER( + 0, 0, HYP, DIG, UC, UC, LC, LC, + 0, 0, 0, 0, 0, 0, 0, 0 ) }, + + .maskLUT_val = { + ARRAY_TO_LXV_ORDER( + /* 0 */ (unsigned char)0b10101000, + /* 1 .. 9 */ (unsigned char)0b11111000, (unsigned char)0b11111000, (unsigned char)0b11111000, (unsigned char)0b11111000, + (unsigned char)0b11111000, (unsigned char)0b11111000, (unsigned char)0b11111000, (unsigned char)0b11111000, + (unsigned char)0b11111000, + /* 10 */ (unsigned char)0b11110000, + /* 11 */ (unsigned char)0b01010100, + /* 12 .. 14 */ (unsigned char)0b01010000, (unsigned char)0b01010000, (unsigned char)0b01010000, + /* 15 */ (unsigned char)0b01010100 ) }, + + .maskLUT_URL_val = { + ARRAY_TO_LXV_ORDER( + /* 0 */ (unsigned char)0b10101000, + /* 1 .. 9 */ (unsigned char)0b11111000, (unsigned char)0b11111000, (unsigned char)0b11111000, (unsigned char)0b11111000, + (unsigned char)0b11111000, (unsigned char)0b11111000, (unsigned char)0b11111000, (unsigned char)0b11111000, + (unsigned char)0b11111000, + /* 10 */ (unsigned char)0b11110000, + /* 11 .. 12 */ (unsigned char)0b01010000, (unsigned char)0b01010000, + /* 13 */ (unsigned char)0b01010100, + /* 14 */ (unsigned char)0b01010000, + /* 15 */ (unsigned char)0b01110000 ) }, + + .bitposLUT_val = { + ARRAY_TO_LXV_ORDER( + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, (unsigned char)0x80, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ) }, + + // In the following table_*_val constants, a 0 value means the + // character is not in the Base64 character set + .table_32_47_val = { + ARRAY_TO_LXV_ORDER ( + /* space .. '*' = 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '+' = 62 */ VB64(62), /* ',' .. '.' = 0 */ 0, 0, 0, /* '/' = 63 */ VB64(63) ) }, + + .table_32_47_URL_val = { + ARRAY_TO_LXV_ORDER( + /* space .. ',' = 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '-' = 62 */ VB64(62), /* '.' .. '/' */ 0, 0 ) }, + + .table_48_63_val = { + ARRAY_TO_LXV_ORDER( + /* '0' .. '9' = 52 .. 61 */ VB64(52), VB64(53), VB64(54), VB64(55), VB64(56), VB64(57), VB64(58), VB64(59), VB64(60), VB64(61), + /* ':' .. '?' = 0 */ 0, 0, 0, 0, 0, 0 ) }, + + .table_64_79_val = { + ARRAY_TO_LXV_ORDER( + /* '@' = 0 */ 0, /* 'A' .. 'O' = 0 .. 14 */ VB64(0), VB64(1), VB64(2), VB64(3), VB64(4), VB64(5), VB64(6), VB64(7), VB64(8), + VB64(9), VB64(10), VB64(11), VB64(12), VB64(13), VB64(14) ) }, + + .table_80_95_val = { + ARRAY_TO_LXV_ORDER(/* 'P' .. 'Z' = 15 .. 25 */ VB64(15), VB64(16), VB64(17), VB64(18), VB64(19), VB64(20), VB64(21), VB64(22), + VB64(23), VB64(24), VB64(25), /* '[' .. '_' = 0 */ 0, 0, 0, 0, 0 ) }, + + .table_80_95_URL_val = { + ARRAY_TO_LXV_ORDER(/* 'P' .. 'Z' = 15 .. 25 */ VB64(15), VB64(16), VB64(17), VB64(18), VB64(19), VB64(20), VB64(21), VB64(22), + VB64(23), VB64(24), VB64(25), /* '[' .. '^' = 0 */ 0, 0, 0, 0, /* '_' = 63 */ VB64(63) ) }, + + .table_96_111_val = { + ARRAY_TO_LXV_ORDER(/* '`' = 0 */ 0, /* 'a' .. 'o' = 26 .. 40 */ VB64(26), VB64(27), VB64(28), VB64(29), VB64(30), VB64(31), + VB64(32), VB64(33), VB64(34), VB64(35), VB64(36), VB64(37), VB64(38), VB64(39), VB64(40) ) }, + + .table_112_127_val = { + ARRAY_TO_LXV_ORDER(/* 'p' .. 'z' = 41 .. 51 */ VB64(41), VB64(42), VB64(43), VB64(44), VB64(45), VB64(46), VB64(47), VB64(48), + VB64(49), VB64(50), VB64(51), /* '{' .. DEL = 0 */ 0, 0, 0, 0, 0 ) }, + + .pack_lshift_val = { + ARRAY_TO_LXV_ORDER( + 0, 6, 4, 2, 0, 6, 4, 2, 0, 6, 4, 2, 0, 6, 4, 2 ) }, + + .pack_rshift_val = { + ARRAY_TO_LXV_ORDER( + 0, 2, 4, 0, 0, 2, 4, 0, 0, 2, 4, 0, 0, 2, 4, 0 ) }, + + // The first 4 index values are "don't care" because + // we only use the first 12 bytes of the vector, + // which are decoded from 16 bytes of Base64 characters. + .pack_permute_val = { + ARRAY_TO_LXV_ORDER( + 0, 0, 0, 0, + 0, 1, 2, + 4, 5, 6, + 8, 9, 10, + 12, 13, 14 ) } + }; + + const unsigned block_size = 16; // number of bytes to process in each pass through the loop + const unsigned block_size_shift = 4; // According to the ELF V2 ABI, registers r3-r12 are volatile and available for use without save/restore Register s = R3_ARG1; // source starting address of Base64 characters @@ -3757,6 +3791,7 @@ class StubGenerator: public StubCodeGenerator { Register d = R6_ARG4; // destination address Register dp = R7_ARG5; // destination offset Register isURL = R8_ARG6; // boolean, if non-zero indicates use of RFC 4648 base64url encoding + Register isMIME = R9_ARG7; // boolean, if non-zero indicates use of RFC 2045 MIME encoding - not used // Local variables Register const_ptr = R9; // used for loading constants @@ -3774,8 +3809,6 @@ class StubGenerator: public StubCodeGenerator { VectorRegister vec_special_case_char = VR3; VectorRegister pack_rshift = VR4; VectorRegister pack_lshift = VR5; - // P10+ - VectorRegister vec_0x3fs = VR4; // safe to reuse pack_rshift's register // VSR Constants VectorSRegister offsetLUT = VSR0; @@ -3785,26 +3818,40 @@ class StubGenerator: public StubCodeGenerator { VectorSRegister vec_special_case_offset = VSR4; VectorSRegister pack_permute = VSR5; - // Variables for lookup - // VR + // P10 (or later) VSR lookup constants + VectorSRegister table_32_47 = VSR0; + VectorSRegister table_48_63 = VSR1; + VectorSRegister table_64_79 = VSR2; + VectorSRegister table_80_95 = VSR3; + VectorSRegister table_96_111 = VSR4; + VectorSRegister table_112_127 = VSR6; + + // Data read in and later converted VectorRegister input = VR6; + // Variable for testing Base64 validity + VectorRegister non_match = VR10; + + // P9 VR Variables for lookup VectorRegister higher_nibble = VR7; VectorRegister eq_special_case_char = VR8; VectorRegister offsets = VR9; - VectorRegister non_match = VR10; - // VSR + // P9 VSR lookup variables VectorSRegister bit = VSR6; VectorSRegister lower_nibble = VSR7; VectorSRegister M = VSR8; + // P10 (or later) VSR lookup variables + VectorSRegister xlate_a = VSR7; + VectorSRegister xlate_b = VSR8; + // Variables for pack // VR VectorRegister l = VR7; // reuse higher_nibble's register VectorRegister r = VR8; // reuse eq_special_case_char's register - VectorRegister gathered = VR9; // reuse offsets's register + VectorRegister gathered = VR10; // reuse non_match's register - Label not_URL, calculate_size, unrolled_loop_start, unrolled_loop_exit, return_zero; + Label not_URL, calculate_size, loop_start, loop_exit, return_zero; // The upper 32 bits of the non-pointer parameter registers are not // guaranteed to be zero, so mask off those upper bits. @@ -3823,7 +3870,7 @@ class StubGenerator: public StubCodeGenerator { __ sub(sl, sl, sp); __ subi(sl, sl, 12); - // Load CTR with the number of passes through the unrolled loop + // Load CTR with the number of passes through the loop // = sl >> block_size_shift. After the shift, if sl <= 0, there's too // little data to be processed by this intrinsic. __ srawi_(sl, sl, block_size_shift); @@ -3835,26 +3882,33 @@ class StubGenerator: public StubCodeGenerator { __ clrldi(dp, dp, 32); // Load constant vec registers that need to be loaded from memory - __ load_const_optimized(const_ptr, (address)&bitposLUT_val, tmp_reg); - __ lxv(bitposLUT, 0, const_ptr); - if (PowerArchitecturePPC64 >= 10) { - __ load_const_optimized(const_ptr, (address)&p10_pack_permute_val, tmp_reg); - } else { - __ load_const_optimized(const_ptr, (address)&pack_rshift_val, tmp_reg); - __ lxv(pack_rshift->to_vsr(), 0, const_ptr); - __ load_const_optimized(const_ptr, (address)&pack_lshift_val, tmp_reg); - __ lxv(pack_lshift->to_vsr(), 0, const_ptr); - __ load_const_optimized(const_ptr, (address)&pack_permute_val, tmp_reg); - } - __ lxv(pack_permute, 0, const_ptr); + __ load_const_optimized(const_ptr, (address)&const_block, tmp_reg); + __ lxv(bitposLUT, BLK_OFFSETOF(bitposLUT_val), const_ptr); + __ lxv(pack_rshift->to_vsr(), BLK_OFFSETOF(pack_rshift_val), const_ptr); + __ lxv(pack_lshift->to_vsr(), BLK_OFFSETOF(pack_lshift_val), const_ptr); + __ lxv(pack_permute, BLK_OFFSETOF(pack_permute_val), const_ptr); // Splat the constants that can use xxspltib __ xxspltib(vec_0s->to_vsr(), 0); - __ xxspltib(vec_4s->to_vsr(), 4); __ xxspltib(vec_8s->to_vsr(), 8); - __ xxspltib(vec_0xfs, 0xf); if (PowerArchitecturePPC64 >= 10) { - __ xxspltib(vec_0x3fs->to_vsr(), 0x3f); + // Using VALID_B64 for the offsets effectively strips the upper bit + // of each byte that was selected from the table. Setting the upper + // bit gives us a way to distinguish between the 6-bit value of 0 + // from an error code of 0, which will happen if the character is + // outside the range of the lookup, or is an illegal Base64 + // character, such as %. + __ xxspltib(offsets->to_vsr(), VALID_B64); + + __ lxv(table_48_63, BLK_OFFSETOF(table_48_63_val), const_ptr); + __ lxv(table_64_79, BLK_OFFSETOF(table_64_79_val), const_ptr); + __ lxv(table_80_95, BLK_OFFSETOF(table_80_95_val), const_ptr); + __ lxv(table_96_111, BLK_OFFSETOF(table_96_111_val), const_ptr); + __ lxv(table_112_127, BLK_OFFSETOF(table_112_127_val), const_ptr); + } else { + __ xxspltib(vec_4s->to_vsr(), 4); + __ xxspltib(vec_0xfs, 0xf); + __ lxv(bitposLUT, BLK_OFFSETOF(bitposLUT_val), const_ptr); } // The rest of the constants use different values depending on the @@ -3863,22 +3917,28 @@ class StubGenerator: public StubCodeGenerator { __ beq(CCR0, not_URL); // isURL != 0 (true) - __ load_const_optimized(const_ptr, (address)&offsetLUT_URL_val, tmp_reg); - __ lxv(offsetLUT, 0, const_ptr); - __ load_const_optimized(const_ptr, (address)&maskLUT_URL_val, tmp_reg); - __ lxv(maskLUT, 0, const_ptr); - __ xxspltib(vec_special_case_char->to_vsr(), '_'); - __ xxspltib(vec_special_case_offset, (unsigned char)US); + if (PowerArchitecturePPC64 >= 10) { + __ lxv(table_32_47, BLK_OFFSETOF(table_32_47_URL_val), const_ptr); + __ lxv(table_80_95, BLK_OFFSETOF(table_80_95_URL_val), const_ptr); + } else { + __ lxv(offsetLUT, BLK_OFFSETOF(offsetLUT_URL_val), const_ptr); + __ lxv(maskLUT, BLK_OFFSETOF(maskLUT_URL_val), const_ptr); + __ xxspltib(vec_special_case_char->to_vsr(), '_'); + __ xxspltib(vec_special_case_offset, (unsigned char)US); + } __ b(calculate_size); // isURL = 0 (false) __ bind(not_URL); - __ load_const_optimized(const_ptr, (address)&offsetLUT_val, tmp_reg); - __ lxv(offsetLUT, 0, const_ptr); - __ load_const_optimized(const_ptr, (address)&maskLUT_val, tmp_reg); - __ lxv(maskLUT, 0, const_ptr); - __ xxspltib(vec_special_case_char->to_vsr(), '/'); - __ xxspltib(vec_special_case_offset, (unsigned char)SLS); + if (PowerArchitecturePPC64 >= 10) { + __ lxv(table_32_47, BLK_OFFSETOF(table_32_47_val), const_ptr); + __ lxv(table_80_95, BLK_OFFSETOF(table_80_95_val), const_ptr); + } else { + __ lxv(offsetLUT, BLK_OFFSETOF(offsetLUT_val), const_ptr); + __ lxv(maskLUT, BLK_OFFSETOF(maskLUT_val), const_ptr); + __ xxspltib(vec_special_case_char->to_vsr(), '/'); + __ xxspltib(vec_special_case_offset, (unsigned char)SLS); + } __ bind(calculate_size); @@ -3889,177 +3949,156 @@ class StubGenerator: public StubCodeGenerator { __ add(in, s, sp); __ align(32); - __ bind(unrolled_loop_start); - for (unsigned unroll_cnt=0; unroll_cnt < loop_unrolls; unroll_cnt++) { - // We can use a static displacement in the load since it's always a - // multiple of 16, which is a requirement of lxv/stxv. This saves - // an addi instruction. - __ lxv(input->to_vsr(), unroll_cnt * 16, in); - // - // Lookup - // - // Isolate the upper 4 bits of each character by shifting it right 4 bits - __ vsrb(higher_nibble, input, vec_4s); - // Isolate the lower 4 bits by masking - __ xxland(lower_nibble, input->to_vsr(), vec_0xfs); - - // Get the offset (the value to subtract from the byte) by using - // a lookup table indexed by the upper 4 bits of the character - __ xxperm(offsets->to_vsr(), offsetLUT, higher_nibble->to_vsr()); - - // Find out which elements are the special case character (isURL ? '/' : '-') - __ vcmpequb(eq_special_case_char, input, vec_special_case_char); - - // For each character in the input which is a special case - // character, replace its offset with one that is special for that - // character. - __ xxsel(offsets->to_vsr(), offsets->to_vsr(), vec_special_case_offset, eq_special_case_char->to_vsr()); - - // Use the lower_nibble to select a mask "M" from the lookup table. - __ xxperm(M, maskLUT, lower_nibble); - - // "bit" is used to isolate which of the bits in M is relevant. - __ xxperm(bit, bitposLUT, higher_nibble->to_vsr()); - - // Each element of non_match correspond to one each of the 16 input - // characters. Those elements that become 0x00 after the xxland - // instuction are invalid Base64 characters. - __ xxland(non_match->to_vsr(), M, bit); - - // Compare each element to zero - // - // vmcmpequb_ sets the EQ bit of CCR6 if no elements compare equal. - // Any element comparing equal to zero means there is an error in - // that element. Note that the comparison result register - // non_match is not referenced again. Only CCR6-EQ matters. - __ vcmpequb_(non_match, non_match, vec_0s); - __ bne_predict_not_taken(CCR6, unrolled_loop_exit); - - // The Base64 characters had no errors, so add the offsets - __ vaddubm(input, input, offsets); - - // Pack - // - // In the tables below, b0, b1, .. b15 are the bytes of decoded - // binary data, the first line of each of the cells (except for - // the constants) uses the bit-field nomenclature from the - // above-linked paper, whereas the second line is more specific - // about which exact bits are present, and is constructed using the - // Power ISA 3.x document style, where: - // - // * The specifier after the colon depicts which bits are there. - // * The bit numbering is big endian style (bit 0 is the most - // significant). - // * || is a concatenate operator. - // * Strings of 0's are a field of zeros with the shown length, and - // likewise for strings of 1's. - - if (PowerArchitecturePPC64 >= 10) { - // Note that only e8..e15 are shown here because the extract bit - // pattern is the same in e0..e7. - // - // +===============+=============+======================+======================+=============+=============+======================+======================+=============+ - // | Vector | e8 | e9 | e10 | e11 | e12 | e13 | e14 | e15 | - // | Element | | | | | | | | | - // +===============+=============+======================+======================+=============+=============+======================+======================+=============+ - // | after vaddudb | 00hhhhhh | 00gggggg | 00ffffff | 00eeeeee | 00dddddd | 00cccccc | 00bbbbbb | 00aaaaaa | - // | | 00||b5:2..7 | 00||b4:4..7||b5:0..1 | 00||b3:6..7||b4:0..3 | 00||b3:0..5 | 00||b2:2..7 | 00||b1:4..7||b2:0..1 | 00||b0:6..7||b1:0..3 | 00||b0:0..5 | - // +---------------+-------------+----------------------+----------------------+-------------+-------------+----------------------+----------------------+-------------+ - // | after xxbrd | 00aaaaaa | 00bbbbbb | 00cccccc | 00dddddd | 00eeeeee | 00ffffff | 00gggggg | 00hhhhhh | - // | | 00||b0:0..5 | 00||b0:6..7||b1:0..3 | 00||b1:4..7||b2:0..1 | 00||b2:2..7 | 00||b3:0..5 | 00||b3:6..7||b4:0..3 | 00||b4:4..7||b5:0..1 | 00||b5:2..7 | - // +---------------+-------------+----------------------+----------------------+-------------+-------------+----------------------+----------------------+-------------+ - // | vec_0x3fs | 00111111 | 00111111 | 00111111 | 00111111 | 00111111 | 00111111 | 00111111 | 00111111 | - // +---------------+-------------+----------------------+----------------------+-------------+-------------+----------------------+----------------------+-------------+ - // | after vpextd | 00000000 | 00000000 | aaaaaabb | bbbbcccc | ccdddddd | eeeeeeff | ffffgggg | gghhhhhh | - // | | 00000000 | 00000000 | b0:0..7 | b1:0..7 | b2:0..7 | b3:0..7 | b4:0..7 | b5:0..7 | - // +===============+=============+======================+======================+=============+=============+======================+======================+=============+ - - __ xxbrd(input->to_vsr(), input->to_vsr()); - __ vpextd(gathered, input, vec_0x3fs); - - // Final rearrangement of bytes into their correct positions. - // +==================+====+====+====+====+=====+=====+=====+=====+====+====+=====+=====+=====+=====+=====+=====+ - // | Vector | e0 | e1 | e2 | e3 | e4 | e5 | e6 | e7 | e8 | e9 | e10 | e11 | e12 | e13 | e14 | e15 | - // | Elements | | | | | | | | | | | | | | | | | - // +==================+====+====+====+====+=====+=====+=====+=====+====+====+=====+=====+=====+=====+=====+=====+ - // | after vpextd | 0 | 0 | b6 | b7 | b8 | b9 | b10 | b11 | 0 | 0 | b0 | b1 | b2 | b3 | b4 | b5 | - // +------------------+----+----+----+----+-----+-----+-----+-----+----+----+-----+-----+-----+-----+-----+-----+ - // | p10_pack_permute | 0 | 0 | 0 | 0 | 7 | 6 | 5 | 4 | 3 | 2 | 15 | 14 | 13 | 12 | 11 | 10 | - // +------------------+----+----+----+----+-----+-----+-----+-----+----+----+-----+-----+-----+-----+-----+-----+ - // | after xxperm | 0 | 0 | 0 | 0 | b11 | b10 | b9 | b8 | b7 | b6 | b5 | b4 | b3 | b2 | b1 | b0 | - // +==================+====+====+====+====+=====+=====+=====+=====+====+====+=====+=====+=====+=====+=====+=====+ - - } else { - // Note that only e12..e15 are shown here because the shifting - // and OR'ing pattern replicates for e8..e11, e4..7, and - // e0..e3. - // - // +======================+=================+======================+======================+=============+ - // | Vector | e12 | e13 | e14 | e15 | - // | Element | | | | | - // +======================+=================+======================+======================+=============+ - // | after vaddubm | 00dddddd | 00cccccc | 00bbbbbb | 00aaaaaa | - // | | 00||b2:2..7 | 00||b1:4..7||b2:0..1 | 00||b0:6..7||b1:0..3 | 00||b0:0..5 | - // +----------------------+-----------------+----------------------+----------------------+-------------+ - // | pack_lshift | | << 6 | << 4 | << 2 | - // +----------------------+-----------------+----------------------+----------------------+-------------+ - // | l after vslb | 00dddddd | cc000000 | bbbb0000 | aaaaaa00 | - // | | 00||b2:2..7 | b2:0..1||000000 | b1:0..3||0000 | b0:0..5||00 | - // +----------------------+-----------------+----------------------+----------------------+-------------+ - // | l after vslo | cc000000 | bbbb0000 | aaaaaa00 | 00000000 | - // | | b2:0..1||000000 | b1:0..3||0000 | b0:0..5||00 | 00000000 | - // +----------------------+-----------------+----------------------+----------------------+-------------+ - // | pack_rshift | | >> 2 | >> 4 | | - // +----------------------+-----------------+----------------------+----------------------+-------------+ - // | r after vsrb | 00dddddd | 0000cccc | 000000bb | 00aaaaaa | - // | | 00||b2:2..7 | 0000||b1:4..7 | 000000||b0:6..7 | 00||b0:0..5 | - // +----------------------+-----------------+----------------------+----------------------+-------------+ - // | gathered after xxlor | ccdddddd | bbbbcccc | aaaaaabb | 00aaaaaa | - // | | b2:0..7 | b1:0..7 | b0:0..7 | 00||b0:0..5 | - // +======================+=================+======================+======================+=============+ - // - // Note: there is a typo in the above-linked paper that shows the result of the gathering process is: - // [ddddddcc|bbbbcccc|aaaaaabb] - // but should be: - // [ccdddddd|bbbbcccc|aaaaaabb] - // - __ vslb(l, input, pack_lshift); - // vslo of vec_8s shifts the vector by one octet toward lower - // element numbers, discarding element 0. This means it actually - // shifts to the right (not left) according to the order of the - // table above. - __ vslo(l, l, vec_8s); - __ vsrb(r, input, pack_rshift); - __ xxlor(gathered->to_vsr(), l->to_vsr(), r->to_vsr()); - - // Final rearrangement of bytes into their correct positions. - // +==============+======+======+======+======+=====+=====+====+====+====+====+=====+=====+=====+=====+=====+=====+ - // | Vector | e0 | e1 | e2 | e3 | e4 | e5 | e6 | e7 | e8 | e9 | e10 | e11 | e12 | e13 | e14 | e15 | - // | Elements | | | | | | | | | | | | | | | | | - // +==============+======+======+======+======+=====+=====+====+====+====+====+=====+=====+=====+=====+=====+=====+ - // | after xxlor | b11 | b10 | b9 | xx | b8 | b7 | b6 | xx | b5 | b4 | b3 | xx | b2 | b1 | b0 | xx | - // +--------------+------+------+------+------+-----+-----+----+----+----+----+-----+-----+-----+-----+-----+-----+ - // | pack_permute | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 4 | 5 | 6 | 8 | 9 | 10 | 12 | 13 | 14 | - // +--------------+------+------+------+------+-----+-----+----+----+----+----+-----+-----+-----+-----+-----+-----+ - // | after xxperm | b11* | b11* | b11* | b11* | b11 | b10 | b9 | b8 | b7 | b6 | b5 | b4 | b3 | b2 | b1 | b0 | - // +==============+======+======+======+======+=====+=====+====+====+====+====+=====+=====+=====+=====+=====+=====+ - // xx bytes are not used to form the final data - // b0..b15 are the decoded and reassembled 8-bit bytes of data - // b11 with asterisk is a "don't care", because these bytes will be - // overwritten on the next iteration. - } - __ xxperm(gathered->to_vsr(), gathered->to_vsr(), pack_permute); + __ bind(loop_start); + __ lxv(input->to_vsr(), 0, in); // offset=0 - // We cannot use a static displacement on the store, since it's a - // multiple of 12, not 16. Note that this stxv instruction actually - // writes 16 bytes, even though only the first 12 are valid data. - __ stxv(gathered->to_vsr(), 0, out); - __ addi(out, out, 12); + // + // Lookup + // + if (PowerArchitecturePPC64 >= 10) { + // Use xxpermx to do a lookup of each Base64 character in the + // input vector and translate it to a 6-bit value + 0x80. + // Characters which are not valid Base64 characters will result + // in a zero in the corresponding byte. + // + // Note that due to align(32) call above, the xxpermx instructions do + // not require align_prefix() calls, since the final xxpermx + // prefix+opcode is at byte 24. + __ xxpermx(xlate_a, table_32_47, table_48_63, input->to_vsr(), 1); // offset=4 + __ xxpermx(xlate_b, table_64_79, table_80_95, input->to_vsr(), 2); // offset=12 + __ xxlor(xlate_b, xlate_a, xlate_b); // offset=20 + __ xxpermx(xlate_a, table_96_111, table_112_127, input->to_vsr(), 3); // offset=24 + __ xxlor(input->to_vsr(), xlate_a, xlate_b); + // Check for non-Base64 characters by comparing each byte to zero. + __ vcmpequb_(non_match, input, vec_0s); + } else { + // Isolate the upper 4 bits of each character by shifting it right 4 bits + __ vsrb(higher_nibble, input, vec_4s); + // Isolate the lower 4 bits by masking + __ xxland(lower_nibble, input->to_vsr(), vec_0xfs); + + // Get the offset (the value to subtract from the byte) by using + // a lookup table indexed by the upper 4 bits of the character + __ xxperm(offsets->to_vsr(), offsetLUT, higher_nibble->to_vsr()); + + // Find out which elements are the special case character (isURL ? '/' : '-') + __ vcmpequb(eq_special_case_char, input, vec_special_case_char); + + // For each character in the input which is a special case + // character, replace its offset with one that is special for that + // character. + __ xxsel(offsets->to_vsr(), offsets->to_vsr(), vec_special_case_offset, eq_special_case_char->to_vsr()); + + // Use the lower_nibble to select a mask "M" from the lookup table. + __ xxperm(M, maskLUT, lower_nibble); + + // "bit" is used to isolate which of the bits in M is relevant. + __ xxperm(bit, bitposLUT, higher_nibble->to_vsr()); + + // Each element of non_match correspond to one each of the 16 input + // characters. Those elements that become 0x00 after the xxland + // instuction are invalid Base64 characters. + __ xxland(non_match->to_vsr(), M, bit); + + // Compare each element to zero + // + __ vcmpequb_(non_match, non_match, vec_0s); } - __ addi(in, in, 16 * loop_unrolls); - __ bdnz(unrolled_loop_start); + // vmcmpequb_ sets the EQ bit of CCR6 if no elements compare equal. + // Any element comparing equal to zero means there is an error in + // that element. Note that the comparison result register + // non_match is not referenced again. Only CCR6-EQ matters. + __ bne_predict_not_taken(CCR6, loop_exit); + + // The Base64 characters had no errors, so add the offsets, which in + // the case of Power10 is a constant vector of all 0x80's (see earlier + // comment where the offsets register is loaded). + __ vaddubm(input, input, offsets); + + // Pack + // + // In the tables below, b0, b1, .. b15 are the bytes of decoded + // binary data, the first line of each of the cells (except for + // the constants) uses the bit-field nomenclature from the + // above-linked paper, whereas the second line is more specific + // about which exact bits are present, and is constructed using the + // Power ISA 3.x document style, where: + // + // * The specifier after the colon depicts which bits are there. + // * The bit numbering is big endian style (bit 0 is the most + // significant). + // * || is a concatenate operator. + // * Strings of 0's are a field of zeros with the shown length, and + // likewise for strings of 1's. + + // Note that only e12..e15 are shown here because the shifting + // and OR'ing pattern replicates for e8..e11, e4..7, and + // e0..e3. + // + // +======================+=================+======================+======================+=============+ + // | Vector | e12 | e13 | e14 | e15 | + // | Element | | | | | + // +======================+=================+======================+======================+=============+ + // | after vaddubm | 00dddddd | 00cccccc | 00bbbbbb | 00aaaaaa | + // | | 00||b2:2..7 | 00||b1:4..7||b2:0..1 | 00||b0:6..7||b1:0..3 | 00||b0:0..5 | + // +----------------------+-----------------+----------------------+----------------------+-------------+ + // | pack_lshift | | << 6 | << 4 | << 2 | + // +----------------------+-----------------+----------------------+----------------------+-------------+ + // | l after vslb | 00dddddd | cc000000 | bbbb0000 | aaaaaa00 | + // | | 00||b2:2..7 | b2:0..1||000000 | b1:0..3||0000 | b0:0..5||00 | + // +----------------------+-----------------+----------------------+----------------------+-------------+ + // | l after vslo | cc000000 | bbbb0000 | aaaaaa00 | 00000000 | + // | | b2:0..1||000000 | b1:0..3||0000 | b0:0..5||00 | 00000000 | + // +----------------------+-----------------+----------------------+----------------------+-------------+ + // | pack_rshift | | >> 2 | >> 4 | | + // +----------------------+-----------------+----------------------+----------------------+-------------+ + // | r after vsrb | 00dddddd | 0000cccc | 000000bb | 00aaaaaa | + // | | 00||b2:2..7 | 0000||b1:4..7 | 000000||b0:6..7 | 00||b0:0..5 | + // +----------------------+-----------------+----------------------+----------------------+-------------+ + // | gathered after xxlor | ccdddddd | bbbbcccc | aaaaaabb | 00aaaaaa | + // | | b2:0..7 | b1:0..7 | b0:0..7 | 00||b0:0..5 | + // +======================+=================+======================+======================+=============+ + // + // Note: there is a typo in the above-linked paper that shows the result of the gathering process is: + // [ddddddcc|bbbbcccc|aaaaaabb] + // but should be: + // [ccdddddd|bbbbcccc|aaaaaabb] + // + __ vslb(l, input, pack_lshift); + // vslo of vec_8s shifts the vector by one octet toward lower + // element numbers, discarding element 0. This means it actually + // shifts to the right (not left) according to the order of the + // table above. + __ vslo(l, l, vec_8s); + __ vsrb(r, input, pack_rshift); + __ xxlor(gathered->to_vsr(), l->to_vsr(), r->to_vsr()); + + // Final rearrangement of bytes into their correct positions. + // +==============+======+======+======+======+=====+=====+====+====+====+====+=====+=====+=====+=====+=====+=====+ + // | Vector | e0 | e1 | e2 | e3 | e4 | e5 | e6 | e7 | e8 | e9 | e10 | e11 | e12 | e13 | e14 | e15 | + // | Elements | | | | | | | | | | | | | | | | | + // +==============+======+======+======+======+=====+=====+====+====+====+====+=====+=====+=====+=====+=====+=====+ + // | after xxlor | b11 | b10 | b9 | xx | b8 | b7 | b6 | xx | b5 | b4 | b3 | xx | b2 | b1 | b0 | xx | + // +--------------+------+------+------+------+-----+-----+----+----+----+----+-----+-----+-----+-----+-----+-----+ + // | pack_permute | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 4 | 5 | 6 | 8 | 9 | 10 | 12 | 13 | 14 | + // +--------------+------+------+------+------+-----+-----+----+----+----+----+-----+-----+-----+-----+-----+-----+ + // | after xxperm | b11* | b11* | b11* | b11* | b11 | b10 | b9 | b8 | b7 | b6 | b5 | b4 | b3 | b2 | b1 | b0 | + // +==============+======+======+======+======+=====+=====+====+====+====+====+=====+=====+=====+=====+=====+=====+ + // xx bytes are not used to form the final data + // b0..b15 are the decoded and reassembled 8-bit bytes of data + // b11 with asterisk is a "don't care", because these bytes will be + // overwritten on the next iteration. + __ xxperm(gathered->to_vsr(), gathered->to_vsr(), pack_permute); + + // We cannot use a static displacement on the store, since it's a + // multiple of 12, not 16. Note that this stxv instruction actually + // writes 16 bytes, even though only the first 12 are valid data. + __ stxv(gathered->to_vsr(), 0, out); + __ addi(out, out, 12); + __ addi(in, in, 16); + __ bdnz(loop_start); - __ bind(unrolled_loop_exit); + __ bind(loop_exit); // Return the number of out bytes produced, which is (out - (d + dp)) == out - d - dp; __ sub(R3_RET, out, d); @@ -4187,10 +4226,12 @@ class StubGenerator: public StubCodeGenerator { // at each location, all values in expanded are compared to 31. Using // vsel, values higher than 31 use the results from the upper 32 bytes of // the lookup operation, while values less than or equal to 31 use the -// lower 32 bytes of the lookup operation. Power10 and beyond can save the -// compare instruction, because the comparison is done within xxpermx -// itself. TODO: use xxpermx,xxpermx,vor on P10 when instruction prefixes are -// available in assembler_ppc.* +// lower 32 bytes of the lookup operation. +// +// Note: it's tempting to use a xxpermx,xxpermx,vor sequence here on +// Power10 (or later), but experiments doing so on Power10 yielded a slight +// performance drop, perhaps due to the need for xxpermx instruction +// prefixes. #define ENCODE_CORE \ __ xxperm(input->to_vsr(), input->to_vsr(), expand_permute); \ @@ -4282,7 +4323,6 @@ class StubGenerator: public StubCodeGenerator { ARRAY_TO_LXV_ORDER( 'w','x','y','z','0','1','2','3','4','5','6','7','8','9','-','_' ) } }; - #define BLK_OFFSETOF(x) (offsetof(constant_block, x)) // Number of bytes to process in each pass through the main loop. // 12 of the 16 bytes from each lxv are encoded to 16 Base64 bytes. @@ -4305,7 +4345,7 @@ class StubGenerator: public StubCodeGenerator { Register block_modulo = R12; // == block_size (reuse const_ptr) Register remaining = R12; // bytes remaining to process after the blocks are completed (reuse block_modulo's reg) Register in = R4; // current input (source) pointer (reuse sp's register) - Register num_blocks = R11; // number of blocks to be processed by the unrolled loop + Register num_blocks = R11; // number of blocks to be processed by the loop Register out = R8; // current output (destination) pointer (reuse const_ptr's register) Register three = R9; // constant divisor (reuse size's register) Register bytes_to_write = R10; // number of bytes to write with the stxvl instr (reused blocked_size's register) diff --git a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp index 2210e05a410f941a42d413165c0315892251b963..124cb66cc5d2eeebeb7a8062ebde44bf770ebdcc 100644 --- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp @@ -3793,11 +3793,7 @@ void TemplateTable::_new() { // -------------------------------------------------------------------------- // Init2: Initialize the header: mark, klass // Init mark. - if (UseBiasedLocking) { - __ ld(Rscratch, in_bytes(Klass::prototype_header_offset()), RinstanceKlass); - } else { - __ load_const_optimized(Rscratch, markWord::prototype().value(), R0); - } + __ load_const_optimized(Rscratch, markWord::prototype().value(), R0); __ std(Rscratch, oopDesc::mark_offset_in_bytes(), RallocatedObject); // Init klass. diff --git a/src/hotspot/cpu/ppc/vm_version_ppc.cpp b/src/hotspot/cpu/ppc/vm_version_ppc.cpp index 43b655cf9d653462df959b4e24428290fafd95a6..0b2aa63bceadcd833d1d992d53ceceba323ad683 100644 --- a/src/hotspot/cpu/ppc/vm_version_ppc.cpp +++ b/src/hotspot/cpu/ppc/vm_version_ppc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2020 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -378,11 +378,7 @@ void VM_Version::initialize() { // Adjust RTM (Restricted Transactional Memory) flags. if (UseRTMLocking) { - // If CPU or OS do not support TM: - // Can't continue because UseRTMLocking affects UseBiasedLocking flag - // setting during arguments processing. See use_biased_locking(). - // VM_Version_init() is executed after UseBiasedLocking is used - // in Thread::allocate(). + // If CPU or OS do not support RTM: if (PowerArchitecturePPC64 < 8) { vm_exit_during_initialization("RTM instructions are not available on this CPU."); } @@ -399,8 +395,6 @@ void VM_Version::initialize() { } #else // Only C2 does RTM locking optimization. - // Can't continue because UseRTMLocking affects UseBiasedLocking flag - // setting during arguments processing. See use_biased_locking(). vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); #endif } else { // !UseRTMLocking @@ -544,27 +538,6 @@ void VM_Version::print_platform_virtualization_info(outputStream* st) { #endif } -bool VM_Version::use_biased_locking() { -#if INCLUDE_RTM_OPT - // RTM locking is most useful when there is high lock contention and - // low data contention. With high lock contention the lock is usually - // inflated and biased locking is not suitable for that case. - // RTM locking code requires that biased locking is off. - // Note: we can't switch off UseBiasedLocking in get_processor_features() - // because it is used by Thread::allocate() which is called before - // VM_Version::initialize(). - if (UseRTMLocking && UseBiasedLocking) { - if (FLAG_IS_DEFAULT(UseBiasedLocking)) { - FLAG_SET_DEFAULT(UseBiasedLocking, false); - } else { - warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." ); - UseBiasedLocking = false; - } - } -#endif - return UseBiasedLocking; -} - void VM_Version::print_features() { tty->print_cr("Version: %s L1_data_cache_line_size=%d", features_string(), L1_data_cache_line_size()); diff --git a/src/hotspot/cpu/ppc/vm_version_ppc.hpp b/src/hotspot/cpu/ppc/vm_version_ppc.hpp index 439be6cc8d2046b2e2b3e1ad8af6475ae728e557..89352192615c53b082d94802832ce213f35eeb3b 100644 --- a/src/hotspot/cpu/ppc/vm_version_ppc.hpp +++ b/src/hotspot/cpu/ppc/vm_version_ppc.hpp @@ -93,9 +93,6 @@ public: // Override Abstract_VM_Version implementation static void print_platform_virtualization_info(outputStream*); - // Override Abstract_VM_Version implementation - static bool use_biased_locking(); - // PPC64 supports fast class initialization checks for static methods. static bool supports_fast_class_init_checks() { return true; } constexpr static bool supports_stack_watermark_barrier() { return true; } diff --git a/src/hotspot/cpu/ppc/vmreg_ppc.hpp b/src/hotspot/cpu/ppc/vmreg_ppc.hpp index 090fe1d72a2a07a328a9e880437c710d6fc13ba6..16f6799d04643622d5e063c4779399c87edc73b1 100644 --- a/src/hotspot/cpu/ppc/vmreg_ppc.hpp +++ b/src/hotspot/cpu/ppc/vmreg_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2013 SAP SE. All rights reserved. + * Copyright (c) 2001, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2021 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,6 +35,21 @@ inline bool is_FloatRegister() { value() < ConcreteRegisterImpl::max_fpr; } +inline bool is_VectorRegister() { + return value() >= ConcreteRegisterImpl::max_fpr && + value() < ConcreteRegisterImpl::max_vsr; +} + +inline bool is_ConditionRegister() { + return value() >= ConcreteRegisterImpl::max_vsr && + value() < ConcreteRegisterImpl::max_cnd; +} + +inline bool is_SpecialRegister() { + return value() >= ConcreteRegisterImpl::max_cnd && + value() < ConcreteRegisterImpl::max_spr; +} + inline Register as_Register() { assert(is_Register() && is_even(value()), "even-aligned GPR name"); return ::as_Register(value()>>1); diff --git a/src/hotspot/cpu/s390/assembler_s390.cpp b/src/hotspot/cpu/s390/assembler_s390.cpp index d587c9d659bb3d2ad82d441de39cfe1dbb429f03..4defc9596184a6f8f3b93a9a8b0be61ade35e15d 100644 --- a/src/hotspot/cpu/s390/assembler_s390.cpp +++ b/src/hotspot/cpu/s390/assembler_s390.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -31,7 +31,6 @@ #include "gc/shared/cardTableBarrierSet.hpp" #include "memory/resourceArea.hpp" #include "prims/methodHandles.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/objectMonitor.hpp" #include "runtime/os.hpp" diff --git a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp index 329c163f313e60561b4dca6a06ba80aef68493cf..ccc2364007b8d168031497a0522e1c34482c6735 100644 --- a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp +++ b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp @@ -454,8 +454,10 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) { ce->verify_oop_map(info()); #ifndef PRODUCT - __ load_const_optimized(Z_R1_scratch, (address)&Runtime1::_arraycopy_slowcase_cnt); - __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch); + if (PrintC1Statistics) { + __ load_const_optimized(Z_R1_scratch, (address)&Runtime1::_arraycopy_slowcase_cnt); + __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch); + } #endif __ branch_optimized(Assembler::bcondAlways, _continuation); diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp index a7d5a4a1c428421fc2ec5f9a5097ee1313db7180..6971490e0681cacf8ae196ae7079ad3bbd5328da 100644 --- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp @@ -879,7 +879,7 @@ Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { } void LIR_Assembler::mem2reg(LIR_Opr src_opr, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, - CodeEmitInfo* info, bool wide, bool unaligned) { + CodeEmitInfo* info, bool wide) { assert(type != T_METADATA, "load of metadata ptr not supported"); LIR_Address* addr = src_opr->as_address_ptr(); @@ -1079,7 +1079,7 @@ void LIR_Assembler::reg2reg(LIR_Opr from_reg, LIR_Opr to_reg) { void LIR_Assembler::reg2mem(LIR_Opr from, LIR_Opr dest_opr, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, - bool wide, bool unaligned) { + bool wide) { assert(type != T_METADATA, "store of metadata ptr not supported"); LIR_Address* addr = dest_opr->as_address_ptr(); diff --git a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp index 89cea9598a4fd8ab1f2a62bf4f5d7dc98232c84e..aaad1575a82a345e23ff33b528f1e90178b51711 100644 --- a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp @@ -33,7 +33,6 @@ #include "oops/arrayOop.hpp" #include "oops/markWord.hpp" #include "runtime/basicLock.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/os.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" @@ -97,10 +96,6 @@ void C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hd z_btrue(slow_case); } - if (UseBiasedLocking) { - biased_locking_enter(obj, hdr, Z_R1_scratch, Z_R0_scratch, done, &slow_case); - } - // and mark it as unlocked. z_oill(hdr, markWord::unlocked_value); // Save unlocked object header into the displaced header location on the stack. @@ -110,13 +105,6 @@ void C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hd // object header instead. z_csg(hdr, disp_hdr, hdr_offset, obj); // If the object header was the same, we're done. - if (PrintBiasedLockingStatistics) { - Unimplemented(); -#if 0 - cond_inc32(Assembler::equal, - ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); -#endif - } branch_optimized(Assembler::bcondEqual, done); // If the object header was not the same, it is now in the hdr register. // => Test if it is a stack pointer into the same stack (recursive locking), i.e.: @@ -150,20 +138,12 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_ assert_different_registers(hdr, obj, disp_hdr); NearLabel done; - if (UseBiasedLocking) { - // Load object. - z_lg(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); - biased_locking_exit(obj, hdr, done); - } - // Load displaced header. z_ltg(hdr, Address(disp_hdr, (intptr_t)0)); // If the loaded hdr is NULL we had recursive locking, and we are done. z_bre(done); - if (!UseBiasedLocking) { - // Load object. - z_lg(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); - } + // Load object. + z_lg(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); verify_oop(obj, FILE_AND_LINE); // Test if object header is pointing to the displaced header, and if so, restore // the displaced header in the object. If the object header is not pointing to @@ -193,13 +173,8 @@ void C1_MacroAssembler::try_allocate( void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register Rzero, Register t1) { assert_different_registers(obj, klass, len, t1, Rzero); - if (UseBiasedLocking && !len->is_valid()) { - assert_different_registers(obj, klass, len, t1); - z_lg(t1, Address(klass, Klass::prototype_header_offset())); - } else { - // This assumes that all prototype bits fit in an int32_t. - load_const_optimized(t1, (intx)markWord::prototype().value()); - } + // This assumes that all prototype bits fit in an int32_t. + load_const_optimized(t1, (intx)markWord::prototype().value()); z_stg(t1, Address(obj, oopDesc::mark_offset_in_bytes())); if (len->is_valid()) { diff --git a/src/hotspot/cpu/s390/c2_globals_s390.hpp b/src/hotspot/cpu/s390/c2_globals_s390.hpp index e747f6c8c517905a8aef397dac93ffb575360b6c..0192cb716baab4f58ee889257ff7438ff4f52e83 100644 --- a/src/hotspot/cpu/s390/c2_globals_s390.hpp +++ b/src/hotspot/cpu/s390/c2_globals_s390.hpp @@ -44,10 +44,8 @@ define_pd_global(intx, CompileThreshold, 10000); define_pd_global(intx, OnStackReplacePercentage, 140); define_pd_global(intx, ConditionalMoveLimit, 4); -define_pd_global(intx, FLOATPRESSURE, 15); define_pd_global(intx, FreqInlineSize, 175); // 10 prevents spill-split-recycle sanity check in JVM2008.xml.transform. -define_pd_global(intx, INTPRESSURE, 10); // Medium size register set, 6 special purpose regs, 3 SOE regs. define_pd_global(intx, InteriorEntryAlignment, 2); define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K)); define_pd_global(intx, RegisterCostAreaRatio, 12000); diff --git a/src/hotspot/cpu/s390/frame_s390.cpp b/src/hotspot/cpu/s390/frame_s390.cpp index 2c1155e6de561a304d0feba4fc89ab634941a545..d29227bb32effd34af833a9dbbd128fee57ac2eb 100644 --- a/src/hotspot/cpu/s390/frame_s390.cpp +++ b/src/hotspot/cpu/s390/frame_s390.cpp @@ -55,7 +55,6 @@ void RegisterMap::check_location_valid() { // Profiling/safepoint support bool frame::safe_for_sender(JavaThread *thread) { - bool safe = false; address sp = (address)_sp; address fp = (address)_fp; address unextended_sp = (address)_unextended_sp; @@ -73,28 +72,23 @@ bool frame::safe_for_sender(JavaThread *thread) { // An fp must be within the stack and above (but not equal) sp. bool fp_safe = thread->is_in_stack_range_excl(fp, sp); - // An interpreter fp must be within the stack and above (but not equal) sp. - // Moreover, it must be at least the size of the z_ijava_state structure. + // An interpreter fp must be fp_safe. + // Moreover, it must be at a distance at least the size of the z_ijava_state structure. bool fp_interp_safe = fp_safe && ((fp - sp) >= z_ijava_state_size); // We know sp/unextended_sp are safe, only fp is questionable here // If the current frame is known to the code cache then we can attempt to - // to construct the sender and do some validation of it. This goes a long way + // construct the sender and do some validation of it. This goes a long way // toward eliminating issues when we get in frame construction code if (_cb != NULL ) { - // Entry frame checks - if (is_entry_frame()) { - // An entry frame must have a valid fp. - return fp_safe && is_entry_frame_valid(thread); - } - // Now check if the frame is complete and the test is - // reliable. Unfortunately we can only check frame completeness for - // runtime stubs. Other generic buffer blobs are more - // problematic so we just assume they are OK. Adapter blobs never have a - // complete frame and are never OK. nmethods should be OK on s390. + // First check if the frame is complete and the test is reliable. + // Unfortunately we can only check frame completeness for runtime stubs. + // Other generic buffer blobs are more problematic so we just assume they are OK. + // Adapter blobs never have a complete frame and are never OK. + // nmethods should be OK on s390. if (!_cb->is_frame_complete_at(_pc)) { if (_cb->is_adapter_blob() || _cb->is_runtime_stub()) { return false; @@ -106,13 +100,26 @@ bool frame::safe_for_sender(JavaThread *thread) { return false; } + // Entry frame checks + if (is_entry_frame()) { + // An entry frame must have a valid fp. + return fp_safe && is_entry_frame_valid(thread); + } + if (is_interpreted_frame() && !fp_interp_safe) { return false; } + // At this point, there still is a chance that fp_safe is false. + // In particular, (fp == NULL) might be true. So let's check and + // bail out before we actually dereference from fp. + if (!fp_safe) { + return false; + } + z_abi_160* sender_abi = (z_abi_160*) fp; intptr_t* sender_sp = (intptr_t*) sender_abi->callers_sp; - address sender_pc = (address) sender_abi->return_pc; + address sender_pc = (address) sender_abi->return_pc; // We must always be able to find a recognizable pc. CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); @@ -208,6 +215,16 @@ frame frame::sender_for_entry_frame(RegisterMap *map) const { return fr; } +OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const { + ShouldNotCallThis(); + return nullptr; +} + +bool frame::optimized_entry_frame_is_first() const { + ShouldNotCallThis(); + return false; +} + frame frame::sender_for_interpreter_frame(RegisterMap *map) const { // Pass callers sender_sp as unextended_sp. return frame(sender_sp(), sender_pc(), (intptr_t*)(ijava_state()->sender_sp)); @@ -611,8 +628,6 @@ void frame::describe_pd(FrameValues& values, int frame_no) { } } - -void frame::pd_ps() {} #endif // !PRODUCT intptr_t *frame::initial_deoptimization_info() { diff --git a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp index ba4bfdc986449d2ba96538471ca5347b8f6de691..7258630bb0b9d30dfbd4691d675ddd7c27449733 100644 --- a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp @@ -427,7 +427,7 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier Register pre_val_reg = stub->pre_val()->as_register(); if (stub->do_load()) { - ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/); } __ z_ltgr(Z_R1_scratch, pre_val_reg); // Pass oop in Z_R1_scratch to Runtime1::g1_pre_barrier_slow_id. diff --git a/src/hotspot/cpu/s390/globals_s390.hpp b/src/hotspot/cpu/s390/globals_s390.hpp index 80e7baca71d5321434de7a52335a0c07d3b52486..8e8dabf484038ce36d6b2de6e8861e9b3ab7800c 100644 --- a/src/hotspot/cpu/s390/globals_s390.hpp +++ b/src/hotspot/cpu/s390/globals_s390.hpp @@ -43,7 +43,6 @@ define_pd_global(uintx, CodeCacheSegmentSize, 256); // code size significantly by padding nops between IVC and second UEP. define_pd_global(intx, CodeEntryAlignment, 64); define_pd_global(intx, OptoLoopAlignment, 2); -define_pd_global(intx, InlineFrequencyCount, 100); define_pd_global(intx, InlineSmallCode, 2000); #define DEFAULT_STACK_YELLOW_PAGES (2) diff --git a/src/hotspot/cpu/s390/interp_masm_s390.cpp b/src/hotspot/cpu/s390/interp_masm_s390.cpp index 51faebad4de1ec7d142ed25edf43749dc6b30973..764d4002cb65eed697bf2bacc9bf3ab128d45b1f 100644 --- a/src/hotspot/cpu/s390/interp_masm_s390.cpp +++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp @@ -38,7 +38,6 @@ #include "prims/jvmtiExport.hpp" #include "prims/jvmtiThreadState.hpp" #include "runtime/basicLock.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/frame.inline.hpp" #include "runtime/safepointMechanism.hpp" #include "runtime/sharedRuntime.hpp" @@ -1005,10 +1004,6 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { z_btrue(slow_case); } - if (UseBiasedLocking) { - biased_locking_enter(object, displaced_header, Z_R1, Z_R0, done, &slow_case); - } - // Set displaced_header to be (markWord of object | UNLOCK_VALUE). z_oill(displaced_header, markWord::unlocked_value); @@ -1116,12 +1111,6 @@ void InterpreterMacroAssembler::unlock_object(Register monitor, Register object) clear_mem(obj_entry, sizeof(oop)); - if (UseBiasedLocking) { - // The object address from the monitor is in object. - assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); - biased_locking_exit(object, displaced_header, done); - } - // Test first if we are in the fast recursive case. MacroAssembler::load_and_test_long(displaced_header, Address(monitor, BasicObjectLock::lock_offset_in_bytes() + diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index c098cf09604b1d90ac677d105bf62a30eaf47852..06567a511ed62e6caf44a4bd86784eb548a38539 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -39,7 +39,6 @@ #include "oops/klass.inline.hpp" #include "prims/methodHandles.hpp" #include "registerSaver_s390.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/icache.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/objectMonitor.hpp" @@ -3128,194 +3127,7 @@ void MacroAssembler::increment_counter_eq(address counter_address, Register tmp1 bind(l); } -// Semantics are dependent on the slow_case label: -// If the slow_case label is not NULL, failure to biased-lock the object -// transfers control to the location of the slow_case label. If the -// object could be biased-locked, control is transferred to the done label. -// The condition code is unpredictable. -// -// If the slow_case label is NULL, failure to biased-lock the object results -// in a transfer of control to the done label with a condition code of not_equal. -// If the biased-lock could be successfully obtained, control is transfered to -// the done label with a condition code of equal. -// It is mandatory to react on the condition code At the done label. -// -void MacroAssembler::biased_locking_enter(Register obj_reg, - Register mark_reg, - Register temp_reg, - Register temp2_reg, // May be Z_RO! - Label &done, - Label *slow_case) { - assert(UseBiasedLocking, "why call this otherwise?"); - assert_different_registers(obj_reg, mark_reg, temp_reg, temp2_reg); - - Label cas_label; // Try, if implemented, CAS locking. Fall thru to slow path otherwise. - - BLOCK_COMMENT("biased_locking_enter {"); - - // Biased locking - // See whether the lock is currently biased toward our thread and - // whether the epoch is still valid. - // Note that the runtime guarantees sufficient alignment of JavaThread - // pointers to allow age to be placed into low bits. - assert(markWord::age_shift == markWord::lock_bits + markWord::biased_lock_bits, - "biased locking makes assumptions about bit layout"); - z_lr(temp_reg, mark_reg); - z_nilf(temp_reg, markWord::biased_lock_mask_in_place); - z_chi(temp_reg, markWord::biased_lock_pattern); - z_brne(cas_label); // Try cas if object is not biased, i.e. cannot be biased locked. - - load_prototype_header(temp_reg, obj_reg); - load_const_optimized(temp2_reg, ~((int) markWord::age_mask_in_place)); - - z_ogr(temp_reg, Z_thread); - z_xgr(temp_reg, mark_reg); - z_ngr(temp_reg, temp2_reg); - if (PrintBiasedLockingStatistics) { - increment_counter_eq((address) BiasedLocking::biased_lock_entry_count_addr(), mark_reg, temp2_reg); - // Restore mark_reg. - z_lg(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg); - } - branch_optimized(Assembler::bcondEqual, done); // Biased lock obtained, return success. - - Label try_revoke_bias; - Label try_rebias; - Address mark_addr = Address(obj_reg, oopDesc::mark_offset_in_bytes()); - - //---------------------------------------------------------------------------- - // At this point we know that the header has the bias pattern and - // that we are not the bias owner in the current epoch. We need to - // figure out more details about the state of the header in order to - // know what operations can be legally performed on the object's - // header. - - // If the low three bits in the xor result aren't clear, that means - // the prototype header is no longer biased and we have to revoke - // the bias on this object. - z_tmll(temp_reg, markWord::biased_lock_mask_in_place); - z_brnaz(try_revoke_bias); - - // Biasing is still enabled for this data type. See whether the - // epoch of the current bias is still valid, meaning that the epoch - // bits of the mark word are equal to the epoch bits of the - // prototype header. (Note that the prototype header's epoch bits - // only change at a safepoint.) If not, attempt to rebias the object - // toward the current thread. Note that we must be absolutely sure - // that the current epoch is invalid in order to do this because - // otherwise the manipulations it performs on the mark word are - // illegal. - z_tmll(temp_reg, markWord::epoch_mask_in_place); - z_brnaz(try_rebias); - - //---------------------------------------------------------------------------- - // The epoch of the current bias is still valid but we know nothing - // about the owner; it might be set or it might be clear. Try to - // acquire the bias of the object using an atomic operation. If this - // fails we will go in to the runtime to revoke the object's bias. - // Note that we first construct the presumed unbiased header so we - // don't accidentally blow away another thread's valid bias. - z_nilf(mark_reg, markWord::biased_lock_mask_in_place | markWord::age_mask_in_place | - markWord::epoch_mask_in_place); - z_lgr(temp_reg, Z_thread); - z_llgfr(mark_reg, mark_reg); - z_ogr(temp_reg, mark_reg); - - assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); - - z_csg(mark_reg, temp_reg, 0, obj_reg); - - // If the biasing toward our thread failed, this means that - // another thread succeeded in biasing it toward itself and we - // need to revoke that bias. The revocation will occur in the - // interpreter runtime in the slow case. - - if (PrintBiasedLockingStatistics) { - increment_counter_eq((address) BiasedLocking::anonymously_biased_lock_entry_count_addr(), - temp_reg, temp2_reg); - } - if (slow_case != NULL) { - branch_optimized(Assembler::bcondNotEqual, *slow_case); // Biased lock not obtained, need to go the long way. - } - branch_optimized(Assembler::bcondAlways, done); // Biased lock status given in condition code. - - //---------------------------------------------------------------------------- - bind(try_rebias); - // At this point we know the epoch has expired, meaning that the - // current "bias owner", if any, is actually invalid. Under these - // circumstances _only_, we are allowed to use the current header's - // value as the comparison value when doing the cas to acquire the - // bias in the current epoch. In other words, we allow transfer of - // the bias from one thread to another directly in this situation. - - z_nilf(mark_reg, markWord::biased_lock_mask_in_place | markWord::age_mask_in_place | markWord::epoch_mask_in_place); - load_prototype_header(temp_reg, obj_reg); - z_llgfr(mark_reg, mark_reg); - - z_ogr(temp_reg, Z_thread); - - assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); - - z_csg(mark_reg, temp_reg, 0, obj_reg); - - // If the biasing toward our thread failed, this means that - // another thread succeeded in biasing it toward itself and we - // need to revoke that bias. The revocation will occur in the - // interpreter runtime in the slow case. - - if (PrintBiasedLockingStatistics) { - increment_counter_eq((address) BiasedLocking::rebiased_lock_entry_count_addr(), temp_reg, temp2_reg); - } - if (slow_case != NULL) { - branch_optimized(Assembler::bcondNotEqual, *slow_case); // Biased lock not obtained, need to go the long way. - } - z_bru(done); // Biased lock status given in condition code. - - //---------------------------------------------------------------------------- - bind(try_revoke_bias); - // The prototype mark in the klass doesn't have the bias bit set any - // more, indicating that objects of this data type are not supposed - // to be biased any more. We are going to try to reset the mark of - // this object to the prototype value and fall through to the - // CAS-based locking scheme. Note that if our CAS fails, it means - // that another thread raced us for the privilege of revoking the - // bias of this particular object, so it's okay to continue in the - // normal locking code. - load_prototype_header(temp_reg, obj_reg); - - assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); - - z_csg(mark_reg, temp_reg, 0, obj_reg); - - // Fall through to the normal CAS-based lock, because no matter what - // the result of the above CAS, some thread must have succeeded in - // removing the bias bit from the object's header. - if (PrintBiasedLockingStatistics) { - // z_cgr(mark_reg, temp2_reg); - increment_counter_eq((address) BiasedLocking::revoked_lock_entry_count_addr(), temp_reg, temp2_reg); - } - - bind(cas_label); - BLOCK_COMMENT("} biased_locking_enter"); -} - -void MacroAssembler::biased_locking_exit(Register mark_addr, Register temp_reg, Label& done) { - // Check for biased locking unlock case, which is a no-op - // Note: we do not have to check the thread ID for two reasons. - // First, the interpreter checks for IllegalMonitorStateException at - // a higher level. Second, if the bias was revoked while we held the - // lock, the object could not be rebiased toward another thread, so - // the bias bit would be clear. - BLOCK_COMMENT("biased_locking_exit {"); - - z_lg(temp_reg, 0, mark_addr); - z_nilf(temp_reg, markWord::biased_lock_mask_in_place); - - z_chi(temp_reg, markWord::biased_lock_pattern); - z_bre(done); - BLOCK_COMMENT("} biased_locking_exit"); -} - -void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias) { +void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2) { Register displacedHeader = temp1; Register currentHeader = temp1; Register temp = temp2; @@ -3334,10 +3146,6 @@ void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Regis z_brne(done); } - if (try_bias) { - biased_locking_enter(oop, displacedHeader, temp, Z_R0, done); - } - // Handle existing monitor. // The object has an existing monitor iff (mark & monitor_value) != 0. guarantee(Immediate::is_uimm16(markWord::monitor_value), "must be half-word"); @@ -3402,7 +3210,7 @@ void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Regis // _complete_monitor_locking_Java. } -void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias) { +void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2) { Register displacedHeader = temp1; Register currentHeader = temp2; Register temp = temp1; @@ -3412,10 +3220,6 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg BLOCK_COMMENT("compiler_fast_unlock_object {"); - if (try_bias) { - biased_locking_exit(oop, currentHeader, done); - } - // Find the lock address and load the displaced header from the stack. // if the displaced header is zero, we have a recursive unlock. load_and_test_long(displacedHeader, Address(box, BasicLock::displaced_header_offset_in_bytes())); @@ -3833,12 +3637,6 @@ void MacroAssembler::load_klass(Register klass, Register src_oop) { } } -void MacroAssembler::load_prototype_header(Register Rheader, Register Rsrc_oop) { - assert_different_registers(Rheader, Rsrc_oop); - load_klass(Rheader, Rsrc_oop); - z_lg(Rheader, Address(Rheader, Klass::prototype_header_offset())); -} - void MacroAssembler::store_klass(Register klass, Register dst_oop, Register ck) { if (UseCompressedClassPointers) { assert_different_registers(dst_oop, klass, Z_R0); diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.hpp b/src/hotspot/cpu/s390/macroAssembler_s390.hpp index 113a1a3db2afc9bb7d3570c8e5c06fcdc9d00ae7..72cfbe02355080dbba97eb260cc7e976f22f9b22 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.hpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2019 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -718,26 +718,9 @@ class MacroAssembler: public Assembler { // Increment a counter at counter_address when the eq condition code is set. // Kills registers tmp1_reg and tmp2_reg and preserves the condition code. void increment_counter_eq(address counter_address, Register tmp1_reg, Register tmp2_reg); - // Biased locking support - // Upon entry,obj_reg must contain the target object, and mark_reg - // must contain the target object's header. - // Destroys mark_reg if an attempt is made to bias an anonymously - // biased lock. In this case a failure will go either to the slow - // case or fall through with the notEqual condition code set with - // the expectation that the slow case in the runtime will be called. - // In the fall-through case where the CAS-based lock is done, - // mark_reg is not destroyed. - void biased_locking_enter(Register obj_reg, Register mark_reg, Register temp_reg, - Register temp2_reg, Label& done, Label* slow_case = NULL); - // Upon entry, the base register of mark_addr must contain the oop. - // Destroys temp_reg. - // If allow_delay_slot_filling is set to true, the next instruction - // emitted after this one will go in an annulled delay slot if the - // biased locking exit case failed. - void biased_locking_exit(Register mark_addr, Register temp_reg, Label& done); - - void compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias = UseBiasedLocking); - void compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias = UseBiasedLocking); + + void compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2); + void compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2); void resolve_jobject(Register value, Register tmp1, Register tmp2); @@ -782,7 +765,6 @@ class MacroAssembler: public Assembler { void decode_klass_not_null(Register dst); void load_klass(Register klass, Address mem); void load_klass(Register klass, Register src_oop); - void load_prototype_header(Register Rheader, Register Rsrc_oop); void store_klass(Register klass, Register dst_oop, Register ck = noreg); // Klass will get compressed if ck not provided. void store_klass_gap(Register s, Register dst_oop); diff --git a/src/hotspot/cpu/s390/matcher_s390.hpp b/src/hotspot/cpu/s390/matcher_s390.hpp index 2906f584a317670edc1460dd36a852c01703e7ef..09cb819a6414ab29b5f1a8c540767858ed27603a 100644 --- a/src/hotspot/cpu/s390/matcher_s390.hpp +++ b/src/hotspot/cpu/s390/matcher_s390.hpp @@ -57,6 +57,9 @@ // No support for generic vector operands. static const bool supports_generic_vector_operands = false; + // No support for 48 extra htbl entries in aes-gcm intrinsic + static const int htbl_entries = -1; + static constexpr bool isSimpleConstant64(jlong value) { // Probably always true, even if a temp register is required. return true; @@ -128,6 +131,11 @@ return false; } + // Does the CPU supports vector constant rotate instructions? + static constexpr bool supports_vector_constant_rotates(int shift) { + return false; + } + // Does the CPU supports vector unsigned comparison instructions? static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) { return false; @@ -144,4 +152,7 @@ return true; } + // Implements a variant of EncodeISOArrayNode that encode ASCII only + static const bool supports_encode_ascii_array = false; + #endif // CPU_S390_MATCHER_S390_HPP diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad index acd601b4929ae401f0cab5b1f4877f7fa1909ee2..63004f8e2634349b1d801cfb002637b7e1502704 100644 --- a/src/hotspot/cpu/s390/s390.ad +++ b/src/hotspot/cpu/s390/s390.ad @@ -1554,10 +1554,6 @@ OptoRegPair Matcher::vector_return_value(uint ideal_reg) { return OptoRegPair(0, 0); } -const int Matcher::float_pressure(int default_pressure_threshold) { - return default_pressure_threshold; -} - //----------SUPERWORD HELPERS---------------------------------------- // Vector width in bytes. @@ -1609,7 +1605,7 @@ MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, return NULL; } -bool Matcher::is_generic_reg2reg_move(MachNode* m) { +bool Matcher::is_reg2reg_move(MachNode* m) { ShouldNotReachHere(); // generic vector operands not supported return false; } @@ -1665,6 +1661,17 @@ bool Matcher::is_spillable_arg(int reg) { return can_be_java_arg(reg); } +uint Matcher::int_pressure_limit() +{ + // Medium size register set, 6 special purpose regs, 3 SOE regs. + return (INTPRESSURE == -1) ? 10 : INTPRESSURE; +} + +uint Matcher::float_pressure_limit() +{ + return (FLOATPRESSURE == -1) ? 15 : FLOATPRESSURE; +} + bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { return false; } @@ -9809,8 +9816,7 @@ instruct cmpFastLock(flagsReg pcc, iRegP_N2P oop, iRegP_N2P box, iRegP tmp1, iRe ins_cost(100); // TODO: s390 port size(VARIABLE_SIZE); // Uses load_const_optimized. format %{ "FASTLOCK $oop, $box; KILL Z_ARG4, Z_ARG5" %} - ins_encode %{ __ compiler_fast_lock_object($oop$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register, - UseBiasedLocking && !UseOptoBiasInlining); %} + ins_encode %{ __ compiler_fast_lock_object($oop$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register); %} ins_pipe(pipe_class_dummy); %} @@ -9818,10 +9824,9 @@ instruct cmpFastUnlock(flagsReg pcc, iRegP_N2P oop, iRegP_N2P box, iRegP tmp1, i match(Set pcc (FastUnlock oop box)); effect(TEMP tmp1, TEMP tmp2); ins_cost(100); - // TODO: s390 port size(FIXED_SIZE); // emitted code depends on UseBiasedLocking being on/off. + // TODO: s390 port size(FIXED_SIZE); format %{ "FASTUNLOCK $oop, $box; KILL Z_ARG4, Z_ARG5" %} - ins_encode %{ __ compiler_fast_unlock_object($oop$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register, - UseBiasedLocking && !UseOptoBiasInlining); %} + ins_encode %{ __ compiler_fast_unlock_object($oop$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register); %} ins_pipe(pipe_class_dummy); %} @@ -10277,6 +10282,7 @@ instruct has_negatives(rarg5RegP ary1, iRegI len, iRegI result, roddRegI oddReg, // encode char[] to byte[] in ISO_8859_1 instruct encode_iso_array(iRegP src, iRegP dst, iRegI result, iRegI len, iRegI tmp, flagsReg cr) %{ + predicate(!((EncodeISOArrayNode*)n)->is_ascii()); match(Set result (EncodeISOArray src (Binary dst len))); effect(TEMP_DEF result, TEMP tmp, KILL cr); // R0, R1 are killed, too. ins_cost(300); diff --git a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp index 95facb3a2efa7fbd42b700e69e30d1f72246707d..949f3d206e63fef79e08d5d02a55d15f4ecfb818 100644 --- a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp +++ b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp @@ -1873,13 +1873,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // Get the lock box slot's address. __ add2reg(r_box, lock_offset, Z_SP); -#ifdef ASSERT - if (UseBiasedLocking) - // Making the box point to itself will make it clear it went unused - // but also be obviously invalid. - __ z_stg(r_box, 0, r_box); -#endif // ASSERT - // Try fastpath for locking. // Fast_lock kills r_temp_1, r_temp_2. (Don't use R1 as temp, won't work!) __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2); diff --git a/src/hotspot/cpu/s390/templateTable_s390.cpp b/src/hotspot/cpu/s390/templateTable_s390.cpp index e28481ddd86d01d19c4e66ce7b8dd47e837bd38d..49eab73a198bc5701ee43b19e14372976a292195 100644 --- a/src/hotspot/cpu/s390/templateTable_s390.cpp +++ b/src/hotspot/cpu/s390/templateTable_s390.cpp @@ -3768,9 +3768,8 @@ void TemplateTable::_new() { // Get instance_size in InstanceKlass (scaled to a count of bytes). Register Rsize = offset; - const int mask = 1 << Klass::_lh_instance_slow_path_bit; __ z_llgf(Rsize, Address(iklass, Klass::layout_helper_offset())); - __ z_tmll(Rsize, mask); + __ z_tmll(Rsize, Klass::_lh_instance_slow_path_bit); __ z_btrue(slow_case); // Allocate the instance @@ -3813,14 +3812,8 @@ void TemplateTable::_new() { // Initialize object header only. __ bind(initialize_header); - if (UseBiasedLocking) { - Register prototype = RobjectFields; - __ z_lg(prototype, Address(iklass, Klass::prototype_header_offset())); - __ z_stg(prototype, Address(RallocatedObject, oopDesc::mark_offset_in_bytes())); - } else { - __ store_const(Address(RallocatedObject, oopDesc::mark_offset_in_bytes()), - (long)markWord::prototype().value()); - } + __ store_const(Address(RallocatedObject, oopDesc::mark_offset_in_bytes()), + (long)markWord::prototype().value()); __ store_klass_gap(Rzero, RallocatedObject); // Zero klass gap for compressed oops. __ store_klass(iklass, RallocatedObject); // Store klass last. diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index 7c6bbc37eec112019914686c11666d22010d6ff2..aee326133e0a7af7ade25262fc983b95a06e7525 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -30,7 +30,6 @@ #include "interpreter/interpreter.hpp" #include "memory/resourceArea.hpp" #include "prims/methodHandles.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/objectMonitor.hpp" #include "runtime/os.hpp" #include "runtime/sharedRuntime.hpp" @@ -3758,6 +3757,15 @@ void Assembler::vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve emit_int16((unsigned char)0x8D, (0xC0 | encode)); } +void Assembler::vpermb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert(VM_Version::supports_avx512_vbmi(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x8D); + emit_operand(dst, src); +} + void Assembler::vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(vector_len == AVX_128bit ? VM_Version::supports_avx512vlbw() : vector_len == AVX_256bit ? VM_Version::supports_avx512vlbw() : @@ -3830,6 +3838,22 @@ void Assembler::evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int emit_int16(0x76, (0xC0 | encode)); } +void Assembler::evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx512_vbmi(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x7D, (0xC0 | encode)); +} + +void Assembler::evpmultishiftqb(XMMRegister dst, XMMRegister ctl, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx512_vbmi(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), ctl->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16((unsigned char)0x83, (unsigned char)(0xC0 | encode)); +} + void Assembler::pause() { emit_int16((unsigned char)0xF3, (unsigned char)0x90); } @@ -4128,6 +4152,15 @@ void Assembler::vpmovmskb(Register dst, XMMRegister src, int vec_enc) { emit_int16((unsigned char)0xD7, (0xC0 | encode)); } +void Assembler::vpmaskmovd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert((VM_Version::supports_avx2() && vector_len == AVX_256bit), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true); + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x8C); + emit_operand(dst, src); +} + void Assembler::pextrd(Register dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); @@ -4549,6 +4582,15 @@ void Assembler::vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int emit_int16((unsigned char)0xF5, (0xC0 | encode)); } +void Assembler::vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) { +assert(vector_len == AVX_128bit? VM_Version::supports_avx() : + vector_len == AVX_256bit? VM_Version::supports_avx2() : + vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, src1, src2, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x04, (0xC0 | encode)); +} + void Assembler::evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_avx512_vnni(), "must support vnni"); @@ -4857,6 +4899,15 @@ void Assembler::vptest(XMMRegister dst, XMMRegister src, int vector_len) { emit_int16(0x17, (0xC0 | encode)); } +void Assembler::evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx512vlbw(), ""); + // Encoding: EVEX.NDS.XXX.66.0F.W0 DB /r + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16((unsigned char)0x26, (0xC0 | encode)); +} + void Assembler::punpcklbw(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); @@ -6539,6 +6590,13 @@ void Assembler::psubq(XMMRegister dst, XMMRegister src) { emit_int8((0xC0 | encode)); } +void Assembler::vpsubusb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xD8, (0xC0 | encode)); +} + void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); @@ -6630,6 +6688,15 @@ void Assembler::pmuludq(XMMRegister dst, XMMRegister src) { emit_int16((unsigned char)0xF4, (0xC0 | encode)); } +void Assembler::vpmulhuw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert((vector_len == AVX_128bit && VM_Version::supports_avx()) || + (vector_len == AVX_256bit && VM_Version::supports_avx2()) || + (vector_len == AVX_512bit && VM_Version::supports_avx512bw()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xE4, (0xC0 | encode)); +} + void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); @@ -7402,6 +7469,10 @@ void Assembler::evprolq(XMMRegister dst, XMMRegister src, int shift, int vector_ emit_int24(0x72, (0xC0 | encode), shift & 0xFF); } +// Register is a class, but it would be assigned numerical value. +// "0" is assigned for xmm0. Thus we need to ignore -Wnonnull. +PRAGMA_DIAG_PUSH +PRAGMA_NONNULL_IGNORED void Assembler::evprord(XMMRegister dst, XMMRegister src, int shift, int vector_len) { assert(VM_Version::supports_evex(), "requires EVEX support"); assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support"); @@ -7419,6 +7490,7 @@ void Assembler::evprorq(XMMRegister dst, XMMRegister src, int shift, int vector_ int encode = vex_prefix_and_encode(xmm0->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int24(0x72, (0xC0 | encode), shift & 0xFF); } +PRAGMA_DIAG_POP void Assembler::evprolvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { assert(VM_Version::supports_evex(), "requires EVEX support"); @@ -9404,6 +9476,13 @@ void Assembler::shlxq(Register dst, Register src1, Register src2) { emit_int16((unsigned char)0xF7, (0xC0 | encode)); } +void Assembler::shrxl(Register dst, Register src1, Register src2) { + assert(VM_Version::supports_bmi2(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes); + emit_int16((unsigned char)0xF7, (0xC0 | encode)); +} + void Assembler::shrxq(Register dst, Register src1, Register src2) { assert(VM_Version::supports_bmi2(), ""); InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true); diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 09849fc49f8770b04807fc22bb51acf6313a852a..5976597019b160de6f46ca7e866db2b39431457c 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -28,8 +28,6 @@ #include "asm/register.hpp" #include "utilities/powerOfTwo.hpp" -class BiasedLockingCounters; - // Contains all the definitions needed for x86 assembly code generation. // Calling convention @@ -1690,6 +1688,7 @@ private: void vpermq(XMMRegister dst, XMMRegister src, int imm8); void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpermb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); @@ -1699,6 +1698,8 @@ private: void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len); void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len); void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void evpmultishiftqb(XMMRegister dst, XMMRegister ctl, XMMRegister src, int vector_len); void pause(); @@ -1747,6 +1748,7 @@ private: void pmovmskb(Register dst, XMMRegister src); void vpmovmskb(Register dst, XMMRegister src, int vec_enc); + void vpmaskmovd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); // SSE 4.1 extract void pextrd(Register dst, XMMRegister src, int imm8); @@ -1812,6 +1814,8 @@ private: // Multiply add void pmaddwd(XMMRegister dst, XMMRegister src); void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + // Multiply add accumulate void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); @@ -1879,6 +1883,8 @@ private: void vptest(XMMRegister dst, XMMRegister src); void vptest(XMMRegister dst, Address src); + void evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + // Vector compare void vptest(XMMRegister dst, XMMRegister src, int vector_len); @@ -2140,6 +2146,7 @@ private: void shlxl(Register dst, Register src1, Register src2); void shlxq(Register dst, Register src1, Register src2); + void shrxl(Register dst, Register src1, Register src2); void shrxq(Register dst, Register src1, Register src2); void bzhiq(Register dst, Register src1, Register src2); @@ -2244,6 +2251,7 @@ private: void psubw(XMMRegister dst, XMMRegister src); void psubd(XMMRegister dst, XMMRegister src); void psubq(XMMRegister dst, XMMRegister src); + void vpsubusb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); @@ -2264,6 +2272,7 @@ private: void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + void vpmulhuw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); // Minimum of packed integers void pminsb(XMMRegister dst, XMMRegister src); diff --git a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp index 1ba5061a574817dc88cbdc4501ce3be61bef8534..0656b204f2565b827fd568fd82d88bc215803a10 100644 --- a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp +++ b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp @@ -543,7 +543,9 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) { ce->add_call_info_here(info()); #ifndef PRODUCT - __ incrementl(ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); + if (PrintC1Statistics) { + __ incrementl(ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); + } #endif __ jmp(_continuation); diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index e2454f32481c9a214203e9fdd523be8b3f7d7e3e..971c2515017df2c9d49d55713802729eb5037a33 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -953,7 +953,7 @@ void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool po } -void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { +void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide) { LIR_Address* to_addr = dest->as_address_ptr(); PatchingStub* patch = NULL; Register compressed_src = rscratch1; @@ -1178,7 +1178,7 @@ void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { } -void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) { +void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide) { assert(src->is_address(), "should not call otherwise"); assert(dest->is_register(), "should not call otherwise"); @@ -3512,13 +3512,9 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) { if (!UseFastLocking) { __ jmp(*op->stub()->entry()); } else if (op->code() == lir_lock) { - Register scratch = noreg; - if (UseBiasedLocking) { - scratch = op->scratch_opr()->as_register(); - } assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); // add debug info for NullPointerException only if one is possible - int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); + int null_check_offset = __ lock_object(hdr, obj, lock, *op->stub()->entry()); if (op->info() != NULL) { add_debug_info_for_null_check(null_check_offset, op->info()); } diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index ba18ce30cfa9168d385264e5a8fe74d7cba9ef0f..0c7428e456d0dcf3853851d5580fca42c17eff38 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -288,11 +288,6 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { // "lock" stores the address of the monitor stack slot, so this is not an oop LIR_Opr lock = new_register(T_INT); - // Need a scratch register for biased locking on x86 - LIR_Opr scratch = LIR_OprFact::illegalOpr; - if (UseBiasedLocking) { - scratch = new_register(T_INT); - } CodeEmitInfo* info_for_exception = NULL; if (x->needs_null_check()) { @@ -301,7 +296,7 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { // this CodeEmitInfo must not have the xhandlers because here the // object is already locked (xhandlers expect object to be unlocked) CodeEmitInfo* info = state_for(x, x->state(), true); - monitor_enter(obj.result(), lock, syncTempOpr(), scratch, + monitor_enter(obj.result(), lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no(), info_for_exception, info); } diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index 9301f5d604a45dca5972e60a11efe0fdcd7d6615..80d0ca4dd181d338720e5a3d9b8b80a93527e580 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -33,12 +33,11 @@ #include "oops/arrayOop.hpp" #include "oops/markWord.hpp" #include "runtime/basicLock.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/os.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" -int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { +int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { const Register rklass_decode_tmp = LP64_ONLY(rscratch1) NOT_LP64(noreg); const int aligned_mask = BytesPerWord -1; const int hdr_offset = oopDesc::mark_offset_in_bytes(); @@ -61,11 +60,6 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr jcc(Assembler::notZero, slow_case); } - if (UseBiasedLocking) { - assert(scratch != noreg, "should have scratch register at this point"); - biased_locking_enter(disp_hdr, obj, hdr, scratch, rklass_decode_tmp, false, done, &slow_case); - } - // Load object header movptr(hdr, Address(obj, hdr_offset)); // and mark it as unlocked @@ -78,10 +72,6 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr MacroAssembler::lock(); // must be immediately before cmpxchg! cmpxchgptr(disp_hdr, Address(obj, hdr_offset)); // if the object header was the same, we're done - if (PrintBiasedLockingStatistics) { - cond_inc32(Assembler::equal, - ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); - } jcc(Assembler::equal, done); // if the object header was not the same, it is now in the hdr register // => test if it is a stack pointer into the same stack (recursive locking), i.e.: @@ -116,22 +106,15 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_ assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); Label done; - if (UseBiasedLocking) { - // load object - movptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); - biased_locking_exit(obj, hdr, done); - } - // load displaced header movptr(hdr, Address(disp_hdr, 0)); // if the loaded hdr is NULL we had recursive locking testptr(hdr, hdr); // if we had recursive locking, we are done jcc(Assembler::zero, done); - if (!UseBiasedLocking) { - // load object - movptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); - } + // load object + movptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + verify_oop(obj); // test if object header is pointing to the displaced header, and if so, restore // the displaced header in the object - if the object header is not pointing to @@ -159,14 +142,8 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) { assert_different_registers(obj, klass, len); Register tmp_encode_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg); - if (UseBiasedLocking && !len->is_valid()) { - assert_different_registers(obj, klass, len, t1, t2); - movptr(t1, Address(klass, Klass::prototype_header_offset())); - movptr(Address(obj, oopDesc::mark_offset_in_bytes()), t1); - } else { - // This assumes that all prototype bits fit in an int32_t - movptr(Address(obj, oopDesc::mark_offset_in_bytes ()), (int32_t)(intptr_t)markWord::prototype().value()); - } + // This assumes that all prototype bits fit in an int32_t + movptr(Address(obj, oopDesc::mark_offset_in_bytes ()), (int32_t)(intptr_t)markWord::prototype().value()); #ifdef _LP64 if (UseCompressedClassPointers) { // Take care not to kill klass movptr(t1, klass); diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp index 77d5fc6eccdf12eac2a590935fe1744f308c9f80..374b1ac3be34c27e130ab8329c83d58462b87563 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp @@ -49,9 +49,8 @@ // hdr : must be rax, contents destroyed // obj : must point to the object to lock, contents preserved // disp_hdr: must point to the displaced header location, contents preserved - // scratch : scratch register, contents destroyed // returns code offset at which to add null check debug information - int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); + int lock_object (Register swap, Register obj, Register disp_hdr, Label& slow_case); // unlocking // hdr : contents destroyed diff --git a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp index 42c46496d47b91b9dedb2501eb2355db42723b08..d0c356c462605583acd2c446d15c3eef4fae1713 100644 --- a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp +++ b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp @@ -318,7 +318,11 @@ enum reg_save_layout { // expensive. The deopt blob is the only thing which needs to // describe FPU registers. In all other cases it should be sufficient // to simply save their current value. - +// +// Register is a class, but it would be assigned numerical value. +// "0" is assigned for rax. Thus we need to ignore -Wnonnull. +PRAGMA_DIAG_PUSH +PRAGMA_NONNULL_IGNORED static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args, bool save_fpu_registers = true) { @@ -418,6 +422,7 @@ static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args, return map; } +PRAGMA_DIAG_POP #define __ this-> diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 3e61c86c6d47ffbfb6c3d0ae71e3c5728dfde0f9..9b891ed727bd9b44027c1c9c0dc374e08d1f7735 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -30,7 +30,6 @@ #include "opto/intrinsicnode.hpp" #include "opto/opcodes.hpp" #include "opto/subnode.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/objectMonitor.hpp" #include "runtime/stubRoutines.hpp" @@ -234,7 +233,6 @@ void C2_MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Regi Metadata* method_data, bool profile_rtm, Label& DONE_LABEL, Label& IsInflated) { assert(UseRTMForStackLocks, "why call this otherwise?"); - assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking"); assert(tmpReg == rax, ""); assert(scrReg == rdx, ""); Label L_rtm_retry, L_decrement_retry, L_on_abort; @@ -244,7 +242,7 @@ void C2_MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Regi bind(L_rtm_retry); } movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); - testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral|biased + testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral jcc(Assembler::notZero, IsInflated); if (PrintPreciseRTMLockingStatistics || profile_rtm) { @@ -259,8 +257,8 @@ void C2_MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Regi } xbegin(L_on_abort); movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword - andptr(tmpReg, markWord::biased_lock_mask_in_place); // look at 3 lock bits - cmpptr(tmpReg, markWord::unlocked_value); // bits = 001 unlocked + andptr(tmpReg, markWord::lock_mask_in_place); // look at 2 lock bits + cmpptr(tmpReg, markWord::unlocked_value); // bits = 01 unlocked jcc(Assembler::equal, DONE_LABEL); // all done if unlocked Register abort_status_Reg = tmpReg; // status of abort is stored in RAX @@ -447,7 +445,6 @@ void C2_MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, R // scr: tmp -- KILLED void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg, Register cx1Reg, Register cx2Reg, - BiasedLockingCounters* counters, RTMLockingCounters* rtm_counters, RTMLockingCounters* stack_rtm_counters, Metadata* method_data, @@ -462,10 +459,6 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp assert_different_registers(objReg, boxReg, tmpReg, scrReg); } - if (counters != NULL) { - atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg); - } - // Possible cases that we'll encounter in fast_lock // ------------------------------------------------ // * Inflated @@ -473,9 +466,6 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp // -- Locked // = by self // = by other - // * biased - // -- by Self - // -- by other // * neutral // * stack-locked // -- by self @@ -493,16 +483,6 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp jcc(Assembler::notZero, DONE_LABEL); } - // it's stack-locked, biased or neutral - // TODO: optimize away redundant LDs of obj->mark and improve the markword triage - // order to reduce the number of conditional branches in the most common cases. - // Beware -- there's a subtle invariant that fetch of the markword - // at [FETCH], below, will never observe a biased encoding (*101b). - // If this invariant is not held we risk exclusion (safety) failure. - if (UseBiasedLocking && !UseOptoBiasInlining) { - biased_locking_enter(boxReg, objReg, tmpReg, scrReg, cx1Reg, false, DONE_LABEL, NULL, counters); - } - #if INCLUDE_RTM_OPT if (UseRTMForStackLocks && use_rtm) { rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg, @@ -512,7 +492,7 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp #endif // INCLUDE_RTM_OPT movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH] - testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral|biased + testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral jccb(Assembler::notZero, IsInflated); // Attempt stack-locking ... @@ -520,10 +500,6 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS lock(); cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg - if (counters != NULL) { - cond_inc32(Assembler::equal, - ExternalAddress((address)counters->fast_path_entry_count_addr())); - } jcc(Assembler::equal, DONE_LABEL); // Success // Recursive locking. @@ -533,10 +509,6 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp // Next instruction set ZFlag == 1 (Success) if difference is less then one page. andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) ); movptr(Address(boxReg, 0), tmpReg); - if (counters != NULL) { - cond_inc32(Assembler::equal, - ExternalAddress((address)counters->fast_path_entry_count_addr())); - } jmp(DONE_LABEL); bind(IsInflated); @@ -659,19 +631,12 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t Label DONE_LABEL, Stacked, CheckSucc; - // Critically, the biased locking test must have precedence over - // and appear before the (box->dhw == 0) recursive stack-lock test. - if (UseBiasedLocking && !UseOptoBiasInlining) { - biased_locking_exit(objReg, tmpReg, DONE_LABEL); - } - #if INCLUDE_RTM_OPT if (UseRTMForStackLocks && use_rtm) { - assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking"); Label L_regular_unlock; movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword - andptr(tmpReg, markWord::biased_lock_mask_in_place); // look at 3 lock bits - cmpptr(tmpReg, markWord::unlocked_value); // bits = 001 unlocked + andptr(tmpReg, markWord::lock_mask_in_place); // look at 2 lock bits + cmpptr(tmpReg, markWord::unlocked_value); // bits = 01 unlocked jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock xend(); // otherwise end... jmp(DONE_LABEL); // ... and we're done @@ -738,7 +703,7 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t jmpb (DONE_LABEL); bind (Stacked); - // It's not inflated and it's not recursively stack-locked and it's not biased. + // It's not inflated and it's not recursively stack-locked. // It must be stack-locked. // Try to reset the header to displaced header. // The "box" value on the stack is stable, so we can reload @@ -1462,7 +1427,7 @@ void C2_MacroAssembler::evscatter(BasicType typ, Register base, XMMRegister idx, } } -void C2_MacroAssembler::load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt) { +void C2_MacroAssembler::load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy) { if (vlen_in_bytes <= 16) { pxor (dst, dst); psubb(dst, src); @@ -1477,10 +1442,12 @@ void C2_MacroAssembler::load_vector_mask(XMMRegister dst, XMMRegister src, int v default: assert(false, "%s", type2name(elem_bt)); } } else { + assert(!is_legacy || !is_subword_type(elem_bt) || vlen_in_bytes < 64, ""); int vlen_enc = vector_length_encoding(vlen_in_bytes); vpxor (dst, dst, dst, vlen_enc); - vpsubb(dst, dst, src, vlen_enc); + vpsubb(dst, dst, src, is_legacy ? AVX_256bit : vlen_enc); + switch (elem_bt) { case T_BYTE: /* nothing to do */ break; case T_SHORT: vpmovsxbw(dst, dst, vlen_enc); break; @@ -1496,7 +1463,11 @@ void C2_MacroAssembler::load_vector_mask(XMMRegister dst, XMMRegister src, int v void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes) { ExternalAddress addr(StubRoutines::x86::vector_iota_indices()); - if (vlen_in_bytes <= 16) { + if (vlen_in_bytes == 4) { + movdl(dst, addr); + } else if (vlen_in_bytes == 8) { + movq(dst, addr); + } else if (vlen_in_bytes == 16) { movdqu(dst, addr, scratch); } else if (vlen_in_bytes == 32) { vmovdqu(dst, addr, scratch); @@ -1505,6 +1476,7 @@ void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int evmovdqub(dst, k0, addr, false /*merge*/, Assembler::AVX_512bit, scratch); } } + // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles. void C2_MacroAssembler::reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src) { @@ -3887,6 +3859,9 @@ void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister vpxor(xtmp, xtmp, xtmp, vec_enc); vpsubb(xtmp, xtmp, mask, vec_enc); vpmovmskb(tmp, xtmp, vec_enc); + if (masklen < 64) { + andq(tmp, (((jlong)1 << masklen) - 1)); + } switch(opc) { case Op_VectorMaskTrueCount: popcntq(dst, tmp); diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp index 0a61be37bd1d3d865d53f86c9932b199db7c2580..2ef8e27cadce2fd08778cfef37cb490527c70384 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp @@ -38,7 +38,6 @@ public: // See full desription in macroAssembler_x86.cpp. void fast_lock(Register obj, Register box, Register tmp, Register scr, Register cx1, Register cx2, - BiasedLockingCounters* counters, RTMLockingCounters* rtm_counters, RTMLockingCounters* stack_rtm_counters, Metadata* method_data, @@ -142,7 +141,7 @@ public: void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len); void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len); - void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt); + void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy); void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes); // vector compare diff --git a/src/hotspot/cpu/x86/c2_globals_x86.hpp b/src/hotspot/cpu/x86/c2_globals_x86.hpp index 776caa30cf9a55b104136aaf2e138d3d00bd09c4..7e5128d7f2d13c574c4d704abf2a4ae60bd87e68 100644 --- a/src/hotspot/cpu/x86/c2_globals_x86.hpp +++ b/src/hotspot/cpu/x86/c2_globals_x86.hpp @@ -46,8 +46,6 @@ define_pd_global(intx, FreqInlineSize, 325); define_pd_global(intx, MinJumpTableSize, 10); define_pd_global(intx, LoopPercentProfileLimit, 30); #ifdef AMD64 -define_pd_global(intx, INTPRESSURE, 13); -define_pd_global(intx, FLOATPRESSURE, 14); define_pd_global(intx, InteriorEntryAlignment, 16); define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K)); define_pd_global(intx, LoopUnrollLimit, 60); @@ -58,8 +56,6 @@ define_pd_global(uintx, CodeCacheExpansionSize, 64*K); // Ergonomics related flags define_pd_global(uint64_t, MaxRAM, 128ULL*G); #else -define_pd_global(intx, INTPRESSURE, 6); -define_pd_global(intx, FLOATPRESSURE, 6); define_pd_global(intx, InteriorEntryAlignment, 4); define_pd_global(size_t, NewSizeThreadIncrease, 4*K); define_pd_global(intx, LoopUnrollLimit, 50); // Design center runs on 1.3.1 diff --git a/src/hotspot/cpu/x86/frame_x86.cpp b/src/hotspot/cpu/x86/frame_x86.cpp index 7c9804974d302e0fb336f55e4b4800aab5cecbc0..0f8cd45104002da7036e60956eb08c5e1a86c6ae 100644 --- a/src/hotspot/cpu/x86/frame_x86.cpp +++ b/src/hotspot/cpu/x86/frame_x86.cpp @@ -353,9 +353,18 @@ frame frame::sender_for_entry_frame(RegisterMap* map) const { return fr; } -JavaFrameAnchor* OptimizedEntryBlob::jfa_for_frame(const frame& frame) const { +OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const { + assert(frame.is_optimized_entry_frame(), "wrong frame"); // need unextended_sp here, since normal sp is wrong for interpreter callees - return reinterpret_cast(reinterpret_cast(frame.unextended_sp()) + in_bytes(jfa_sp_offset())); + return reinterpret_cast( + reinterpret_cast(frame.unextended_sp()) + in_bytes(_frame_data_offset)); +} + +bool frame::optimized_entry_frame_is_first() const { + assert(is_optimized_entry_frame(), "must be optimzed entry frame"); + OptimizedEntryBlob* blob = _cb->as_optimized_entry_blob(); + JavaFrameAnchor* jfa = blob->jfa_for_frame(*this); + return jfa->last_Java_sp() == NULL; } frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const { @@ -364,6 +373,7 @@ frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const { // Java frame called from C; skip all C frames and return top C // frame of that chunk as the sender JavaFrameAnchor* jfa = blob->jfa_for_frame(*this); + assert(!optimized_entry_frame_is_first(), "must have a frame anchor to go back to"); assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); // Since we are walking the stack now this nested anchor is obviously walkable // even if it wasn't when it was stacked. @@ -708,7 +718,6 @@ frame::frame(void* sp, void* fp, void* pc) { init((intptr_t*)sp, (intptr_t*)fp, (address)pc); } -void frame::pd_ps() {} #endif void JavaFrameAnchor::make_walkable(JavaThread* thread) { diff --git a/src/hotspot/cpu/x86/frame_x86.inline.hpp b/src/hotspot/cpu/x86/frame_x86.inline.hpp index 60847fbf39179e937e034fca9114faccabd0fe32..733a357d5fe3e7fe9a9e445b64b1ebd4b7c381ea 100644 --- a/src/hotspot/cpu/x86/frame_x86.inline.hpp +++ b/src/hotspot/cpu/x86/frame_x86.inline.hpp @@ -227,6 +227,10 @@ inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { // Compiled frames +// Register is a class, but it would be assigned numerical value. +// "0" is assigned for rax. Thus we need to ignore -Wnonnull. +PRAGMA_DIAG_PUSH +PRAGMA_NONNULL_IGNORED inline oop frame::saved_oop_result(RegisterMap* map) const { oop* result_adr = (oop *)map->location(rax->as_VMReg()); guarantee(result_adr != NULL, "bad register save location"); @@ -240,5 +244,6 @@ inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { *result_adr = obj; } +PRAGMA_DIAG_POP #endif // CPU_X86_FRAME_X86_INLINE_HPP diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp index 8316df3644bb5d26f9189815de1ef48d720579f6..645c2fec8bd9e9a49c7ae77293cf680ddf1ac8cd 100644 --- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp @@ -420,7 +420,7 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier Register pre_val_reg = stub->pre_val()->as_register(); if (stub->do_load()) { - ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/); } __ cmpptr(pre_val_reg, (int32_t)NULL_WORD); diff --git a/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp b/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp index d96ea768914e71197e4a5c88974df26d1cebd071..9da848f65bc0e186b5d8bafca41641afb49e7e90 100644 --- a/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -132,7 +132,7 @@ void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { ResourceMark mark; log_trace(nmethod, barrier)("deoptimize(nmethod: %p, return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p", nm, (address *) return_address_ptr, nm->is_osr_method(), jth, - jth->get_thread_name(), callers_rsp, nm->verified_entry_point()); + jth->name(), callers_rsp, nm->verified_entry_point()); } assert(nm->frame_size() >= 3, "invariant"); diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp index 32d9050fcc87b1a122287df8b994b7bc8d483b46..28c295d6139cf69dd5c4fdf0f194ae460759c202 100644 --- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp @@ -682,13 +682,14 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, // // Try to CAS with given arguments. If successful, then we are done. - if (os::is_MP()) __ lock(); #ifdef _LP64 if (UseCompressedOops) { + __ lock(); __ cmpxchgl(newval, addr); } else #endif { + __ lock(); __ cmpxchgptr(newval, addr); } __ jcc(Assembler::equal, L_success); @@ -765,13 +766,14 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, } #endif - if (os::is_MP()) __ lock(); #ifdef _LP64 if (UseCompressedOops) { + __ lock(); __ cmpxchgl(tmp2, addr); } else #endif { + __ lock(); __ cmpxchgptr(tmp2, addr); } @@ -791,13 +793,14 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, __ movptr(oldval, tmp2); } - if (os::is_MP()) __ lock(); #ifdef _LP64 if (UseCompressedOops) { + __ lock(); __ cmpxchgl(newval, addr); } else #endif { + __ lock(); __ cmpxchgptr(newval, addr); } if (!exchange) { @@ -844,7 +847,7 @@ void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, Shen Register pre_val_reg = stub->pre_val()->as_register(); if (stub->do_load()) { - ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/); } __ cmpptr(pre_val_reg, (int32_t)NULL_WORD); diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp index a929908d003bcef896578ae6b081c3054a83bfb7..3ffd3a2a85f4062164a413b982774574c81a46e5 100644 --- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp @@ -465,6 +465,10 @@ private: _spill_offset += 8; } +// Register is a class, but it would be assigned numerical value. +// "0" is assigned for rax. Thus we need to ignore -Wnonnull. +PRAGMA_DIAG_PUSH +PRAGMA_NONNULL_IGNORED void initialize(ZLoadBarrierStubC2* stub) { // Create mask of caller saved registers that need to // be saved/restored if live @@ -540,6 +544,7 @@ private: // Stack pointer must be 16 bytes aligned for the call _spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size + opmask_spill_size + arg_spill_size, 16); } +PRAGMA_DIAG_POP public: ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : diff --git a/src/hotspot/cpu/x86/globals_x86.hpp b/src/hotspot/cpu/x86/globals_x86.hpp index 7142e0910c2cd8bea1d9ee58b9d91f6c3812c05b..f17f62a915d5e62c6a50f72374b4c520fca234d2 100644 --- a/src/hotspot/cpu/x86/globals_x86.hpp +++ b/src/hotspot/cpu/x86/globals_x86.hpp @@ -49,7 +49,6 @@ define_pd_global(intx, CodeEntryAlignment, 32); define_pd_global(intx, CodeEntryAlignment, 16); #endif // COMPILER2_OR_JVMCI define_pd_global(intx, OptoLoopAlignment, 16); -define_pd_global(intx, InlineFrequencyCount, 100); define_pd_global(intx, InlineSmallCode, 1000); #define DEFAULT_STACK_YELLOW_PAGES (NOT_WINDOWS(2) WINDOWS_ONLY(3)) @@ -109,6 +108,9 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); "Highest supported AVX instructions set on x86/x64") \ range(0, 99) \ \ + product(bool, UseKNLSetting, false, DIAGNOSTIC, \ + "Control whether Knights platform setting should be used") \ + \ product(bool, UseCLMUL, false, \ "Control whether CLMUL instructions can be used on x86/x64") \ \ diff --git a/src/hotspot/cpu/x86/interp_masm_x86.cpp b/src/hotspot/cpu/x86/interp_masm_x86.cpp index b332f8ed621fd058479b3140bcbc1cf60e65b66f..bf8b94a6319dbacfe7a14d0228fb3fb50689a3f8 100644 --- a/src/hotspot/cpu/x86/interp_masm_x86.cpp +++ b/src/hotspot/cpu/x86/interp_masm_x86.cpp @@ -35,7 +35,6 @@ #include "prims/jvmtiExport.hpp" #include "prims/jvmtiThreadState.hpp" #include "runtime/basicLock.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/frame.inline.hpp" #include "runtime/safepointMechanism.hpp" #include "runtime/sharedRuntime.hpp" @@ -1205,8 +1204,7 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) { Label done; const Register swap_reg = rax; // Must use rax for cmpxchg instruction - const Register tmp_reg = rbx; // Will be passed to biased_locking_enter to avoid a - // problematic case where tmp_reg = no_reg. + const Register tmp_reg = rbx; const Register obj_reg = LP64_ONLY(c_rarg3) NOT_LP64(rcx); // Will contain the oop const Register rklass_decode_tmp = LP64_ONLY(rscratch1) NOT_LP64(noreg); @@ -1227,10 +1225,6 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) { jcc(Assembler::notZero, slow_case); } - if (UseBiasedLocking) { - biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp_reg, rklass_decode_tmp, false, done, &slow_case); - } - // Load immediate 1 into swap_reg %rax movl(swap_reg, (int32_t)1); @@ -1245,10 +1239,6 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) { lock(); cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - if (PrintBiasedLockingStatistics) { - cond_inc32(Assembler::zero, - ExternalAddress((address) BiasedLocking::fast_path_entry_count_addr())); - } jcc(Assembler::zero, done); const int zero_bits = LP64_ONLY(7) NOT_LP64(3); @@ -1285,11 +1275,6 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) { // Save the test result, for recursive case, the result is zero movptr(Address(lock_reg, mark_offset), swap_reg); - - if (PrintBiasedLockingStatistics) { - cond_inc32(Assembler::zero, - ExternalAddress((address) BiasedLocking::fast_path_entry_count_addr())); - } jcc(Assembler::zero, done); bind(slow_case); @@ -1341,10 +1326,6 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) { // Free entry movptr(Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()), (int32_t)NULL_WORD); - if (UseBiasedLocking) { - biased_locking_exit(obj_reg, header_reg, done); - } - // Load the old header from BasicLock structure movptr(header_reg, Address(swap_reg, BasicLock::displaced_header_offset_in_bytes())); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index e94a71c44b42dad0f3bda827aa976574977dfdfa..072e7962b8b8b2f338729ec212e868c59b5a68b3 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -40,7 +40,6 @@ #include "oops/compressedOops.inline.hpp" #include "oops/klass.inline.hpp" #include "prims/methodHandles.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/flags/flagSetting.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/jniHandles.hpp" @@ -1171,7 +1170,20 @@ void MacroAssembler::addpd(XMMRegister dst, AddressLiteral src) { } } +// See 8273459. Function for ensuring 64-byte alignment, intended for stubs only. +// Stub code is generated once and never copied. +// NMethods can't use this because they get copied and we can't force alignment > 32 bytes. +void MacroAssembler::align64() { + align(64, (unsigned long long) pc()); +} + +void MacroAssembler::align32() { + align(32, (unsigned long long) pc()); +} + void MacroAssembler::align(int modulus) { + // 8273459: Ensure alignment is possible with current segment alignment + assert(modulus <= CodeEntryAlignment, "Alignment must be <= CodeEntryAlignment"); align(modulus, offset()); } @@ -1280,200 +1292,6 @@ void MacroAssembler::reserved_stack_check() { bind(no_reserved_zone_enabling); } -void MacroAssembler::biased_locking_enter(Register lock_reg, - Register obj_reg, - Register swap_reg, - Register tmp_reg, - Register tmp_reg2, - bool swap_reg_contains_mark, - Label& done, - Label* slow_case, - BiasedLockingCounters* counters) { - assert(UseBiasedLocking, "why call this otherwise?"); - assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); - assert(tmp_reg != noreg, "tmp_reg must be supplied"); - assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); - assert(markWord::age_shift == markWord::lock_bits + markWord::biased_lock_bits, "biased locking makes assumptions about bit layout"); - Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); - NOT_LP64( Address saved_mark_addr(lock_reg, 0); ) - - if (PrintBiasedLockingStatistics && counters == NULL) { - counters = BiasedLocking::counters(); - } - // Biased locking - // See whether the lock is currently biased toward our thread and - // whether the epoch is still valid - // Note that the runtime guarantees sufficient alignment of JavaThread - // pointers to allow age to be placed into low bits - // First check to see whether biasing is even enabled for this object - Label cas_label; - if (!swap_reg_contains_mark) { - movptr(swap_reg, mark_addr); - } - movptr(tmp_reg, swap_reg); - andptr(tmp_reg, markWord::biased_lock_mask_in_place); - cmpptr(tmp_reg, markWord::biased_lock_pattern); - jcc(Assembler::notEqual, cas_label); - // The bias pattern is present in the object's header. Need to check - // whether the bias owner and the epoch are both still current. -#ifndef _LP64 - // Note that because there is no current thread register on x86_32 we - // need to store off the mark word we read out of the object to - // avoid reloading it and needing to recheck invariants below. This - // store is unfortunate but it makes the overall code shorter and - // simpler. - movptr(saved_mark_addr, swap_reg); -#endif - load_prototype_header(tmp_reg, obj_reg, tmp_reg2); -#ifdef _LP64 - orptr(tmp_reg, r15_thread); - xorptr(tmp_reg, swap_reg); - Register header_reg = tmp_reg; -#else - xorptr(tmp_reg, swap_reg); - get_thread(swap_reg); - xorptr(swap_reg, tmp_reg); - Register header_reg = swap_reg; -#endif - andptr(header_reg, ~((int) markWord::age_mask_in_place)); - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address) counters->biased_lock_entry_count_addr())); - } - jcc(Assembler::equal, done); - - Label try_revoke_bias; - Label try_rebias; - - // At this point we know that the header has the bias pattern and - // that we are not the bias owner in the current epoch. We need to - // figure out more details about the state of the header in order to - // know what operations can be legally performed on the object's - // header. - - // If the low three bits in the xor result aren't clear, that means - // the prototype header is no longer biased and we have to revoke - // the bias on this object. - testptr(header_reg, markWord::biased_lock_mask_in_place); - jcc(Assembler::notZero, try_revoke_bias); - - // Biasing is still enabled for this data type. See whether the - // epoch of the current bias is still valid, meaning that the epoch - // bits of the mark word are equal to the epoch bits of the - // prototype header. (Note that the prototype header's epoch bits - // only change at a safepoint.) If not, attempt to rebias the object - // toward the current thread. Note that we must be absolutely sure - // that the current epoch is invalid in order to do this because - // otherwise the manipulations it performs on the mark word are - // illegal. - testptr(header_reg, markWord::epoch_mask_in_place); - jccb(Assembler::notZero, try_rebias); - - // The epoch of the current bias is still valid but we know nothing - // about the owner; it might be set or it might be clear. Try to - // acquire the bias of the object using an atomic operation. If this - // fails we will go in to the runtime to revoke the object's bias. - // Note that we first construct the presumed unbiased header so we - // don't accidentally blow away another thread's valid bias. - NOT_LP64( movptr(swap_reg, saved_mark_addr); ) - andptr(swap_reg, - markWord::biased_lock_mask_in_place | markWord::age_mask_in_place | markWord::epoch_mask_in_place); -#ifdef _LP64 - movptr(tmp_reg, swap_reg); - orptr(tmp_reg, r15_thread); -#else - get_thread(tmp_reg); - orptr(tmp_reg, swap_reg); -#endif - lock(); - cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg - // If the biasing toward our thread failed, this means that - // another thread succeeded in biasing it toward itself and we - // need to revoke that bias. The revocation will occur in the - // interpreter runtime in the slow case. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); - } - if (slow_case != NULL) { - jcc(Assembler::notZero, *slow_case); - } - jmp(done); - - bind(try_rebias); - // At this point we know the epoch has expired, meaning that the - // current "bias owner", if any, is actually invalid. Under these - // circumstances _only_, we are allowed to use the current header's - // value as the comparison value when doing the cas to acquire the - // bias in the current epoch. In other words, we allow transfer of - // the bias from one thread to another directly in this situation. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - load_prototype_header(tmp_reg, obj_reg, tmp_reg2); -#ifdef _LP64 - orptr(tmp_reg, r15_thread); -#else - get_thread(swap_reg); - orptr(tmp_reg, swap_reg); - movptr(swap_reg, saved_mark_addr); -#endif - lock(); - cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg - // If the biasing toward our thread failed, then another thread - // succeeded in biasing it toward itself and we need to revoke that - // bias. The revocation will occur in the runtime in the slow case. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); - } - if (slow_case != NULL) { - jcc(Assembler::notZero, *slow_case); - } - jmp(done); - - bind(try_revoke_bias); - // The prototype mark in the klass doesn't have the bias bit set any - // more, indicating that objects of this data type are not supposed - // to be biased any more. We are going to try to reset the mark of - // this object to the prototype value and fall through to the - // CAS-based locking scheme. Note that if our CAS fails, it means - // that another thread raced us for the privilege of revoking the - // bias of this particular object, so it's okay to continue in the - // normal locking code. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - NOT_LP64( movptr(swap_reg, saved_mark_addr); ) - load_prototype_header(tmp_reg, obj_reg, tmp_reg2); - lock(); - cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg - // Fall through to the normal CAS-based lock, because no matter what - // the result of the above CAS, some thread must have succeeded in - // removing the bias bit from the object's header. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address) counters->revoked_lock_entry_count_addr())); - } - - bind(cas_label); -} - -void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { - assert(UseBiasedLocking, "why call this otherwise?"); - - // Check for biased locking unlock case, which is a no-op - // Note: we do not have to check the thread ID for two reasons. - // First, the interpreter checks for IllegalMonitorStateException at - // a higher level. Second, if the bias was revoked while we held the - // lock, the object could not be rebiased toward another thread, so - // the bias bit would be clear. - movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - andptr(temp_reg, markWord::biased_lock_mask_in_place); - cmpptr(temp_reg, markWord::biased_lock_pattern); - jcc(Assembler::equal, done); -} - void MacroAssembler::c2bool(Register x) { // implements x == 0 ? 0 : 1 // note: must only look at least-significant byte of x @@ -4732,11 +4550,6 @@ void MacroAssembler::load_klass(Register dst, Register src, Register tmp) { movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); } -void MacroAssembler::load_prototype_header(Register dst, Register src, Register tmp) { - load_klass(dst, src, tmp); - movptr(dst, Address(dst, Klass::prototype_header_offset())); -} - void MacroAssembler::store_klass(Register dst, Register src, Register tmp) { assert_different_registers(src, tmp); assert_different_registers(dst, tmp); @@ -5614,7 +5427,7 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned, BIND(L_exit); } -// encode char[] to byte[] in ISO_8859_1 +// encode char[] to byte[] in ISO_8859_1 or ASCII //@IntrinsicCandidate //private static int implEncodeISOArray(byte[] sa, int sp, //byte[] da, int dp, int len) { @@ -5627,10 +5440,23 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned, // } // return i; //} + // + //@IntrinsicCandidate + //private static int implEncodeAsciiArray(char[] sa, int sp, + // byte[] da, int dp, int len) { + // int i = 0; + // for (; i < len; i++) { + // char c = sa[sp++]; + // if (c >= '\u0080') + // break; + // da[dp++] = (byte)c; + // } + // return i; + //} void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, XMMRegister tmp1Reg, XMMRegister tmp2Reg, XMMRegister tmp3Reg, XMMRegister tmp4Reg, - Register tmp5, Register result) { + Register tmp5, Register result, bool ascii) { // rsi: src // rdi: dst @@ -5641,6 +5467,9 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, assert_different_registers(src, dst, len, tmp5, result); Label L_done, L_copy_1_char, L_copy_1_char_exit; + int mask = ascii ? 0xff80ff80 : 0xff00ff00; + int short_mask = ascii ? 0xff80 : 0xff00; + // set result xorl(result, result); // check for zero length @@ -5660,7 +5489,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, if (UseAVX >= 2) { Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit; - movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector + movl(tmp5, mask); // create mask to test for Unicode or non-ASCII chars in vector movdl(tmp1Reg, tmp5); vpbroadcastd(tmp1Reg, tmp1Reg, Assembler::AVX_256bit); jmp(L_chars_32_check); @@ -5669,7 +5498,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64)); vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32)); vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1); - vptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector + vptest(tmp2Reg, tmp1Reg); // check for Unicode or non-ASCII chars in vector jccb(Assembler::notZero, L_copy_32_chars_exit); vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1); vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector_len */ 1); @@ -5684,7 +5513,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, jccb(Assembler::greater, L_copy_16_chars_exit); } else if (UseSSE42Intrinsics) { - movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector + movl(tmp5, mask); // create mask to test for Unicode or non-ASCII chars in vector movdl(tmp1Reg, tmp5); pshufd(tmp1Reg, tmp1Reg, 0); jmpb(L_chars_16_check); @@ -5708,7 +5537,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); por(tmp2Reg, tmp4Reg); } - ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector + ptest(tmp2Reg, tmp1Reg); // check for Unicode or non-ASCII chars in vector jccb(Assembler::notZero, L_copy_16_chars_exit); packuswb(tmp3Reg, tmp4Reg); } @@ -5746,7 +5575,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, bind(L_copy_1_char); load_unsigned_short(tmp5, Address(src, len, Address::times_2, 0)); - testl(tmp5, 0xff00); // check if Unicode char + testl(tmp5, short_mask); // check if Unicode or non-ASCII char jccb(Assembler::notZero, L_copy_1_char_exit); movb(Address(dst, len, Address::times_1, 0), tmp5); addptr(len, 1); @@ -7080,7 +6909,7 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Regi // 128 bits per each of 4 parallel streams. movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 32)); - align(32); + align32(); BIND(L_fold_512b_loop); fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0); fold_128bit_crc32(xmm2, xmm0, xmm5, buf, 16); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 55ed32c969cb7f19567a7f5a7352d30999a6f263..9b9e85643b817369aaaedf2264ec9778c5d5d3ec 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -194,6 +194,8 @@ class MacroAssembler: public Assembler { void incrementq(AddressLiteral dst); // Alignment + void align32(); + void align64(); void align(int modulus); void align(int modulus, int target); @@ -356,8 +358,6 @@ class MacroAssembler: public Assembler { // stored using routines that take a jobject. void store_heap_oop_null(Address dst); - void load_prototype_header(Register dst, Register src, Register tmp); - #ifdef _LP64 void store_klass_gap(Register dst, Register src); @@ -673,21 +673,6 @@ class MacroAssembler: public Assembler { void verify_tlab(); - // Biased locking support - // lock_reg and obj_reg must be loaded up with the appropriate values. - // swap_reg must be rax, and is killed. - // tmp_reg is optional. If it is supplied (i.e., != noreg) it will - // be killed; if not supplied, push/pop will be used internally to - // allocate a temporary (inefficient, avoid if possible). - // Optional slow case is for implementations (interpreter and C1) which branch to - // slow case directly. Leaves condition codes set for C2's Fast_Lock node. - void biased_locking_enter(Register lock_reg, Register obj_reg, - Register swap_reg, Register tmp_reg, - Register tmp_reg2, bool swap_reg_contains_mark, - Label& done, Label* slow_case = NULL, - BiasedLockingCounters* counters = NULL); - void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); - Condition negate_condition(Condition cond); // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit @@ -962,12 +947,19 @@ private: void roundDec(XMMRegister key, int rnum); void lastroundDec(XMMRegister key, int rnum); void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask); - + void gfmul_avx512(XMMRegister ghash, XMMRegister hkey); + void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl); + void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx, + XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction, + XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos, + bool final_reduction, int index, XMMRegister counter_inc_mask); public: void aesecb_encrypt(Register source_addr, Register dest_addr, Register key, Register len); void aesecb_decrypt(Register source_addr, Register dest_addr, Register key, Register len); void aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter, Register len_reg, Register used, Register used_addr, Register saved_encCounter_start); + void aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key, + Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter); #endif @@ -1734,7 +1726,7 @@ public: void encode_iso_array(Register src, Register dst, Register len, XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, - XMMRegister tmp4, Register tmp5, Register result); + XMMRegister tmp4, Register tmp5, Register result, bool ascii); #ifdef _LP64 void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_adler.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_adler.cpp index 2c1904c391a2efc53814336d61faa01eba8e2382..12e0e5ee7d3b2db8682610df6bd41bb9d4ddfa5b 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86_adler.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86_adler.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2021, Intel Corporation. +* Copyright (c) 2021, Intel Corporation. All rights reserved. * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -80,7 +80,7 @@ void MacroAssembler::updateBytesAdler32(Register init_d, Register data, Register cmpptr(data, end); jcc(Assembler::aboveEqual, SKIP_LOOP_1A); - align(32); + align32(); bind(SLOOP1A); vbroadcastf128(ydata, Address(data, 0), Assembler::AVX_256bit); addptr(data, CHUNKSIZE); @@ -178,7 +178,7 @@ void MacroAssembler::updateBytesAdler32(Register init_d, Register data, Register movdl(rax, xb); addl(b_d, rax); - align(32); + align32(); bind(FINAL_LOOP); movzbl(rax, Address(data, 0)); //movzx eax, byte[data] addl(a_d, rax); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_aes.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_aes.cpp index 778dd1a1b7e390dc76534503e563d174969ec408..776eebaa684a67859dd7646e6a0b720129b023c3 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86_aes.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86_aes.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2019, Intel Corporation. +* Copyright (c) 2019, 2021, Intel Corporation. All rights reserved. * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -1267,4 +1267,626 @@ void MacroAssembler::aesctr_encrypt(Register src_addr, Register dest_addr, Regis bind(EXIT); } +void MacroAssembler::gfmul_avx512(XMMRegister GH, XMMRegister HK) { + const XMMRegister TMP1 = xmm0; + const XMMRegister TMP2 = xmm1; + const XMMRegister TMP3 = xmm2; + + evpclmulqdq(TMP1, GH, HK, 0x11, Assembler::AVX_512bit); + evpclmulqdq(TMP2, GH, HK, 0x00, Assembler::AVX_512bit); + evpclmulqdq(TMP3, GH, HK, 0x01, Assembler::AVX_512bit); + evpclmulqdq(GH, GH, HK, 0x10, Assembler::AVX_512bit); + evpxorq(GH, GH, TMP3, Assembler::AVX_512bit); + vpsrldq(TMP3, GH, 8, Assembler::AVX_512bit); + vpslldq(GH, GH, 8, Assembler::AVX_512bit); + evpxorq(TMP1, TMP1, TMP3, Assembler::AVX_512bit); + evpxorq(GH, GH, TMP2, Assembler::AVX_512bit); + + evmovdquq(TMP3, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr()), Assembler::AVX_512bit, r15); + evpclmulqdq(TMP2, TMP3, GH, 0x01, Assembler::AVX_512bit); + vpslldq(TMP2, TMP2, 8, Assembler::AVX_512bit); + evpxorq(GH, GH, TMP2, Assembler::AVX_512bit); + evpclmulqdq(TMP2, TMP3, GH, 0x00, Assembler::AVX_512bit); + vpsrldq(TMP2, TMP2, 4, Assembler::AVX_512bit); + evpclmulqdq(GH, TMP3, GH, 0x10, Assembler::AVX_512bit); + vpslldq(GH, GH, 4, Assembler::AVX_512bit); + vpternlogq(GH, 0x96, TMP1, TMP2, Assembler::AVX_512bit); +} + +void MacroAssembler::generateHtbl_48_block_zmm(Register htbl, Register avx512_htbl) { + const XMMRegister HK = xmm6; + const XMMRegister ZT5 = xmm4; + const XMMRegister ZT7 = xmm7; + const XMMRegister ZT8 = xmm8; + + Label GFMUL_AVX512; + + movdqu(HK, Address(htbl, 0)); + movdqu(xmm10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); + vpshufb(HK, HK, xmm10, Assembler::AVX_128bit); + + movdqu(xmm11, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr() + 64)); // Poly + movdqu(xmm12, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr() + 80)); // Twoone + // Compute H ^ 2 from the input subkeyH + movdqu(xmm2, xmm6); + vpsllq(xmm6, xmm6, 1, Assembler::AVX_128bit); + vpsrlq(xmm2, xmm2, 63, Assembler::AVX_128bit); + movdqu(xmm1, xmm2); + vpslldq(xmm2, xmm2, 8, Assembler::AVX_128bit); + vpsrldq(xmm1, xmm1, 8, Assembler::AVX_128bit); + vpor(xmm6, xmm6, xmm2, Assembler::AVX_128bit); + + vpshufd(xmm2, xmm1, 0x24, Assembler::AVX_128bit); + vpcmpeqd(xmm2, xmm2, xmm12, AVX_128bit); + vpand(xmm2, xmm2, xmm11, Assembler::AVX_128bit); + vpxor(xmm6, xmm6, xmm2, Assembler::AVX_128bit); + movdqu(Address(avx512_htbl, 16 * 47), xmm6); // H ^ 2 + // Compute the remaining three powers of H using XMM registers and all following powers using ZMM + movdqu(ZT5, HK); + vinserti32x4(ZT7, ZT7, HK, 3); + + gfmul_avx512(ZT5, HK); + movdqu(Address(avx512_htbl, 16 * 46), ZT5); // H ^ 2 * 2 + vinserti32x4(ZT7, ZT7, ZT5, 2); + + gfmul_avx512(ZT5, HK); + movdqu(Address(avx512_htbl, 16 * 45), ZT5); // H ^ 2 * 3 + vinserti32x4(ZT7, ZT7, ZT5, 1); + + gfmul_avx512(ZT5, HK); + movdqu(Address(avx512_htbl, 16 * 44), ZT5); // H ^ 2 * 4 + vinserti32x4(ZT7, ZT7, ZT5, 0); + + evshufi64x2(ZT5, ZT5, ZT5, 0x00, Assembler::AVX_512bit); + evmovdquq(ZT8, ZT7, Assembler::AVX_512bit); + gfmul_avx512(ZT7, ZT5); + evmovdquq(Address(avx512_htbl, 16 * 40), ZT7, Assembler::AVX_512bit); + evshufi64x2(ZT5, ZT7, ZT7, 0x00, Assembler::AVX_512bit); + gfmul_avx512(ZT8, ZT5); + evmovdquq(Address(avx512_htbl, 16 * 36), ZT8, Assembler::AVX_512bit); + gfmul_avx512(ZT7, ZT5); + evmovdquq(Address(avx512_htbl, 16 * 32), ZT7, Assembler::AVX_512bit); + gfmul_avx512(ZT8, ZT5); + evmovdquq(Address(avx512_htbl, 16 * 28), ZT8, Assembler::AVX_512bit); + gfmul_avx512(ZT7, ZT5); + evmovdquq(Address(avx512_htbl, 16 * 24), ZT7, Assembler::AVX_512bit); + gfmul_avx512(ZT8, ZT5); + evmovdquq(Address(avx512_htbl, 16 * 20), ZT8, Assembler::AVX_512bit); + gfmul_avx512(ZT7, ZT5); + evmovdquq(Address(avx512_htbl, 16 * 16), ZT7, Assembler::AVX_512bit); + gfmul_avx512(ZT8, ZT5); + evmovdquq(Address(avx512_htbl, 16 * 12), ZT8, Assembler::AVX_512bit); + gfmul_avx512(ZT7, ZT5); + evmovdquq(Address(avx512_htbl, 16 * 8), ZT7, Assembler::AVX_512bit); + gfmul_avx512(ZT8, ZT5); + evmovdquq(Address(avx512_htbl, 16 * 4), ZT8, Assembler::AVX_512bit); + gfmul_avx512(ZT7, ZT5); + evmovdquq(Address(avx512_htbl, 16 * 0), ZT7, Assembler::AVX_512bit); + ret(0); +} + +#define vclmul_reduce(out, poly, hi128, lo128, tmp0, tmp1) \ +evpclmulqdq(tmp0, poly, lo128, 0x01, Assembler::AVX_512bit); \ +vpslldq(tmp0, tmp0, 8, Assembler::AVX_512bit); \ +evpxorq(tmp0, lo128, tmp0, Assembler::AVX_512bit); \ +evpclmulqdq(tmp1, poly, tmp0, 0x00, Assembler::AVX_512bit); \ +vpsrldq(tmp1, tmp1, 4, Assembler::AVX_512bit); \ +evpclmulqdq(out, poly, tmp0, 0x10, Assembler::AVX_512bit); \ +vpslldq(out, out, 4, Assembler::AVX_512bit); \ +vpternlogq(out, 0x96, tmp1, hi128, Assembler::AVX_512bit); \ + +#define vhpxori4x128(reg, tmp) \ +vextracti64x4(tmp, reg, 1); \ +evpxorq(reg, reg, tmp, Assembler::AVX_256bit); \ +vextracti32x4(tmp, reg, 1); \ +evpxorq(reg, reg, tmp, Assembler::AVX_128bit); \ + +#define roundEncode(key, dst1, dst2, dst3, dst4) \ +vaesenc(dst1, dst1, key, Assembler::AVX_512bit); \ +vaesenc(dst2, dst2, key, Assembler::AVX_512bit); \ +vaesenc(dst3, dst3, key, Assembler::AVX_512bit); \ +vaesenc(dst4, dst4, key, Assembler::AVX_512bit); \ + +#define lastroundEncode(key, dst1, dst2, dst3, dst4) \ +vaesenclast(dst1, dst1, key, Assembler::AVX_512bit); \ +vaesenclast(dst2, dst2, key, Assembler::AVX_512bit); \ +vaesenclast(dst3, dst3, key, Assembler::AVX_512bit); \ +vaesenclast(dst4, dst4, key, Assembler::AVX_512bit); \ + +#define storeData(dst, position, src1, src2, src3, src4) \ +evmovdquq(Address(dst, position, Address::times_1, 0 * 64), src1, Assembler::AVX_512bit); \ +evmovdquq(Address(dst, position, Address::times_1, 1 * 64), src2, Assembler::AVX_512bit); \ +evmovdquq(Address(dst, position, Address::times_1, 2 * 64), src3, Assembler::AVX_512bit); \ +evmovdquq(Address(dst, position, Address::times_1, 3 * 64), src4, Assembler::AVX_512bit); \ + +#define loadData(src, position, dst1, dst2, dst3, dst4) \ +evmovdquq(dst1, Address(src, position, Address::times_1, 0 * 64), Assembler::AVX_512bit); \ +evmovdquq(dst2, Address(src, position, Address::times_1, 1 * 64), Assembler::AVX_512bit); \ +evmovdquq(dst3, Address(src, position, Address::times_1, 2 * 64), Assembler::AVX_512bit); \ +evmovdquq(dst4, Address(src, position, Address::times_1, 3 * 64), Assembler::AVX_512bit); \ + +#define carrylessMultiply(dst00, dst01, dst10, dst11, ghdata, hkey) \ +evpclmulqdq(dst00, ghdata, hkey, 0x00, Assembler::AVX_512bit); \ +evpclmulqdq(dst01, ghdata, hkey, 0x01, Assembler::AVX_512bit); \ +evpclmulqdq(dst10, ghdata, hkey, 0x10, Assembler::AVX_512bit); \ +evpclmulqdq(dst11, ghdata, hkey, 0x11, Assembler::AVX_512bit); \ + +#define shuffleExorRnd1Key(dst0, dst1, dst2, dst3, shufmask, rndkey) \ +vpshufb(dst0, dst0, shufmask, Assembler::AVX_512bit); \ +evpxorq(dst0, dst0, rndkey, Assembler::AVX_512bit); \ +vpshufb(dst1, dst1, shufmask, Assembler::AVX_512bit); \ +evpxorq(dst1, dst1, rndkey, Assembler::AVX_512bit); \ +vpshufb(dst2, dst2, shufmask, Assembler::AVX_512bit); \ +evpxorq(dst2, dst2, rndkey, Assembler::AVX_512bit); \ +vpshufb(dst3, dst3, shufmask, Assembler::AVX_512bit); \ +evpxorq(dst3, dst3, rndkey, Assembler::AVX_512bit); \ + +#define xorBeforeStore(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \ +evpxorq(dst0, dst0, src0, Assembler::AVX_512bit); \ +evpxorq(dst1, dst1, src1, Assembler::AVX_512bit); \ +evpxorq(dst2, dst2, src2, Assembler::AVX_512bit); \ +evpxorq(dst3, dst3, src3, Assembler::AVX_512bit); \ + +#define xorGHASH(dst0, dst1, dst2, dst3, src02, src03, src12, src13, src22, src23, src32, src33) \ +vpternlogq(dst0, 0x96, src02, src03, Assembler::AVX_512bit); \ +vpternlogq(dst1, 0x96, src12, src13, Assembler::AVX_512bit); \ +vpternlogq(dst2, 0x96, src22, src23, Assembler::AVX_512bit); \ +vpternlogq(dst3, 0x96, src32, src33, Assembler::AVX_512bit); \ + +void MacroAssembler::ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx, XMMRegister aad_hashx, + Register in, Register out, Register data, Register pos, bool first_time_reduction, XMMRegister addmask, bool ghash_input, Register rounds, + Register ghash_pos, bool final_reduction, int i, XMMRegister counter_inc_mask) { + + Label AES_192, AES_256, LAST_AES_RND; + const XMMRegister ZTMP0 = xmm0; + const XMMRegister ZTMP1 = xmm3; + const XMMRegister ZTMP2 = xmm4; + const XMMRegister ZTMP3 = xmm5; + const XMMRegister ZTMP5 = xmm7; + const XMMRegister ZTMP6 = xmm10; + const XMMRegister ZTMP7 = xmm11; + const XMMRegister ZTMP8 = xmm12; + const XMMRegister ZTMP9 = xmm13; + const XMMRegister ZTMP10 = xmm15; + const XMMRegister ZTMP11 = xmm16; + const XMMRegister ZTMP12 = xmm17; + + const XMMRegister ZTMP13 = xmm19; + const XMMRegister ZTMP14 = xmm20; + const XMMRegister ZTMP15 = xmm21; + const XMMRegister ZTMP16 = xmm30; + const XMMRegister ZTMP17 = xmm31; + const XMMRegister ZTMP18 = xmm1; + const XMMRegister ZTMP19 = xmm2; + const XMMRegister ZTMP20 = xmm8; + const XMMRegister ZTMP21 = xmm22; + const XMMRegister ZTMP22 = xmm23; + + // Pre increment counters + vpaddd(ZTMP0, ctr_blockx, counter_inc_mask, Assembler::AVX_512bit); + vpaddd(ZTMP1, ZTMP0, counter_inc_mask, Assembler::AVX_512bit); + vpaddd(ZTMP2, ZTMP1, counter_inc_mask, Assembler::AVX_512bit); + vpaddd(ZTMP3, ZTMP2, counter_inc_mask, Assembler::AVX_512bit); + // Save counter value + evmovdquq(ctr_blockx, ZTMP3, Assembler::AVX_512bit); + + // Reuse ZTMP17 / ZTMP18 for loading AES Keys + // Pre-load AES round keys + ev_load_key(ZTMP17, key, 0, xmm29); + ev_load_key(ZTMP18, key, 1 * 16, xmm29); + + // ZTMP19 & ZTMP20 used for loading hash key + // Pre-load hash key + evmovdquq(ZTMP19, Address(subkeyHtbl, i * 64), Assembler::AVX_512bit); + evmovdquq(ZTMP20, Address(subkeyHtbl, ++i * 64), Assembler::AVX_512bit); + // Load data for computing ghash + evmovdquq(ZTMP21, Address(data, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit); + vpshufb(ZTMP21, ZTMP21, xmm24, Assembler::AVX_512bit); + + // Xor cipher block 0 with input ghash, if available + if (ghash_input) { + evpxorq(ZTMP21, ZTMP21, aad_hashx, Assembler::AVX_512bit); + } + // Load data for computing ghash + evmovdquq(ZTMP22, Address(data, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit); + vpshufb(ZTMP22, ZTMP22, xmm24, Assembler::AVX_512bit); + + // stitch AES rounds with GHASH + // AES round 0, xmm24 has shuffle mask + shuffleExorRnd1Key(ZTMP0, ZTMP1, ZTMP2, ZTMP3, xmm24, ZTMP17); + // Reuse ZTMP17 / ZTMP18 for loading remaining AES Keys + ev_load_key(ZTMP17, key, 2 * 16, xmm29); + // GHASH 4 blocks + carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP21, ZTMP19); + // Load the next hkey and Ghash data + evmovdquq(ZTMP19, Address(subkeyHtbl, ++i * 64), Assembler::AVX_512bit); + evmovdquq(ZTMP21, Address(data, ghash_pos, Address::times_1, 2 * 64), Assembler::AVX_512bit); + vpshufb(ZTMP21, ZTMP21, xmm24, Assembler::AVX_512bit); + + // AES round 1 + roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3); + ev_load_key(ZTMP18, key, 3 * 16, xmm29); + + // GHASH 4 blocks(11 to 8) + carrylessMultiply(ZTMP10, ZTMP12, ZTMP11, ZTMP9, ZTMP22, ZTMP20); + // Load the next hkey and GDATA + evmovdquq(ZTMP20, Address(subkeyHtbl, ++i * 64), Assembler::AVX_512bit); + evmovdquq(ZTMP22, Address(data, ghash_pos, Address::times_1, 3 * 64), Assembler::AVX_512bit); + vpshufb(ZTMP22, ZTMP22, xmm24, Assembler::AVX_512bit); + + // AES round 2 + roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3); + ev_load_key(ZTMP17, key, 4 * 16, xmm29); + + // GHASH 4 blocks(7 to 4) + carrylessMultiply(ZTMP14, ZTMP16, ZTMP15, ZTMP13, ZTMP21, ZTMP19); + // AES rounds 3 + roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3); + ev_load_key(ZTMP18, key, 5 * 16, xmm29); + + // Gather(XOR) GHASH for 12 blocks + xorGHASH(ZTMP5, ZTMP6, ZTMP8, ZTMP7, ZTMP9, ZTMP13, ZTMP10, ZTMP14, ZTMP12, ZTMP16, ZTMP11, ZTMP15); + + // AES rounds 4 + roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3); + ev_load_key(ZTMP17, key, 6 * 16, xmm29); + + // load plain / cipher text(recycle registers) + loadData(in, pos, ZTMP13, ZTMP14, ZTMP15, ZTMP16); + + // AES rounds 5 + roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3); + ev_load_key(ZTMP18, key, 7 * 16, xmm29); + // GHASH 4 blocks(3 to 0) + carrylessMultiply(ZTMP10, ZTMP12, ZTMP11, ZTMP9, ZTMP22, ZTMP20); + + // AES round 6 + roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3); + ev_load_key(ZTMP17, key, 8 * 16, xmm29); + + // gather GHASH in ZTMP6(low) and ZTMP5(high) + if (first_time_reduction) { + vpternlogq(ZTMP7, 0x96, ZTMP8, ZTMP12, Assembler::AVX_512bit); + evpxorq(xmm25, ZTMP7, ZTMP11, Assembler::AVX_512bit); + evpxorq(xmm27, ZTMP5, ZTMP9, Assembler::AVX_512bit); + evpxorq(xmm26, ZTMP6, ZTMP10, Assembler::AVX_512bit); + } + else if (!first_time_reduction && !final_reduction) { + xorGHASH(ZTMP7, xmm25, xmm27, xmm26, ZTMP8, ZTMP12, ZTMP7, ZTMP11, ZTMP5, ZTMP9, ZTMP6, ZTMP10); + } + + if (final_reduction) { + // Phase one: Add mid products together + // Also load polynomial constant for reduction + vpternlogq(ZTMP7, 0x96, ZTMP8, ZTMP12, Assembler::AVX_512bit); + vpternlogq(ZTMP7, 0x96, xmm25, ZTMP11, Assembler::AVX_512bit); + vpsrldq(ZTMP11, ZTMP7, 8, Assembler::AVX_512bit); + vpslldq(ZTMP7, ZTMP7, 8, Assembler::AVX_512bit); + evmovdquq(ZTMP12, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr()), Assembler::AVX_512bit, rbx); + } + // AES round 7 + roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3); + ev_load_key(ZTMP18, key, 9 * 16, xmm29); + if (final_reduction) { + vpternlogq(ZTMP5, 0x96, ZTMP9, ZTMP11, Assembler::AVX_512bit); + evpxorq(ZTMP5, ZTMP5, xmm27, Assembler::AVX_512bit); + vpternlogq(ZTMP6, 0x96, ZTMP10, ZTMP7, Assembler::AVX_512bit); + evpxorq(ZTMP6, ZTMP6, xmm26, Assembler::AVX_512bit); + } + // AES round 8 + roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3); + ev_load_key(ZTMP17, key, 10 * 16, xmm29); + + // Horizontal xor of low and high 4*128 + if (final_reduction) { + vhpxori4x128(ZTMP5, ZTMP9); + vhpxori4x128(ZTMP6, ZTMP10); + } + // AES round 9 + roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3); + // First phase of reduction + if (final_reduction) { + evpclmulqdq(ZTMP10, ZTMP12, ZTMP6, 0x01, Assembler::AVX_128bit); + vpslldq(ZTMP10, ZTMP10, 8, Assembler::AVX_128bit); + evpxorq(ZTMP10, ZTMP6, ZTMP10, Assembler::AVX_128bit); + } + cmpl(rounds, 52); + jcc(Assembler::greaterEqual, AES_192); + jmp(LAST_AES_RND); + // AES rounds upto 11 (AES192) or 13 (AES256) + bind(AES_192); + roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3); + ev_load_key(ZTMP18, key, 11 * 16, xmm29); + roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3); + ev_load_key(ZTMP17, key, 12 * 16, xmm29); + cmpl(rounds, 60); + jcc(Assembler::aboveEqual, AES_256); + jmp(LAST_AES_RND); + + bind(AES_256); + roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3); + ev_load_key(ZTMP18, key, 13 * 16, xmm29); + roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3); + ev_load_key(ZTMP17, key, 14 * 16, xmm29); + + bind(LAST_AES_RND); + // Second phase of reduction + if (final_reduction) { + evpclmulqdq(ZTMP9, ZTMP12, ZTMP10, 0x00, Assembler::AVX_128bit); + vpsrldq(ZTMP9, ZTMP9, 4, Assembler::AVX_128bit); // Shift-R 1-DW to obtain 2-DWs shift-R + evpclmulqdq(ZTMP11, ZTMP12, ZTMP10, 0x10, Assembler::AVX_128bit); + vpslldq(ZTMP11, ZTMP11, 4, Assembler::AVX_128bit); // Shift-L 1-DW for result + // ZTMP5 = ZTMP5 X ZTMP11 X ZTMP9 + vpternlogq(ZTMP5, 0x96, ZTMP11, ZTMP9, Assembler::AVX_128bit); + } + // Last AES round + lastroundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3); + // XOR against plain / cipher text + xorBeforeStore(ZTMP0, ZTMP1, ZTMP2, ZTMP3, ZTMP13, ZTMP14, ZTMP15, ZTMP16); + // store cipher / plain text + storeData(out, pos, ZTMP0, ZTMP1, ZTMP2, ZTMP3); +} + +void MacroAssembler::aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key, + Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter) { + Label ENC_DEC_DONE, GENERATE_HTBL_48_BLKS, AES_192, AES_256, STORE_CT, GHASH_LAST_32, + AES_32_BLOCKS, GHASH_AES_PARALLEL, LOOP, ACCUMULATE, GHASH_16_AES_16; + const XMMRegister CTR_BLOCKx = xmm9; + const XMMRegister AAD_HASHx = xmm14; + const Register pos = rax; + const Register rounds = r15; + Register ghash_pos; +#ifndef _WIN64 + ghash_pos = r14; +#else + ghash_pos = r11; +#endif // !_WIN64 + const XMMRegister ZTMP0 = xmm0; + const XMMRegister ZTMP1 = xmm3; + const XMMRegister ZTMP2 = xmm4; + const XMMRegister ZTMP3 = xmm5; + const XMMRegister ZTMP4 = xmm6; + const XMMRegister ZTMP5 = xmm7; + const XMMRegister ZTMP6 = xmm10; + const XMMRegister ZTMP7 = xmm11; + const XMMRegister ZTMP8 = xmm12; + const XMMRegister ZTMP9 = xmm13; + const XMMRegister ZTMP10 = xmm15; + const XMMRegister ZTMP11 = xmm16; + const XMMRegister ZTMP12 = xmm17; + const XMMRegister ZTMP13 = xmm19; + const XMMRegister ZTMP14 = xmm20; + const XMMRegister ZTMP15 = xmm21; + const XMMRegister ZTMP16 = xmm30; + const XMMRegister COUNTER_INC_MASK = xmm18; + + movl(pos, 0); // Total length processed + // Min data size processed = 768 bytes + cmpl(len, 768); + jcc(Assembler::less, ENC_DEC_DONE); + + // Generate 48 constants for htbl + call(GENERATE_HTBL_48_BLKS, relocInfo::none); + int index = 0; // Index for choosing subkeyHtbl entry + movl(ghash_pos, 0); // Pointer for ghash read and store operations + + // Move initial counter value and STATE value into variables + movdqu(CTR_BLOCKx, Address(counter, 0)); + movdqu(AAD_HASHx, Address(state, 0)); + // Load lswap mask for ghash + movdqu(xmm24, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()), rbx); + // Shuffle input state using lswap mask + vpshufb(AAD_HASHx, AAD_HASHx, xmm24, Assembler::AVX_128bit); + + // Compute #rounds for AES based on the length of the key array + movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + // Broadcast counter value to 512 bit register + evshufi64x2(CTR_BLOCKx, CTR_BLOCKx, CTR_BLOCKx, 0, Assembler::AVX_512bit); + // Load counter shuffle mask + evmovdquq(xmm24, ExternalAddress(StubRoutines::x86::counter_mask_addr()), Assembler::AVX_512bit, rbx); + // Shuffle counter + vpshufb(CTR_BLOCKx, CTR_BLOCKx, xmm24, Assembler::AVX_512bit); + + // Load mask for incrementing counter + evmovdquq(COUNTER_INC_MASK, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, rbx); + // Pre-increment counter + vpaddd(ZTMP5, CTR_BLOCKx, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 64), Assembler::AVX_512bit, rbx); + vpaddd(ZTMP6, ZTMP5, COUNTER_INC_MASK, Assembler::AVX_512bit); + vpaddd(ZTMP7, ZTMP6, COUNTER_INC_MASK, Assembler::AVX_512bit); + vpaddd(ZTMP8, ZTMP7, COUNTER_INC_MASK, Assembler::AVX_512bit); + + // Begin 32 blocks of AES processing + bind(AES_32_BLOCKS); + // Save incremented counter before overwriting it with AES data + evmovdquq(CTR_BLOCKx, ZTMP8, Assembler::AVX_512bit); + + // Move 256 bytes of data + loadData(in, pos, ZTMP0, ZTMP1, ZTMP2, ZTMP3); + // Load key shuffle mask + movdqu(xmm29, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()), rbx); + // Load 0th AES round key + ev_load_key(ZTMP4, key, 0, xmm29); + // AES-ROUND0, xmm24 has the shuffle mask + shuffleExorRnd1Key(ZTMP5, ZTMP6, ZTMP7, ZTMP8, xmm24, ZTMP4); + + for (int j = 1; j < 10; j++) { + ev_load_key(ZTMP4, key, j * 16, xmm29); + roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8); + } + ev_load_key(ZTMP4, key, 10 * 16, xmm29); + // AES rounds upto 11 (AES192) or 13 (AES256) + cmpl(rounds, 52); + jcc(Assembler::greaterEqual, AES_192); + lastroundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8); + jmp(STORE_CT); + + bind(AES_192); + roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8); + ev_load_key(ZTMP4, key, 11 * 16, xmm29); + roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8); + cmpl(rounds, 60); + jcc(Assembler::aboveEqual, AES_256); + ev_load_key(ZTMP4, key, 12 * 16, xmm29); + lastroundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8); + jmp(STORE_CT); + + bind(AES_256); + ev_load_key(ZTMP4, key, 12 * 16, xmm29); + roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8); + ev_load_key(ZTMP4, key, 13 * 16, xmm29); + roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8); + ev_load_key(ZTMP4, key, 14 * 16, xmm29); + // Last AES round + lastroundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8); + + bind(STORE_CT); + // Xor the encrypted key with PT to obtain CT + xorBeforeStore(ZTMP5, ZTMP6, ZTMP7, ZTMP8, ZTMP0, ZTMP1, ZTMP2, ZTMP3); + storeData(out, pos, ZTMP5, ZTMP6, ZTMP7, ZTMP8); + // 16 blocks encryption completed + addl(pos, 256); + cmpl(pos, 512); + jcc(Assembler::aboveEqual, GHASH_AES_PARALLEL); + vpaddd(ZTMP5, CTR_BLOCKx, COUNTER_INC_MASK, Assembler::AVX_512bit); + vpaddd(ZTMP6, ZTMP5, COUNTER_INC_MASK, Assembler::AVX_512bit); + vpaddd(ZTMP7, ZTMP6, COUNTER_INC_MASK, Assembler::AVX_512bit); + vpaddd(ZTMP8, ZTMP7, COUNTER_INC_MASK, Assembler::AVX_512bit); + jmp(AES_32_BLOCKS); + + bind(GHASH_AES_PARALLEL); + // Ghash16_encrypt16_parallel takes place in the order with three reduction values: + // 1) First time -> cipher xor input ghash + // 2) No reduction -> accumulate multiplication values + // 3) Final reduction post 48 blocks -> new ghash value is computed for the next round + // Reduction value = first time + ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, true, xmm24, true, rounds, ghash_pos, false, index, COUNTER_INC_MASK); + addl(pos, 256); + addl(ghash_pos, 256); + index += 4; + + // At this point we have processed 768 bytes of AES and 256 bytes of GHASH. + // If the remaining length is less than 768, process remaining 512 bytes of ghash in GHASH_LAST_32 code + subl(len, 768); + cmpl(len, 768); + jcc(Assembler::less, GHASH_LAST_32); + + // AES 16 blocks and GHASH 16 blocks in parallel + // For multiples of 48 blocks we will do ghash16_encrypt16 interleaved multiple times + // Reduction value = no reduction means that the carryless multiplication values are accumulated for further calculations + // Each call uses 4 subkeyHtbl values, so increment the index by 4. + bind(GHASH_16_AES_16); + // Reduction value = no reduction + ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, false, xmm24, false, rounds, ghash_pos, false, index, COUNTER_INC_MASK); + addl(pos, 256); + addl(ghash_pos, 256); + index += 4; + // Reduction value = final reduction means that the accumulated values have to be reduced as we have completed 48 blocks of ghash + ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, false, xmm24, false, rounds, ghash_pos, true, index, COUNTER_INC_MASK); + addl(pos, 256); + addl(ghash_pos, 256); + // Calculated ghash value needs to be moved to AAD_HASHX so that we can restart the ghash16-aes16 pipeline + movdqu(AAD_HASHx, ZTMP5); + index = 0; // Reset subkeyHtbl index + + // Restart the pipeline + // Reduction value = first time + ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, true, xmm24, true, rounds, ghash_pos, false, index, COUNTER_INC_MASK); + addl(pos, 256); + addl(ghash_pos, 256); + index += 4; + + subl(len, 768); + cmpl(len, 768); + jcc(Assembler::greaterEqual, GHASH_16_AES_16); + + // GHASH last 32 blocks processed here + // GHASH products accumulated in ZMM27, ZMM25 and ZMM26 during GHASH16-AES16 operation is used + bind(GHASH_LAST_32); + // Use rbx as a pointer to the htbl; For last 32 blocks of GHASH, use key# 4-11 entry in subkeyHtbl + movl(rbx, 256); + // Load cipher blocks + evmovdquq(ZTMP13, Address(ct, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit); + evmovdquq(ZTMP14, Address(ct, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit); + vpshufb(ZTMP13, ZTMP13, xmm24, Assembler::AVX_512bit); + vpshufb(ZTMP14, ZTMP14, xmm24, Assembler::AVX_512bit); + // Load ghash keys + evmovdquq(ZTMP15, Address(avx512_subkeyHtbl, rbx, Address::times_1, 0 * 64), Assembler::AVX_512bit); + evmovdquq(ZTMP16, Address(avx512_subkeyHtbl, rbx, Address::times_1, 1 * 64), Assembler::AVX_512bit); + + // Ghash blocks 0 - 3 + carrylessMultiply(ZTMP2, ZTMP3, ZTMP4, ZTMP1, ZTMP13, ZTMP15); + // Ghash blocks 4 - 7 + carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP14, ZTMP16); + + vpternlogq(ZTMP1, 0x96, ZTMP5, xmm27, Assembler::AVX_512bit); // ZTMP1 = ZTMP1 + ZTMP5 + zmm27 + vpternlogq(ZTMP2, 0x96, ZTMP6, xmm26, Assembler::AVX_512bit); // ZTMP2 = ZTMP2 + ZTMP6 + zmm26 + vpternlogq(ZTMP3, 0x96, ZTMP7, xmm25, Assembler::AVX_512bit); // ZTMP3 = ZTMP3 + ZTMP7 + zmm25 + evpxorq(ZTMP4, ZTMP4, ZTMP8, Assembler::AVX_512bit); // ZTMP4 = ZTMP4 + ZTMP8 + + addl(ghash_pos, 128); + addl(rbx, 128); + + // Ghash remaining blocks + bind(LOOP); + cmpl(ghash_pos, pos); + jcc(Assembler::aboveEqual, ACCUMULATE); + // Load next cipher blocks and corresponding ghash keys + evmovdquq(ZTMP13, Address(ct, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit); + evmovdquq(ZTMP14, Address(ct, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit); + vpshufb(ZTMP13, ZTMP13, xmm24, Assembler::AVX_512bit); + vpshufb(ZTMP14, ZTMP14, xmm24, Assembler::AVX_512bit); + evmovdquq(ZTMP15, Address(avx512_subkeyHtbl, rbx, Address::times_1, 0 * 64), Assembler::AVX_512bit); + evmovdquq(ZTMP16, Address(avx512_subkeyHtbl, rbx, Address::times_1, 1 * 64), Assembler::AVX_512bit); + + // ghash blocks 0 - 3 + carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP13, ZTMP15); + + // ghash blocks 4 - 7 + carrylessMultiply(ZTMP10, ZTMP11, ZTMP12, ZTMP9, ZTMP14, ZTMP16); + + // update sums + // ZTMP1 = ZTMP1 + ZTMP5 + ZTMP9 + // ZTMP2 = ZTMP2 + ZTMP6 + ZTMP10 + // ZTMP3 = ZTMP3 + ZTMP7 xor ZTMP11 + // ZTMP4 = ZTMP4 + ZTMP8 xor ZTMP12 + xorGHASH(ZTMP1, ZTMP2, ZTMP3, ZTMP4, ZTMP5, ZTMP9, ZTMP6, ZTMP10, ZTMP7, ZTMP11, ZTMP8, ZTMP12); + addl(ghash_pos, 128); + addl(rbx, 128); + jmp(LOOP); + + // Integrate ZTMP3/ZTMP4 into ZTMP1 and ZTMP2 + bind(ACCUMULATE); + evpxorq(ZTMP3, ZTMP3, ZTMP4, Assembler::AVX_512bit); + vpsrldq(ZTMP7, ZTMP3, 8, Assembler::AVX_512bit); + vpslldq(ZTMP8, ZTMP3, 8, Assembler::AVX_512bit); + evpxorq(ZTMP1, ZTMP1, ZTMP7, Assembler::AVX_512bit); + evpxorq(ZTMP2, ZTMP2, ZTMP8, Assembler::AVX_512bit); + + // Add ZTMP1 and ZTMP2 128 - bit words horizontally + vhpxori4x128(ZTMP1, ZTMP11); + vhpxori4x128(ZTMP2, ZTMP12); + // Load reduction polynomial and compute final reduction + evmovdquq(ZTMP15, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr()), Assembler::AVX_512bit, rbx); + vclmul_reduce(AAD_HASHx, ZTMP15, ZTMP1, ZTMP2, ZTMP3, ZTMP4); + + // Pre-increment counter for next operation + vpaddd(CTR_BLOCKx, CTR_BLOCKx, xmm18, Assembler::AVX_128bit); + // Shuffle counter and save the updated value + vpshufb(CTR_BLOCKx, CTR_BLOCKx, xmm24, Assembler::AVX_512bit); + movdqu(Address(counter, 0), CTR_BLOCKx); + // Load ghash lswap mask + movdqu(xmm24, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); + // Shuffle ghash using lbswap_mask and store it + vpshufb(AAD_HASHx, AAD_HASHx, xmm24, Assembler::AVX_128bit); + movdqu(Address(state, 0), AAD_HASHx); + jmp(ENC_DEC_DONE); + + bind(GENERATE_HTBL_48_BLKS); + generateHtbl_48_block_zmm(subkeyHtbl, avx512_subkeyHtbl); + + bind(ENC_DEC_DONE); + movq(rax, pos); +} + #endif // _LP64 diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp index ff92b5647ba84538a3786fb0920b9a49c5a4fc4e..cbf4db8966b643afea4d6881bdd54168eddcd9c4 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2020, Intel Corporation. +* Copyright (c) 2020, 2021, Intel Corporation. All rights reserved. * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_cos.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_cos.cpp index 38a4366607420ca82b0851bc25088d615564db10..f72318897a15b53b8b7a0081066fad5280cd7ce0 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86_cos.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86_cos.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2016, Intel Corporation. +* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. * Intel Math Library (LIBM) Source Code * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_exp.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_exp.cpp index 313c5b514b5c38c24aba979d4e95f20d49414156..b3022607ecc264be8f8eb6a88b5b7e951295cfe4 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86_exp.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86_exp.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2016, Intel Corporation. +* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. * Intel Math Library (LIBM) Source Code * diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_log.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_log.cpp index 9c28d9e510d472dc2c7dff4b5888d421314c84f2..95b8cc245df8259d49c13706d113effee38256fa 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86_log.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86_log.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2016, Intel Corporation. +* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. * Intel Math Library (LIBM) Source Code * diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_log10.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_log10.cpp index 1478da29a491f86593d9277e0461d91b8a9b4ce3..c2bd61e6c2ebf6972b3629f55d51cc6d64296bc0 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86_log10.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86_log10.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2016, Intel Corporation. +* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. * Intel Math Library (LIBM) Source Code * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_pow.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_pow.cpp index d1c405ba3c8d9e0c71ac246919b10740b063a41a..66bdc7194f93c2b9b8fb28828778ce0a6809e900 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86_pow.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86_pow.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2016, Intel Corporation. +* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. * Intel Math Library (LIBM) Source Code * diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp index 10935345f70c77a33cf817d69b515e11aa4a92c8..c51b6755cee04f22f8781e9fd0b909d705d68530 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2016, Intel Corporation. +* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_sin.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_sin.cpp index 4ba4bf6eddbedb9f49041146a01251921dec5549..8e59182b94e3d57ff67160ae3015e611157b5d95 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86_sin.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86_sin.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2016, Intel Corporation. +* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. * Intel Math Library (LIBM) Source Code * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_tan.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_tan.cpp index 2822eaa7ebf08534f73a263e6d7833cdec53db44..6d3fe8e19943904670332b3a05ae388cc28882ef 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86_tan.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86_tan.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2016, Intel Corporation. +* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. * Intel Math Library (LIBM) Source Code * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. diff --git a/src/hotspot/cpu/x86/matcher_x86.hpp b/src/hotspot/cpu/x86/matcher_x86.hpp index f0c7aff73f96a0a32181097e52e7f8d23cb05979..2dcd1e6e7a94adacf4494dfa848dd884ef2e10a4 100644 --- a/src/hotspot/cpu/x86/matcher_x86.hpp +++ b/src/hotspot/cpu/x86/matcher_x86.hpp @@ -148,6 +148,9 @@ static const bool int_in_long = false; #endif + // Number of htbl entries for aes-gcm intrinsic + static const int htbl_entries = 96; + // Does the CPU supports vector variable shift instructions? static bool supports_vector_variable_shifts(void) { return (UseAVX >= 2); @@ -158,6 +161,11 @@ return true; } + // Does the CPU supports vector constant rotate instructions? + static constexpr bool supports_vector_constant_rotates(int shift) { + return -0x80 <= shift && shift < 0x80; + } + // Does the CPU supports vector unsigned comparison instructions? static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) { int vlen_in_bytes = vlen * type2aelembytes(bt); @@ -178,11 +186,7 @@ // Some microarchitectures have mask registers used on vectors static const bool has_predicated_vectors(void) { - bool ret_value = false; - if (UseAVX > 2) { - ret_value = VM_Version::supports_avx512vl(); - } - return ret_value; + return VM_Version::supports_evex(); } // true means we have fast l2f convers @@ -191,4 +195,7 @@ return true; } + // Implements a variant of EncodeISOArrayNode that encode ASCII only + static const bool supports_encode_ascii_array = true; + #endif // CPU_X86_MATCHER_X86_HPP diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86.cpp index d067820fdc1e9f04725681e118b9956c96e97c0e..8a0e8e6cf245656f9543e49f2681b086e0d14320 100644 --- a/src/hotspot/cpu/x86/sharedRuntime_x86.cpp +++ b/src/hotspot/cpu/x86/sharedRuntime_x86.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -62,12 +62,6 @@ void SharedRuntime::inline_check_hashcode_from_object_header(MacroAssembler* mas __ testptr(result, markWord::unlocked_value); __ jcc(Assembler::zero, slowCase); - if (UseBiasedLocking) { - // Check if biased and fall through to runtime if so - __ testptr(result, markWord::biased_lock_bit_in_place); - __ jcc(Assembler::notZero, slowCase); - } - // get hash #ifdef _LP64 // Read the header and build a mask to get its hash field. diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp index 492ec5962d0975a565f8e9a16d873a8afae6372a..cad8207e5b9f3bdacc1385627dc6dd39dffd61b8 100644 --- a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp +++ b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp @@ -1823,11 +1823,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Load the oop from the handle __ movptr(obj_reg, Address(oop_handle_reg, 0)); - if (UseBiasedLocking) { - // Note that oop_handle_reg is trashed during this call - __ biased_locking_enter(lock_reg, obj_reg, swap_reg, oop_handle_reg, noreg, false, lock_done, &slow_path_lock); - } - // Load immediate 1 into swap_reg %rax, __ movptr(swap_reg, 1); @@ -1860,11 +1855,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ jcc(Assembler::notEqual, slow_path_lock); // Slow path will re-enter here __ bind(lock_done); - - if (UseBiasedLocking) { - // Re-fetch oop_handle_reg as we trashed it above - __ movptr(oop_handle_reg, Address(rsp, wordSize)); - } } @@ -1993,10 +1983,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Get locked oop from the handle we passed to jni __ movptr(obj_reg, Address(oop_handle_reg, 0)); - if (UseBiasedLocking) { - __ biased_locking_exit(obj_reg, rbx, done); - } - // Simple recursive lock? __ cmpptr(Address(rbp, lock_slot_rbp_offset), (int32_t)NULL_WORD); diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp index fb4e3f544009f6b06bde515ac52581f6323af595..8dcb557fd9384e8b70b7f4b016cabdbd09d9bafc 100644 --- a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp +++ b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp @@ -168,6 +168,10 @@ class RegisterSaver { static void restore_result_registers(MacroAssembler* masm); }; +// Register is a class, but it would be assigned numerical value. +// "0" is assigned for rax. Thus we need to ignore -Wnonnull. +PRAGMA_DIAG_PUSH +PRAGMA_NONNULL_IGNORED OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { int off = 0; int num_xmm_regs = XMMRegisterImpl::number_of_registers; @@ -360,6 +364,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ return map; } +PRAGMA_DIAG_POP void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { int num_xmm_regs = XMMRegisterImpl::number_of_registers; @@ -2070,10 +2075,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Load the oop from the handle __ movptr(obj_reg, Address(oop_handle_reg, 0)); - if (UseBiasedLocking) { - __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch1, rscratch2, false, lock_done, &slow_path_lock); - } - // Load immediate 1 into swap_reg %rax __ movl(swap_reg, 1); @@ -2224,11 +2225,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ movptr(obj_reg, Address(oop_handle_reg, 0)); Label done; - - if (UseBiasedLocking) { - __ biased_locking_exit(obj_reg, old_hdr, done); - } - // Simple recursive lock? __ cmpptr(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), (int32_t)NULL_WORD); diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp index f289ad0a7a7c3dea1986dcff60189c00f1edeab1..654066ac87262194a4e87e9ee68b7ad699a41f07 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp @@ -2998,7 +2998,7 @@ class StubGenerator: public StubCodeGenerator { } address generate_upper_word_mask() { - __ align(64); + __ align64(); StubCodeMark mark(this, "StubRoutines", "upper_word_mask"); address start = __ pc(); __ emit_data(0x00000000, relocInfo::none, 0); @@ -3009,7 +3009,7 @@ class StubGenerator: public StubCodeGenerator { } address generate_shuffle_byte_flip_mask() { - __ align(64); + __ align64(); StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask"); address start = __ pc(); __ emit_data(0x0c0d0e0f, relocInfo::none, 0); @@ -3068,7 +3068,7 @@ class StubGenerator: public StubCodeGenerator { } address generate_pshuffle_byte_flip_mask() { - __ align(64); + __ align64(); StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask"); address start = __ pc(); __ emit_data(0x00010203, relocInfo::none, 0); diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp index 24c2439bf8762ab676f9c5f5d5e28ec80560ec46..70fd2e67da1c2c15192a236adc9c4631de610d68 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -39,6 +39,7 @@ #include "oops/objArrayKlass.hpp" #include "oops/oop.inline.hpp" #include "prims/methodHandles.hpp" +#include "runtime/arguments.hpp" #include "runtime/frame.inline.hpp" #include "runtime/handles.inline.hpp" #include "runtime/sharedRuntime.hpp" @@ -1483,7 +1484,7 @@ class StubGenerator: public StubCodeGenerator { __ subq(temp1, loop_size[shift]); // Main loop with aligned copy block size of 192 bytes at 32 byte granularity. - __ align(32); + __ align32(); __ BIND(L_main_loop); __ copy64_avx(to, from, temp4, xmm1, false, shift, 0); __ copy64_avx(to, from, temp4, xmm1, false, shift, 64); @@ -1550,7 +1551,7 @@ class StubGenerator: public StubCodeGenerator { // Main loop with aligned copy block size of 192 bytes at // 64 byte copy granularity. - __ align(32); + __ align32(); __ BIND(L_main_loop_64bytes); __ copy64_avx(to, from, temp4, xmm1, false, shift, 0 , true); __ copy64_avx(to, from, temp4, xmm1, false, shift, 64, true); @@ -1690,7 +1691,7 @@ class StubGenerator: public StubCodeGenerator { __ BIND(L_main_pre_loop); // Main loop with aligned copy block size of 192 bytes at 32 byte granularity. - __ align(32); + __ align32(); __ BIND(L_main_loop); __ copy64_avx(to, from, temp1, xmm1, true, shift, -64); __ copy64_avx(to, from, temp1, xmm1, true, shift, -128); @@ -1723,7 +1724,7 @@ class StubGenerator: public StubCodeGenerator { // Main loop with aligned copy block size of 192 bytes at // 64 byte copy granularity. - __ align(32); + __ align32(); __ BIND(L_main_loop_64bytes); __ copy64_avx(to, from, temp1, xmm1, true, shift, -64 , true); __ copy64_avx(to, from, temp1, xmm1, true, shift, -128, true); @@ -4193,7 +4194,7 @@ class StubGenerator: public StubCodeGenerator { } address generate_upper_word_mask() { - __ align(64); + __ align64(); StubCodeMark mark(this, "StubRoutines", "upper_word_mask"); address start = __ pc(); __ emit_data64(0x0000000000000000, relocInfo::none); @@ -4202,7 +4203,7 @@ class StubGenerator: public StubCodeGenerator { } address generate_shuffle_byte_flip_mask() { - __ align(64); + __ align64(); StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask"); address start = __ pc(); __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none); @@ -4247,7 +4248,7 @@ class StubGenerator: public StubCodeGenerator { } address generate_pshuffle_byte_flip_mask() { - __ align(64); + __ align64(); StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask"); address start = __ pc(); __ emit_data64(0x0405060700010203, relocInfo::none); @@ -4273,7 +4274,7 @@ class StubGenerator: public StubCodeGenerator { //Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. address generate_pshuffle_byte_flip_mask_sha512() { - __ align(32); + __ align32(); StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask_sha512"); address start = __ pc(); if (VM_Version::supports_avx2()) { @@ -4367,9 +4368,103 @@ class StubGenerator: public StubCodeGenerator { return start; } + address ghash_polynomial512_addr() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "_ghash_poly512_addr"); + address start = __ pc(); + __ emit_data64(0x00000001C2000000, relocInfo::none); // POLY for reduction + __ emit_data64(0xC200000000000000, relocInfo::none); + __ emit_data64(0x00000001C2000000, relocInfo::none); + __ emit_data64(0xC200000000000000, relocInfo::none); + __ emit_data64(0x00000001C2000000, relocInfo::none); + __ emit_data64(0xC200000000000000, relocInfo::none); + __ emit_data64(0x00000001C2000000, relocInfo::none); + __ emit_data64(0xC200000000000000, relocInfo::none); + __ emit_data64(0x0000000000000001, relocInfo::none); // POLY + __ emit_data64(0xC200000000000000, relocInfo::none); + __ emit_data64(0x0000000000000001, relocInfo::none); // TWOONE + __ emit_data64(0x0000000100000000, relocInfo::none); + return start; +} + + // Vector AES Galois Counter Mode implementation. Parameters: + // Windows regs | Linux regs + // in = c_rarg0 (rcx) | c_rarg0 (rsi) + // len = c_rarg1 (rdx) | c_rarg1 (rdi) + // ct = c_rarg2 (r8) | c_rarg2 (rdx) + // out = c_rarg3 (r9) | c_rarg3 (rcx) + // key = r10 | c_rarg4 (r8) + // state = r13 | c_rarg5 (r9) + // subkeyHtbl = r14 | r11 + // counter = rsi | r12 + // return - number of processed bytes + address generate_galoisCounterMode_AESCrypt() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "galoisCounterMode_AESCrypt"); + address start = __ pc(); + const Register in = c_rarg0; + const Register len = c_rarg1; + const Register ct = c_rarg2; + const Register out = c_rarg3; + // and updated with the incremented counter in the end +#ifndef _WIN64 + const Register key = c_rarg4; + const Register state = c_rarg5; + const Address subkeyH_mem(rbp, 2 * wordSize); + const Register subkeyHtbl = r11; + const Address avx512_subkeyH_mem(rbp, 3 * wordSize); + const Register avx512_subkeyHtbl = r13; + const Address counter_mem(rbp, 4 * wordSize); + const Register counter = r12; +#else + const Address key_mem(rbp, 6 * wordSize); + const Register key = r10; + const Address state_mem(rbp, 7 * wordSize); + const Register state = r13; + const Address subkeyH_mem(rbp, 8 * wordSize); + const Register subkeyHtbl = r14; + const Address avx512_subkeyH_mem(rbp, 9 * wordSize); + const Register avx512_subkeyHtbl = r12; + const Address counter_mem(rbp, 10 * wordSize); + const Register counter = rsi; +#endif + __ enter(); + // Save state before entering routine + __ push(r12); + __ push(r13); + __ push(r14); + __ push(r15); + __ push(rbx); +#ifdef _WIN64 + // on win64, fill len_reg from stack position + __ push(rsi); + __ movptr(key, key_mem); + __ movptr(state, state_mem); +#endif + __ movptr(subkeyHtbl, subkeyH_mem); + __ movptr(avx512_subkeyHtbl, avx512_subkeyH_mem); + __ movptr(counter, counter_mem); + + __ aesgcm_encrypt(in, len, ct, out, key, state, subkeyHtbl, avx512_subkeyHtbl, counter); + + // Restore state before leaving routine +#ifdef _WIN64 + __ pop(rsi); +#endif + __ pop(rbx); + __ pop(r15); + __ pop(r14); + __ pop(r13); + __ pop(r12); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + return start; + } + // This mask is used for incrementing counter value(linc0, linc4, etc.) address counter_mask_addr() { - __ align(64); + __ align64(); StubCodeMark mark(this, "StubRoutines", "counter_mask_addr"); address start = __ pc(); __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);//lbswapmask @@ -5286,164 +5381,691 @@ address generate_avx_ghash_processBlocks() { return start; } - //base64 character set - address base64_charset_addr() { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "base64_charset"); + address base64_shuffle_addr() + { + __ align64(); + StubCodeMark mark(this, "StubRoutines", "shuffle_base64"); address start = __ pc(); - __ emit_data64(0x0000004200000041, relocInfo::none); - __ emit_data64(0x0000004400000043, relocInfo::none); - __ emit_data64(0x0000004600000045, relocInfo::none); - __ emit_data64(0x0000004800000047, relocInfo::none); - __ emit_data64(0x0000004a00000049, relocInfo::none); - __ emit_data64(0x0000004c0000004b, relocInfo::none); - __ emit_data64(0x0000004e0000004d, relocInfo::none); - __ emit_data64(0x000000500000004f, relocInfo::none); - __ emit_data64(0x0000005200000051, relocInfo::none); - __ emit_data64(0x0000005400000053, relocInfo::none); - __ emit_data64(0x0000005600000055, relocInfo::none); - __ emit_data64(0x0000005800000057, relocInfo::none); - __ emit_data64(0x0000005a00000059, relocInfo::none); - __ emit_data64(0x0000006200000061, relocInfo::none); - __ emit_data64(0x0000006400000063, relocInfo::none); - __ emit_data64(0x0000006600000065, relocInfo::none); - __ emit_data64(0x0000006800000067, relocInfo::none); - __ emit_data64(0x0000006a00000069, relocInfo::none); - __ emit_data64(0x0000006c0000006b, relocInfo::none); - __ emit_data64(0x0000006e0000006d, relocInfo::none); - __ emit_data64(0x000000700000006f, relocInfo::none); - __ emit_data64(0x0000007200000071, relocInfo::none); - __ emit_data64(0x0000007400000073, relocInfo::none); - __ emit_data64(0x0000007600000075, relocInfo::none); - __ emit_data64(0x0000007800000077, relocInfo::none); - __ emit_data64(0x0000007a00000079, relocInfo::none); - __ emit_data64(0x0000003100000030, relocInfo::none); - __ emit_data64(0x0000003300000032, relocInfo::none); - __ emit_data64(0x0000003500000034, relocInfo::none); - __ emit_data64(0x0000003700000036, relocInfo::none); - __ emit_data64(0x0000003900000038, relocInfo::none); - __ emit_data64(0x0000002f0000002b, relocInfo::none); + assert(((unsigned long long)start & 0x3f) == 0, + "Alignment problem (0x%08llx)", (unsigned long long)start); + __ emit_data64(0x0405030401020001, relocInfo::none); + __ emit_data64(0x0a0b090a07080607, relocInfo::none); + __ emit_data64(0x10110f100d0e0c0d, relocInfo::none); + __ emit_data64(0x1617151613141213, relocInfo::none); + __ emit_data64(0x1c1d1b1c191a1819, relocInfo::none); + __ emit_data64(0x222321221f201e1f, relocInfo::none); + __ emit_data64(0x2829272825262425, relocInfo::none); + __ emit_data64(0x2e2f2d2e2b2c2a2b, relocInfo::none); return start; } - //base64 url character set - address base64url_charset_addr() { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "base64url_charset"); + address base64_avx2_shuffle_addr() + { + __ align32(); + StubCodeMark mark(this, "StubRoutines", "avx2_shuffle_base64"); address start = __ pc(); - __ emit_data64(0x0000004200000041, relocInfo::none); - __ emit_data64(0x0000004400000043, relocInfo::none); - __ emit_data64(0x0000004600000045, relocInfo::none); - __ emit_data64(0x0000004800000047, relocInfo::none); - __ emit_data64(0x0000004a00000049, relocInfo::none); - __ emit_data64(0x0000004c0000004b, relocInfo::none); - __ emit_data64(0x0000004e0000004d, relocInfo::none); - __ emit_data64(0x000000500000004f, relocInfo::none); - __ emit_data64(0x0000005200000051, relocInfo::none); - __ emit_data64(0x0000005400000053, relocInfo::none); - __ emit_data64(0x0000005600000055, relocInfo::none); - __ emit_data64(0x0000005800000057, relocInfo::none); - __ emit_data64(0x0000005a00000059, relocInfo::none); - __ emit_data64(0x0000006200000061, relocInfo::none); - __ emit_data64(0x0000006400000063, relocInfo::none); - __ emit_data64(0x0000006600000065, relocInfo::none); - __ emit_data64(0x0000006800000067, relocInfo::none); - __ emit_data64(0x0000006a00000069, relocInfo::none); - __ emit_data64(0x0000006c0000006b, relocInfo::none); - __ emit_data64(0x0000006e0000006d, relocInfo::none); - __ emit_data64(0x000000700000006f, relocInfo::none); - __ emit_data64(0x0000007200000071, relocInfo::none); - __ emit_data64(0x0000007400000073, relocInfo::none); - __ emit_data64(0x0000007600000075, relocInfo::none); - __ emit_data64(0x0000007800000077, relocInfo::none); - __ emit_data64(0x0000007a00000079, relocInfo::none); - __ emit_data64(0x0000003100000030, relocInfo::none); - __ emit_data64(0x0000003300000032, relocInfo::none); - __ emit_data64(0x0000003500000034, relocInfo::none); - __ emit_data64(0x0000003700000036, relocInfo::none); - __ emit_data64(0x0000003900000038, relocInfo::none); - __ emit_data64(0x0000005f0000002d, relocInfo::none); + __ emit_data64(0x0809070805060405, relocInfo::none); + __ emit_data64(0x0e0f0d0e0b0c0a0b, relocInfo::none); + __ emit_data64(0x0405030401020001, relocInfo::none); + __ emit_data64(0x0a0b090a07080607, relocInfo::none); + return start; + } + address base64_avx2_input_mask_addr() + { + __ align32(); + StubCodeMark mark(this, "StubRoutines", "avx2_input_mask_base64"); + address start = __ pc(); + __ emit_data64(0x8000000000000000, relocInfo::none); + __ emit_data64(0x8000000080000000, relocInfo::none); + __ emit_data64(0x8000000080000000, relocInfo::none); + __ emit_data64(0x8000000080000000, relocInfo::none); return start; } - address base64_bswap_mask_addr() { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "bswap_mask_base64"); + address base64_avx2_lut_addr() + { + __ align32(); + StubCodeMark mark(this, "StubRoutines", "avx2_lut_base64"); address start = __ pc(); - __ emit_data64(0x0504038002010080, relocInfo::none); - __ emit_data64(0x0b0a098008070680, relocInfo::none); - __ emit_data64(0x0908078006050480, relocInfo::none); - __ emit_data64(0x0f0e0d800c0b0a80, relocInfo::none); - __ emit_data64(0x0605048003020180, relocInfo::none); - __ emit_data64(0x0c0b0a8009080780, relocInfo::none); - __ emit_data64(0x0504038002010080, relocInfo::none); - __ emit_data64(0x0b0a098008070680, relocInfo::none); + __ emit_data64(0xfcfcfcfcfcfc4741, relocInfo::none); + __ emit_data64(0x0000f0edfcfcfcfc, relocInfo::none); + __ emit_data64(0xfcfcfcfcfcfc4741, relocInfo::none); + __ emit_data64(0x0000f0edfcfcfcfc, relocInfo::none); + + // URL LUT + __ emit_data64(0xfcfcfcfcfcfc4741, relocInfo::none); + __ emit_data64(0x000020effcfcfcfc, relocInfo::none); + __ emit_data64(0xfcfcfcfcfcfc4741, relocInfo::none); + __ emit_data64(0x000020effcfcfcfc, relocInfo::none); + return start; + } + address base64_encoding_table_addr() + { + __ align64(); + StubCodeMark mark(this, "StubRoutines", "encoding_table_base64"); + address start = __ pc(); + assert(((unsigned long long)start & 0x3f) == 0, "Alignment problem (0x%08llx)", (unsigned long long)start); + __ emit_data64(0x4847464544434241, relocInfo::none); + __ emit_data64(0x504f4e4d4c4b4a49, relocInfo::none); + __ emit_data64(0x5857565554535251, relocInfo::none); + __ emit_data64(0x6665646362615a59, relocInfo::none); + __ emit_data64(0x6e6d6c6b6a696867, relocInfo::none); + __ emit_data64(0x767574737271706f, relocInfo::none); + __ emit_data64(0x333231307a797877, relocInfo::none); + __ emit_data64(0x2f2b393837363534, relocInfo::none); + + // URL table + __ emit_data64(0x4847464544434241, relocInfo::none); + __ emit_data64(0x504f4e4d4c4b4a49, relocInfo::none); + __ emit_data64(0x5857565554535251, relocInfo::none); + __ emit_data64(0x6665646362615a59, relocInfo::none); + __ emit_data64(0x6e6d6c6b6a696867, relocInfo::none); + __ emit_data64(0x767574737271706f, relocInfo::none); + __ emit_data64(0x333231307a797877, relocInfo::none); + __ emit_data64(0x5f2d393837363534, relocInfo::none); return start; } - address base64_right_shift_mask_addr() { + // Code for generating Base64 encoding. + // Intrinsic function prototype in Base64.java: + // private void encodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, + // boolean isURL) { + address generate_base64_encodeBlock() + { __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "right_shift_mask"); + StubCodeMark mark(this, "StubRoutines", "implEncode"); + address start = __ pc(); + __ enter(); + + // Save callee-saved registers before using them + __ push(r12); + __ push(r13); + __ push(r14); + __ push(r15); + + // arguments + const Register source = c_rarg0; // Source Array + const Register start_offset = c_rarg1; // start offset + const Register end_offset = c_rarg2; // end offset + const Register dest = c_rarg3; // destination array + +#ifndef _WIN64 + const Register dp = c_rarg4; // Position for writing to dest array + const Register isURL = c_rarg5; // Base64 or URL character set +#else + const Address dp_mem(rbp, 6 * wordSize); // length is on stack on Win64 + const Address isURL_mem(rbp, 7 * wordSize); + const Register isURL = r10; // pick the volatile windows register + const Register dp = r12; + __ movl(dp, dp_mem); + __ movl(isURL, isURL_mem); +#endif + + const Register length = r14; + const Register encode_table = r13; + Label L_process3, L_exit, L_processdata, L_vbmiLoop, L_not512, L_32byteLoop; + + // calculate length from offsets + __ movl(length, end_offset); + __ subl(length, start_offset); + __ cmpl(length, 0); + __ jcc(Assembler::lessEqual, L_exit); + + // Code for 512-bit VBMI encoding. Encodes 48 input bytes into 64 + // output bytes. We read 64 input bytes and ignore the last 16, so be + // sure not to read past the end of the input buffer. + if (VM_Version::supports_avx512_vbmi()) { + __ cmpl(length, 64); // Do not overrun input buffer. + __ jcc(Assembler::below, L_not512); + + __ shll(isURL, 6); // index into decode table based on isURL + __ lea(encode_table, ExternalAddress(StubRoutines::x86::base64_encoding_table_addr())); + __ addptr(encode_table, isURL); + __ shrl(isURL, 6); // restore isURL + + __ mov64(rax, 0x3036242a1016040aull); // Shifts + __ evmovdquq(xmm3, ExternalAddress(StubRoutines::x86::base64_shuffle_addr()), Assembler::AVX_512bit, r15); + __ evmovdquq(xmm2, Address(encode_table, 0), Assembler::AVX_512bit); + __ evpbroadcastq(xmm1, rax, Assembler::AVX_512bit); + + __ align32(); + __ BIND(L_vbmiLoop); + + __ vpermb(xmm0, xmm3, Address(source, start_offset), Assembler::AVX_512bit); + __ subl(length, 48); + + // Put the input bytes into the proper lanes for writing, then + // encode them. + __ evpmultishiftqb(xmm0, xmm1, xmm0, Assembler::AVX_512bit); + __ vpermb(xmm0, xmm0, xmm2, Assembler::AVX_512bit); + + // Write to destination + __ evmovdquq(Address(dest, dp), xmm0, Assembler::AVX_512bit); + + __ addptr(dest, 64); + __ addptr(source, 48); + __ cmpl(length, 64); + __ jcc(Assembler::aboveEqual, L_vbmiLoop); + + __ vzeroupper(); + } + + __ BIND(L_not512); + if (VM_Version::supports_avx2() + && VM_Version::supports_avx512vlbw()) { + /* + ** This AVX2 encoder is based off the paper at: + ** https://dl.acm.org/doi/10.1145/3132709 + ** + ** We use AVX2 SIMD instructions to encode 24 bytes into 32 + ** output bytes. + ** + */ + // Lengths under 32 bytes are done with scalar routine + __ cmpl(length, 31); + __ jcc(Assembler::belowEqual, L_process3); + + // Set up supporting constant table data + __ vmovdqu(xmm9, ExternalAddress(StubRoutines::x86::base64_avx2_shuffle_addr()), rax); + // 6-bit mask for 2nd and 4th (and multiples) 6-bit values + __ movl(rax, 0x0fc0fc00); + __ vmovdqu(xmm1, ExternalAddress(StubRoutines::x86::base64_avx2_input_mask_addr()), rax); + __ evpbroadcastd(xmm8, rax, Assembler::AVX_256bit); + + // Multiplication constant for "shifting" right by 6 and 10 + // bits + __ movl(rax, 0x04000040); + + __ subl(length, 24); + __ evpbroadcastd(xmm7, rax, Assembler::AVX_256bit); + + // For the first load, we mask off reading of the first 4 + // bytes into the register. This is so we can get 4 3-byte + // chunks into each lane of the register, avoiding having to + // handle end conditions. We then shuffle these bytes into a + // specific order so that manipulation is easier. + // + // The initial read loads the XMM register like this: + // + // Lower 128-bit lane: + // +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+ + // | XX | XX | XX | XX | A0 | A1 | A2 | B0 | B1 | B2 | C0 | C1 + // | C2 | D0 | D1 | D2 | + // +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+ + // + // Upper 128-bit lane: + // +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+ + // | E0 | E1 | E2 | F0 | F1 | F2 | G0 | G1 | G2 | H0 | H1 | H2 + // | XX | XX | XX | XX | + // +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+ + // + // Where A0 is the first input byte, B0 is the fourth, etc. + // The alphabetical significance denotes the 3 bytes to be + // consumed and encoded into 4 bytes. + // + // We then shuffle the register so each 32-bit word contains + // the sequence: + // A1 A0 A2 A1, B1, B0, B2, B1, etc. + // Each of these byte sequences are then manipulated into 4 + // 6-bit values ready for encoding. + // + // If we focus on one set of 3-byte chunks, changing the + // nomenclature such that A0 => a, A1 => b, and A2 => c, we + // shuffle such that each 24-bit chunk contains: + // + // b7 b6 b5 b4 b3 b2 b1 b0 | a7 a6 a5 a4 a3 a2 a1 a0 | c7 c6 + // c5 c4 c3 c2 c1 c0 | b7 b6 b5 b4 b3 b2 b1 b0 + // Explain this step. + // b3 b2 b1 b0 c5 c4 c3 c2 | c1 c0 d5 d4 d3 d2 d1 d0 | a5 a4 + // a3 a2 a1 a0 b5 b4 | b3 b2 b1 b0 c5 c4 c3 c2 + // + // W first and off all but bits 4-9 and 16-21 (c5..c0 and + // a5..a0) and shift them using a vector multiplication + // operation (vpmulhuw) which effectively shifts c right by 6 + // bits and a right by 10 bits. We similarly mask bits 10-15 + // (d5..d0) and 22-27 (b5..b0) and shift them left by 8 and 4 + // bits respecively. This is done using vpmullw. We end up + // with 4 6-bit values, thus splitting the 3 input bytes, + // ready for encoding: + // 0 0 d5..d0 0 0 c5..c0 0 0 b5..b0 0 0 a5..a0 + // + // For translation, we recognize that there are 5 distinct + // ranges of legal Base64 characters as below: + // + // +-------------+-------------+------------+ + // | 6-bit value | ASCII range | offset | + // +-------------+-------------+------------+ + // | 0..25 | A..Z | 65 | + // | 26..51 | a..z | 71 | + // | 52..61 | 0..9 | -4 | + // | 62 | + or - | -19 or -17 | + // | 63 | / or _ | -16 or 32 | + // +-------------+-------------+------------+ + // + // We note that vpshufb does a parallel lookup in a + // destination register using the lower 4 bits of bytes from a + // source register. If we use a saturated subtraction and + // subtract 51 from each 6-bit value, bytes from [0,51] + // saturate to 0, and [52,63] map to a range of [1,12]. We + // distinguish the [0,25] and [26,51] ranges by assigning a + // value of 13 for all 6-bit values less than 26. We end up + // with: + // + // +-------------+-------------+------------+ + // | 6-bit value | Reduced | offset | + // +-------------+-------------+------------+ + // | 0..25 | 13 | 65 | + // | 26..51 | 0 | 71 | + // | 52..61 | 0..9 | -4 | + // | 62 | 11 | -19 or -17 | + // | 63 | 12 | -16 or 32 | + // +-------------+-------------+------------+ + // + // We then use a final vpshufb to add the appropriate offset, + // translating the bytes. + // + // Load input bytes - only 28 bytes. Mask the first load to + // not load into the full register. + __ vpmaskmovd(xmm1, xmm1, Address(source, start_offset, Address::times_1, -4), Assembler::AVX_256bit); + + // Move 3-byte chunks of input (12 bytes) into 16 bytes, + // ordering by: + // 1, 0, 2, 1; 4, 3, 5, 4; etc. This groups 6-bit chunks + // for easy masking + __ vpshufb(xmm1, xmm1, xmm9, Assembler::AVX_256bit); + + __ addl(start_offset, 24); + + // Load masking register for first and third (and multiples) + // 6-bit values. + __ movl(rax, 0x003f03f0); + __ evpbroadcastd(xmm6, rax, Assembler::AVX_256bit); + // Multiplication constant for "shifting" left by 4 and 8 bits + __ movl(rax, 0x01000010); + __ evpbroadcastd(xmm5, rax, Assembler::AVX_256bit); + + // Isolate 6-bit chunks of interest + __ vpand(xmm0, xmm8, xmm1, Assembler::AVX_256bit); + + // Load constants for encoding + __ movl(rax, 0x19191919); + __ evpbroadcastd(xmm3, rax, Assembler::AVX_256bit); + __ movl(rax, 0x33333333); + __ evpbroadcastd(xmm4, rax, Assembler::AVX_256bit); + + // Shift output bytes 0 and 2 into proper lanes + __ vpmulhuw(xmm2, xmm0, xmm7, Assembler::AVX_256bit); + + // Mask and shift output bytes 1 and 3 into proper lanes and + // combine + __ vpand(xmm0, xmm6, xmm1, Assembler::AVX_256bit); + __ vpmullw(xmm0, xmm5, xmm0, Assembler::AVX_256bit); + __ vpor(xmm0, xmm0, xmm2, Assembler::AVX_256bit); + + // Find out which are 0..25. This indicates which input + // values fall in the range of 'A'-'Z', which require an + // additional offset (see comments above) + __ vpcmpgtb(xmm2, xmm0, xmm3, Assembler::AVX_256bit); + __ vpsubusb(xmm1, xmm0, xmm4, Assembler::AVX_256bit); + __ vpsubb(xmm1, xmm1, xmm2, Assembler::AVX_256bit); + + // Load the proper lookup table + __ lea(r11, ExternalAddress(StubRoutines::x86::base64_avx2_lut_addr())); + __ movl(r15, isURL); + __ shll(r15, 5); + __ vmovdqu(xmm2, Address(r11, r15)); + + // Shuffle the offsets based on the range calculation done + // above. This allows us to add the correct offset to the + // 6-bit value corresponding to the range documented above. + __ vpshufb(xmm1, xmm2, xmm1, Assembler::AVX_256bit); + __ vpaddb(xmm0, xmm1, xmm0, Assembler::AVX_256bit); + + // Store the encoded bytes + __ vmovdqu(Address(dest, dp), xmm0); + __ addl(dp, 32); + + __ cmpl(length, 31); + __ jcc(Assembler::belowEqual, L_process3); + + __ align32(); + __ BIND(L_32byteLoop); + + // Get next 32 bytes + __ vmovdqu(xmm1, Address(source, start_offset, Address::times_1, -4)); + + __ subl(length, 24); + __ addl(start_offset, 24); + + // This logic is identical to the above, with only constant + // register loads removed. Shuffle the input, mask off 6-bit + // chunks, shift them into place, then add the offset to + // encode. + __ vpshufb(xmm1, xmm1, xmm9, Assembler::AVX_256bit); + + __ vpand(xmm0, xmm8, xmm1, Assembler::AVX_256bit); + __ vpmulhuw(xmm10, xmm0, xmm7, Assembler::AVX_256bit); + __ vpand(xmm0, xmm6, xmm1, Assembler::AVX_256bit); + __ vpmullw(xmm0, xmm5, xmm0, Assembler::AVX_256bit); + __ vpor(xmm0, xmm0, xmm10, Assembler::AVX_256bit); + __ vpcmpgtb(xmm10, xmm0, xmm3, Assembler::AVX_256bit); + __ vpsubusb(xmm1, xmm0, xmm4, Assembler::AVX_256bit); + __ vpsubb(xmm1, xmm1, xmm10, Assembler::AVX_256bit); + __ vpshufb(xmm1, xmm2, xmm1, Assembler::AVX_256bit); + __ vpaddb(xmm0, xmm1, xmm0, Assembler::AVX_256bit); + + // Store the encoded bytes + __ vmovdqu(Address(dest, dp), xmm0); + __ addl(dp, 32); + + __ cmpl(length, 31); + __ jcc(Assembler::above, L_32byteLoop); + + __ BIND(L_process3); + __ vzeroupper(); + } else { + __ BIND(L_process3); + } + + __ cmpl(length, 3); + __ jcc(Assembler::below, L_exit); + + // Load the encoding table based on isURL + __ lea(r11, ExternalAddress(StubRoutines::x86::base64_encoding_table_addr())); + __ movl(r15, isURL); + __ shll(r15, 6); + __ addptr(r11, r15); + + __ BIND(L_processdata); + + // Load 3 bytes + __ load_unsigned_byte(r15, Address(source, start_offset)); + __ load_unsigned_byte(r10, Address(source, start_offset, Address::times_1, 1)); + __ load_unsigned_byte(r13, Address(source, start_offset, Address::times_1, 2)); + + // Build a 32-bit word with bytes 1, 2, 0, 1 + __ movl(rax, r10); + __ shll(r10, 24); + __ orl(rax, r10); + + __ subl(length, 3); + + __ shll(r15, 8); + __ shll(r13, 16); + __ orl(rax, r15); + + __ addl(start_offset, 3); + + __ orl(rax, r13); + // At this point, rax contains | byte1 | byte2 | byte0 | byte1 + // r13 has byte2 << 16 - need low-order 6 bits to translate. + // This translated byte is the fourth output byte. + __ shrl(r13, 16); + __ andl(r13, 0x3f); + + // The high-order 6 bits of r15 (byte0) is translated. + // The translated byte is the first output byte. + __ shrl(r15, 10); + + __ load_unsigned_byte(r13, Address(r11, r13)); + __ load_unsigned_byte(r15, Address(r11, r15)); + + __ movb(Address(dest, dp, Address::times_1, 3), r13); + + // Extract high-order 4 bits of byte1 and low-order 2 bits of byte0. + // This translated byte is the second output byte. + __ shrl(rax, 4); + __ movl(r10, rax); + __ andl(rax, 0x3f); + + __ movb(Address(dest, dp, Address::times_1, 0), r15); + + __ load_unsigned_byte(rax, Address(r11, rax)); + + // Extract low-order 2 bits of byte1 and high-order 4 bits of byte2. + // This translated byte is the third output byte. + __ shrl(r10, 18); + __ andl(r10, 0x3f); + + __ load_unsigned_byte(r10, Address(r11, r10)); + + __ movb(Address(dest, dp, Address::times_1, 1), rax); + __ movb(Address(dest, dp, Address::times_1, 2), r10); + + __ addl(dp, 4); + __ cmpl(length, 3); + __ jcc(Assembler::aboveEqual, L_processdata); + + __ BIND(L_exit); + __ pop(r15); + __ pop(r14); + __ pop(r13); + __ pop(r12); + __ leave(); + __ ret(0); + return start; + } + + // base64 AVX512vbmi tables + address base64_vbmi_lookup_lo_addr() { + __ align64(); + StubCodeMark mark(this, "StubRoutines", "lookup_lo_base64"); address start = __ pc(); - __ emit_data64(0x0006000400020000, relocInfo::none); - __ emit_data64(0x0006000400020000, relocInfo::none); - __ emit_data64(0x0006000400020000, relocInfo::none); - __ emit_data64(0x0006000400020000, relocInfo::none); - __ emit_data64(0x0006000400020000, relocInfo::none); - __ emit_data64(0x0006000400020000, relocInfo::none); - __ emit_data64(0x0006000400020000, relocInfo::none); - __ emit_data64(0x0006000400020000, relocInfo::none); + assert(((unsigned long long)start & 0x3f) == 0, + "Alignment problem (0x%08llx)", (unsigned long long)start); + __ emit_data64(0x8080808080808080, relocInfo::none); + __ emit_data64(0x8080808080808080, relocInfo::none); + __ emit_data64(0x8080808080808080, relocInfo::none); + __ emit_data64(0x8080808080808080, relocInfo::none); + __ emit_data64(0x8080808080808080, relocInfo::none); + __ emit_data64(0x3f8080803e808080, relocInfo::none); + __ emit_data64(0x3b3a393837363534, relocInfo::none); + __ emit_data64(0x8080808080803d3c, relocInfo::none); + return start; + } + address base64_vbmi_lookup_hi_addr() { + __ align64(); + StubCodeMark mark(this, "StubRoutines", "lookup_hi_base64"); + address start = __ pc(); + assert(((unsigned long long)start & 0x3f) == 0, + "Alignment problem (0x%08llx)", (unsigned long long)start); + __ emit_data64(0x0605040302010080, relocInfo::none); + __ emit_data64(0x0e0d0c0b0a090807, relocInfo::none); + __ emit_data64(0x161514131211100f, relocInfo::none); + __ emit_data64(0x8080808080191817, relocInfo::none); + __ emit_data64(0x201f1e1d1c1b1a80, relocInfo::none); + __ emit_data64(0x2827262524232221, relocInfo::none); + __ emit_data64(0x302f2e2d2c2b2a29, relocInfo::none); + __ emit_data64(0x8080808080333231, relocInfo::none); + return start; + } + address base64_vbmi_lookup_lo_url_addr() { + __ align64(); + StubCodeMark mark(this, "StubRoutines", "lookup_lo_base64url"); + address start = __ pc(); + assert(((unsigned long long)start & 0x3f) == 0, + "Alignment problem (0x%08llx)", (unsigned long long)start); + __ emit_data64(0x8080808080808080, relocInfo::none); + __ emit_data64(0x8080808080808080, relocInfo::none); + __ emit_data64(0x8080808080808080, relocInfo::none); + __ emit_data64(0x8080808080808080, relocInfo::none); + __ emit_data64(0x8080808080808080, relocInfo::none); + __ emit_data64(0x80803e8080808080, relocInfo::none); + __ emit_data64(0x3b3a393837363534, relocInfo::none); + __ emit_data64(0x8080808080803d3c, relocInfo::none); return start; } - address base64_left_shift_mask_addr() { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "left_shift_mask"); + address base64_vbmi_lookup_hi_url_addr() { + __ align64(); + StubCodeMark mark(this, "StubRoutines", "lookup_hi_base64url"); address start = __ pc(); - __ emit_data64(0x0000000200040000, relocInfo::none); - __ emit_data64(0x0000000200040000, relocInfo::none); - __ emit_data64(0x0000000200040000, relocInfo::none); - __ emit_data64(0x0000000200040000, relocInfo::none); - __ emit_data64(0x0000000200040000, relocInfo::none); - __ emit_data64(0x0000000200040000, relocInfo::none); - __ emit_data64(0x0000000200040000, relocInfo::none); - __ emit_data64(0x0000000200040000, relocInfo::none); + assert(((unsigned long long)start & 0x3f) == 0, + "Alignment problem (0x%08llx)", (unsigned long long)start); + __ emit_data64(0x0605040302010080, relocInfo::none); + __ emit_data64(0x0e0d0c0b0a090807, relocInfo::none); + __ emit_data64(0x161514131211100f, relocInfo::none); + __ emit_data64(0x3f80808080191817, relocInfo::none); + __ emit_data64(0x201f1e1d1c1b1a80, relocInfo::none); + __ emit_data64(0x2827262524232221, relocInfo::none); + __ emit_data64(0x302f2e2d2c2b2a29, relocInfo::none); + __ emit_data64(0x8080808080333231, relocInfo::none); + return start; + } + address base64_vbmi_pack_vec_addr() { + __ align64(); + StubCodeMark mark(this, "StubRoutines", "pack_vec_base64"); + address start = __ pc(); + assert(((unsigned long long)start & 0x3f) == 0, + "Alignment problem (0x%08llx)", (unsigned long long)start); + __ emit_data64(0x090a040506000102, relocInfo::none); + __ emit_data64(0x161011120c0d0e08, relocInfo::none); + __ emit_data64(0x1c1d1e18191a1415, relocInfo::none); + __ emit_data64(0x292a242526202122, relocInfo::none); + __ emit_data64(0x363031322c2d2e28, relocInfo::none); + __ emit_data64(0x3c3d3e38393a3435, relocInfo::none); + __ emit_data64(0x0000000000000000, relocInfo::none); + __ emit_data64(0x0000000000000000, relocInfo::none); return start; } - address base64_and_mask_addr() { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "and_mask"); + address base64_vbmi_join_0_1_addr() { + __ align64(); + StubCodeMark mark(this, "StubRoutines", "join_0_1_base64"); address start = __ pc(); - __ emit_data64(0x3f003f003f000000, relocInfo::none); - __ emit_data64(0x3f003f003f000000, relocInfo::none); - __ emit_data64(0x3f003f003f000000, relocInfo::none); - __ emit_data64(0x3f003f003f000000, relocInfo::none); - __ emit_data64(0x3f003f003f000000, relocInfo::none); - __ emit_data64(0x3f003f003f000000, relocInfo::none); - __ emit_data64(0x3f003f003f000000, relocInfo::none); - __ emit_data64(0x3f003f003f000000, relocInfo::none); + assert(((unsigned long long)start & 0x3f) == 0, + "Alignment problem (0x%08llx)", (unsigned long long)start); + __ emit_data64(0x090a040506000102, relocInfo::none); + __ emit_data64(0x161011120c0d0e08, relocInfo::none); + __ emit_data64(0x1c1d1e18191a1415, relocInfo::none); + __ emit_data64(0x292a242526202122, relocInfo::none); + __ emit_data64(0x363031322c2d2e28, relocInfo::none); + __ emit_data64(0x3c3d3e38393a3435, relocInfo::none); + __ emit_data64(0x494a444546404142, relocInfo::none); + __ emit_data64(0x565051524c4d4e48, relocInfo::none); return start; } - address base64_gather_mask_addr() { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "gather_mask"); + address base64_vbmi_join_1_2_addr() { + __ align64(); + StubCodeMark mark(this, "StubRoutines", "join_1_2_base64"); + address start = __ pc(); + assert(((unsigned long long)start & 0x3f) == 0, + "Alignment problem (0x%08llx)", (unsigned long long)start); + __ emit_data64(0x1c1d1e18191a1415, relocInfo::none); + __ emit_data64(0x292a242526202122, relocInfo::none); + __ emit_data64(0x363031322c2d2e28, relocInfo::none); + __ emit_data64(0x3c3d3e38393a3435, relocInfo::none); + __ emit_data64(0x494a444546404142, relocInfo::none); + __ emit_data64(0x565051524c4d4e48, relocInfo::none); + __ emit_data64(0x5c5d5e58595a5455, relocInfo::none); + __ emit_data64(0x696a646566606162, relocInfo::none); + return start; + } + + address base64_vbmi_join_2_3_addr() { + __ align64(); + StubCodeMark mark(this, "StubRoutines", "join_2_3_base64"); + address start = __ pc(); + assert(((unsigned long long)start & 0x3f) == 0, + "Alignment problem (0x%08llx)", (unsigned long long)start); + __ emit_data64(0x363031322c2d2e28, relocInfo::none); + __ emit_data64(0x3c3d3e38393a3435, relocInfo::none); + __ emit_data64(0x494a444546404142, relocInfo::none); + __ emit_data64(0x565051524c4d4e48, relocInfo::none); + __ emit_data64(0x5c5d5e58595a5455, relocInfo::none); + __ emit_data64(0x696a646566606162, relocInfo::none); + __ emit_data64(0x767071726c6d6e68, relocInfo::none); + __ emit_data64(0x7c7d7e78797a7475, relocInfo::none); + return start; + } + + address base64_decoding_table_addr() { + StubCodeMark mark(this, "StubRoutines", "decoding_table_base64"); address start = __ pc(); __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0x3fffffff3effffff, relocInfo::none); + __ emit_data64(0x3b3a393837363534, relocInfo::none); + __ emit_data64(0xffffffffffff3d3c, relocInfo::none); + __ emit_data64(0x06050403020100ff, relocInfo::none); + __ emit_data64(0x0e0d0c0b0a090807, relocInfo::none); + __ emit_data64(0x161514131211100f, relocInfo::none); + __ emit_data64(0xffffffffff191817, relocInfo::none); + __ emit_data64(0x201f1e1d1c1b1aff, relocInfo::none); + __ emit_data64(0x2827262524232221, relocInfo::none); + __ emit_data64(0x302f2e2d2c2b2a29, relocInfo::none); + __ emit_data64(0xffffffffff333231, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + + // URL table + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffff3effffffffff, relocInfo::none); + __ emit_data64(0x3b3a393837363534, relocInfo::none); + __ emit_data64(0xffffffffffff3d3c, relocInfo::none); + __ emit_data64(0x06050403020100ff, relocInfo::none); + __ emit_data64(0x0e0d0c0b0a090807, relocInfo::none); + __ emit_data64(0x161514131211100f, relocInfo::none); + __ emit_data64(0x3fffffffff191817, relocInfo::none); + __ emit_data64(0x201f1e1d1c1b1aff, relocInfo::none); + __ emit_data64(0x2827262524232221, relocInfo::none); + __ emit_data64(0x302f2e2d2c2b2a29, relocInfo::none); + __ emit_data64(0xffffffffff333231, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); + __ emit_data64(0xffffffffffffffff, relocInfo::none); return start; } -// Code for generating Base64 encoding. + +// Code for generating Base64 decoding. +// +// Based on the article (and associated code) from https://arxiv.org/abs/1910.05109. +// // Intrinsic function prototype in Base64.java: -// private void encodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL) { - address generate_base64_encodeBlock() { +// private void decodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL, isMIME) { + address generate_base64_decodeBlock() { __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "implEncode"); + StubCodeMark mark(this, "StubRoutines", "implDecode"); address start = __ pc(); __ enter(); @@ -5452,16 +6074,19 @@ address generate_avx_ghash_processBlocks() { __ push(r13); __ push(r14); __ push(r15); + __ push(rbx); // arguments const Register source = c_rarg0; // Source Array const Register start_offset = c_rarg1; // start offset const Register end_offset = c_rarg2; // end offset const Register dest = c_rarg3; // destination array + const Register isMIME = rbx; #ifndef _WIN64 const Register dp = c_rarg4; // Position for writing to dest array const Register isURL = c_rarg5;// Base64 or URL character set + __ movl(isMIME, Address(rbp, 2 * wordSize)); #else const Address dp_mem(rbp, 6 * wordSize); // length is on stack on Win64 const Address isURL_mem(rbp, 7 * wordSize); @@ -5469,219 +6094,404 @@ address generate_avx_ghash_processBlocks() { const Register dp = r12; __ movl(dp, dp_mem); __ movl(isURL, isURL_mem); + __ movl(isMIME, Address(rbp, 8 * wordSize)); #endif + const XMMRegister lookup_lo = xmm5; + const XMMRegister lookup_hi = xmm6; + const XMMRegister errorvec = xmm7; + const XMMRegister pack16_op = xmm9; + const XMMRegister pack32_op = xmm8; + const XMMRegister input0 = xmm3; + const XMMRegister input1 = xmm20; + const XMMRegister input2 = xmm21; + const XMMRegister input3 = xmm19; + const XMMRegister join01 = xmm12; + const XMMRegister join12 = xmm11; + const XMMRegister join23 = xmm10; + const XMMRegister translated0 = xmm2; + const XMMRegister translated1 = xmm1; + const XMMRegister translated2 = xmm0; + const XMMRegister translated3 = xmm4; + + const XMMRegister merged0 = xmm2; + const XMMRegister merged1 = xmm1; + const XMMRegister merged2 = xmm0; + const XMMRegister merged3 = xmm4; + const XMMRegister merge_ab_bc0 = xmm2; + const XMMRegister merge_ab_bc1 = xmm1; + const XMMRegister merge_ab_bc2 = xmm0; + const XMMRegister merge_ab_bc3 = xmm4; + + const XMMRegister pack24bits = xmm4; + const Register length = r14; - Label L_process80, L_process32, L_process3, L_exit, L_processdata; + const Register output_size = r13; + const Register output_mask = r15; + const KRegister input_mask = k1; + + const XMMRegister input_initial_valid_b64 = xmm0; + const XMMRegister tmp = xmm10; + const XMMRegister mask = xmm0; + const XMMRegister invalid_b64 = xmm1; + + Label L_process256, L_process64, L_process64Loop, L_exit, L_processdata, L_loadURL; + Label L_continue, L_finalBit, L_padding, L_donePadding, L_bruteForce; + Label L_forceLoop, L_bottomLoop, L_checkMIME, L_exit_no_vzero; // calculate length from offsets __ movl(length, end_offset); __ subl(length, start_offset); - __ cmpl(length, 0); - __ jcc(Assembler::lessEqual, L_exit); + __ push(dest); // Save for return value calc + + // If AVX512 VBMI not supported, just compile non-AVX code + if(VM_Version::supports_avx512_vbmi() && + VM_Version::supports_avx512bw()) { + __ cmpl(length, 128); // 128-bytes is break-even for AVX-512 + __ jcc(Assembler::lessEqual, L_bruteForce); + + __ cmpl(isMIME, 0); + __ jcc(Assembler::notEqual, L_bruteForce); + + // Load lookup tables based on isURL + __ cmpl(isURL, 0); + __ jcc(Assembler::notZero, L_loadURL); + + __ evmovdquq(lookup_lo, ExternalAddress(StubRoutines::x86::base64_vbmi_lookup_lo_addr()), Assembler::AVX_512bit, r13); + __ evmovdquq(lookup_hi, ExternalAddress(StubRoutines::x86::base64_vbmi_lookup_hi_addr()), Assembler::AVX_512bit, r13); + + __ BIND(L_continue); + + __ movl(r15, 0x01400140); + __ evpbroadcastd(pack16_op, r15, Assembler::AVX_512bit); + + __ movl(r15, 0x00011000); + __ evpbroadcastd(pack32_op, r15, Assembler::AVX_512bit); + + __ cmpl(length, 0xff); + __ jcc(Assembler::lessEqual, L_process64); + + // load masks required for decoding data + __ BIND(L_processdata); + __ evmovdquq(join01, ExternalAddress(StubRoutines::x86::base64_vbmi_join_0_1_addr()), Assembler::AVX_512bit,r13); + __ evmovdquq(join12, ExternalAddress(StubRoutines::x86::base64_vbmi_join_1_2_addr()), Assembler::AVX_512bit, r13); + __ evmovdquq(join23, ExternalAddress(StubRoutines::x86::base64_vbmi_join_2_3_addr()), Assembler::AVX_512bit, r13); + + __ align32(); + __ BIND(L_process256); + // Grab input data + __ evmovdquq(input0, Address(source, start_offset, Address::times_1, 0x00), Assembler::AVX_512bit); + __ evmovdquq(input1, Address(source, start_offset, Address::times_1, 0x40), Assembler::AVX_512bit); + __ evmovdquq(input2, Address(source, start_offset, Address::times_1, 0x80), Assembler::AVX_512bit); + __ evmovdquq(input3, Address(source, start_offset, Address::times_1, 0xc0), Assembler::AVX_512bit); + + // Copy the low part of the lookup table into the destination of the permutation + __ evmovdquq(translated0, lookup_lo, Assembler::AVX_512bit); + __ evmovdquq(translated1, lookup_lo, Assembler::AVX_512bit); + __ evmovdquq(translated2, lookup_lo, Assembler::AVX_512bit); + __ evmovdquq(translated3, lookup_lo, Assembler::AVX_512bit); + + // Translate the base64 input into "decoded" bytes + __ evpermt2b(translated0, input0, lookup_hi, Assembler::AVX_512bit); + __ evpermt2b(translated1, input1, lookup_hi, Assembler::AVX_512bit); + __ evpermt2b(translated2, input2, lookup_hi, Assembler::AVX_512bit); + __ evpermt2b(translated3, input3, lookup_hi, Assembler::AVX_512bit); + + // OR all of the translations together to check for errors (high-order bit of byte set) + __ vpternlogd(input0, 0xfe, input1, input2, Assembler::AVX_512bit); + + __ vpternlogd(input3, 0xfe, translated0, translated1, Assembler::AVX_512bit); + __ vpternlogd(input0, 0xfe, translated2, translated3, Assembler::AVX_512bit); + __ vpor(errorvec, input3, input0, Assembler::AVX_512bit); + + // Check if there was an error - if so, try 64-byte chunks + __ evpmovb2m(k3, errorvec, Assembler::AVX_512bit); + __ kortestql(k3, k3); + __ jcc(Assembler::notZero, L_process64); + + // The merging and shuffling happens here + // We multiply each byte pair [00dddddd | 00cccccc | 00bbbbbb | 00aaaaaa] + // Multiply [00cccccc] by 2^6 added to [00dddddd] to get [0000cccc | ccdddddd] + // The pack16_op is a vector of 0x01400140, so multiply D by 1 and C by 0x40 + __ vpmaddubsw(merge_ab_bc0, translated0, pack16_op, Assembler::AVX_512bit); + __ vpmaddubsw(merge_ab_bc1, translated1, pack16_op, Assembler::AVX_512bit); + __ vpmaddubsw(merge_ab_bc2, translated2, pack16_op, Assembler::AVX_512bit); + __ vpmaddubsw(merge_ab_bc3, translated3, pack16_op, Assembler::AVX_512bit); + + // Now do the same with packed 16-bit values. + // We start with [0000cccc | ccdddddd | 0000aaaa | aabbbbbb] + // pack32_op is 0x00011000 (2^12, 1), so this multiplies [0000aaaa | aabbbbbb] by 2^12 + // and adds [0000cccc | ccdddddd] to yield [00000000 | aaaaaabb | bbbbcccc | ccdddddd] + __ vpmaddwd(merged0, merge_ab_bc0, pack32_op, Assembler::AVX_512bit); + __ vpmaddwd(merged1, merge_ab_bc1, pack32_op, Assembler::AVX_512bit); + __ vpmaddwd(merged2, merge_ab_bc2, pack32_op, Assembler::AVX_512bit); + __ vpmaddwd(merged3, merge_ab_bc3, pack32_op, Assembler::AVX_512bit); + + // The join vectors specify which byte from which vector goes into the outputs + // One of every 4 bytes in the extended vector is zero, so we pack them into their + // final positions in the register for storing (256 bytes in, 192 bytes out) + __ evpermt2b(merged0, join01, merged1, Assembler::AVX_512bit); + __ evpermt2b(merged1, join12, merged2, Assembler::AVX_512bit); + __ evpermt2b(merged2, join23, merged3, Assembler::AVX_512bit); + + // Store result + __ evmovdquq(Address(dest, dp, Address::times_1, 0x00), merged0, Assembler::AVX_512bit); + __ evmovdquq(Address(dest, dp, Address::times_1, 0x40), merged1, Assembler::AVX_512bit); + __ evmovdquq(Address(dest, dp, Address::times_1, 0x80), merged2, Assembler::AVX_512bit); + + __ addptr(source, 0x100); + __ addptr(dest, 0xc0); + __ subl(length, 0x100); + __ cmpl(length, 64 * 4); + __ jcc(Assembler::greaterEqual, L_process256); + + // At this point, we've decoded 64 * 4 * n bytes. + // The remaining length will be <= 64 * 4 - 1. + // UNLESS there was an error decoding the first 256-byte chunk. In this + // case, the length will be arbitrarily long. + // + // Note that this will be the path for MIME-encoded strings. + + __ BIND(L_process64); + + __ evmovdquq(pack24bits, ExternalAddress(StubRoutines::x86::base64_vbmi_pack_vec_addr()), Assembler::AVX_512bit, r13); + + __ cmpl(length, 63); + __ jcc(Assembler::lessEqual, L_finalBit); + + __ align32(); + __ BIND(L_process64Loop); + + // Handle first 64-byte block + + __ evmovdquq(input0, Address(source, start_offset), Assembler::AVX_512bit); + __ evmovdquq(translated0, lookup_lo, Assembler::AVX_512bit); + __ evpermt2b(translated0, input0, lookup_hi, Assembler::AVX_512bit); + + __ vpor(errorvec, translated0, input0, Assembler::AVX_512bit); + + // Check for error and bomb out before updating dest + __ evpmovb2m(k3, errorvec, Assembler::AVX_512bit); + __ kortestql(k3, k3); + __ jcc(Assembler::notZero, L_exit); + + // Pack output register, selecting correct byte ordering + __ vpmaddubsw(merge_ab_bc0, translated0, pack16_op, Assembler::AVX_512bit); + __ vpmaddwd(merged0, merge_ab_bc0, pack32_op, Assembler::AVX_512bit); + __ vpermb(merged0, pack24bits, merged0, Assembler::AVX_512bit); + + __ evmovdquq(Address(dest, dp), merged0, Assembler::AVX_512bit); + + __ subl(length, 64); + __ addptr(source, 64); + __ addptr(dest, 48); + + __ cmpl(length, 64); + __ jcc(Assembler::greaterEqual, L_process64Loop); + + __ cmpl(length, 0); + __ jcc(Assembler::lessEqual, L_exit); - __ lea(r11, ExternalAddress(StubRoutines::x86::base64_charset_addr())); - // check if base64 charset(isURL=0) or base64 url charset(isURL=1) needs to be loaded - __ cmpl(isURL, 0); - __ jcc(Assembler::equal, L_processdata); - __ lea(r11, ExternalAddress(StubRoutines::x86::base64url_charset_addr())); + __ BIND(L_finalBit); + // Now have 1 to 63 bytes left to decode + + // I was going to let Java take care of the final fragment + // however it will repeatedly call this routine for every 4 bytes + // of input data, so handle the rest here. + __ movq(rax, -1); + __ bzhiq(rax, rax, length); // Input mask in rax + + __ movl(output_size, length); + __ shrl(output_size, 2); // Find (len / 4) * 3 (output length) + __ lea(output_size, Address(output_size, output_size, Address::times_2, 0)); + // output_size in r13 + + // Strip pad characters, if any, and adjust length and mask + __ cmpb(Address(source, length, Address::times_1, -1), '='); + __ jcc(Assembler::equal, L_padding); + + __ BIND(L_donePadding); + + // Output size is (64 - output_size), output mask is (all 1s >> output_size). + __ kmovql(input_mask, rax); + __ movq(output_mask, -1); + __ bzhiq(output_mask, output_mask, output_size); + + // Load initial input with all valid base64 characters. Will be used + // in merging source bytes to avoid masking when determining if an error occurred. + __ movl(rax, 0x61616161); + __ evpbroadcastd(input_initial_valid_b64, rax, Assembler::AVX_512bit); + + // A register containing all invalid base64 decoded values + __ movl(rax, 0x80808080); + __ evpbroadcastd(invalid_b64, rax, Assembler::AVX_512bit); + + // input_mask is in k1 + // output_size is in r13 + // output_mask is in r15 + // zmm0 - free + // zmm1 - 0x00011000 + // zmm2 - 0x01400140 + // zmm3 - errorvec + // zmm4 - pack vector + // zmm5 - lookup_lo + // zmm6 - lookup_hi + // zmm7 - errorvec + // zmm8 - 0x61616161 + // zmm9 - 0x80808080 + + // Load only the bytes from source, merging into our "fully-valid" register + __ evmovdqub(input_initial_valid_b64, input_mask, Address(source, start_offset, Address::times_1, 0x0), true, Assembler::AVX_512bit); + + // Decode all bytes within our merged input + __ evmovdquq(tmp, lookup_lo, Assembler::AVX_512bit); + __ evpermt2b(tmp, input_initial_valid_b64, lookup_hi, Assembler::AVX_512bit); + __ vporq(mask, tmp, input_initial_valid_b64, Assembler::AVX_512bit); + + // Check for error. Compare (decoded | initial) to all invalid. + // If any bytes have their high-order bit set, then we have an error. + __ evptestmb(k2, mask, invalid_b64, Assembler::AVX_512bit); + __ kortestql(k2, k2); + + // If we have an error, use the brute force loop to decode what we can (4-byte chunks). + __ jcc(Assembler::notZero, L_bruteForce); + + // Shuffle output bytes + __ vpmaddubsw(tmp, tmp, pack16_op, Assembler::AVX_512bit); + __ vpmaddwd(tmp, tmp, pack32_op, Assembler::AVX_512bit); + + __ vpermb(tmp, pack24bits, tmp, Assembler::AVX_512bit); + __ kmovql(k1, output_mask); + __ evmovdqub(Address(dest, dp), k1, tmp, true, Assembler::AVX_512bit); + + __ addptr(dest, output_size); - // load masks required for encoding data - __ BIND(L_processdata); - __ movdqu(xmm16, ExternalAddress(StubRoutines::x86::base64_gather_mask_addr())); - // Set 64 bits of K register. - __ evpcmpeqb(k3, xmm16, xmm16, Assembler::AVX_512bit); - __ evmovdquq(xmm12, ExternalAddress(StubRoutines::x86::base64_bswap_mask_addr()), Assembler::AVX_256bit, r13); - __ evmovdquq(xmm13, ExternalAddress(StubRoutines::x86::base64_right_shift_mask_addr()), Assembler::AVX_512bit, r13); - __ evmovdquq(xmm14, ExternalAddress(StubRoutines::x86::base64_left_shift_mask_addr()), Assembler::AVX_512bit, r13); - __ evmovdquq(xmm15, ExternalAddress(StubRoutines::x86::base64_and_mask_addr()), Assembler::AVX_512bit, r13); - - // Vector Base64 implementation, producing 96 bytes of encoded data - __ BIND(L_process80); - __ cmpl(length, 80); - __ jcc(Assembler::below, L_process32); - __ evmovdquq(xmm0, Address(source, start_offset, Address::times_1, 0), Assembler::AVX_256bit); - __ evmovdquq(xmm1, Address(source, start_offset, Address::times_1, 24), Assembler::AVX_256bit); - __ evmovdquq(xmm2, Address(source, start_offset, Address::times_1, 48), Assembler::AVX_256bit); - - //permute the input data in such a manner that we have continuity of the source - __ vpermq(xmm3, xmm0, 148, Assembler::AVX_256bit); - __ vpermq(xmm4, xmm1, 148, Assembler::AVX_256bit); - __ vpermq(xmm5, xmm2, 148, Assembler::AVX_256bit); - - //shuffle input and group 3 bytes of data and to it add 0 as the 4th byte. - //we can deal with 12 bytes at a time in a 128 bit register - __ vpshufb(xmm3, xmm3, xmm12, Assembler::AVX_256bit); - __ vpshufb(xmm4, xmm4, xmm12, Assembler::AVX_256bit); - __ vpshufb(xmm5, xmm5, xmm12, Assembler::AVX_256bit); - - //convert byte to word. Each 128 bit register will have 6 bytes for processing - __ vpmovzxbw(xmm3, xmm3, Assembler::AVX_512bit); - __ vpmovzxbw(xmm4, xmm4, Assembler::AVX_512bit); - __ vpmovzxbw(xmm5, xmm5, Assembler::AVX_512bit); - - // Extract bits in the following pattern 6, 4+2, 2+4, 6 to convert 3, 8 bit numbers to 4, 6 bit numbers - __ evpsrlvw(xmm0, xmm3, xmm13, Assembler::AVX_512bit); - __ evpsrlvw(xmm1, xmm4, xmm13, Assembler::AVX_512bit); - __ evpsrlvw(xmm2, xmm5, xmm13, Assembler::AVX_512bit); - - __ evpsllvw(xmm3, xmm3, xmm14, Assembler::AVX_512bit); - __ evpsllvw(xmm4, xmm4, xmm14, Assembler::AVX_512bit); - __ evpsllvw(xmm5, xmm5, xmm14, Assembler::AVX_512bit); - - __ vpsrlq(xmm0, xmm0, 8, Assembler::AVX_512bit); - __ vpsrlq(xmm1, xmm1, 8, Assembler::AVX_512bit); - __ vpsrlq(xmm2, xmm2, 8, Assembler::AVX_512bit); - - __ vpsllq(xmm3, xmm3, 8, Assembler::AVX_512bit); - __ vpsllq(xmm4, xmm4, 8, Assembler::AVX_512bit); - __ vpsllq(xmm5, xmm5, 8, Assembler::AVX_512bit); - - __ vpandq(xmm3, xmm3, xmm15, Assembler::AVX_512bit); - __ vpandq(xmm4, xmm4, xmm15, Assembler::AVX_512bit); - __ vpandq(xmm5, xmm5, xmm15, Assembler::AVX_512bit); - - // Get the final 4*6 bits base64 encoding - __ vporq(xmm3, xmm3, xmm0, Assembler::AVX_512bit); - __ vporq(xmm4, xmm4, xmm1, Assembler::AVX_512bit); - __ vporq(xmm5, xmm5, xmm2, Assembler::AVX_512bit); - - // Shift - __ vpsrlq(xmm3, xmm3, 8, Assembler::AVX_512bit); - __ vpsrlq(xmm4, xmm4, 8, Assembler::AVX_512bit); - __ vpsrlq(xmm5, xmm5, 8, Assembler::AVX_512bit); - - // look up 6 bits in the base64 character set to fetch the encoding - // we are converting word to dword as gather instructions need dword indices for looking up encoding - __ vextracti64x4(xmm6, xmm3, 0); - __ vpmovzxwd(xmm0, xmm6, Assembler::AVX_512bit); - __ vextracti64x4(xmm6, xmm3, 1); - __ vpmovzxwd(xmm1, xmm6, Assembler::AVX_512bit); - - __ vextracti64x4(xmm6, xmm4, 0); - __ vpmovzxwd(xmm2, xmm6, Assembler::AVX_512bit); - __ vextracti64x4(xmm6, xmm4, 1); - __ vpmovzxwd(xmm3, xmm6, Assembler::AVX_512bit); - - __ vextracti64x4(xmm4, xmm5, 0); - __ vpmovzxwd(xmm6, xmm4, Assembler::AVX_512bit); - - __ vextracti64x4(xmm4, xmm5, 1); - __ vpmovzxwd(xmm7, xmm4, Assembler::AVX_512bit); - - __ kmovql(k2, k3); - __ evpgatherdd(xmm4, k2, Address(r11, xmm0, Address::times_4, 0), Assembler::AVX_512bit); - __ kmovql(k2, k3); - __ evpgatherdd(xmm5, k2, Address(r11, xmm1, Address::times_4, 0), Assembler::AVX_512bit); - __ kmovql(k2, k3); - __ evpgatherdd(xmm8, k2, Address(r11, xmm2, Address::times_4, 0), Assembler::AVX_512bit); - __ kmovql(k2, k3); - __ evpgatherdd(xmm9, k2, Address(r11, xmm3, Address::times_4, 0), Assembler::AVX_512bit); - __ kmovql(k2, k3); - __ evpgatherdd(xmm10, k2, Address(r11, xmm6, Address::times_4, 0), Assembler::AVX_512bit); - __ kmovql(k2, k3); - __ evpgatherdd(xmm11, k2, Address(r11, xmm7, Address::times_4, 0), Assembler::AVX_512bit); - - //Down convert dword to byte. Final output is 16*6 = 96 bytes long - __ evpmovdb(Address(dest, dp, Address::times_1, 0), xmm4, Assembler::AVX_512bit); - __ evpmovdb(Address(dest, dp, Address::times_1, 16), xmm5, Assembler::AVX_512bit); - __ evpmovdb(Address(dest, dp, Address::times_1, 32), xmm8, Assembler::AVX_512bit); - __ evpmovdb(Address(dest, dp, Address::times_1, 48), xmm9, Assembler::AVX_512bit); - __ evpmovdb(Address(dest, dp, Address::times_1, 64), xmm10, Assembler::AVX_512bit); - __ evpmovdb(Address(dest, dp, Address::times_1, 80), xmm11, Assembler::AVX_512bit); - - __ addq(dest, 96); - __ addq(source, 72); - __ subq(length, 72); - __ jmp(L_process80); - - // Vector Base64 implementation generating 32 bytes of encoded data - __ BIND(L_process32); - __ cmpl(length, 32); - __ jcc(Assembler::below, L_process3); - __ evmovdquq(xmm0, Address(source, start_offset), Assembler::AVX_256bit); - __ vpermq(xmm0, xmm0, 148, Assembler::AVX_256bit); - __ vpshufb(xmm6, xmm0, xmm12, Assembler::AVX_256bit); - __ vpmovzxbw(xmm6, xmm6, Assembler::AVX_512bit); - __ evpsrlvw(xmm2, xmm6, xmm13, Assembler::AVX_512bit); - __ evpsllvw(xmm3, xmm6, xmm14, Assembler::AVX_512bit); - - __ vpsrlq(xmm2, xmm2, 8, Assembler::AVX_512bit); - __ vpsllq(xmm3, xmm3, 8, Assembler::AVX_512bit); - __ vpandq(xmm3, xmm3, xmm15, Assembler::AVX_512bit); - __ vporq(xmm1, xmm2, xmm3, Assembler::AVX_512bit); - __ vpsrlq(xmm1, xmm1, 8, Assembler::AVX_512bit); - __ vextracti64x4(xmm9, xmm1, 0); - __ vpmovzxwd(xmm6, xmm9, Assembler::AVX_512bit); - __ vextracti64x4(xmm9, xmm1, 1); - __ vpmovzxwd(xmm5, xmm9, Assembler::AVX_512bit); - __ kmovql(k2, k3); - __ evpgatherdd(xmm8, k2, Address(r11, xmm6, Address::times_4, 0), Assembler::AVX_512bit); - __ kmovql(k2, k3); - __ evpgatherdd(xmm10, k2, Address(r11, xmm5, Address::times_4, 0), Assembler::AVX_512bit); - __ evpmovdb(Address(dest, dp, Address::times_1, 0), xmm8, Assembler::AVX_512bit); - __ evpmovdb(Address(dest, dp, Address::times_1, 16), xmm10, Assembler::AVX_512bit); - __ subq(length, 24); - __ addq(dest, 32); - __ addq(source, 24); - __ jmp(L_process32); - - // Scalar data processing takes 3 bytes at a time and produces 4 bytes of encoded data - /* This code corresponds to the scalar version of the following snippet in Base64.java - ** int bits = (src[sp0++] & 0xff) << 16 |(src[sp0++] & 0xff) << 8 |(src[sp0++] & 0xff); - ** dst[dp0++] = (byte)base64[(bits >> > 18) & 0x3f]; - ** dst[dp0++] = (byte)base64[(bits >> > 12) & 0x3f]; - ** dst[dp0++] = (byte)base64[(bits >> > 6) & 0x3f]; - ** dst[dp0++] = (byte)base64[bits & 0x3f];*/ - __ BIND(L_process3); - __ cmpl(length, 3); - __ jcc(Assembler::below, L_exit); - // Read 1 byte at a time - __ movzbl(rax, Address(source, start_offset)); - __ shll(rax, 0x10); - __ movl(r15, rax); - __ movzbl(rax, Address(source, start_offset, Address::times_1, 1)); - __ shll(rax, 0x8); - __ movzwl(rax, rax); - __ orl(r15, rax); - __ movzbl(rax, Address(source, start_offset, Address::times_1, 2)); - __ orl(rax, r15); - // Save 3 bytes read in r15 - __ movl(r15, rax); - __ shrl(rax, 0x12); - __ andl(rax, 0x3f); - // rax contains the index, r11 contains base64 lookup table - __ movb(rax, Address(r11, rax, Address::times_4)); - // Write the encoded byte to destination - __ movb(Address(dest, dp, Address::times_1, 0), rax); - __ movl(rax, r15); - __ shrl(rax, 0xc); - __ andl(rax, 0x3f); - __ movb(rax, Address(r11, rax, Address::times_4)); - __ movb(Address(dest, dp, Address::times_1, 1), rax); - __ movl(rax, r15); - __ shrl(rax, 0x6); - __ andl(rax, 0x3f); - __ movb(rax, Address(r11, rax, Address::times_4)); - __ movb(Address(dest, dp, Address::times_1, 2), rax); - __ movl(rax, r15); - __ andl(rax, 0x3f); - __ movb(rax, Address(r11, rax, Address::times_4)); - __ movb(Address(dest, dp, Address::times_1, 3), rax); - __ subl(length, 3); - __ addq(dest, 4); - __ addq(source, 3); - __ jmp(L_process3); - __ BIND(L_exit); + __ BIND(L_exit); + __ vzeroupper(); + __ pop(rax); // Get original dest value + __ subptr(dest, rax); // Number of bytes converted + __ movptr(rax, dest); + __ pop(rbx); + __ pop(r15); + __ pop(r14); + __ pop(r13); + __ pop(r12); + __ leave(); + __ ret(0); + + __ BIND(L_loadURL); + __ evmovdquq(lookup_lo, ExternalAddress(StubRoutines::x86::base64_vbmi_lookup_lo_url_addr()), Assembler::AVX_512bit, r13); + __ evmovdquq(lookup_hi, ExternalAddress(StubRoutines::x86::base64_vbmi_lookup_hi_url_addr()), Assembler::AVX_512bit, r13); + __ jmp(L_continue); + + __ BIND(L_padding); + __ decrementq(output_size, 1); + __ shrq(rax, 1); + + __ cmpb(Address(source, length, Address::times_1, -2), '='); + __ jcc(Assembler::notEqual, L_donePadding); + + __ decrementq(output_size, 1); + __ shrq(rax, 1); + __ jmp(L_donePadding); + + __ align32(); + __ BIND(L_bruteForce); + } // End of if(avx512_vbmi) + + // Use non-AVX code to decode 4-byte chunks into 3 bytes of output + + // Register state (Linux): + // r12-15 - saved on stack + // rdi - src + // rsi - sp + // rdx - sl + // rcx - dst + // r8 - dp + // r9 - isURL + + // Register state (Windows): + // r12-15 - saved on stack + // rcx - src + // rdx - sp + // r8 - sl + // r9 - dst + // r12 - dp + // r10 - isURL + + // Registers (common): + // length (r14) - bytes in src + + const Register decode_table = r11; + const Register out_byte_count = rbx; + const Register byte1 = r13; + const Register byte2 = r15; + const Register byte3 = WINDOWS_ONLY(r8) NOT_WINDOWS(rdx); + const Register byte4 = WINDOWS_ONLY(r10) NOT_WINDOWS(r9); + + __ shrl(length, 2); // Multiple of 4 bytes only - length is # 4-byte chunks + __ cmpl(length, 0); + __ jcc(Assembler::lessEqual, L_exit_no_vzero); + + __ shll(isURL, 8); // index into decode table based on isURL + __ lea(decode_table, ExternalAddress(StubRoutines::x86::base64_decoding_table_addr())); + __ addptr(decode_table, isURL); + + __ jmp(L_bottomLoop); + + __ align32(); + __ BIND(L_forceLoop); + __ shll(byte1, 18); + __ shll(byte2, 12); + __ shll(byte3, 6); + __ orl(byte1, byte2); + __ orl(byte1, byte3); + __ orl(byte1, byte4); + + __ addptr(source, 4); + + __ movb(Address(dest, dp, Address::times_1, 2), byte1); + __ shrl(byte1, 8); + __ movb(Address(dest, dp, Address::times_1, 1), byte1); + __ shrl(byte1, 8); + __ movb(Address(dest, dp, Address::times_1, 0), byte1); + + __ addptr(dest, 3); + __ decrementl(length, 1); + __ jcc(Assembler::zero, L_exit_no_vzero); + + __ BIND(L_bottomLoop); + __ load_unsigned_byte(byte1, Address(source, start_offset, Address::times_1, 0x00)); + __ load_unsigned_byte(byte2, Address(source, start_offset, Address::times_1, 0x01)); + __ load_signed_byte(byte1, Address(decode_table, byte1)); + __ load_signed_byte(byte2, Address(decode_table, byte2)); + __ load_unsigned_byte(byte3, Address(source, start_offset, Address::times_1, 0x02)); + __ load_unsigned_byte(byte4, Address(source, start_offset, Address::times_1, 0x03)); + __ load_signed_byte(byte3, Address(decode_table, byte3)); + __ load_signed_byte(byte4, Address(decode_table, byte4)); + + __ mov(rax, byte1); + __ orl(rax, byte2); + __ orl(rax, byte3); + __ orl(rax, byte4); + __ jcc(Assembler::positive, L_forceLoop); + + __ BIND(L_exit_no_vzero); + __ pop(rax); // Get original dest value + __ subptr(dest, rax); // Number of bytes converted + __ movptr(rax, dest); + __ pop(rbx); __ pop(r15); __ pop(r14); __ pop(r13); __ pop(r12); __ leave(); __ ret(0); + return start; } + /** * Arguments: * @@ -6902,13 +7712,20 @@ address generate_avx_ghash_processBlocks() { StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptVectorAESCrypt(); StubRoutines::_electronicCodeBook_encryptAESCrypt = generate_electronicCodeBook_encryptAESCrypt(); StubRoutines::_electronicCodeBook_decryptAESCrypt = generate_electronicCodeBook_decryptAESCrypt(); + StubRoutines::x86::_counter_mask_addr = counter_mask_addr(); + StubRoutines::x86::_ghash_poly512_addr = ghash_polynomial512_addr(); + StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); + StubRoutines::_galoisCounterMode_AESCrypt = generate_galoisCounterMode_AESCrypt(); } else { StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); } } + if (UseAESCTRIntrinsics) { if (VM_Version::supports_avx512_vaes() && VM_Version::supports_avx512bw() && VM_Version::supports_avx512vl()) { - StubRoutines::x86::_counter_mask_addr = counter_mask_addr(); + if (StubRoutines::x86::_counter_mask_addr == NULL) { + StubRoutines::x86::_counter_mask_addr = counter_mask_addr(); + } StubRoutines::_counterMode_AESCrypt = generate_counterMode_VectorAESCrypt(); } else { StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask(); @@ -6948,7 +7765,9 @@ address generate_avx_ghash_processBlocks() { // Generate GHASH intrinsics code if (UseGHASHIntrinsics) { - StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); + if (StubRoutines::x86::_ghash_long_swap_mask_addr == NULL) { + StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); + } StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask(); if (VM_Version::supports_avx()) { StubRoutines::x86::_ghash_shuffmask_addr = ghash_shufflemask_addr(); @@ -6959,15 +7778,30 @@ address generate_avx_ghash_processBlocks() { } } + if (UseBASE64Intrinsics) { - StubRoutines::x86::_and_mask = base64_and_mask_addr(); - StubRoutines::x86::_bswap_mask = base64_bswap_mask_addr(); - StubRoutines::x86::_base64_charset = base64_charset_addr(); - StubRoutines::x86::_url_charset = base64url_charset_addr(); - StubRoutines::x86::_gather_mask = base64_gather_mask_addr(); - StubRoutines::x86::_left_shift_mask = base64_left_shift_mask_addr(); - StubRoutines::x86::_right_shift_mask = base64_right_shift_mask_addr(); + if(VM_Version::supports_avx2() && + VM_Version::supports_avx512bw() && + VM_Version::supports_avx512vl()) { + StubRoutines::x86::_avx2_shuffle_base64 = base64_avx2_shuffle_addr(); + StubRoutines::x86::_avx2_input_mask_base64 = base64_avx2_input_mask_addr(); + StubRoutines::x86::_avx2_lut_base64 = base64_avx2_lut_addr(); + } + StubRoutines::x86::_encoding_table_base64 = base64_encoding_table_addr(); + if (VM_Version::supports_avx512_vbmi()) { + StubRoutines::x86::_shuffle_base64 = base64_shuffle_addr(); + StubRoutines::x86::_lookup_lo_base64 = base64_vbmi_lookup_lo_addr(); + StubRoutines::x86::_lookup_hi_base64 = base64_vbmi_lookup_hi_addr(); + StubRoutines::x86::_lookup_lo_base64url = base64_vbmi_lookup_lo_url_addr(); + StubRoutines::x86::_lookup_hi_base64url = base64_vbmi_lookup_hi_url_addr(); + StubRoutines::x86::_pack_vec_base64 = base64_vbmi_pack_vec_addr(); + StubRoutines::x86::_join_0_1_base64 = base64_vbmi_join_0_1_addr(); + StubRoutines::x86::_join_1_2_base64 = base64_vbmi_join_1_2_addr(); + StubRoutines::x86::_join_2_3_base64 = base64_vbmi_join_2_3_addr(); + } + StubRoutines::x86::_decoding_table_base64 = base64_decoding_table_addr(); StubRoutines::_base64_encodeBlock = generate_base64_encodeBlock(); + StubRoutines::_base64_decodeBlock = generate_base64_decodeBlock(); } BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); @@ -7000,7 +7834,10 @@ address generate_avx_ghash_processBlocks() { // Get svml stub routine addresses void *libsvml = NULL; char ebuf[1024]; - libsvml = os::dll_load(JNI_LIB_PREFIX "svml" JNI_LIB_SUFFIX, ebuf, sizeof ebuf); + char dll_name[JVM_MAXPATHLEN]; + if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "svml")) { + libsvml = os::dll_load(dll_name, ebuf, sizeof ebuf); + } if (libsvml != NULL) { // SVML method naming convention // All the methods are named as __svml_op_ha_ diff --git a/src/hotspot/cpu/x86/stubRoutines_x86.cpp b/src/hotspot/cpu/x86/stubRoutines_x86.cpp index 062c5032c33ecf16d09a194f18d0a5244637edda..9a4523cd06f124fd40f104b355daf56e3c94ec0e 100644 --- a/src/hotspot/cpu/x86/stubRoutines_x86.cpp +++ b/src/hotspot/cpu/x86/stubRoutines_x86.cpp @@ -65,14 +65,22 @@ address StubRoutines::x86::_k256_W_adr = NULL; address StubRoutines::x86::_k512_W_addr = NULL; address StubRoutines::x86::_pshuffle_byte_flip_mask_addr_sha512 = NULL; // Base64 masks -address StubRoutines::x86::_bswap_mask = NULL; -address StubRoutines::x86::_base64_charset = NULL; -address StubRoutines::x86::_gather_mask = NULL; -address StubRoutines::x86::_right_shift_mask = NULL; -address StubRoutines::x86::_left_shift_mask = NULL; -address StubRoutines::x86::_and_mask = NULL; -address StubRoutines::x86::_url_charset = NULL; +address StubRoutines::x86::_encoding_table_base64 = NULL; +address StubRoutines::x86::_shuffle_base64 = NULL; +address StubRoutines::x86::_avx2_shuffle_base64 = NULL; +address StubRoutines::x86::_avx2_input_mask_base64 = NULL; +address StubRoutines::x86::_avx2_lut_base64 = NULL; address StubRoutines::x86::_counter_mask_addr = NULL; +address StubRoutines::x86::_lookup_lo_base64 = NULL; +address StubRoutines::x86::_lookup_hi_base64 = NULL; +address StubRoutines::x86::_lookup_lo_base64url = NULL; +address StubRoutines::x86::_lookup_hi_base64url = NULL; +address StubRoutines::x86::_pack_vec_base64 = NULL; +address StubRoutines::x86::_join_0_1_base64 = NULL; +address StubRoutines::x86::_join_1_2_base64 = NULL; +address StubRoutines::x86::_join_2_3_base64 = NULL; +address StubRoutines::x86::_decoding_table_base64 = NULL; +address StubRoutines::x86::_ghash_poly512_addr = NULL; #endif address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL; diff --git a/src/hotspot/cpu/x86/stubRoutines_x86.hpp b/src/hotspot/cpu/x86/stubRoutines_x86.hpp index 339a148b223aaf081681d6f543f53f6fc6839930..b93e50b6d51468f4d78b0da7b330777b9713fb2b 100644 --- a/src/hotspot/cpu/x86/stubRoutines_x86.hpp +++ b/src/hotspot/cpu/x86/stubRoutines_x86.hpp @@ -33,7 +33,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_ enum platform_dependent_constants { code_size1 = 20000 LP64_ONLY(+10000), // simply increase if too small (assembler will crash if too small) - code_size2 = 35300 LP64_ONLY(+25000) // simply increase if too small (assembler will crash if too small) + code_size2 = 35300 LP64_ONLY(+32000) // simply increase if too small (assembler will crash if too small) }; class x86 { @@ -184,13 +184,21 @@ class x86 { static address _pshuffle_byte_flip_mask_addr_sha512; static address _counter_mask_addr; // Masks for base64 - static address _base64_charset; - static address _bswap_mask; - static address _gather_mask; - static address _right_shift_mask; - static address _left_shift_mask; - static address _and_mask; - static address _url_charset; + static address _encoding_table_base64; + static address _shuffle_base64; + static address _avx2_shuffle_base64; + static address _avx2_input_mask_base64; + static address _avx2_lut_base64; + static address _lookup_lo_base64; + static address _lookup_hi_base64; + static address _lookup_lo_base64url; + static address _lookup_hi_base64url; + static address _pack_vec_base64; + static address _join_0_1_base64; + static address _join_1_2_base64; + static address _join_2_3_base64; + static address _decoding_table_base64; + static address _ghash_poly512_addr; #endif // byte flip mask for sha256 static address _pshuffle_byte_flip_mask_addr; @@ -247,6 +255,7 @@ class x86 { static address crc_by128_masks_avx512_addr() { return (address)_crc_by128_masks_avx512; } static address shuf_table_crc32_avx512_addr() { return (address)_shuf_table_crc32_avx512; } static address crc_table_avx512_addr() { return (address)_crc_table_avx512; } + static address ghash_polynomial512_addr() { return _ghash_poly512_addr; } #endif // _LP64 static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; } static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; } @@ -328,14 +337,21 @@ class x86 { static address k256_W_addr() { return _k256_W_adr; } static address k512_W_addr() { return _k512_W_addr; } static address pshuffle_byte_flip_mask_addr_sha512() { return _pshuffle_byte_flip_mask_addr_sha512; } - static address base64_charset_addr() { return _base64_charset; } - static address base64url_charset_addr() { return _url_charset; } - static address base64_bswap_mask_addr() { return _bswap_mask; } - static address base64_gather_mask_addr() { return _gather_mask; } - static address base64_right_shift_mask_addr() { return _right_shift_mask; } - static address base64_left_shift_mask_addr() { return _left_shift_mask; } - static address base64_and_mask_addr() { return _and_mask; } + static address base64_encoding_table_addr() { return _encoding_table_base64; } + static address base64_shuffle_addr() { return _shuffle_base64; } + static address base64_avx2_shuffle_addr() { return _avx2_shuffle_base64; } + static address base64_avx2_input_mask_addr() { return _avx2_input_mask_base64; } + static address base64_avx2_lut_addr() { return _avx2_lut_base64; } static address counter_mask_addr() { return _counter_mask_addr; } + static address base64_vbmi_lookup_lo_addr() { return _lookup_lo_base64; } + static address base64_vbmi_lookup_hi_addr() { return _lookup_hi_base64; } + static address base64_vbmi_lookup_lo_url_addr() { return _lookup_lo_base64url; } + static address base64_vbmi_lookup_hi_url_addr() { return _lookup_hi_base64url; } + static address base64_vbmi_pack_vec_addr() { return _pack_vec_base64; } + static address base64_vbmi_join_0_1_addr() { return _join_0_1_base64; } + static address base64_vbmi_join_1_2_addr() { return _join_1_2_base64; } + static address base64_vbmi_join_2_3_addr() { return _join_2_3_base64; } + static address base64_decoding_table_addr() { return _decoding_table_base64; } #endif static address pshuffle_byte_flip_mask_addr() { return _pshuffle_byte_flip_mask_addr; } static void generate_CRC32C_table(bool is_pclmulqdq_supported); diff --git a/src/hotspot/cpu/x86/templateTable_x86.cpp b/src/hotspot/cpu/x86/templateTable_x86.cpp index a82954580f05935695c9e9c45d6b3e7fdba4b3e0..d2eaa2427692d2043b73d85b1ff1e1b74715001b 100644 --- a/src/hotspot/cpu/x86/templateTable_x86.cpp +++ b/src/hotspot/cpu/x86/templateTable_x86.cpp @@ -4023,15 +4023,9 @@ void TemplateTable::_new() { // initialize object header only. __ bind(initialize_header); - if (UseBiasedLocking) { - __ pop(rcx); // get saved klass back in the register. - __ movptr(rbx, Address(rcx, Klass::prototype_header_offset())); - __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()), rbx); - } else { - __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()), - (intptr_t)markWord::prototype().value()); // header - __ pop(rcx); // get saved klass back in the register. - } + __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()), + (intptr_t)markWord::prototype().value()); // header + __ pop(rcx); // get saved klass back in the register. #ifdef _LP64 __ xorl(rsi, rsi); // use zero reg to clear memory (shorter code) __ store_klass_gap(rax, rsi); // zero klass gap for compressed oops diff --git a/src/hotspot/cpu/x86/universalUpcallHandler_x86_64.cpp b/src/hotspot/cpu/x86/universalUpcallHandler_x86_64.cpp index bbfaa87bcb85bb922b7c151a5d1051beb60a93c3..c54b907f9b5c5f0bb3713532c66b4f27039bca9f 100644 --- a/src/hotspot/cpu/x86/universalUpcallHandler_x86_64.cpp +++ b/src/hotspot/cpu/x86/universalUpcallHandler_x86_64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -316,47 +316,6 @@ static void print_arg_moves(const GrowableArray& arg_moves, Method* ent } #endif -void save_java_frame_anchor(MacroAssembler* _masm, ByteSize store_offset, Register thread) { - __ block_comment("{ save_java_frame_anchor "); - // upcall->jfa._last_Java_fp = _thread->_anchor._last_Java_fp; - __ movptr(rscratch1, Address(thread, JavaThread::last_Java_fp_offset())); - __ movptr(Address(rsp, store_offset + JavaFrameAnchor::last_Java_fp_offset()), rscratch1); - - // upcall->jfa._last_Java_pc = _thread->_anchor._last_Java_pc; - __ movptr(rscratch1, Address(thread, JavaThread::last_Java_pc_offset())); - __ movptr(Address(rsp, store_offset + JavaFrameAnchor::last_Java_pc_offset()), rscratch1); - - // upcall->jfa._last_Java_sp = _thread->_anchor._last_Java_sp; - __ movptr(rscratch1, Address(thread, JavaThread::last_Java_sp_offset())); - __ movptr(Address(rsp, store_offset + JavaFrameAnchor::last_Java_sp_offset()), rscratch1); - __ block_comment("} save_java_frame_anchor "); -} - -void restore_java_frame_anchor(MacroAssembler* _masm, ByteSize load_offset, Register thread) { - __ block_comment("{ restore_java_frame_anchor "); - // thread->_last_Java_sp = NULL - __ movptr(Address(thread, JavaThread::last_Java_sp_offset()), NULL_WORD); - - // ThreadStateTransition::transition_from_java(_thread, _thread_in_vm); - // __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans); - __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native); - - //_thread->frame_anchor()->copy(&_anchor); -// _thread->_last_Java_fp = upcall->_last_Java_fp; -// _thread->_last_Java_pc = upcall->_last_Java_pc; -// _thread->_last_Java_sp = upcall->_last_Java_sp; - - __ movptr(rscratch1, Address(rsp, load_offset + JavaFrameAnchor::last_Java_fp_offset())); - __ movptr(Address(thread, JavaThread::last_Java_fp_offset()), rscratch1); - - __ movptr(rscratch1, Address(rsp, load_offset + JavaFrameAnchor::last_Java_pc_offset())); - __ movptr(Address(thread, JavaThread::last_Java_pc_offset()), rscratch1); - - __ movptr(rscratch1, Address(rsp, load_offset + JavaFrameAnchor::last_Java_sp_offset())); - __ movptr(Address(thread, JavaThread::last_Java_sp_offset()), rscratch1); - __ block_comment("} restore_java_frame_anchor "); -} - static void save_native_arguments(MacroAssembler* _masm, const CallRegs& conv, int arg_save_area_offset) { __ block_comment("{ save_native_args "); int store_offset = arg_save_area_offset; @@ -442,6 +401,60 @@ static int compute_arg_save_area_size(const CallRegs& conv) { return result_size; } +static int compute_res_save_area_size(const CallRegs& conv) { + int result_size = 0; + for (int i = 0; i < conv._rets_length; i++) { + VMReg reg = conv._ret_regs[i]; + if (reg->is_Register()) { + result_size += 8; + } else if (reg->is_XMMRegister()) { + // Java API doesn't support vector args + result_size += 16; + } else { + ShouldNotReachHere(); // unhandled type + } + } + return result_size; +} + +static void save_java_result(MacroAssembler* _masm, const CallRegs& conv, int res_save_area_offset) { + int offset = res_save_area_offset; + __ block_comment("{ save java result "); + for (int i = 0; i < conv._rets_length; i++) { + VMReg reg = conv._ret_regs[i]; + if (reg->is_Register()) { + __ movptr(Address(rsp, offset), reg->as_Register()); + offset += 8; + } else if (reg->is_XMMRegister()) { + // Java API doesn't support vector args + __ movdqu(Address(rsp, offset), reg->as_XMMRegister()); + offset += 16; + } else { + ShouldNotReachHere(); // unhandled type + } + } + __ block_comment("} save java result "); +} + +static void restore_java_result(MacroAssembler* _masm, const CallRegs& conv, int res_save_area_offset) { + int offset = res_save_area_offset; + __ block_comment("{ restore java result "); + for (int i = 0; i < conv._rets_length; i++) { + VMReg reg = conv._ret_regs[i]; + if (reg->is_Register()) { + __ movptr(reg->as_Register(), Address(rsp, offset)); + offset += 8; + } else if (reg->is_XMMRegister()) { + // Java API doesn't support vector args + __ movdqu(reg->as_XMMRegister(), Address(rsp, offset)); + offset += 16; + } else { + ShouldNotReachHere(); // unhandled type + } + } + __ block_comment("} restore java result "); +} + constexpr int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions static void preserve_callee_saved_registers(MacroAssembler* _masm, const ABIDescriptor& abi, int reg_save_area_offset) { @@ -574,18 +587,16 @@ static void shuffle_arguments(MacroAssembler* _masm, const GrowableArray | | // |---------------------| = frame_bottom_offset = frame_size // | | - // | AuxiliarySaves | - // |---------------------| = auxiliary_saves_offset + // | FrameData | + // |---------------------| = frame_data_offset // | | // | reg_save_area | // |---------------------| = reg_save_are_offset @@ -636,6 +645,9 @@ address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiv // | arg_save_area | // |---------------------| = arg_save_are_offset // | | + // | res_save_area | + // |---------------------| = res_save_are_offset + // | | // | deopt_spill | // |---------------------| = deopt_spill_offset // | | @@ -646,7 +658,6 @@ address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiv ////////////////////////////////////////////////////////////////////////////// MacroAssembler* _masm = new MacroAssembler(&buffer); - Label call_return; address start = __ pc(); __ enter(); // set up frame if ((abi._stack_alignment_bytes % 16) != 0) { @@ -662,53 +673,14 @@ address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiv preserve_callee_saved_registers(_masm, abi, reg_save_area_offset); - __ block_comment("{ get_thread"); + __ block_comment("{ on_entry"); __ vzeroupper(); - __ lea(c_rarg0, Address(rsp, should_detach_offset)); + __ lea(c_rarg0, Address(rsp, frame_data_offset)); // stack already aligned - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::maybe_attach_and_get_thread))); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::on_entry))); __ movptr(r15_thread, rax); __ reinit_heapbase(); - __ movptr(Address(rsp, thread_offset), r15_thread); - __ block_comment("} get_thread"); - - // TODO: - // We expect not to be coming from JNI code, but we might be. - // We should figure out what our stance is on supporting that and then maybe add - // some more handling here for: - // - handle blocks - // - check for active exceptions (and emit an error) - - __ block_comment("{ safepoint poll"); - __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans); - - if (os::is_MP()) { - __ membar(Assembler::Membar_mask_bits( - Assembler::LoadLoad | Assembler::StoreLoad | - Assembler::LoadStore | Assembler::StoreStore)); - } - - // check for safepoint operation in progress and/or pending suspend requests - Label L_after_safepoint_poll; - Label L_safepoint_poll_slow_path; - - __ safepoint_poll(L_safepoint_poll_slow_path, r15_thread, false /* at_return */, false /* in_nmethod */); - - __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0); - __ jcc(Assembler::notEqual, L_safepoint_poll_slow_path); - - __ bind(L_after_safepoint_poll); - __ block_comment("} safepoint poll"); - // change thread state - __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java); - - __ block_comment("{ reguard stack check"); - Label L_reguard; - Label L_after_reguard; - __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_yellow_reserved_disabled); - __ jcc(Assembler::equal, L_reguard); - __ bind(L_after_reguard); - __ block_comment("} reguard stack check"); + __ block_comment("} on_entry"); __ block_comment("{ argument shuffle"); // TODO merge these somehow @@ -724,13 +696,24 @@ address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiv __ mov_metadata(rbx, entry); __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // just in case callee is deoptimized + + __ call(Address(rbx, Method::from_compiled_offset())); + + save_java_result(_masm, conv, res_save_area_offset); + + __ block_comment("{ on_exit"); + __ vzeroupper(); + __ lea(c_rarg0, Address(rsp, frame_data_offset)); + // stack already aligned + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::on_exit))); __ reinit_heapbase(); + __ block_comment("} on_exit"); - save_java_frame_anchor(_masm, jfa_offset, r15_thread); - __ reset_last_Java_frame(r15_thread, true); + restore_callee_saved_registers(_masm, abi, reg_save_area_offset); - __ call(Address(rbx, Method::from_compiled_offset())); + restore_java_result(_masm, conv, res_save_area_offset); + // return value shuffle #ifdef ASSERT if (conv._rets_length == 1) { // 0 or 1 VMReg j_expected_result_reg; @@ -757,55 +740,11 @@ address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiv } #endif - __ bind(call_return); - - // also sets last Java frame - __ movptr(r15_thread, Address(rsp, thread_offset)); - // TODO corrupted thread pointer causes havoc. Can we verify it here? - restore_java_frame_anchor(_masm, jfa_offset, r15_thread); // also transitions to native state - - __ block_comment("{ maybe_detach_thread"); - Label L_after_detach; - __ cmpb(Address(rsp, should_detach_offset), 0); - __ jcc(Assembler::equal, L_after_detach); - __ vzeroupper(); - __ mov(c_rarg0, r15_thread); - // stack already aligned - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::detach_thread))); - __ reinit_heapbase(); - __ bind(L_after_detach); - __ block_comment("} maybe_detach_thread"); - - restore_callee_saved_registers(_masm, abi, reg_save_area_offset); - __ leave(); __ ret(0); ////////////////////////////////////////////////////////////////////////////// - __ block_comment("{ L_safepoint_poll_slow_path"); - __ bind(L_safepoint_poll_slow_path); - __ vzeroupper(); - __ mov(c_rarg0, r15_thread); - // stack already aligned - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); - __ reinit_heapbase(); - __ jmp(L_after_safepoint_poll); - __ block_comment("} L_safepoint_poll_slow_path"); - - ////////////////////////////////////////////////////////////////////////////// - - __ block_comment("{ L_reguard"); - __ bind(L_reguard); - __ vzeroupper(); - // stack already aligned - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); - __ reinit_heapbase(); - __ jmp(L_after_reguard); - __ block_comment("} L_reguard"); - - ////////////////////////////////////////////////////////////////////////////// - __ block_comment("{ exception handler"); intptr_t exception_handler_offset = __ pc() - start; @@ -835,7 +774,7 @@ address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiv const char* name = "optimized_upcall_stub"; #endif // PRODUCT - OptimizedEntryBlob* blob = OptimizedEntryBlob::create(name, &buffer, exception_handler_offset, receiver, jfa_offset); + OptimizedEntryBlob* blob = OptimizedEntryBlob::create(name, &buffer, exception_handler_offset, receiver, in_ByteSize(frame_data_offset)); if (TraceOptimizedUpcallStubs) { blob->print_on(tty); @@ -844,6 +783,7 @@ address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiv return blob->code_begin(); } +PRAGMA_DIAG_POP bool ProgrammableUpcallHandler::supports_optimized_upcalls() { return true; diff --git a/src/hotspot/cpu/x86/vm_version_ext_x86.cpp b/src/hotspot/cpu/x86/vm_version_ext_x86.cpp index a84b37f7977cbe2108e6643b922f7b4291c34995..84e8c9fa819a87c9ca8ad779c671961b9a2e9b9c 100644 --- a/src/hotspot/cpu/x86/vm_version_ext_x86.cpp +++ b/src/hotspot/cpu/x86/vm_version_ext_x86.cpp @@ -948,7 +948,7 @@ const char* const VM_Version_Ext::_feature_ecx_id[] = { const char* const VM_Version_Ext::_feature_extended_ecx_id[] = { "LAHF/SAHF instruction support", - "Core multi-processor leagacy mode", + "Core multi-processor legacy mode", "", "", "", diff --git a/src/hotspot/cpu/x86/vm_version_ext_x86.hpp b/src/hotspot/cpu/x86/vm_version_ext_x86.hpp index 2d318dd390e7b0bed962762ab66cf057de651f2f..c81ebead23ca52d0a4691529cccc898292fa4358 100644 --- a/src/hotspot/cpu/x86/vm_version_ext_x86.hpp +++ b/src/hotspot/cpu/x86/vm_version_ext_x86.hpp @@ -27,6 +27,7 @@ #include "runtime/vm_version.hpp" #include "utilities/macros.hpp" +#include "utilities/sizes.hpp" class VM_Version_Ext : public VM_Version { diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp index 543fee25c6115cf8599ca1b15b063820428244fc..eb60216d59598e8845b68b9950e13a1150a0a97c 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.cpp +++ b/src/hotspot/cpu/x86/vm_version_x86.cpp @@ -30,6 +30,7 @@ #include "logging/log.hpp" #include "logging/logStream.hpp" #include "memory/resourceArea.hpp" +#include "memory/universe.hpp" #include "runtime/globals_extension.hpp" #include "runtime/java.hpp" #include "runtime/os.hpp" @@ -65,6 +66,22 @@ extern "C" { static get_cpu_info_stub_t get_cpu_info_stub = NULL; static detect_virt_stub_t detect_virt_stub = NULL; +#ifdef _LP64 + +bool VM_Version::supports_clflush() { + // clflush should always be available on x86_64 + // if not we are in real trouble because we rely on it + // to flush the code cache. + // Unfortunately, Assembler::clflush is currently called as part + // of generation of the code cache flush routine. This happens + // under Universe::init before the processor features are set + // up. Assembler::flush calls this routine to check that clflush + // is allowed. So, we give the caller a free pass if Universe init + // is still in progress. + assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available"); + return true; +} +#endif class VM_Version_StubGenerator: public StubCodeGenerator { public: @@ -769,6 +786,15 @@ void VM_Version::get_processor_features() { _features &= ~CPU_VZEROUPPER; _features &= ~CPU_AVX512BW; _features &= ~CPU_AVX512VL; + _features &= ~CPU_AVX512DQ; + _features &= ~CPU_AVX512_VNNI; + _features &= ~CPU_AVX512_VAES; + _features &= ~CPU_AVX512_VPOPCNTDQ; + _features &= ~CPU_AVX512_VPCLMULQDQ; + _features &= ~CPU_AVX512_VBMI; + _features &= ~CPU_AVX512_VBMI2; + _features &= ~CPU_CLWB; + _features &= ~CPU_FLUSHOPT; } } @@ -1012,10 +1038,6 @@ void VM_Version::get_processor_features() { } if (!supports_rtm() && UseRTMLocking) { - // Can't continue because UseRTMLocking affects UseBiasedLocking flag - // setting during arguments processing. See use_biased_locking(). - // VM_Version_init() is executed after UseBiasedLocking is used - // in Thread::allocate(). vm_exit_during_initialization("RTM instructions are not available on this CPU"); } @@ -1023,8 +1045,6 @@ void VM_Version::get_processor_features() { if (UseRTMLocking) { if (!CompilerConfig::is_c2_enabled()) { // Only C2 does RTM locking optimization. - // Can't continue because UseRTMLocking affects UseBiasedLocking flag - // setting during arguments processing. See use_biased_locking(). vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); } if (is_intel_family_core()) { @@ -1062,8 +1082,6 @@ void VM_Version::get_processor_features() { #else if (UseRTMLocking) { // Only C2 does RTM locking optimization. - // Can't continue because UseRTMLocking affects UseBiasedLocking flag - // setting during arguments processing. See use_biased_locking(). vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); } #endif @@ -1718,6 +1736,9 @@ void VM_Version::get_processor_features() { if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { FLAG_SET_DEFAULT(UseSignumIntrinsic, true); } + if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { + FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); + } } void VM_Version::print_platform_virtualization_info(outputStream* st) { @@ -1736,27 +1757,6 @@ void VM_Version::print_platform_virtualization_info(outputStream* st) { } } -bool VM_Version::use_biased_locking() { -#if INCLUDE_RTM_OPT - // RTM locking is most useful when there is high lock contention and - // low data contention. With high lock contention the lock is usually - // inflated and biased locking is not suitable for that case. - // RTM locking code requires that biased locking is off. - // Note: we can't switch off UseBiasedLocking in get_processor_features() - // because it is used by Thread::allocate() which is called before - // VM_Version::initialize(). - if (UseRTMLocking && UseBiasedLocking) { - if (FLAG_IS_DEFAULT(UseBiasedLocking)) { - FLAG_SET_DEFAULT(UseBiasedLocking, false); - } else { - warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." ); - UseBiasedLocking = false; - } - } -#endif - return UseBiasedLocking; -} - bool VM_Version::compute_has_intel_jcc_erratum() { if (!is_intel_family_core()) { // Only Intel CPUs are affected. @@ -1766,54 +1766,58 @@ bool VM_Version::compute_has_intel_jcc_erratum() { // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf switch (_model) { case 0x8E: - // 06_8EH | 9 | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Amber Lake Y - // 06_8EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake U - // 06_8EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake U 23e - // 06_8EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake Y - // 06_8EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake U43e - // 06_8EH | B | 8th Generation Intel® Core™ Processors based on microarchitecture code name Whiskey Lake U - // 06_8EH | C | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Amber Lake Y - // 06_8EH | C | 10th Generation Intel® Core™ Processor Family based on microarchitecture code name Comet Lake U42 - // 06_8EH | C | 8th Generation Intel® Core™ Processors based on microarchitecture code name Whiskey Lake U + // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y + // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U + // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e + // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y + // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e + // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U + // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y + // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 + // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; case 0x4E: - // 06_4E | 3 | 6th Generation Intel® Core™ Processors based on microarchitecture code name Skylake U - // 06_4E | 3 | 6th Generation Intel® Core™ Processor Family based on microarchitecture code name Skylake U23e - // 06_4E | 3 | 6th Generation Intel® Core™ Processors based on microarchitecture code name Skylake Y + // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U + // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e + // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y return _stepping == 0x3; case 0x55: - // 06_55H | 4 | Intel® Xeon® Processor D Family based on microarchitecture code name Skylake D, Bakerville - // 06_55H | 4 | Intel® Xeon® Scalable Processors based on microarchitecture code name Skylake Server - // 06_55H | 4 | Intel® Xeon® Processor W Family based on microarchitecture code name Skylake W - // 06_55H | 4 | Intel® Core™ X-series Processors based on microarchitecture code name Skylake X - // 06_55H | 4 | Intel® Xeon® Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 - // 06_55 | 7 | 2nd Generation Intel® Xeon® Scalable Processors based on microarchitecture code name Cascade Lake (server) + // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville + // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server + // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W + // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X + // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 + // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) return _stepping == 0x4 || _stepping == 0x7; case 0x5E: - // 06_5E | 3 | 6th Generation Intel® Core™ Processor Family based on microarchitecture code name Skylake H - // 06_5E | 3 | 6th Generation Intel® Core™ Processor Family based on microarchitecture code name Skylake S + // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H + // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S return _stepping == 0x3; case 0x9E: - // 06_9EH | 9 | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake G - // 06_9EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake H - // 06_9EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake S - // 06_9EH | 9 | Intel® Core™ X-series Processors based on microarchitecture code name Kaby Lake X - // 06_9EH | 9 | Intel® Xeon® Processor E3 v6 Family Kaby Lake Xeon E3 - // 06_9EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake H - // 06_9EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S - // 06_9EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP - // 06_9EH | A | Intel® Xeon® Processor E Family based on microarchitecture code name Coffee Lake S (6+2) - // 06_9EH | A | Intel® Xeon® Processor E Family based on microarchitecture code name Coffee Lake S (4+2) - // 06_9EH | B | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S (4+2) - // 06_9EH | B | Intel® Celeron® Processor G Series based on microarchitecture code name Coffee Lake S (4+2) - // 06_9EH | D | 9th Generation Intel® Core™ Processor Family based on microarchitecturecode name Coffee Lake H (8+2) - // 06_9EH | D | 9th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S (8+2) + // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G + // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H + // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S + // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X + // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 + // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H + // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S + // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP + // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) + // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) + // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) + // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) + // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) + // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; + case 0xA5: + // Not in Intel documentation. + // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H + return true; case 0xA6: - // 06_A6H | 0 | 10th Generation Intel® Core™ Processor Family based on microarchitecture code name Comet Lake U62 + // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 return _stepping == 0x0; case 0xAE: - // 06_AEH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) + // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) return _stepping == 0xA; default: // If we are running on another intel machine not recognized in the table, we are okay. diff --git a/src/hotspot/cpu/x86/vm_version_x86.hpp b/src/hotspot/cpu/x86/vm_version_x86.hpp index 88334ec8c6f9aeb8468a6461cc5c068c48059a83..ec82ecd81c706475dbf31afdfdbd0aa1f922a8cb 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.hpp +++ b/src/hotspot/cpu/x86/vm_version_x86.hpp @@ -25,9 +25,9 @@ #ifndef CPU_X86_VM_VERSION_X86_HPP #define CPU_X86_VM_VERSION_X86_HPP -#include "memory/universe.hpp" #include "runtime/abstract_vm_version.hpp" #include "utilities/macros.hpp" +#include "utilities/sizes.hpp" class VM_Version : public Abstract_VM_Version { friend class VMStructs; @@ -261,7 +261,9 @@ class VM_Version : public Abstract_VM_Version { uint32_t : 2, avx512_4vnniw : 1, avx512_4fmaps : 1, - : 28; + : 10, + serialize : 1, + : 17; } bits; }; @@ -359,7 +361,8 @@ protected: \ decl(AVX512_VBMI2, "avx512_vbmi2", 44) /* VBMI2 shift left double instructions */ \ decl(AVX512_VBMI, "avx512_vbmi", 45) /* Vector BMI instructions */ \ - decl(HV, "hv", 46) /* Hypervisor instructions */ + decl(HV, "hv", 46) /* Hypervisor instructions */ \ + decl(SERIALIZE, "serialize", 47) /* CPU SERIALIZE */ #define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit), CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG) @@ -646,6 +649,8 @@ enum Extended_Family { if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) { result |= CPU_CLWB; } + if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0) + result |= CPU_SERIALIZE; } // ZX features. @@ -747,9 +752,6 @@ public: // Override Abstract_VM_Version implementation static void print_platform_virtualization_info(outputStream*); - // Override Abstract_VM_Version implementation - static bool use_biased_locking(); - // Asserts static void assert_is_initialized() { assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized"); @@ -778,7 +780,7 @@ public: static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' static bool is_zx() { assert_is_initialized(); return (_cpuid_info.std_vendor_name_0 == 0x746e6543) || (_cpuid_info.std_vendor_name_0 == 0x68532020); } // 'tneC'||'hS ' static bool is_atom_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x36) || (extended_cpu_model() == 0x37) || (extended_cpu_model() == 0x4D))); } //Silvermont and Centerton - static bool is_knights_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x57) || (extended_cpu_model() == 0x85))); } // Xeon Phi 3200/5200/7200 and Future Xeon Phi + static bool is_knights_family() { return UseKNLSetting || ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x57) || (extended_cpu_model() == 0x85))); } // Xeon Phi 3200/5200/7200 and Future Xeon Phi static bool supports_processor_topology() { return (_cpuid_info.std_max_function >= 0xB) && @@ -899,6 +901,7 @@ public: static bool supports_avx512_vbmi() { return (_features & CPU_AVX512_VBMI) != 0; } static bool supports_avx512_vbmi2() { return (_features & CPU_AVX512_VBMI2) != 0; } static bool supports_hv() { return (_features & CPU_HV) != 0; } + static bool supports_serialize() { return (_features & CPU_SERIALIZE) != 0; } // Intel features static bool is_intel_family_core() { return is_intel() && @@ -1030,19 +1033,8 @@ public: // and trailing StoreStore fences. #ifdef _LP64 - static bool supports_clflush() { - // clflush should always be available on x86_64 - // if not we are in real trouble because we rely on it - // to flush the code cache. - // Unfortunately, Assembler::clflush is currently called as part - // of generation of the code cache flush routine. This happens - // under Universe::init before the processor features are set - // up. Assembler::flush calls this routine to check that clflush - // is allowed. So, we give the caller a free pass if Universe init - // is still in progress. - assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available"); - return true; - } + + static bool supports_clflush(); // Can't inline due to header file conflict #else static bool supports_clflush() { return ((_features & CPU_FLUSH) != 0); } #endif // _LP64 diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 5be19d73cef943c7113977bfa0ce2290c9052997..1ea22eee800a986d21bea5ba80cce2529277e1b8 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -1216,39 +1216,6 @@ class HandlerImpl { #endif }; - -inline uint vector_length(const Node* n) { - const TypeVect* vt = n->bottom_type()->is_vect(); - return vt->length(); -} - -inline uint vector_length(const MachNode* use, MachOper* opnd) { - uint def_idx = use->operand_index(opnd); - Node* def = use->in(def_idx); - return def->bottom_type()->is_vect()->length(); -} - -inline uint vector_length_in_bytes(const Node* n) { - const TypeVect* vt = n->bottom_type()->is_vect(); - return vt->length_in_bytes(); -} - -inline uint vector_length_in_bytes(const MachNode* use, MachOper* opnd) { - uint def_idx = use->operand_index(opnd); - Node* def = use->in(def_idx); - return def->bottom_type()->is_vect()->length_in_bytes(); -} - -inline BasicType vector_element_basic_type(const Node *n) { - return n->bottom_type()->is_vect()->element_basic_type(); -} - -inline BasicType vector_element_basic_type(const MachNode *use, MachOper* opnd) { - uint def_idx = use->operand_index(opnd); - Node* def = use->in(def_idx); - return def->bottom_type()->is_vect()->element_basic_type(); -} - inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { switch(bytes) { case 4: // fall-through @@ -1265,7 +1232,7 @@ inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { } static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { - return vector_length_encoding(vector_length_in_bytes(n)); + return vector_length_encoding(Matcher::vector_length_in_bytes(n)); } static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { @@ -1593,6 +1560,15 @@ const bool Matcher::match_rule_supported(int opcode) { return false; } break; + case Op_CopySignD: + case Op_CopySignF: + if (UseAVX < 3 || !is_LP64) { + return false; + } + if (!VM_Version::supports_avx512vl()) { + return false; + } + break; #ifndef _LP64 case Op_AddReductionVF: case Op_AddReductionVD: @@ -1671,6 +1647,9 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType break; case Op_RotateRightV: case Op_RotateLeftV: + if (bt != T_INT && bt != T_LONG) { + return false; + } // fallthrough case Op_MacroLogicV: if (!VM_Version::supports_evex() || ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { @@ -1835,6 +1814,11 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType return false; } break; + case Op_VectorMaskCmp: + if (vlen < 2 || size_in_bits < 32) { + return false; + } + break; } return true; // Per default match rules are supported. } @@ -1868,10 +1852,18 @@ MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, return NULL; } -bool Matcher::is_generic_reg2reg_move(MachNode* m) { +bool Matcher::is_reg2reg_move(MachNode* m) { switch (m->rule()) { case MoveVec2Leg_rule: case MoveLeg2Vec_rule: + case MoveF2VL_rule: + case MoveF2LEG_rule: + case MoveVL2F_rule: + case MoveLEG2F_rule: + case MoveD2VL_rule: + case MoveD2LEG_rule: + case MoveVL2D_rule: + case MoveLEG2D_rule: return true; default: return false; @@ -1898,18 +1890,6 @@ const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) { return new TypeVectMask(TypeInt::BOOL, length); } -const int Matcher::float_pressure(int default_pressure_threshold) { - int float_pressure_threshold = default_pressure_threshold; -#ifdef _LP64 - if (UseAVX > 2) { - // Increase pressure threshold on machines with AVX3 which have - // 2x more XMM registers. - float_pressure_threshold = default_pressure_threshold * 2; - } -#endif - return float_pressure_threshold; -} - // Max vector size in bytes. 0 if not supported. const int Matcher::vector_width_in_bytes(BasicType bt) { assert(is_java_primitive(bt), "only primitive type vectors"); @@ -3333,7 +3313,7 @@ instruct sqrtD_reg(regD dst) %{ // ---------------------------------------- VectorReinterpret ------------------------------------ instruct reinterpret(vec dst) %{ - predicate(vector_length_in_bytes(n) == vector_length_in_bytes(n->in(1))); // dst == src + predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src match(Set dst (VectorReinterpret dst)); ins_cost(125); format %{ "vector_reinterpret $dst\t!" %} @@ -3345,16 +3325,16 @@ instruct reinterpret(vec dst) %{ instruct reinterpret_expand(vec dst, vec src, rRegP scratch) %{ predicate(UseAVX == 0 && - (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst + (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst match(Set dst (VectorReinterpret src)); ins_cost(125); effect(TEMP dst, TEMP scratch); format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %} ins_encode %{ - assert(vector_length_in_bytes(this) <= 16, "required"); - assert(vector_length_in_bytes(this, $src) <= 8, "required"); + assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); + assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); - int src_vlen_in_bytes = vector_length_in_bytes(this, $src); + int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); if (src_vlen_in_bytes == 4) { __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register); } else { @@ -3368,8 +3348,8 @@ instruct reinterpret_expand(vec dst, vec src, rRegP scratch) %{ instruct vreinterpret_expand4(legVec dst, vec src, rRegP scratch) %{ predicate(UseAVX > 0 && - (vector_length_in_bytes(n->in(1)) == 4) && // src - (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst + (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src + (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst match(Set dst (VectorReinterpret src)); ins_cost(125); effect(TEMP scratch); @@ -3383,13 +3363,13 @@ instruct vreinterpret_expand4(legVec dst, vec src, rRegP scratch) %{ instruct vreinterpret_expand(legVec dst, vec src) %{ predicate(UseAVX > 0 && - (vector_length_in_bytes(n->in(1)) > 4) && // src - (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst + (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src + (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst match(Set dst (VectorReinterpret src)); ins_cost(125); format %{ "vector_reinterpret_expand $dst,$src\t!" %} ins_encode %{ - switch (vector_length_in_bytes(this, $src)) { + switch (Matcher::vector_length_in_bytes(this, $src)) { case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; @@ -3400,12 +3380,12 @@ instruct vreinterpret_expand(legVec dst, vec src) %{ %} instruct reinterpret_shrink(vec dst, legVec src) %{ - predicate(vector_length_in_bytes(n->in(1)) > vector_length_in_bytes(n)); // src > dst + predicate(Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst match(Set dst (VectorReinterpret src)); ins_cost(125); format %{ "vector_reinterpret_shrink $dst,$src\t!" %} ins_encode %{ - switch (vector_length_in_bytes(this)) { + switch (Matcher::vector_length_in_bytes(this)) { case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; @@ -3454,7 +3434,7 @@ instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{ %} instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ - predicate(vector_length(n) < 8); + predicate(Matcher::vector_length(n) < 8); match(Set dst (RoundDoubleModeV src rmode)); format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} ins_encode %{ @@ -3466,7 +3446,7 @@ instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ %} instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ - predicate(vector_length(n) == 8); + predicate(Matcher::vector_length(n) == 8); match(Set dst (RoundDoubleModeV src rmode)); format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} ins_encode %{ @@ -3477,7 +3457,7 @@ instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ %} instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ - predicate(vector_length(n) < 8); + predicate(Matcher::vector_length(n) < 8); match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} ins_encode %{ @@ -3489,7 +3469,7 @@ instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ %} instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ - predicate(vector_length(n) == 8); + predicate(Matcher::vector_length(n) == 8); match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} ins_encode %{ @@ -3567,7 +3547,7 @@ instruct loadV(vec dst, memory mem) %{ ins_cost(125); format %{ "load_vector $dst,$mem" %} ins_encode %{ - switch (vector_length_in_bytes(this)) { + switch (Matcher::vector_length_in_bytes(this)) { case 4: __ movdl ($dst$$XMMRegister, $mem$$Address); break; case 8: __ movq ($dst$$XMMRegister, $mem$$Address); break; case 16: __ movdqu ($dst$$XMMRegister, $mem$$Address); break; @@ -3585,7 +3565,7 @@ instruct storeV(memory mem, vec src) %{ ins_cost(145); format %{ "store_vector $mem,$src\n\t" %} ins_encode %{ - switch (vector_length_in_bytes(this, $src)) { + switch (Matcher::vector_length_in_bytes(this, $src)) { case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; @@ -3602,7 +3582,7 @@ instruct storeV(memory mem, vec src) %{ // Gather INT, LONG, FLOAT, DOUBLE instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ - predicate(vector_length_in_bytes(n) <= 32); + predicate(Matcher::vector_length_in_bytes(n) <= 32); match(Set dst (LoadVectorGather mem idx)); effect(TEMP dst, TEMP tmp, TEMP mask); format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} @@ -3610,9 +3590,9 @@ instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ assert(UseAVX >= 2, "sanity"); int vlen_enc = vector_length_encoding(this); - BasicType elem_bt = vector_element_basic_type(this); + BasicType elem_bt = Matcher::vector_element_basic_type(this); - assert(vector_length_in_bytes(this) >= 16, "sanity"); + assert(Matcher::vector_length_in_bytes(this) >= 16, "sanity"); assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE if (vlen_enc == Assembler::AVX_128bit) { @@ -3627,7 +3607,7 @@ instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ %} instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ - predicate(vector_length_in_bytes(n) == 64); + predicate(Matcher::vector_length_in_bytes(n) == 64); match(Set dst (LoadVectorGather mem idx)); effect(TEMP dst, TEMP tmp, TEMP ktmp); format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and k2 as TEMP" %} @@ -3635,7 +3615,7 @@ instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ assert(UseAVX > 2, "sanity"); int vlen_enc = vector_length_encoding(this); - BasicType elem_bt = vector_element_basic_type(this); + BasicType elem_bt = Matcher::vector_element_basic_type(this); assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE @@ -3657,9 +3637,9 @@ instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} ins_encode %{ int vlen_enc = vector_length_encoding(this, $src); - BasicType elem_bt = vector_element_basic_type(this, $src); + BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); - assert(vector_length_in_bytes(this, $src) >= 16, "sanity"); + assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register); @@ -3676,7 +3656,7 @@ instruct ReplB_reg(vec dst, rRegI src) %{ match(Set dst (ReplicateB src)); format %{ "replicateB $dst,$src" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW int vlen_enc = vector_length_encoding(this); @@ -3716,7 +3696,7 @@ instruct ReplB_imm(vec dst, immI con) %{ match(Set dst (ReplicateB con)); format %{ "replicateB $dst,$con" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 1)); if (vlen == 4) { __ movdl($dst$$XMMRegister, const_addr); @@ -3741,7 +3721,7 @@ instruct ReplB_zero(vec dst, immI_0 zero) %{ match(Set dst (ReplicateB zero)); format %{ "replicateB $dst,$zero" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen <= 16) { __ pxor($dst$$XMMRegister, $dst$$XMMRegister); } else { @@ -3759,7 +3739,7 @@ instruct ReplS_reg(vec dst, rRegI src) %{ match(Set dst (ReplicateS src)); format %{ "replicateS $dst,$src" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW int vlen_enc = vector_length_encoding(this); @@ -3798,7 +3778,7 @@ instruct ReplS_imm(vec dst, immI con) %{ match(Set dst (ReplicateS con)); format %{ "replicateS $dst,$con" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 2)); if (vlen == 2) { __ movdl($dst$$XMMRegister, const_addr); @@ -3822,7 +3802,7 @@ instruct ReplS_zero(vec dst, immI_0 zero) %{ match(Set dst (ReplicateS zero)); format %{ "replicateS $dst,$zero" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen <= 8) { __ pxor($dst$$XMMRegister, $dst$$XMMRegister); } else { @@ -3839,7 +3819,7 @@ instruct ReplI_reg(vec dst, rRegI src) %{ match(Set dst (ReplicateI src)); format %{ "replicateI $dst,$src" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands int vlen_enc = vector_length_encoding(this); __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); @@ -3863,7 +3843,7 @@ instruct ReplI_mem(vec dst, memory mem) %{ match(Set dst (ReplicateI (LoadI mem))); format %{ "replicateI $dst,$mem" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen <= 4) { __ movdl($dst$$XMMRegister, $mem$$Address); __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); @@ -3880,7 +3860,7 @@ instruct ReplI_imm(vec dst, immI con) %{ match(Set dst (ReplicateI con)); format %{ "replicateI $dst,$con" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 4)); if (vlen <= 4) { __ movq($dst$$XMMRegister, const_addr); @@ -3902,7 +3882,7 @@ instruct ReplI_zero(vec dst, immI_0 zero) %{ match(Set dst (ReplicateI zero)); format %{ "replicateI $dst,$zero" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen <= 4) { __ pxor($dst$$XMMRegister, $dst$$XMMRegister); } else { @@ -3935,7 +3915,7 @@ instruct ReplL_reg(vec dst, rRegL src) %{ match(Set dst (ReplicateL src)); format %{ "replicateL $dst,$src" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen == 2) { __ movdq($dst$$XMMRegister, $src$$Register); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); @@ -3959,12 +3939,12 @@ instruct ReplL_reg(vec dst, rRegL src) %{ #else // _LP64 // Replicate long (8 byte) scalar to be vector instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ - predicate(vector_length(n) <= 4); + predicate(Matcher::vector_length(n) <= 4); match(Set dst (ReplicateL src)); effect(TEMP dst, USE src, TEMP tmp); format %{ "replicateL $dst,$src" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen == 2) { __ movdl($dst$$XMMRegister, $src$$Register); __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); @@ -3988,7 +3968,7 @@ instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ %} instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ - predicate(vector_length(n) == 8); + predicate(Matcher::vector_length(n) == 8); match(Set dst (ReplicateL src)); effect(TEMP dst, USE src, TEMP tmp); format %{ "replicateL $dst,$src" %} @@ -4016,7 +3996,7 @@ instruct ReplL_mem(vec dst, memory mem) %{ match(Set dst (ReplicateL (LoadL mem))); format %{ "replicateL $dst,$mem" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen == 2) { __ movq($dst$$XMMRegister, $mem$$Address); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); @@ -4034,7 +4014,7 @@ instruct ReplL_imm(vec dst, immL con) %{ match(Set dst (ReplicateL con)); format %{ "replicateL $dst,$con" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); InternalAddress const_addr = $constantaddress($con); if (vlen == 2) { __ movq($dst$$XMMRegister, const_addr); @@ -4053,7 +4033,7 @@ instruct ReplL_zero(vec dst, immL0 zero) %{ match(Set dst (ReplicateL zero)); format %{ "replicateL $dst,$zero" %} ins_encode %{ - int vlen = vector_length(this); + int vlen = Matcher::vector_length(this); if (vlen == 2) { __ pxor($dst$$XMMRegister, $dst$$XMMRegister); } else { @@ -4082,7 +4062,7 @@ instruct ReplF_reg(vec dst, vlRegF src) %{ match(Set dst (ReplicateF src)); format %{ "replicateF $dst,$src" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen <= 4) { __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); } else if (VM_Version::supports_avx2()) { @@ -4101,7 +4081,7 @@ instruct ReplF_mem(vec dst, memory mem) %{ match(Set dst (ReplicateF (LoadF mem))); format %{ "replicateF $dst,$mem" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen <= 4) { __ movdl($dst$$XMMRegister, $mem$$Address); __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); @@ -4118,7 +4098,7 @@ instruct ReplF_zero(vec dst, immF0 zero) %{ match(Set dst (ReplicateF zero)); format %{ "replicateF $dst,$zero" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen <= 4) { __ xorps($dst$$XMMRegister, $dst$$XMMRegister); } else { @@ -4136,7 +4116,7 @@ instruct ReplD_reg(vec dst, vlRegD src) %{ match(Set dst (ReplicateD src)); format %{ "replicateD $dst,$src" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen == 2) { __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); } else if (VM_Version::supports_avx2()) { @@ -4155,7 +4135,7 @@ instruct ReplD_mem(vec dst, memory mem) %{ match(Set dst (ReplicateD (LoadD mem))); format %{ "replicateD $dst,$mem" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen == 2) { __ movq($dst$$XMMRegister, $mem$$Address); __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x44); @@ -4172,7 +4152,7 @@ instruct ReplD_zero(vec dst, immD0 zero) %{ match(Set dst (ReplicateD zero)); format %{ "replicateD $dst,$zero" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen == 2) { __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); } else { @@ -4186,17 +4166,17 @@ instruct ReplD_zero(vec dst, immD0 zero) %{ // ====================VECTOR INSERT======================================= instruct insert(vec dst, rRegI val, immU8 idx) %{ - predicate(vector_length_in_bytes(n) < 32); + predicate(Matcher::vector_length_in_bytes(n) < 32); match(Set dst (VectorInsert (Binary dst val) idx)); format %{ "vector_insert $dst,$val,$idx" %} ins_encode %{ assert(UseSSE >= 4, "required"); - assert(vector_length_in_bytes(this) >= 8, "required"); + assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); - BasicType elem_bt = vector_element_basic_type(this); + BasicType elem_bt = Matcher::vector_element_basic_type(this); assert(is_integral_type(elem_bt), ""); - assert($idx$$constant < (int)vector_length(this), "out of bounds"); + assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); %} @@ -4204,18 +4184,18 @@ instruct insert(vec dst, rRegI val, immU8 idx) %{ %} instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ - predicate(vector_length_in_bytes(n) == 32); + predicate(Matcher::vector_length_in_bytes(n) == 32); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP vtmp); format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} ins_encode %{ int vlen_enc = Assembler::AVX_256bit; - BasicType elem_bt = vector_element_basic_type(this); + BasicType elem_bt = Matcher::vector_element_basic_type(this); int elem_per_lane = 16/type2aelembytes(elem_bt); int log2epr = log2(elem_per_lane); assert(is_integral_type(elem_bt), "sanity"); - assert($idx$$constant < (int)vector_length(this), "out of bounds"); + assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); uint x_idx = $idx$$constant & right_n_bits(log2epr); uint y_idx = ($idx$$constant >> log2epr) & 1; @@ -4227,19 +4207,19 @@ instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ %} instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ - predicate(vector_length_in_bytes(n) == 64); + predicate(Matcher::vector_length_in_bytes(n) == 64); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP vtmp); format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} ins_encode %{ assert(UseAVX > 2, "sanity"); - BasicType elem_bt = vector_element_basic_type(this); + BasicType elem_bt = Matcher::vector_element_basic_type(this); int elem_per_lane = 16/type2aelembytes(elem_bt); int log2epr = log2(elem_per_lane); assert(is_integral_type(elem_bt), ""); - assert($idx$$constant < (int)vector_length(this), "out of bounds"); + assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); uint x_idx = $idx$$constant & right_n_bits(log2epr); uint y_idx = ($idx$$constant >> log2epr) & 3; @@ -4252,13 +4232,13 @@ instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ #ifdef _LP64 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ - predicate(vector_length(n) == 2); + predicate(Matcher::vector_length(n) == 2); match(Set dst (VectorInsert (Binary dst val) idx)); format %{ "vector_insert $dst,$val,$idx" %} ins_encode %{ assert(UseSSE >= 4, "required"); - assert(vector_element_basic_type(this) == T_LONG, ""); - assert($idx$$constant < (int)vector_length(this), "out of bounds"); + assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); + assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); %} @@ -4266,13 +4246,13 @@ instruct insert2L(vec dst, rRegL val, immU8 idx) %{ %} instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ - predicate(vector_length(n) == 4); + predicate(Matcher::vector_length(n) == 4); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP vtmp); format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} ins_encode %{ - assert(vector_element_basic_type(this) == T_LONG, ""); - assert($idx$$constant < (int)vector_length(this), "out of bounds"); + assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); + assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); uint x_idx = $idx$$constant & right_n_bits(1); uint y_idx = ($idx$$constant >> 1) & 1; @@ -4285,13 +4265,13 @@ instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ %} instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ - predicate(vector_length(n) == 8); + predicate(Matcher::vector_length(n) == 8); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP vtmp); format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} ins_encode %{ - assert(vector_element_basic_type(this) == T_LONG, "sanity"); - assert($idx$$constant < (int)vector_length(this), "out of bounds"); + assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); + assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); uint x_idx = $idx$$constant & right_n_bits(1); uint y_idx = ($idx$$constant >> 1) & 3; @@ -4304,14 +4284,14 @@ instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ #endif instruct insertF(vec dst, regF val, immU8 idx) %{ - predicate(vector_length(n) < 8); + predicate(Matcher::vector_length(n) < 8); match(Set dst (VectorInsert (Binary dst val) idx)); format %{ "vector_insert $dst,$val,$idx" %} ins_encode %{ assert(UseSSE >= 4, "sanity"); - assert(vector_element_basic_type(this) == T_FLOAT, "sanity"); - assert($idx$$constant < (int)vector_length(this), "out of bounds"); + assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); + assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); __ insertps($dst$$XMMRegister, $val$$XMMRegister, $idx$$constant); %} @@ -4319,15 +4299,15 @@ instruct insertF(vec dst, regF val, immU8 idx) %{ %} instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ - predicate(vector_length(n) >= 8); + predicate(Matcher::vector_length(n) >= 8); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP vtmp); format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} ins_encode %{ - assert(vector_element_basic_type(this) == T_FLOAT, "sanity"); - assert($idx$$constant < (int)vector_length(this), "out of bounds"); + assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); + assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); - int vlen = vector_length(this); + int vlen = Matcher::vector_length(this); uint x_idx = $idx$$constant & right_n_bits(2); if (vlen == 8) { uint y_idx = ($idx$$constant >> 2) & 1; @@ -4348,14 +4328,14 @@ instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ #ifdef _LP64 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ - predicate(vector_length(n) == 2); + predicate(Matcher::vector_length(n) == 2); match(Set dst (VectorInsert (Binary dst val) idx)); effect(TEMP tmp); format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} ins_encode %{ assert(UseSSE >= 4, "sanity"); - assert(vector_element_basic_type(this) == T_DOUBLE, "sanity"); - assert($idx$$constant < (int)vector_length(this), "out of bounds"); + assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); + assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); __ movq($tmp$$Register, $val$$XMMRegister); __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); @@ -4364,13 +4344,13 @@ instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ %} instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ - predicate(vector_length(n) == 4); + predicate(Matcher::vector_length(n) == 4); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP vtmp, TEMP tmp); format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} ins_encode %{ - assert(vector_element_basic_type(this) == T_DOUBLE, "sanity"); - assert($idx$$constant < (int)vector_length(this), "out of bounds"); + assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); + assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); uint x_idx = $idx$$constant & right_n_bits(1); uint y_idx = ($idx$$constant >> 1) & 1; @@ -4384,13 +4364,13 @@ instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ %} instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ - predicate(vector_length(n) == 8); + predicate(Matcher::vector_length(n) == 8); match(Set dst (VectorInsert (Binary src val) idx)); effect(TEMP tmp, TEMP vtmp); format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} ins_encode %{ - assert(vector_element_basic_type(this) == T_DOUBLE, "sanity"); - assert($idx$$constant < (int)vector_length(this), "out of bounds"); + assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); + assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); uint x_idx = $idx$$constant & right_n_bits(1); uint y_idx = ($idx$$constant >> 1) & 3; @@ -4408,7 +4388,7 @@ instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) // =======================Int Reduction========================================== instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ - predicate(vector_element_basic_type(n->in(2)) == T_INT); // src2 + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 match(Set dst (AddReductionVI src1 src2)); match(Set dst (MulReductionVI src1 src2)); match(Set dst (AndReductionV src1 src2)); @@ -4420,7 +4400,7 @@ instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtm format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src2); + int vlen = Matcher::vector_length(this, $src2); __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); %} ins_pipe( pipe_slow ); @@ -4430,7 +4410,7 @@ instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtm #ifdef _LP64 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ - predicate(vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); match(Set dst (AddReductionVL src1 src2)); match(Set dst (MulReductionVL src1 src2)); match(Set dst (AndReductionV src1 src2)); @@ -4442,14 +4422,14 @@ instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtm format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src2); + int vlen = Matcher::vector_length(this, $src2); __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ - predicate(vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); match(Set dst (AddReductionVL src1 src2)); match(Set dst (MulReductionVL src1 src2)); match(Set dst (AndReductionV src1 src2)); @@ -4461,7 +4441,7 @@ instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtm format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src2); + int vlen = Matcher::vector_length(this, $src2); __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); %} ins_pipe( pipe_slow ); @@ -4471,42 +4451,42 @@ instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtm // =======================Float Reduction========================================== instruct reductionF128(regF dst, vec src, vec vtmp) %{ - predicate(vector_length(n->in(2)) <= 4); // src + predicate(Matcher::vector_length(n->in(2)) <= 4); // src match(Set dst (AddReductionVF dst src)); match(Set dst (MulReductionVF dst src)); effect(TEMP dst, TEMP vtmp); format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src); + int vlen = Matcher::vector_length(this, $src); __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ - predicate(vector_length(n->in(2)) == 8); // src + predicate(Matcher::vector_length(n->in(2)) == 8); // src match(Set dst (AddReductionVF dst src)); match(Set dst (MulReductionVF dst src)); effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src); + int vlen = Matcher::vector_length(this, $src); __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ - predicate(vector_length(n->in(2)) == 16); // src + predicate(Matcher::vector_length(n->in(2)) == 16); // src match(Set dst (AddReductionVF dst src)); match(Set dst (MulReductionVF dst src)); effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src); + int vlen = Matcher::vector_length(this, $src); __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); %} ins_pipe( pipe_slow ); @@ -4515,42 +4495,42 @@ instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ // =======================Double Reduction========================================== instruct reduction2D(regD dst, vec src, vec vtmp) %{ - predicate(vector_length(n->in(2)) == 2); // src + predicate(Matcher::vector_length(n->in(2)) == 2); // src match(Set dst (AddReductionVD dst src)); match(Set dst (MulReductionVD dst src)); effect(TEMP dst, TEMP vtmp); format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src); + int vlen = Matcher::vector_length(this, $src); __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ - predicate(vector_length(n->in(2)) == 4); // src + predicate(Matcher::vector_length(n->in(2)) == 4); // src match(Set dst (AddReductionVD dst src)); match(Set dst (MulReductionVD dst src)); effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src); + int vlen = Matcher::vector_length(this, $src); __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ - predicate(vector_length(n->in(2)) == 8); // src + predicate(Matcher::vector_length(n->in(2)) == 8); // src match(Set dst (AddReductionVD dst src)); match(Set dst (MulReductionVD dst src)); effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src); + int vlen = Matcher::vector_length(this, $src); __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); %} ins_pipe( pipe_slow ); @@ -4560,7 +4540,7 @@ instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ #ifdef _LP64 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ - predicate(vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); match(Set dst (AddReductionVI src1 src2)); match(Set dst (AndReductionV src1 src2)); match(Set dst ( OrReductionV src1 src2)); @@ -4571,14 +4551,14 @@ instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtm format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src2); + int vlen = Matcher::vector_length(this, $src2); __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ - predicate(vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); match(Set dst (AddReductionVI src1 src2)); match(Set dst (AndReductionV src1 src2)); match(Set dst ( OrReductionV src1 src2)); @@ -4589,7 +4569,7 @@ instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtm format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src2); + int vlen = Matcher::vector_length(this, $src2); __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); %} ins_pipe( pipe_slow ); @@ -4599,7 +4579,7 @@ instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtm // =======================Short Reduction========================================== instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ - predicate(vector_element_basic_type(n->in(2)) == T_SHORT); // src2 + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 match(Set dst (AddReductionVI src1 src2)); match(Set dst (MulReductionVI src1 src2)); match(Set dst (AndReductionV src1 src2)); @@ -4611,7 +4591,7 @@ instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtm format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src2); + int vlen = Matcher::vector_length(this, $src2); __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); %} ins_pipe( pipe_slow ); @@ -4620,28 +4600,28 @@ instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtm // =======================Mul Reduction========================================== instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ - predicate(vector_element_basic_type(n->in(2)) == T_BYTE && - vector_length(n->in(2)) <= 32); // src2 + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && + Matcher::vector_length(n->in(2)) <= 32); // src2 match(Set dst (MulReductionVI src1 src2)); effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src2); + int vlen = Matcher::vector_length(this, $src2); __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ - predicate(vector_element_basic_type(n->in(2)) == T_BYTE && - vector_length(n->in(2)) == 64); // src2 + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && + Matcher::vector_length(n->in(2)) == 64); // src2 match(Set dst (MulReductionVI src1 src2)); effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src2); + int vlen = Matcher::vector_length(this, $src2); __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); %} ins_pipe( pipe_slow ); @@ -4651,10 +4631,10 @@ instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legV // Float Min Reduction instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ - predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && - vector_length(n->in(2)) == 2); + Matcher::vector_length(n->in(2)) == 2); match(Set dst (MinReductionV src1 src2)); match(Set dst (MaxReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); @@ -4663,7 +4643,7 @@ instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, assert(UseAVX > 0, "sanity"); int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src2); + int vlen = Matcher::vector_length(this, $src2); __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); %} @@ -4672,10 +4652,10 @@ instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ - predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && - vector_length(n->in(2)) >= 4); + Matcher::vector_length(n->in(2)) >= 4); match(Set dst (MinReductionV src1 src2)); match(Set dst (MaxReductionV src1 src2)); effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); @@ -4684,7 +4664,7 @@ instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legV assert(UseAVX > 0, "sanity"); int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src2); + int vlen = Matcher::vector_length(this, $src2); __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); %} @@ -4693,8 +4673,8 @@ instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legV instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ - predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && - vector_length(n->in(2)) == 2); + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && + Matcher::vector_length(n->in(2)) == 2); match(Set dst (MinReductionV dst src)); match(Set dst (MaxReductionV dst src)); effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); @@ -4703,7 +4683,7 @@ instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, assert(UseAVX > 0, "sanity"); int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src); + int vlen = Matcher::vector_length(this, $src); __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); %} @@ -4713,8 +4693,8 @@ instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ - predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && - vector_length(n->in(2)) >= 4); + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && + Matcher::vector_length(n->in(2)) >= 4); match(Set dst (MinReductionV dst src)); match(Set dst (MaxReductionV dst src)); effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); @@ -4723,7 +4703,7 @@ instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, assert(UseAVX > 0, "sanity"); int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src); + int vlen = Matcher::vector_length(this, $src); __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); %} @@ -4735,10 +4715,10 @@ instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs rFlagsReg cr) %{ - predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && - vector_length(n->in(2)) == 2); + Matcher::vector_length(n->in(2)) == 2); match(Set dst (MinReductionV src1 src2)); match(Set dst (MaxReductionV src1 src2)); effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); @@ -4747,7 +4727,7 @@ instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, assert(UseAVX > 0, "sanity"); int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src2); + int vlen = Matcher::vector_length(this, $src2); __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); %} @@ -4757,10 +4737,10 @@ instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs rFlagsReg cr) %{ - predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && - vector_length(n->in(2)) >= 4); + Matcher::vector_length(n->in(2)) >= 4); match(Set dst (MinReductionV src1 src2)); match(Set dst (MaxReductionV src1 src2)); effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); @@ -4769,7 +4749,7 @@ instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, assert(UseAVX > 0, "sanity"); int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src2); + int vlen = Matcher::vector_length(this, $src2); __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); %} @@ -4780,8 +4760,8 @@ instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs rFlagsReg cr) %{ - predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && - vector_length(n->in(2)) == 2); + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && + Matcher::vector_length(n->in(2)) == 2); match(Set dst (MinReductionV dst src)); match(Set dst (MaxReductionV dst src)); effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); @@ -4790,7 +4770,7 @@ instruct minmax_reduction2D_av(legRegD dst, legVec src, assert(UseAVX > 0, "sanity"); int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src); + int vlen = Matcher::vector_length(this, $src); __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); %} @@ -4800,8 +4780,8 @@ instruct minmax_reduction2D_av(legRegD dst, legVec src, instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs rFlagsReg cr) %{ - predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && - vector_length(n->in(2)) >= 4); + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && + Matcher::vector_length(n->in(2)) >= 4); match(Set dst (MinReductionV dst src)); match(Set dst (MaxReductionV dst src)); effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); @@ -4810,7 +4790,7 @@ instruct minmax_reductionD_av(legRegD dst, legVec src, assert(UseAVX > 0, "sanity"); int opcode = this->ideal_Opcode(); - int vlen = vector_length(this, $src); + int vlen = Matcher::vector_length(this, $src); __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); %} @@ -4844,7 +4824,8 @@ instruct vaddB_reg(vec dst, vec src1, vec src2) %{ %} instruct vaddB_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (AddVB src (LoadVector mem))); format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} ins_encode %{ @@ -4877,7 +4858,8 @@ instruct vaddS_reg(vec dst, vec src1, vec src2) %{ %} instruct vaddS_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (AddVS src (LoadVector mem))); format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} ins_encode %{ @@ -4911,7 +4893,8 @@ instruct vaddI_reg(vec dst, vec src1, vec src2) %{ instruct vaddI_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (AddVI src (LoadVector mem))); format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} ins_encode %{ @@ -4944,7 +4927,8 @@ instruct vaddL_reg(vec dst, vec src1, vec src2) %{ %} instruct vaddL_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (AddVL src (LoadVector mem))); format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} ins_encode %{ @@ -4977,7 +4961,8 @@ instruct vaddF_reg(vec dst, vec src1, vec src2) %{ %} instruct vaddF_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (AddVF src (LoadVector mem))); format %{ "vaddps $dst,$src,$mem\t! add packedF" %} ins_encode %{ @@ -5010,7 +4995,8 @@ instruct vaddD_reg(vec dst, vec src1, vec src2) %{ %} instruct vaddD_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (AddVD src (LoadVector mem))); format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} ins_encode %{ @@ -5045,7 +5031,8 @@ instruct vsubB_reg(vec dst, vec src1, vec src2) %{ %} instruct vsubB_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (SubVB src (LoadVector mem))); format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} ins_encode %{ @@ -5079,7 +5066,8 @@ instruct vsubS_reg(vec dst, vec src1, vec src2) %{ %} instruct vsubS_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (SubVS src (LoadVector mem))); format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} ins_encode %{ @@ -5112,7 +5100,8 @@ instruct vsubI_reg(vec dst, vec src1, vec src2) %{ %} instruct vsubI_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (SubVI src (LoadVector mem))); format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} ins_encode %{ @@ -5146,7 +5135,8 @@ instruct vsubL_reg(vec dst, vec src1, vec src2) %{ instruct vsubL_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (SubVL src (LoadVector mem))); format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} ins_encode %{ @@ -5179,7 +5169,8 @@ instruct vsubF_reg(vec dst, vec src1, vec src2) %{ %} instruct vsubF_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (SubVF src (LoadVector mem))); format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} ins_encode %{ @@ -5212,7 +5203,8 @@ instruct vsubD_reg(vec dst, vec src1, vec src2) %{ %} instruct vsubD_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (SubVD src (LoadVector mem))); format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} ins_encode %{ @@ -5226,8 +5218,8 @@ instruct vsubD_mem(vec dst, vec src, memory mem) %{ // Byte vector mul instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ - predicate(vector_length(n) == 4 || - vector_length(n) == 8); + predicate(Matcher::vector_length(n) == 4 || + Matcher::vector_length(n) == 8); match(Set dst (MulVB src1 src2)); effect(TEMP dst, TEMP tmp, TEMP scratch); format %{"vector_mulB $dst,$src1,$src2" %} @@ -5244,7 +5236,7 @@ instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ %} instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ - predicate(vector_length(n) == 16 && UseAVX <= 1); + predicate(Matcher::vector_length(n) == 16 && UseAVX <= 1); match(Set dst (MulVB src1 src2)); effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); format %{"vector_mulB $dst,$src1,$src2" %} @@ -5267,7 +5259,7 @@ instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scrat %} instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ - predicate(vector_length(n) == 16 && UseAVX > 1); + predicate(Matcher::vector_length(n) == 16 && UseAVX > 1); match(Set dst (MulVB src1 src2)); effect(TEMP dst, TEMP tmp, TEMP scratch); format %{"vector_mulB $dst,$src1,$src2" %} @@ -5285,7 +5277,7 @@ instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ %} instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ - predicate(vector_length(n) == 32); + predicate(Matcher::vector_length(n) == 32); match(Set dst (MulVB src1 src2)); effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); format %{"vector_mulB $dst,$src1,$src2" %} @@ -5311,7 +5303,7 @@ instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI %} instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ - predicate(vector_length(n) == 64); + predicate(Matcher::vector_length(n) == 64); match(Set dst (MulVB src1 src2)); effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); format %{"vector_mulB $dst,$src1,$src2\n\t" %} @@ -5360,7 +5352,8 @@ instruct vmulS_reg(vec dst, vec src1, vec src2) %{ %} instruct vmulS_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (MulVS src (LoadVector mem))); format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} ins_encode %{ @@ -5394,7 +5387,8 @@ instruct vmulI_reg(vec dst, vec src1, vec src2) %{ %} instruct vmulI_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (MulVI src (LoadVector mem))); format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} ins_encode %{ @@ -5418,7 +5412,8 @@ instruct vmulL_reg(vec dst, vec src1, vec src2) %{ %} instruct vmulL_mem(vec dst, vec src, memory mem) %{ - predicate(VM_Version::supports_avx512dq()); + predicate(VM_Version::supports_avx512dq() && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (MulVL src (LoadVector mem))); format %{ "vpmullq $dst,$src,$mem\t! mul packedL" %} ins_encode %{ @@ -5430,7 +5425,7 @@ instruct vmulL_mem(vec dst, vec src, memory mem) %{ %} instruct mul2L_reg(vec dst, vec src2, legVec tmp) %{ - predicate(vector_length(n) == 2 && !VM_Version::supports_avx512dq()); + predicate(Matcher::vector_length(n) == 2 && !VM_Version::supports_avx512dq()); match(Set dst (MulVL dst src2)); effect(TEMP dst, TEMP tmp); format %{ "pshufd $tmp,$src2, 177\n\t" @@ -5456,7 +5451,7 @@ instruct mul2L_reg(vec dst, vec src2, legVec tmp) %{ %} instruct vmul4L_reg_avx(vec dst, vec src1, vec src2, legVec tmp, legVec tmp1) %{ - predicate(vector_length(n) == 4 && !VM_Version::supports_avx512dq()); + predicate(Matcher::vector_length(n) == 4 && !VM_Version::supports_avx512dq()); match(Set dst (MulVL src1 src2)); effect(TEMP tmp1, TEMP tmp); format %{ "vpshufd $tmp,$src2\n\t" @@ -5503,7 +5498,8 @@ instruct vmulF_reg(vec dst, vec src1, vec src2) %{ %} instruct vmulF_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (MulVF src (LoadVector mem))); format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} ins_encode %{ @@ -5536,7 +5532,8 @@ instruct vmulD_reg(vec dst, vec src1, vec src2) %{ %} instruct vmulD_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (MulVD src (LoadVector mem))); format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} ins_encode %{ @@ -5547,7 +5544,7 @@ instruct vmulD_mem(vec dst, vec src, memory mem) %{ %} instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ - predicate(vector_length(n) == 8); + predicate(Matcher::vector_length(n) == 8); match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); effect(TEMP dst, USE src1, USE src2); format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" @@ -5565,7 +5562,7 @@ instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmp %} instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ - predicate(vector_length(n) == 4); + predicate(Matcher::vector_length(n) == 4); match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); effect(TEMP dst, USE src1, USE src2); format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" @@ -5607,7 +5604,8 @@ instruct vdivF_reg(vec dst, vec src1, vec src2) %{ %} instruct vdivF_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (DivVF src (LoadVector mem))); format %{ "vdivps $dst,$src,$mem\t! div packedF" %} ins_encode %{ @@ -5640,7 +5638,8 @@ instruct vdivD_reg(vec dst, vec src1, vec src2) %{ %} instruct vdivD_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (DivVD src (LoadVector mem))); format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} ins_encode %{ @@ -5654,7 +5653,7 @@ instruct vdivD_mem(vec dst, vec src, memory mem) %{ // Byte, Short, Int vector Min/Max instruct minmax_reg_sse(vec dst, vec src) %{ - predicate(is_integral_type(vector_element_basic_type(n)) && vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT + predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT UseAVX == 0); match(Set dst (MinV dst src)); match(Set dst (MaxV dst src)); @@ -5663,14 +5662,14 @@ instruct minmax_reg_sse(vec dst, vec src) %{ assert(UseSSE >= 4, "required"); int opcode = this->ideal_Opcode(); - BasicType elem_bt = vector_element_basic_type(this); + BasicType elem_bt = Matcher::vector_element_basic_type(this); __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct vminmax_reg(vec dst, vec src1, vec src2) %{ - predicate(is_integral_type(vector_element_basic_type(n)) && vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT + predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT UseAVX > 0); match(Set dst (MinV src1 src2)); match(Set dst (MaxV src1 src2)); @@ -5678,7 +5677,7 @@ instruct vminmax_reg(vec dst, vec src1, vec src2) %{ ins_encode %{ int opcode = this->ideal_Opcode(); int vlen_enc = vector_length_encoding(this); - BasicType elem_bt = vector_element_basic_type(this); + BasicType elem_bt = Matcher::vector_element_basic_type(this); __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); %} @@ -5687,7 +5686,7 @@ instruct vminmax_reg(vec dst, vec src1, vec src2) %{ // Long vector Min/Max instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ - predicate(vector_length_in_bytes(n) == 16 && vector_element_basic_type(n) == T_LONG && + predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && UseAVX == 0); match(Set dst (MinV dst src)); match(Set dst (MaxV src dst)); @@ -5697,7 +5696,7 @@ instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ assert(UseSSE >= 4, "required"); int opcode = this->ideal_Opcode(); - BasicType elem_bt = vector_element_basic_type(this); + BasicType elem_bt = Matcher::vector_element_basic_type(this); assert(elem_bt == T_LONG, "sanity"); __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); @@ -5706,7 +5705,7 @@ instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ %} instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ - predicate(vector_length_in_bytes(n) <= 32 && vector_element_basic_type(n) == T_LONG && + predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && UseAVX > 0 && !VM_Version::supports_avx512vl()); match(Set dst (MinV src1 src2)); match(Set dst (MaxV src1 src2)); @@ -5715,7 +5714,7 @@ instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ ins_encode %{ int vlen_enc = vector_length_encoding(this); int opcode = this->ideal_Opcode(); - BasicType elem_bt = vector_element_basic_type(this); + BasicType elem_bt = Matcher::vector_element_basic_type(this); assert(elem_bt == T_LONG, "sanity"); __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); @@ -5724,8 +5723,8 @@ instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ %} instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ - predicate((vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && - vector_element_basic_type(n) == T_LONG); + predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && + Matcher::vector_element_basic_type(n) == T_LONG); match(Set dst (MinV src1 src2)); match(Set dst (MaxV src1 src2)); format %{ "vector_minmaxL $dst,$src1,src2\t! " %} @@ -5734,7 +5733,7 @@ instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ int vlen_enc = vector_length_encoding(this); int opcode = this->ideal_Opcode(); - BasicType elem_bt = vector_element_basic_type(this); + BasicType elem_bt = Matcher::vector_element_basic_type(this); assert(elem_bt == T_LONG, "sanity"); __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); @@ -5744,8 +5743,8 @@ instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ // Float/Double vector Min/Max instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ - predicate(vector_length_in_bytes(n) <= 32 && - is_floating_point_type(vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE + predicate(Matcher::vector_length_in_bytes(n) <= 32 && + is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE UseAVX > 0); match(Set dst (MinV a b)); match(Set dst (MaxV a b)); @@ -5756,7 +5755,7 @@ instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, l int opcode = this->ideal_Opcode(); int vlen_enc = vector_length_encoding(this); - BasicType elem_bt = vector_element_basic_type(this); + BasicType elem_bt = Matcher::vector_element_basic_type(this); __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, @@ -5766,8 +5765,8 @@ instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, l %} instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ - predicate(vector_length_in_bytes(n) == 64 && - is_floating_point_type(vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE + predicate(Matcher::vector_length_in_bytes(n) == 64 && + is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE match(Set dst (MinV a b)); match(Set dst (MaxV a b)); effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); @@ -5777,7 +5776,7 @@ instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktm int opcode = this->ideal_Opcode(); int vlen_enc = vector_length_encoding(this); - BasicType elem_bt = vector_element_basic_type(this); + BasicType elem_bt = Matcher::vector_element_basic_type(this); __ evminmax_fp(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, @@ -5786,7 +5785,7 @@ instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktm ins_pipe( pipe_slow ); %} -// --------------------------------- Signum --------------------------- +// --------------------------------- Signum/CopySign --------------------------- instruct signumF_reg(regF dst, regF zero, regF one, rRegP scratch, rFlagsReg cr) %{ match(Set dst (SignumF dst (Binary zero one))); @@ -5810,6 +5809,53 @@ instruct signumD_reg(regD dst, regD zero, regD one, rRegP scratch, rFlagsReg cr) ins_pipe( pipe_slow ); %} +// --------------------------------------- +// For copySign use 0xE4 as writemask for vpternlog +// Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit +// C (xmm2) is set to 0x7FFFFFFF +// Wherever xmm2 is 0, we want to pick from B (sign) +// Wherever xmm2 is 1, we want to pick from A (src) +// +// A B C Result +// 0 0 0 0 +// 0 0 1 0 +// 0 1 0 1 +// 0 1 1 0 +// 1 0 0 0 +// 1 0 1 1 +// 1 1 0 1 +// 1 1 1 1 +// +// Result going from high bit to low bit is 0x11100100 = 0xe4 +// --------------------------------------- + +#ifdef _LP64 +instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ + match(Set dst (CopySignF dst src)); + effect(TEMP tmp1, TEMP tmp2); + format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} + ins_encode %{ + __ movl($tmp2$$Register, 0x7FFFFFFF); + __ movdl($tmp1$$XMMRegister, $tmp2$$Register); + __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); + %} + ins_pipe( pipe_slow ); +%} + +instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ + match(Set dst (CopySignD dst (Binary src zero))); + ins_cost(100); + effect(TEMP tmp1, TEMP tmp2); + format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} + ins_encode %{ + __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); + __ movq($tmp1$$XMMRegister, $tmp2$$Register); + __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); + %} + ins_pipe( pipe_slow ); +%} +#endif // _LP64 + // --------------------------------- Sqrt -------------------------------------- instruct vsqrtF_reg(vec dst, vec src) %{ @@ -5824,6 +5870,7 @@ instruct vsqrtF_reg(vec dst, vec src) %{ %} instruct vsqrtF_mem(vec dst, memory mem) %{ + predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); match(Set dst (SqrtVF (LoadVector mem))); format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} ins_encode %{ @@ -5847,6 +5894,7 @@ instruct vsqrtD_reg(vec dst, vec src) %{ %} instruct vsqrtD_mem(vec dst, memory mem) %{ + predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); match(Set dst (SqrtVD (LoadVector mem))); format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} ins_encode %{ @@ -5873,7 +5921,7 @@ instruct vshiftcnt(vec dst, rRegI cnt) %{ // Byte vector shift instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ - predicate(vector_length(n) <= 8 && VectorNode::is_vshift_cnt(n->in(2))); + predicate(Matcher::vector_length(n) <= 8 && VectorNode::is_vshift_cnt(n->in(2))); match(Set dst ( LShiftVB src shift)); match(Set dst ( RShiftVB src shift)); match(Set dst (URShiftVB src shift)); @@ -5893,7 +5941,7 @@ instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ %} instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ - predicate(vector_length(n) == 16 && VectorNode::is_vshift_cnt(n->in(2)) && + predicate(Matcher::vector_length(n) == 16 && VectorNode::is_vshift_cnt(n->in(2)) && UseAVX <= 1); match(Set dst ( LShiftVB src shift)); match(Set dst ( RShiftVB src shift)); @@ -5918,7 +5966,7 @@ instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratc %} instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ - predicate(vector_length(n) == 16 && VectorNode::is_vshift_cnt(n->in(2)) && + predicate(Matcher::vector_length(n) == 16 && VectorNode::is_vshift_cnt(n->in(2)) && UseAVX > 1); match(Set dst ( LShiftVB src shift)); match(Set dst ( RShiftVB src shift)); @@ -5939,7 +5987,7 @@ instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ %} instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ - predicate(vector_length(n) == 32 && VectorNode::is_vshift_cnt(n->in(2))); + predicate(Matcher::vector_length(n) == 32 && VectorNode::is_vshift_cnt(n->in(2))); match(Set dst ( LShiftVB src shift)); match(Set dst ( RShiftVB src shift)); match(Set dst (URShiftVB src shift)); @@ -5964,7 +6012,7 @@ instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ %} instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ - predicate(vector_length(n) == 64 && VectorNode::is_vshift_cnt(n->in(2))); + predicate(Matcher::vector_length(n) == 64 && VectorNode::is_vshift_cnt(n->in(2))); match(Set dst ( LShiftVB src shift)); match(Set dst (RShiftVB src shift)); match(Set dst (URShiftVB src shift)); @@ -6009,7 +6057,7 @@ instruct vshiftS(vec dst, vec src, vec shift) %{ int vlen_enc = vector_length_encoding(this); __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); } else { - int vlen = vector_length(this); + int vlen = Matcher::vector_length(this); if (vlen == 2) { __ movflt($dst$$XMMRegister, $src$$XMMRegister); __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); @@ -6040,7 +6088,7 @@ instruct vshiftI(vec dst, vec src, vec shift) %{ int vlen_enc = vector_length_encoding(this); __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); } else { - int vlen = vector_length(this); + int vlen = Matcher::vector_length(this); if (vlen == 2) { __ movdbl($dst$$XMMRegister, $src$$XMMRegister); __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); @@ -6066,7 +6114,7 @@ instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ int vector_len = vector_length_encoding(this); __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); } else { - int vlen = vector_length(this); + int vlen = Matcher::vector_length(this); if (vlen == 2) { __ movdbl($dst$$XMMRegister, $src$$XMMRegister); __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); @@ -6093,7 +6141,7 @@ instruct vshiftL(vec dst, vec src, vec shift) %{ int vlen_enc = vector_length_encoding(this); __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); } else { - assert(vector_length(this) == 2, ""); + assert(Matcher::vector_length(this) == 2, ""); __ movdqu($dst$$XMMRegister, $src$$XMMRegister); __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); } @@ -6112,7 +6160,7 @@ instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ int vector_len = vector_length_encoding(this); __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); } else { - assert(vector_length(this) == 2, ""); + assert(Matcher::vector_length(this) == 2, ""); __ movdqu($dst$$XMMRegister, $src$$XMMRegister); __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); } @@ -6128,7 +6176,7 @@ instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) effect(TEMP dst, TEMP tmp, TEMP scratch); format %{ "vshiftq $dst,$src,$shift" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen == 2) { assert(UseSSE >= 2, "required"); __ movdqu($dst$$XMMRegister, $src$$XMMRegister); @@ -6165,7 +6213,7 @@ instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ // ------------------- Variable Shift ----------------------------- // Byte variable shift instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ - predicate(vector_length(n) <= 8 && + predicate(Matcher::vector_length(n) <= 8 && !VectorNode::is_vshift_cnt(n->in(2)) && !VM_Version::supports_avx512bw()); match(Set dst ( LShiftVB src shift)); @@ -6185,7 +6233,7 @@ instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %} instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ - predicate(vector_length(n) == 16 && + predicate(Matcher::vector_length(n) == 16 && !VectorNode::is_vshift_cnt(n->in(2)) && !VM_Version::supports_avx512bw()); match(Set dst ( LShiftVB src shift)); @@ -6213,7 +6261,7 @@ instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, r %} instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4, rRegP scratch) %{ - predicate(vector_length(n) == 32 && + predicate(Matcher::vector_length(n) == 32 && !VectorNode::is_vshift_cnt(n->in(2)) && !VM_Version::supports_avx512bw()); match(Set dst ( LShiftVB src shift)); @@ -6249,7 +6297,7 @@ instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, v %} instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ - predicate(vector_length(n) <= 32 && + predicate(Matcher::vector_length(n) <= 32 && !VectorNode::is_vshift_cnt(n->in(2)) && VM_Version::supports_avx512bw()); match(Set dst ( LShiftVB src shift)); @@ -6268,7 +6316,7 @@ instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp, rRegP scratc %} instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ - predicate(vector_length(n) == 64 && + predicate(Matcher::vector_length(n) == 64 && !VectorNode::is_vshift_cnt(n->in(2)) && VM_Version::supports_avx512bw()); match(Set dst ( LShiftVB src shift)); @@ -6292,7 +6340,7 @@ instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2 // Short variable shift instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ - predicate(vector_length(n) <= 8 && + predicate(Matcher::vector_length(n) <= 8 && !VectorNode::is_vshift_cnt(n->in(2)) && !VM_Version::supports_avx512bw()); match(Set dst ( LShiftVS src shift)); @@ -6317,7 +6365,7 @@ instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %} instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ - predicate(vector_length(n) == 16 && + predicate(Matcher::vector_length(n) == 16 && !VectorNode::is_vshift_cnt(n->in(2)) && !VM_Version::supports_avx512bw()); match(Set dst ( LShiftVS src shift)); @@ -6407,7 +6455,7 @@ instruct vshiftL_var(vec dst, vec src, vec shift) %{ //Long variable right shift arithmetic instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ - predicate(vector_length(n) <= 4 && + predicate(Matcher::vector_length(n) <= 4 && !VectorNode::is_vshift_cnt(n->in(2)) && UseAVX == 2); match(Set dst (RShiftVL src shift)); @@ -6459,7 +6507,8 @@ instruct vand_reg(vec dst, vec src1, vec src2) %{ %} instruct vand_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (AndV src (LoadVector mem))); format %{ "vpand $dst,$src,$mem\t! and vectors" %} ins_encode %{ @@ -6493,7 +6542,8 @@ instruct vor_reg(vec dst, vec src1, vec src2) %{ %} instruct vor_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (OrV src (LoadVector mem))); format %{ "vpor $dst,$src,$mem\t! or vectors" %} ins_encode %{ @@ -6527,7 +6577,8 @@ instruct vxor_reg(vec dst, vec src1, vec src2) %{ %} instruct vxor_mem(vec dst, vec src, memory mem) %{ - predicate(UseAVX > 0); + predicate((UseAVX > 0) && + (Matcher::vector_length_in_bytes(n->in(1)) > 8)); match(Set dst (XorV src (LoadVector mem))); format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} ins_encode %{ @@ -6545,7 +6596,7 @@ instruct vcastBtoX(vec dst, vec src) %{ ins_encode %{ assert(UseAVX > 0, "required"); - BasicType to_elem_bt = vector_element_basic_type(this); + BasicType to_elem_bt = Matcher::vector_element_basic_type(this); int vlen_enc = vector_length_encoding(this); switch (to_elem_bt) { case T_SHORT: @@ -6573,9 +6624,9 @@ instruct vcastBtoX(vec dst, vec src) %{ %} instruct castStoX(vec dst, vec src, rRegP scratch) %{ - predicate(UseAVX <= 2 && - vector_length(n->in(1)) <= 8 && // src - vector_element_basic_type(n) == T_BYTE); + predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && + Matcher::vector_length(n->in(1)) <= 8 && // src + Matcher::vector_element_basic_type(n) == T_BYTE); effect(TEMP scratch); match(Set dst (VectorCastS2X src)); format %{ "vector_cast_s2x $dst,$src\t! using $scratch as TEMP" %} @@ -6589,16 +6640,16 @@ instruct castStoX(vec dst, vec src, rRegP scratch) %{ %} instruct vcastStoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ - predicate(UseAVX <= 2 && - vector_length(n->in(1)) == 16 && // src - vector_element_basic_type(n) == T_BYTE); + predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && + Matcher::vector_length(n->in(1)) == 16 && // src + Matcher::vector_element_basic_type(n) == T_BYTE); effect(TEMP dst, TEMP vtmp, TEMP scratch); match(Set dst (VectorCastS2X src)); format %{ "vector_cast_s2x $dst,$src\t! using $vtmp, $scratch as TEMP" %} ins_encode %{ assert(UseAVX > 0, "required"); - int vlen_enc = vector_length_encoding(vector_length_in_bytes(this, $src)); + int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); @@ -6607,12 +6658,12 @@ instruct vcastStoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ %} instruct vcastStoX_evex(vec dst, vec src) %{ - predicate(UseAVX > 2 || - (vector_length_in_bytes(n) >= vector_length_in_bytes(n->in(1)))); // dst >= src + predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || + (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src match(Set dst (VectorCastS2X src)); format %{ "vector_cast_s2x $dst,$src\t!" %} ins_encode %{ - BasicType to_elem_bt = vector_element_basic_type(this); + BasicType to_elem_bt = Matcher::vector_element_basic_type(this); int src_vlen_enc = vector_length_encoding(this, $src); int vlen_enc = vector_length_encoding(this); switch (to_elem_bt) { @@ -6645,15 +6696,15 @@ instruct vcastStoX_evex(vec dst, vec src) %{ instruct castItoX(vec dst, vec src, rRegP scratch) %{ predicate(UseAVX <= 2 && - (vector_length_in_bytes(n->in(1)) <= 16) && - (vector_length_in_bytes(n) < vector_length_in_bytes(n->in(1)))); // dst < src + (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && + (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src match(Set dst (VectorCastI2X src)); format %{ "vector_cast_i2x $dst,$src\t! using $scratch as TEMP" %} effect(TEMP scratch); ins_encode %{ assert(UseAVX > 0, "required"); - BasicType to_elem_bt = vector_element_basic_type(this); + BasicType to_elem_bt = Matcher::vector_element_basic_type(this); int vlen_enc = vector_length_encoding(this, $src); if (to_elem_bt == T_BYTE) { @@ -6671,15 +6722,15 @@ instruct castItoX(vec dst, vec src, rRegP scratch) %{ instruct vcastItoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ predicate(UseAVX <= 2 && - (vector_length_in_bytes(n->in(1)) == 32) && - (vector_length_in_bytes(n) < vector_length_in_bytes(n->in(1)))); // dst < src + (Matcher::vector_length_in_bytes(n->in(1)) == 32) && + (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src match(Set dst (VectorCastI2X src)); format %{ "vector_cast_i2x $dst,$src\t! using $vtmp and $scratch as TEMP" %} effect(TEMP dst, TEMP vtmp, TEMP scratch); ins_encode %{ assert(UseAVX > 0, "required"); - BasicType to_elem_bt = vector_element_basic_type(this); + BasicType to_elem_bt = Matcher::vector_element_basic_type(this); int vlen_enc = vector_length_encoding(this, $src); if (to_elem_bt == T_BYTE) { @@ -6699,13 +6750,13 @@ instruct vcastItoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ instruct vcastItoX_evex(vec dst, vec src) %{ predicate(UseAVX > 2 || - (vector_length_in_bytes(n) >= vector_length_in_bytes(n->in(1)))); // dst >= src + (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src match(Set dst (VectorCastI2X src)); format %{ "vector_cast_i2x $dst,$src\t!" %} ins_encode %{ assert(UseAVX > 0, "required"); - BasicType dst_elem_bt = vector_element_basic_type(this); + BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); int src_vlen_enc = vector_length_encoding(this, $src); int dst_vlen_enc = vector_length_encoding(this); switch (dst_elem_bt) { @@ -6722,13 +6773,13 @@ instruct vcastItoX_evex(vec dst, vec src) %{ __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); break; case T_FLOAT: - __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); + __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); break; case T_LONG: __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); break; case T_DOUBLE: - __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); + __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); break; default: ShouldNotReachHere(); @@ -6738,7 +6789,7 @@ instruct vcastItoX_evex(vec dst, vec src) %{ %} instruct vcastLtoBS(vec dst, vec src, rRegP scratch) %{ - predicate((vector_element_basic_type(n) == T_BYTE || vector_element_basic_type(n) == T_SHORT) && + predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && UseAVX <= 2); match(Set dst (VectorCastL2X src)); effect(TEMP scratch); @@ -6746,8 +6797,8 @@ instruct vcastLtoBS(vec dst, vec src, rRegP scratch) %{ ins_encode %{ assert(UseAVX > 0, "required"); - int vlen = vector_length_in_bytes(this, $src); - BasicType to_elem_bt = vector_element_basic_type(this); + int vlen = Matcher::vector_length_in_bytes(this, $src); + BasicType to_elem_bt = Matcher::vector_element_basic_type(this); AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) : ExternalAddress(vector_int_to_short_mask()); if (vlen <= 16) { @@ -6770,14 +6821,14 @@ instruct vcastLtoBS(vec dst, vec src, rRegP scratch) %{ instruct vcastLtoX_evex(vec dst, vec src) %{ predicate(UseAVX > 2 || - (vector_element_basic_type(n) == T_INT || - vector_element_basic_type(n) == T_FLOAT || - vector_element_basic_type(n) == T_DOUBLE)); + (Matcher::vector_element_basic_type(n) == T_INT || + Matcher::vector_element_basic_type(n) == T_FLOAT || + Matcher::vector_element_basic_type(n) == T_DOUBLE)); match(Set dst (VectorCastL2X src)); format %{ "vector_cast_l2x $dst,$src\t!" %} ins_encode %{ - BasicType to_elem_bt = vector_element_basic_type(this); - int vlen = vector_length_in_bytes(this, $src); + BasicType to_elem_bt = Matcher::vector_element_basic_type(this); + int vlen = Matcher::vector_length_in_bytes(this, $src); int vlen_enc = vector_length_encoding(this, $src); switch (to_elem_bt) { case T_BYTE: @@ -6829,7 +6880,7 @@ instruct vcastLtoX_evex(vec dst, vec src) %{ %} instruct vcastFtoD_reg(vec dst, vec src) %{ - predicate(vector_element_basic_type(n) == T_DOUBLE); + predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); match(Set dst (VectorCastF2X src)); format %{ "vector_cast_f2x $dst,$src\t!" %} ins_encode %{ @@ -6840,7 +6891,7 @@ instruct vcastFtoD_reg(vec dst, vec src) %{ %} instruct vcastDtoF_reg(vec dst, vec src) %{ - predicate(vector_element_basic_type(n) == T_FLOAT); + predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); match(Set dst (VectorCastD2X src)); format %{ "vector_cast_d2x $dst,$src\t!" %} ins_encode %{ @@ -6853,15 +6904,15 @@ instruct vcastDtoF_reg(vec dst, vec src) %{ // --------------------------------- VectorMaskCmp -------------------------------------- instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ - predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 - vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 - is_floating_point_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE + predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 + Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 + is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} ins_encode %{ int vlen_enc = vector_length_encoding(this, $src1); Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); - if (vector_element_basic_type(this, $src1) == T_FLOAT) { + if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); } else { __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); @@ -6871,8 +6922,8 @@ instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ %} instruct evcmpFD(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{ - predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 - is_floating_point_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE + predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 + is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); effect(TEMP scratch, TEMP ktmp); format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} @@ -6880,7 +6931,7 @@ instruct evcmpFD(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg kt int vlen_enc = Assembler::AVX_512bit; Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); KRegister mask = k0; // The comparison itself is not being masked. - if (vector_element_basic_type(this, $src1) == T_FLOAT) { + if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register); } else { @@ -6894,16 +6945,16 @@ instruct evcmpFD(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg kt instruct vcmp(legVec dst, legVec src1, legVec src2, immI8 cond, rRegP scratch) %{ predicate((UseAVX <= 2 || !VM_Version::supports_avx512vl()) && !is_unsigned_booltest_pred(n->in(2)->get_int()) && - vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 - vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 - is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 + Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 + Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 + is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); effect(TEMP scratch); format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} ins_encode %{ int vlen_enc = vector_length_encoding(this, $src1); Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); - Assembler::Width ww = widthForType(vector_element_basic_type(this, $src1)); + Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, ww, vlen_enc, $scratch$$Register); %} ins_pipe( pipe_slow ); @@ -6912,16 +6963,16 @@ instruct vcmp(legVec dst, legVec src1, legVec src2, immI8 cond, rRegP scratch) % instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec vtmp1, legVec vtmp2, rRegP scratch) %{ predicate((UseAVX == 2 || !VM_Version::supports_avx512vl()) && is_unsigned_booltest_pred(n->in(2)->get_int()) && - vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 - vector_length_in_bytes(n->in(1)->in(1)) <= 16 && // src1 - is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 + Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 + Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 16 && // src1 + is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); effect(TEMP vtmp1, TEMP vtmp2, TEMP scratch); format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} ins_encode %{ - int vlen = vector_length_in_bytes(this, $src1); + int vlen = Matcher::vector_length_in_bytes(this, $src1); Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); - BasicType bt = vector_element_basic_type(this, $src1); + BasicType bt = Matcher::vector_element_basic_type(this, $src1); __ vpcmpu(bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, $scratch$$Register); %} @@ -6931,15 +6982,15 @@ instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec vtmp1, l instruct vcmpu32(legVec dst, legVec src1, legVec src2, immI8 cond, legVec vtmp1, legVec vtmp2, legVec vtmp3, rRegP scratch) %{ predicate((UseAVX == 2 || !VM_Version::supports_avx512vl()) && is_unsigned_booltest_pred(n->in(2)->get_int()) && - vector_length_in_bytes(n->in(1)->in(1)) == 32 && // src1 - is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 + Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 32 && // src1 + is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP scratch); format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} ins_encode %{ - int vlen = vector_length_in_bytes(this, $src1); + int vlen = Matcher::vector_length_in_bytes(this, $src1); Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); - BasicType bt = vector_element_basic_type(this, $src1); + BasicType bt = Matcher::vector_element_basic_type(this, $src1); __ vpcmpu32(bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, $vtmp3$$XMMRegister, $scratch$$Register); %} @@ -6949,8 +7000,8 @@ instruct vcmpu32(legVec dst, legVec src1, legVec src2, immI8 cond, legVec vtmp1, instruct evcmp(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{ predicate(UseAVX > 2 && (VM_Version::supports_avx512vl() || - vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 - is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 + Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 + is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); effect(TEMP scratch, TEMP ktmp); format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} @@ -6962,7 +7013,7 @@ instruct evcmp(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); KRegister mask = k0; // The comparison itself is not being masked. bool merge = false; - BasicType src1_elem_bt = vector_element_basic_type(this, $src1); + BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); switch (src1_elem_bt) { case T_BYTE: { @@ -6994,7 +7045,7 @@ instruct evcmp(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp // Extract instruct extractI(rRegI dst, legVec src, immU8 idx) %{ - predicate(vector_length_in_bytes(n->in(1)) <= 16); // src + predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src match(Set dst (ExtractI src idx)); match(Set dst (ExtractS src idx)); #ifdef _LP64 @@ -7002,17 +7053,17 @@ instruct extractI(rRegI dst, legVec src, immU8 idx) %{ #endif format %{ "extractI $dst,$src,$idx\t!" %} ins_encode %{ - assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); + assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); - BasicType elem_bt = vector_element_basic_type(this, $src); + BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); %} ins_pipe( pipe_slow ); %} instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ - predicate(vector_length_in_bytes(n->in(1)) == 32 || // src - vector_length_in_bytes(n->in(1)) == 64); // src + predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src + Matcher::vector_length_in_bytes(n->in(1)) == 64); // src match(Set dst (ExtractI src idx)); match(Set dst (ExtractS src idx)); #ifdef _LP64 @@ -7021,9 +7072,9 @@ instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ effect(TEMP vtmp); format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} ins_encode %{ - assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); + assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); - BasicType elem_bt = vector_element_basic_type(this, $src); + BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); %} @@ -7032,12 +7083,12 @@ instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ #ifdef _LP64 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ - predicate(vector_length(n->in(1)) <= 2); // src + predicate(Matcher::vector_length(n->in(1)) <= 2); // src match(Set dst (ExtractL src idx)); format %{ "extractL $dst,$src,$idx\t!" %} ins_encode %{ assert(UseSSE >= 4, "required"); - assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); + assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); %} @@ -7045,13 +7096,13 @@ instruct extractL(rRegL dst, legVec src, immU8 idx) %{ %} instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ - predicate(vector_length(n->in(1)) == 4 || // src - vector_length(n->in(1)) == 8); // src + predicate(Matcher::vector_length(n->in(1)) == 4 || // src + Matcher::vector_length(n->in(1)) == 8); // src match(Set dst (ExtractL src idx)); effect(TEMP vtmp); format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} ins_encode %{ - assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); + assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); @@ -7061,12 +7112,12 @@ instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ #endif instruct extractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ - predicate(vector_length(n->in(1)) <= 4); + predicate(Matcher::vector_length(n->in(1)) <= 4); match(Set dst (ExtractF src idx)); effect(TEMP dst, TEMP tmp, TEMP vtmp); format %{ "extractF $dst,$src,$idx\t! using $tmp, $vtmp as TEMP" %} ins_encode %{ - assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); + assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $tmp$$Register, $vtmp$$XMMRegister); %} @@ -7074,13 +7125,13 @@ instruct extractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ %} instruct vextractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ - predicate(vector_length(n->in(1)/*src*/) == 8 || - vector_length(n->in(1)/*src*/) == 16); + predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || + Matcher::vector_length(n->in(1)/*src*/) == 16); match(Set dst (ExtractF src idx)); effect(TEMP tmp, TEMP vtmp); format %{ "vextractF $dst,$src,$idx\t! using $tmp, $vtmp as TEMP" %} ins_encode %{ - assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); + assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant, $tmp$$Register); @@ -7089,11 +7140,11 @@ instruct vextractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) % %} instruct extractD(legRegD dst, legVec src, immU8 idx) %{ - predicate(vector_length(n->in(1)) == 2); // src + predicate(Matcher::vector_length(n->in(1)) == 2); // src match(Set dst (ExtractD src idx)); format %{ "extractD $dst,$src,$idx\t!" %} ins_encode %{ - assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); + assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); %} @@ -7101,13 +7152,13 @@ instruct extractD(legRegD dst, legVec src, immU8 idx) %{ %} instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ - predicate(vector_length(n->in(1)) == 4 || // src - vector_length(n->in(1)) == 8); // src + predicate(Matcher::vector_length(n->in(1)) == 4 || // src + Matcher::vector_length(n->in(1)) == 8); // src match(Set dst (ExtractD src idx)); effect(TEMP vtmp); format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} ins_encode %{ - assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); + assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); @@ -7135,8 +7186,8 @@ instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ predicate(UseAVX > 0 && - vector_length_in_bytes(n) <= 32 && - is_integral_type(vector_element_basic_type(n))); + Matcher::vector_length_in_bytes(n) <= 32 && + is_integral_type(Matcher::vector_element_basic_type(n))); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} ins_encode %{ @@ -7148,8 +7199,8 @@ instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ predicate(UseAVX > 0 && - vector_length_in_bytes(n) <= 32 && - !is_integral_type(vector_element_basic_type(n))); + Matcher::vector_length_in_bytes(n) <= 32 && + !is_integral_type(Matcher::vector_element_basic_type(n))); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} ins_encode %{ @@ -7160,13 +7211,13 @@ instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ %} instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch, kReg ktmp) %{ - predicate(vector_length_in_bytes(n) == 64); + predicate(Matcher::vector_length_in_bytes(n) == 64); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %} effect(TEMP scratch, TEMP ktmp); ins_encode %{ int vlen_enc = Assembler::AVX_512bit; - BasicType elem_bt = vector_element_basic_type(this); + BasicType elem_bt = Matcher::vector_element_basic_type(this); __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, $scratch$$Register); __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); %} @@ -7179,7 +7230,7 @@ instruct vabsB_reg(vec dst, vec src) %{ match(Set dst (AbsVB src)); format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen <= 16) { __ pabsb($dst$$XMMRegister, $src$$XMMRegister); } else { @@ -7194,7 +7245,7 @@ instruct vabsS_reg(vec dst, vec src) %{ match(Set dst (AbsVS src)); format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen <= 8) { __ pabsw($dst$$XMMRegister, $src$$XMMRegister); } else { @@ -7209,7 +7260,7 @@ instruct vabsI_reg(vec dst, vec src) %{ match(Set dst (AbsVI src)); format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} ins_encode %{ - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen <= 4) { __ pabsd($dst$$XMMRegister, $src$$XMMRegister); } else { @@ -7237,7 +7288,7 @@ instruct vabsL_reg(vec dst, vec src) %{ // --------------------------------- ABSNEG -------------------------------------- instruct vabsnegF(vec dst, vec src, rRegI scratch) %{ - predicate(vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F + predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F match(Set dst (AbsVF src)); match(Set dst (NegVF src)); effect(TEMP scratch); @@ -7245,7 +7296,7 @@ instruct vabsnegF(vec dst, vec src, rRegI scratch) %{ ins_cost(150); ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen = vector_length(this); + int vlen = Matcher::vector_length(this); if (vlen == 2) { __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); } else { @@ -7258,7 +7309,7 @@ instruct vabsnegF(vec dst, vec src, rRegI scratch) %{ %} instruct vabsneg4F(vec dst, rRegI scratch) %{ - predicate(vector_length(n) == 4); + predicate(Matcher::vector_length(n) == 4); match(Set dst (AbsVF dst)); match(Set dst (NegVF dst)); effect(TEMP scratch); @@ -7278,7 +7329,7 @@ instruct vabsnegD(vec dst, vec src, rRegI scratch) %{ format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} ins_encode %{ int opcode = this->ideal_Opcode(); - uint vlen = vector_length(this); + uint vlen = Matcher::vector_length(this); if (vlen == 2) { assert(UseSSE >= 2, "required"); __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); @@ -7294,14 +7345,14 @@ instruct vabsnegD(vec dst, vec src, rRegI scratch) %{ #ifdef _LP64 instruct vptest_alltrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp1, legVec vtmp2, rFlagsReg cr) %{ - predicate(vector_length_in_bytes(n->in(1)) >= 4 && - vector_length_in_bytes(n->in(1)) < 16 && + predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 4 && + Matcher::vector_length_in_bytes(n->in(1)) < 16 && static_cast(n)->get_predicate() == BoolTest::overflow); match(Set dst (VectorTest src1 src2 )); effect(TEMP vtmp1, TEMP vtmp2, KILL cr); format %{ "vector_test $dst,$src1, $src2\t! using $vtmp1, $vtmp2 and $cr as TEMP" %} ins_encode %{ - int vlen = vector_length_in_bytes(this, $src1); + int vlen = Matcher::vector_length_in_bytes(this, $src1); __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); __ setb(Assembler::carrySet, $dst$$Register); __ movzbl($dst$$Register, $dst$$Register); @@ -7310,14 +7361,14 @@ instruct vptest_alltrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp1, %} instruct vptest_alltrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ - predicate(vector_length_in_bytes(n->in(1)) >= 16 && - vector_length_in_bytes(n->in(1)) < 64 && + predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16 && + Matcher::vector_length_in_bytes(n->in(1)) < 64 && static_cast(n)->get_predicate() == BoolTest::overflow); match(Set dst (VectorTest src1 src2 )); effect(KILL cr); format %{ "vector_test $dst,$src1, $src2\t! using $cr as TEMP" %} ins_encode %{ - int vlen = vector_length_in_bytes(this, $src1); + int vlen = Matcher::vector_length_in_bytes(this, $src1); __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg); __ setb(Assembler::carrySet, $dst$$Register); __ movzbl($dst$$Register, $dst$$Register); @@ -7326,13 +7377,13 @@ instruct vptest_alltrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ %} instruct vptest_alltrue_evex(rRegI dst, legVec src1, legVec src2, kReg ktmp, rFlagsReg cr) %{ - predicate(vector_length_in_bytes(n->in(1)) == 64 && + predicate(Matcher::vector_length_in_bytes(n->in(1)) == 64 && static_cast(n)->get_predicate() == BoolTest::overflow); match(Set dst (VectorTest src1 src2 )); effect(KILL cr, TEMP ktmp); format %{ "vector_test $dst,$src1, $src2\t! using $cr as TEMP" %} ins_encode %{ - int vlen = vector_length_in_bytes(this, $src1); + int vlen = Matcher::vector_length_in_bytes(this, $src1); __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, $ktmp$$KRegister); __ setb(Assembler::carrySet, $dst$$Register); __ movzbl($dst$$Register, $dst$$Register); @@ -7341,14 +7392,14 @@ instruct vptest_alltrue_evex(rRegI dst, legVec src1, legVec src2, kReg ktmp, rFl %} instruct vptest_anytrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp, rFlagsReg cr) %{ - predicate(vector_length_in_bytes(n->in(1)) >= 4 && - vector_length_in_bytes(n->in(1)) < 16 && + predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 4 && + Matcher::vector_length_in_bytes(n->in(1)) < 16 && static_cast(n)->get_predicate() == BoolTest::ne); match(Set dst (VectorTest src1 src2 )); effect(TEMP vtmp, KILL cr); format %{ "vector_test_any_true $dst,$src1,$src2\t! using $vtmp, $cr as TEMP" %} ins_encode %{ - int vlen = vector_length_in_bytes(this, $src1); + int vlen = Matcher::vector_length_in_bytes(this, $src1); __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); __ setb(Assembler::notZero, $dst$$Register); __ movzbl($dst$$Register, $dst$$Register); @@ -7357,14 +7408,14 @@ instruct vptest_anytrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp, r %} instruct vptest_anytrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ - predicate(vector_length_in_bytes(n->in(1)) >= 16 && - vector_length_in_bytes(n->in(1)) < 64 && + predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16 && + Matcher::vector_length_in_bytes(n->in(1)) < 64 && static_cast(n)->get_predicate() == BoolTest::ne); match(Set dst (VectorTest src1 src2 )); effect(KILL cr); format %{ "vector_test_any_true $dst,$src1,$src2\t! using $cr as TEMP" %} ins_encode %{ - int vlen = vector_length_in_bytes(this, $src1); + int vlen = Matcher::vector_length_in_bytes(this, $src1); __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg); __ setb(Assembler::notZero, $dst$$Register); __ movzbl($dst$$Register, $dst$$Register); @@ -7373,13 +7424,13 @@ instruct vptest_anytrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ %} instruct vptest_anytrue_evex(rRegI dst, legVec src1, legVec src2, kReg ktmp, rFlagsReg cr) %{ - predicate(vector_length_in_bytes(n->in(1)) == 64 && + predicate(Matcher::vector_length_in_bytes(n->in(1)) == 64 && static_cast(n)->get_predicate() == BoolTest::ne); match(Set dst (VectorTest src1 src2 )); effect(KILL cr, TEMP ktmp); format %{ "vector_test_any_true $dst,$src1,$src2\t! using $cr as TEMP" %} ins_encode %{ - int vlen = vector_length_in_bytes(this, $src1); + int vlen = Matcher::vector_length_in_bytes(this, $src1); __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, $ktmp$$KRegister); __ setb(Assembler::notZero, $dst$$Register); __ movzbl($dst$$Register, $dst$$Register); @@ -7388,40 +7439,40 @@ instruct vptest_anytrue_evex(rRegI dst, legVec src1, legVec src2, kReg ktmp, rFl %} instruct cmpvptest_anytrue_lt16(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero, legVec vtmp) %{ - predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 4 && - vector_length_in_bytes(n->in(1)->in(1)) < 16 && + predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && + Matcher::vector_length_in_bytes(n->in(1)->in(1)) < 16 && static_cast(n->in(1))->get_predicate() == BoolTest::ne); match(Set cr (CmpI (VectorTest src1 src2) zero)); effect(TEMP vtmp); format %{ "cmp_vector_test_any_true $src1,$src2\t! using $vtmp as TEMP" %} ins_encode %{ - int vlen = vector_length_in_bytes(this, $src1); + int vlen = Matcher::vector_length_in_bytes(this, $src1); __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct cmpvptest_anytrue(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero) %{ - predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 16 && - vector_length_in_bytes(n->in(1)->in(1)) < 64 && + predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 16 && + Matcher::vector_length_in_bytes(n->in(1)->in(1)) < 64 && static_cast(n->in(1))->get_predicate() == BoolTest::ne); match(Set cr (CmpI (VectorTest src1 src2) zero)); format %{ "cmp_vector_test_any_true $src1,$src2\t!" %} ins_encode %{ - int vlen = vector_length_in_bytes(this, $src1); + int vlen = Matcher::vector_length_in_bytes(this, $src1); __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg); %} ins_pipe( pipe_slow ); %} instruct cmpvptest_anytrue_evex(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero, kReg ktmp) %{ - predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 && + predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && static_cast(n->in(1))->get_predicate() == BoolTest::ne); match(Set cr (CmpI (VectorTest src1 src2) zero)); effect(TEMP ktmp); format %{ "cmp_vector_test_any_true $src1,$src2\t!" %} ins_encode %{ - int vlen = vector_length_in_bytes(this, $src1); + int vlen = Matcher::vector_length_in_bytes(this, $src1); __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, $ktmp$$KRegister); %} ins_pipe( pipe_slow ); @@ -7430,15 +7481,30 @@ instruct cmpvptest_anytrue_evex(rFlagsReg cr, legVec src1, legVec src2, immI_0 z //------------------------------------- LoadMask -------------------------------------------- -instruct loadMask(vec dst, vec src) %{ +instruct loadMask(legVec dst, legVec src) %{ + predicate(!VM_Version::supports_avx512vlbw()); + match(Set dst (VectorLoadMask src)); + effect(TEMP dst); + format %{ "vector_loadmask_byte $dst,$src\n\t" %} + ins_encode %{ + int vlen_in_bytes = Matcher::vector_length_in_bytes(this); + BasicType elem_bt = Matcher::vector_element_basic_type(this); + + __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); + %} + ins_pipe( pipe_slow ); +%} + +instruct loadMask_evex(vec dst, vec src) %{ + predicate(VM_Version::supports_avx512vlbw()); match(Set dst (VectorLoadMask src)); effect(TEMP dst); format %{ "vector_loadmask_byte $dst,$src\n\t" %} ins_encode %{ - int vlen_in_bytes = vector_length_in_bytes(this); - BasicType elem_bt = vector_element_basic_type(this); + int vlen_in_bytes = Matcher::vector_length_in_bytes(this); + BasicType elem_bt = Matcher::vector_element_basic_type(this); - __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt); + __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, false); %} ins_pipe( pipe_slow ); %} @@ -7446,12 +7512,12 @@ instruct loadMask(vec dst, vec src) %{ //------------------------------------- StoreMask -------------------------------------------- instruct storeMask1B(vec dst, vec src, immI_1 size) %{ - predicate(vector_length(n) < 64 || VM_Version::supports_avx512vlbw()); + predicate(Matcher::vector_length(n) < 64 || VM_Version::supports_avx512vlbw()); match(Set dst (VectorStoreMask src size)); format %{ "vector_store_mask $dst,$src\t!" %} ins_encode %{ assert(UseSSE >= 3, "required"); - if (vector_length_in_bytes(this) <= 16) { + if (Matcher::vector_length_in_bytes(this) <= 16) { __ pabsb($dst$$XMMRegister, $src$$XMMRegister); } else { assert(UseAVX >= 2, "required"); @@ -7463,7 +7529,7 @@ instruct storeMask1B(vec dst, vec src, immI_1 size) %{ %} instruct storeMask2B(vec dst, vec src, immI_2 size) %{ - predicate(vector_length(n) <= 8); + predicate(Matcher::vector_length(n) <= 8); match(Set dst (VectorStoreMask src size)); format %{ "vector_store_mask $dst,$src\n\t" %} ins_encode %{ @@ -7475,7 +7541,7 @@ instruct storeMask2B(vec dst, vec src, immI_2 size) %{ %} instruct vstoreMask2B(vec dst, vec src, immI_2 size) %{ - predicate(vector_length(n) == 16 && !VM_Version::supports_avx512bw()); + predicate(Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); match(Set dst (VectorStoreMask src size)); effect(TEMP dst); format %{ "vector_store_mask $dst,$src\t!" %} @@ -7502,7 +7568,7 @@ instruct vstoreMask2B_evex(vec dst, vec src, immI_2 size) %{ %} instruct storeMask4B(vec dst, vec src, immI_4 size) %{ - predicate (vector_length(n) <= 4 && UseAVX <= 2); + predicate(Matcher::vector_length(n) <= 4 && UseAVX <= 2); match(Set dst (VectorStoreMask src size)); format %{ "vector_store_mask $dst,$src\t!" %} ins_encode %{ @@ -7515,7 +7581,7 @@ instruct storeMask4B(vec dst, vec src, immI_4 size) %{ %} instruct vstoreMask4B(vec dst, vec src, immI_4 size) %{ - predicate(vector_length(n) == 8 && UseAVX <= 2); + predicate(Matcher::vector_length(n) == 8 && UseAVX <= 2); match(Set dst (VectorStoreMask src size)); format %{ "vector_store_mask $dst,$src\t!" %} effect(TEMP dst); @@ -7546,7 +7612,7 @@ instruct vstoreMask4B_evex(vec dst, vec src, immI_4 size) %{ %} instruct storeMask8B(vec dst, vec src, immI_8 size) %{ - predicate(vector_length(n) == 2 && UseAVX <= 2); + predicate(Matcher::vector_length(n) == 2 && UseAVX <= 2); match(Set dst (VectorStoreMask src size)); format %{ "vector_store_mask $dst,$src\t!" %} ins_encode %{ @@ -7560,7 +7626,7 @@ instruct storeMask8B(vec dst, vec src, immI_8 size) %{ %} instruct storeMask8B_avx(vec dst, vec src, immI_8 size, legVec vtmp) %{ - predicate(vector_length(n) == 4 && UseAVX <= 2); + predicate(Matcher::vector_length(n) == 4 && UseAVX <= 2); match(Set dst (VectorStoreMask src size)); format %{ "vector_store_mask $dst,$src\t! using $vtmp as TEMP" %} effect(TEMP dst, TEMP vtmp); @@ -7593,8 +7659,8 @@ instruct vstoreMask8B_evex(vec dst, vec src, immI_8 size) %{ %} instruct vmaskcast(vec dst) %{ - predicate((vector_length(n) == vector_length(n->in(1))) && - (vector_length_in_bytes(n) == vector_length_in_bytes(n->in(1)))); + predicate((Matcher::vector_length(n) == Matcher::vector_length(n->in(1))) && + (Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)))); match(Set dst (VectorMaskCast dst)); ins_cost(0); format %{ "vector_mask_cast $dst" %} @@ -7607,12 +7673,12 @@ instruct vmaskcast(vec dst) %{ //-------------------------------- Load Iota Indices ---------------------------------- instruct loadIotaIndices(vec dst, immI_0 src, rRegP scratch) %{ - predicate(vector_element_basic_type(n) == T_BYTE); + predicate(Matcher::vector_element_basic_type(n) == T_BYTE); match(Set dst (VectorLoadConst src)); effect(TEMP scratch); format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} ins_encode %{ - int vlen_in_bytes = vector_length_in_bytes(this); + int vlen_in_bytes = Matcher::vector_length_in_bytes(this); __ load_iota_indices($dst$$XMMRegister, $scratch$$Register, vlen_in_bytes); %} ins_pipe( pipe_slow ); @@ -7623,7 +7689,7 @@ instruct loadIotaIndices(vec dst, immI_0 src, rRegP scratch) %{ // LoadShuffle/Rearrange for Byte instruct loadShuffleB(vec dst) %{ - predicate(vector_element_basic_type(n) == T_BYTE); + predicate(Matcher::vector_element_basic_type(n) == T_BYTE); match(Set dst (VectorLoadShuffle dst)); format %{ "vector_load_shuffle $dst, $dst" %} ins_encode %{ @@ -7633,8 +7699,8 @@ instruct loadShuffleB(vec dst) %{ %} instruct rearrangeB(vec dst, vec shuffle) %{ - predicate(vector_element_basic_type(n) == T_BYTE && - vector_length(n) < 32); + predicate(Matcher::vector_element_basic_type(n) == T_BYTE && + Matcher::vector_length(n) < 32); match(Set dst (VectorRearrange dst shuffle)); format %{ "vector_rearrange $dst, $shuffle, $dst" %} ins_encode %{ @@ -7645,8 +7711,8 @@ instruct rearrangeB(vec dst, vec shuffle) %{ %} instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2, rRegP scratch) %{ - predicate(vector_element_basic_type(n) == T_BYTE && - vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); + predicate(Matcher::vector_element_basic_type(n) == T_BYTE && + Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); match(Set dst (VectorRearrange src shuffle)); effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2, $scratch as TEMP" %} @@ -7667,8 +7733,8 @@ instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVe %} instruct rearrangeB_evex(vec dst, vec src, vec shuffle) %{ - predicate(vector_element_basic_type(n) == T_BYTE && - vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); + predicate(Matcher::vector_element_basic_type(n) == T_BYTE && + Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); match(Set dst (VectorRearrange src shuffle)); format %{ "vector_rearrange $dst, $shuffle, $src" %} ins_encode %{ @@ -7681,15 +7747,15 @@ instruct rearrangeB_evex(vec dst, vec src, vec shuffle) %{ // LoadShuffle/Rearrange for Short instruct loadShuffleS(vec dst, vec src, vec vtmp, rRegP scratch) %{ - predicate(vector_element_basic_type(n) == T_SHORT && - vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS + predicate(Matcher::vector_element_basic_type(n) == T_SHORT && + Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS match(Set dst (VectorLoadShuffle src)); effect(TEMP dst, TEMP vtmp, TEMP scratch); format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} ins_encode %{ // Create a byte shuffle mask from short shuffle mask // only byte shuffle instruction available on these platforms - int vlen_in_bytes = vector_length_in_bytes(this); + int vlen_in_bytes = Matcher::vector_length_in_bytes(this); if (UseAVX == 0) { assert(vlen_in_bytes <= 16, "required"); // Multiply each shuffle by two to get byte index @@ -7723,8 +7789,8 @@ instruct loadShuffleS(vec dst, vec src, vec vtmp, rRegP scratch) %{ %} instruct rearrangeS(vec dst, vec shuffle) %{ - predicate(vector_element_basic_type(n) == T_SHORT && - vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); + predicate(Matcher::vector_element_basic_type(n) == T_SHORT && + Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); match(Set dst (VectorRearrange dst shuffle)); format %{ "vector_rearrange $dst, $shuffle, $dst" %} ins_encode %{ @@ -7735,8 +7801,8 @@ instruct rearrangeS(vec dst, vec shuffle) %{ %} instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2, rRegP scratch) %{ - predicate(vector_element_basic_type(n) == T_SHORT && - vector_length(n) == 16 && !VM_Version::supports_avx512bw()); + predicate(Matcher::vector_element_basic_type(n) == T_SHORT && + Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); match(Set dst (VectorRearrange src shuffle)); effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2, $scratch as TEMP" %} @@ -7757,7 +7823,7 @@ instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVe %} instruct loadShuffleS_evex(vec dst, vec src) %{ - predicate(vector_element_basic_type(n) == T_SHORT && + predicate(Matcher::vector_element_basic_type(n) == T_SHORT && VM_Version::supports_avx512bw()); match(Set dst (VectorLoadShuffle src)); format %{ "vector_load_shuffle $dst, $src" %} @@ -7772,7 +7838,7 @@ instruct loadShuffleS_evex(vec dst, vec src) %{ %} instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ - predicate(vector_element_basic_type(n) == T_SHORT && + predicate(Matcher::vector_element_basic_type(n) == T_SHORT && VM_Version::supports_avx512bw()); match(Set dst (VectorRearrange src shuffle)); format %{ "vector_rearrange $dst, $shuffle, $src" %} @@ -7789,8 +7855,8 @@ instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ // LoadShuffle/Rearrange for Integer and Float instruct loadShuffleI(vec dst, vec src, vec vtmp, rRegP scratch) %{ - predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && - vector_length(n) == 4 && UseAVX < 2); + predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && + Matcher::vector_length(n) == 4 && UseAVX < 2); match(Set dst (VectorLoadShuffle src)); effect(TEMP dst, TEMP vtmp, TEMP scratch); format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} @@ -7819,8 +7885,8 @@ instruct loadShuffleI(vec dst, vec src, vec vtmp, rRegP scratch) %{ %} instruct rearrangeI(vec dst, vec shuffle) %{ - predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && - vector_length(n) == 4 && UseAVX < 2); + predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && + Matcher::vector_length(n) == 4 && UseAVX < 2); match(Set dst (VectorRearrange dst shuffle)); format %{ "vector_rearrange $dst, $shuffle, $dst" %} ins_encode %{ @@ -7831,7 +7897,7 @@ instruct rearrangeI(vec dst, vec shuffle) %{ %} instruct loadShuffleI_avx(vec dst, vec src) %{ - predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && + predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && UseAVX >= 2); match(Set dst (VectorLoadShuffle src)); format %{ "vector_load_shuffle $dst, $src" %} @@ -7843,7 +7909,7 @@ instruct loadShuffleI_avx(vec dst, vec src) %{ %} instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ - predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && + predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && UseAVX >= 2); match(Set dst (VectorRearrange src shuffle)); format %{ "vector_rearrange $dst, $shuffle, $src" %} @@ -7860,8 +7926,8 @@ instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ // LoadShuffle/Rearrange for Long and Double instruct loadShuffleL(vec dst, vec src, vec vtmp, rRegP scratch) %{ - predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE - vector_length(n) < 8 && !VM_Version::supports_avx512vl()); + predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE + Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); match(Set dst (VectorLoadShuffle src)); effect(TEMP dst, TEMP vtmp, TEMP scratch); format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} @@ -7887,8 +7953,8 @@ instruct loadShuffleL(vec dst, vec src, vec vtmp, rRegP scratch) %{ %} instruct rearrangeL(vec dst, vec src, vec shuffle) %{ - predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE - vector_length(n) < 8 && !VM_Version::supports_avx512vl()); + predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE + Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); match(Set dst (VectorRearrange src shuffle)); format %{ "vector_rearrange $dst, $shuffle, $src" %} ins_encode %{ @@ -7901,8 +7967,8 @@ instruct rearrangeL(vec dst, vec src, vec shuffle) %{ %} instruct loadShuffleL_evex(vec dst, vec src) %{ - predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE - (vector_length(n) == 8 || VM_Version::supports_avx512vl())); + predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE + (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); match(Set dst (VectorLoadShuffle src)); format %{ "vector_load_shuffle $dst, $src" %} ins_encode %{ @@ -7915,8 +7981,8 @@ instruct loadShuffleL_evex(vec dst, vec src) %{ %} instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ - predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE - (vector_length(n) == 8 || VM_Version::supports_avx512vl())); + predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE + (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); match(Set dst (VectorRearrange src shuffle)); format %{ "vector_rearrange $dst, $shuffle, $src" %} ins_encode %{ @@ -7947,6 +8013,7 @@ instruct vfmaF_reg(vec a, vec b, vec c) %{ %} instruct vfmaF_mem(vec a, memory b, vec c) %{ + predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); match(Set c (FmaVF c (Binary a (LoadVector b)))); format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} ins_cost(150); @@ -7971,6 +8038,7 @@ instruct vfmaD_reg(vec a, vec b, vec c) %{ %} instruct vfmaD_mem(vec a, memory b, vec c) %{ + predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); match(Set c (FmaVD c (Binary a (LoadVector b)))); format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} ins_cost(150); @@ -8048,6 +8116,7 @@ instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ %} instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ + predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); effect(TEMP dst); format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} @@ -8094,11 +8163,11 @@ instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kR format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} ins_encode %{ assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); - assert(vector_element_basic_type(this, $src1) == vector_element_basic_type(this, $src2), "mismatch"); + assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); Label DONE; int vlen_enc = vector_length_encoding(this, $src1); - BasicType elem_bt = vector_element_basic_type(this, $src1); + BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); __ knotql($ktmp2$$KRegister, $mask$$KRegister); __ mov64($dst$$Register, -1L); @@ -8166,7 +8235,7 @@ instruct vmask_truecount_evex(rRegI dst, vec mask, rRegL tmp, kReg ktmp, vec xtm ins_encode %{ int opcode = this->ideal_Opcode(); int vlen_enc = vector_length_encoding(this, $mask); - int mask_len = vector_length(this, $mask); + int mask_len = Matcher::vector_length(this, $mask); __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, $tmp$$Register, $ktmp$$KRegister, mask_len, vlen_enc); %} @@ -8182,7 +8251,7 @@ instruct vmask_first_or_last_true_evex(rRegI dst, vec mask, rRegL tmp, kReg ktmp ins_encode %{ int opcode = this->ideal_Opcode(); int vlen_enc = vector_length_encoding(this, $mask); - int mask_len = vector_length(this, $mask); + int mask_len = Matcher::vector_length(this, $mask); __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, $tmp$$Register, $ktmp$$KRegister, mask_len, vlen_enc); %} @@ -8197,7 +8266,7 @@ instruct vmask_truecount_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp, vec xtmp1 ins_encode %{ int opcode = this->ideal_Opcode(); int vlen_enc = vector_length_encoding(this, $mask); - int mask_len = vector_length(this, $mask); + int mask_len = Matcher::vector_length(this, $mask); __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, $xtmp1$$XMMRegister, $tmp$$Register, mask_len, vlen_enc); %} @@ -8213,7 +8282,7 @@ instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp, ins_encode %{ int opcode = this->ideal_Opcode(); int vlen_enc = vector_length_encoding(this, $mask); - int mask_len = vector_length(this, $mask); + int mask_len = Matcher::vector_length(this, $mask); __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, $xtmp1$$XMMRegister, $tmp$$Register, mask_len, vlen_enc); %} diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad index a2cf6192415954589fd9a761e8f7b9d302b631ce..18d213cc31a049dd3c52a078c5377f9a3ca9b196 100644 --- a/src/hotspot/cpu/x86/x86_32.ad +++ b/src/hotspot/cpu/x86/x86_32.ad @@ -1440,6 +1440,16 @@ bool Matcher::is_spillable_arg( int reg ) { return can_be_java_arg(reg); } +uint Matcher::int_pressure_limit() +{ + return (INTPRESSURE == -1) ? 6 : INTPRESSURE; +} + +uint Matcher::float_pressure_limit() +{ + return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE; +} + bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { // Use hardware integer DIV instruction when // it is faster than a code which use multiply. @@ -5011,6 +5021,89 @@ define %{ // name must have been defined in an 'enc_class' specification // in the encode section of the architecture description. +// Dummy reg-to-reg vector moves. Removed during post-selection cleanup. +// Load Float +instruct MoveF2LEG(legRegF dst, regF src) %{ + match(Set dst src); + format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} + ins_encode %{ + ShouldNotReachHere(); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Load Float +instruct MoveLEG2F(regF dst, legRegF src) %{ + match(Set dst src); + format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} + ins_encode %{ + ShouldNotReachHere(); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Load Float +instruct MoveF2VL(vlRegF dst, regF src) %{ + match(Set dst src); + format %{ "movss $dst,$src\t! load float (4 bytes)" %} + ins_encode %{ + ShouldNotReachHere(); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Load Float +instruct MoveVL2F(regF dst, vlRegF src) %{ + match(Set dst src); + format %{ "movss $dst,$src\t! load float (4 bytes)" %} + ins_encode %{ + ShouldNotReachHere(); + %} + ins_pipe( fpu_reg_reg ); +%} + + + +// Load Double +instruct MoveD2LEG(legRegD dst, regD src) %{ + match(Set dst src); + format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} + ins_encode %{ + ShouldNotReachHere(); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Load Double +instruct MoveLEG2D(regD dst, legRegD src) %{ + match(Set dst src); + format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} + ins_encode %{ + ShouldNotReachHere(); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Load Double +instruct MoveD2VL(vlRegD dst, regD src) %{ + match(Set dst src); + format %{ "movsd $dst,$src\t! load double (8 bytes)" %} + ins_encode %{ + ShouldNotReachHere(); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Load Double +instruct MoveVL2D(regD dst, vlRegD src) %{ + match(Set dst src); + format %{ "movsd $dst,$src\t! load double (8 bytes)" %} + ins_encode %{ + ShouldNotReachHere(); + %} + ins_pipe( fpu_reg_reg ); +%} + //----------BSWAP-Instruction-------------------------------------------------- instruct bytes_reverse_int(rRegI dst) %{ match(Set dst (ReverseBytesI dst)); @@ -5756,46 +5849,6 @@ instruct loadKlass(eRegP dst, memory mem) %{ ins_pipe( ialu_reg_mem ); %} -// Load Float -instruct MoveF2LEG(legRegF dst, regF src) %{ - match(Set dst src); - format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} - ins_encode %{ - __ movflt($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Float -instruct MoveLEG2F(regF dst, legRegF src) %{ - match(Set dst src); - format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} - ins_encode %{ - __ movflt($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Double -instruct MoveD2LEG(legRegD dst, regD src) %{ - match(Set dst src); - format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} - ins_encode %{ - __ movdbl($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Double -instruct MoveLEG2D(regD dst, legRegD src) %{ - match(Set dst src); - format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} - ins_encode %{ - __ movdbl($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - // Load Double instruct loadDPR(regDPR dst, memory mem) %{ predicate(UseSSE<=1); @@ -6425,26 +6478,6 @@ instruct storeD(memory mem, regD src) %{ ins_pipe( pipe_slow ); %} -// Load Double -instruct MoveD2VL(vlRegD dst, regD src) %{ - match(Set dst src); - format %{ "movsd $dst,$src\t! load double (8 bytes)" %} - ins_encode %{ - __ movdbl($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Double -instruct MoveVL2D(regD dst, vlRegD src) %{ - match(Set dst src); - format %{ "movsd $dst,$src\t! load double (8 bytes)" %} - ins_encode %{ - __ movdbl($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - // Store XMM register to memory (single-precision floating point) // MOVSS instruction instruct storeF(memory mem, regF src) %{ @@ -6458,25 +6491,6 @@ instruct storeF(memory mem, regF src) %{ ins_pipe( pipe_slow ); %} -// Load Float -instruct MoveF2VL(vlRegF dst, regF src) %{ - match(Set dst src); - format %{ "movss $dst,$src\t! load float (4 bytes)" %} - ins_encode %{ - __ movflt($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Float -instruct MoveVL2F(regF dst, vlRegF src) %{ - match(Set dst src); - format %{ "movss $dst,$src\t! load float (4 bytes)" %} - ins_encode %{ - __ movflt($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} // Store Float instruct storeFPR( memory mem, regFPR1 src) %{ @@ -11470,8 +11484,7 @@ instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ // fast clearing of an array // Small ClearArray non-AVX512. instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ - predicate(!((ClearArrayNode*)n)->is_large() && - (UseAVX <= 2 || !VM_Version::supports_avx512vlbw())); + predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); @@ -11530,11 +11543,10 @@ instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe du %} // Small ClearArray AVX512 non-constant length. -instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, regD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ - predicate(!((ClearArrayNode*)n)->is_large() && - UseAVX > 2 && VM_Version::supports_avx512vlbw() && - !n->in(2)->bottom_type()->is_int()->is_con()); +instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ + predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); match(Set dummy (ClearArray cnt base)); + ins_cost(125); effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); format %{ $$template @@ -11593,7 +11605,7 @@ instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, regD tmp, kReg ktmp, eAXRegI z // Large ClearArray non-AVX512. instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ - predicate(UseAVX <= 2 && ((ClearArrayNode*)n)->is_large()); + predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); format %{ $$template @@ -11642,8 +11654,8 @@ instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Unive %} // Large ClearArray AVX512. -instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, regD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ - predicate(UseAVX > 2 && ((ClearArrayNode*)n)->is_large()); +instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ + predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); format %{ $$template @@ -11695,9 +11707,9 @@ instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, regD tmp, kReg ktmp, eAX instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) %{ predicate(!((ClearArrayNode*)n)->is_large() && - (UseAVX > 2 && VM_Version::supports_avx512vlbw() && - n->in(2)->bottom_type()->is_int()->is_con())); + ((UseAVX > 2) && VM_Version::supports_avx512vlbw())); match(Set dummy (ClearArray cnt base)); + ins_cost(100); effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); format %{ "clear_mem_imm $base , $cnt \n\t" %} ins_encode %{ @@ -11708,7 +11720,7 @@ instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Univ instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, eAXRegI result, regD tmp1, eFlagsReg cr) %{ - predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); + predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -11723,7 +11735,7 @@ instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ - predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); + predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -11738,7 +11750,7 @@ instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, eAXRegI result, regD tmp1, eFlagsReg cr) %{ - predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); + predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -11753,7 +11765,7 @@ instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ - predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); + predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -11768,7 +11780,7 @@ instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, eAXRegI result, regD tmp1, eFlagsReg cr) %{ - predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); + predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -11783,7 +11795,7 @@ instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ - predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); + predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -11798,7 +11810,7 @@ instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, eAXRegI result, regD tmp1, eFlagsReg cr) %{ - predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); + predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -11813,7 +11825,7 @@ instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ - predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); + predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -11829,7 +11841,7 @@ instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI // fast string equals instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ - predicate(UseAVX <= 2); + predicate(!VM_Version::supports_avx512vlbw()); match(Set result (StrEquals (Binary str1 str2) cnt)); effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); @@ -11845,7 +11857,7 @@ instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ - predicate(UseAVX > 2); + predicate(VM_Version::supports_avx512vlbw()); match(Set result (StrEquals (Binary str1 str2) cnt)); effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); @@ -12023,7 +12035,7 @@ instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) %{ - predicate(UseAVX <= 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); + predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); match(Set result (AryEq ary1 ary2)); effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); //ins_cost(300); @@ -12040,7 +12052,7 @@ instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) %{ - predicate(UseAVX > 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); + predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); match(Set result (AryEq ary1 ary2)); effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); //ins_cost(300); @@ -12057,7 +12069,7 @@ instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) %{ - predicate(UseAVX <= 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); + predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); match(Set result (AryEq ary1 ary2)); effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); //ins_cost(300); @@ -12074,7 +12086,7 @@ instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) %{ - predicate(UseAVX > 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); + predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); match(Set result (AryEq ary1 ary2)); effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); //ins_cost(300); @@ -12091,7 +12103,7 @@ instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ - predicate(UseAVX <= 2); + predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); match(Set result (HasNegatives ary1 len)); effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); @@ -12107,7 +12119,7 @@ instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, instruct has_negatives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) %{ - predicate(UseAVX > 2); + predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); match(Set result (HasNegatives ary1 len)); effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); @@ -12124,7 +12136,7 @@ instruct has_negatives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, // fast char[] to byte[] compression instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ - predicate(UseAVX <= 2); + predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); match(Set result (StrCompressedCopy src (Binary dst len))); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); @@ -12140,7 +12152,7 @@ instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ - predicate(UseAVX > 2); + predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); match(Set result (StrCompressedCopy src (Binary dst len))); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); @@ -12157,7 +12169,7 @@ instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, // fast byte[] to char[] inflation instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ - predicate(UseAVX <= 2); + predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); match(Set dummy (StrInflatedCopy src (Binary dst len))); effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); @@ -12171,7 +12183,7 @@ instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ - predicate(UseAVX > 2); + predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); match(Set dummy (StrInflatedCopy src (Binary dst len))); effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); @@ -12187,18 +12199,35 @@ instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI l instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ + predicate(!((EncodeISOArrayNode*)n)->is_ascii()); match(Set result (EncodeISOArray src (Binary dst len))); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); - format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} + format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} ins_encode %{ __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, - $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); + $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); %} ins_pipe( pipe_slow ); %} +// encode char[] to byte[] in ASCII +instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, + regD tmp1, regD tmp2, regD tmp3, regD tmp4, + eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ + predicate(((EncodeISOArrayNode*)n)->is_ascii()); + match(Set result (EncodeISOArray src (Binary dst len))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); + + format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} + ins_encode %{ + __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, + $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, + $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); + %} + ins_pipe( pipe_slow ); +%} //----------Control Flow Instructions------------------------------------------ // Signed compare Instructions @@ -13667,7 +13696,7 @@ instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eD ins_encode %{ __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, $cx1$$Register, $cx2$$Register, - _counters, _rtm_counters, _stack_rtm_counters, + _rtm_counters, _stack_rtm_counters, ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), true, ra_->C->profile_rtm()); %} @@ -13682,7 +13711,7 @@ instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} ins_encode %{ __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, - $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); + $scr$$Register, noreg, noreg, NULL, NULL, NULL, false, false); %} ins_pipe(pipe_slow); %} diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index 606e76d6553b315f560608598305961cd3ba99e7..14671c39640c14075e0909c36476daac593814a2 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -322,6 +322,7 @@ extern RegMask _LONG_NO_RCX_REG_mask; extern RegMask _INT_REG_mask; extern RegMask _INT_NO_RAX_RDX_REG_mask; extern RegMask _INT_NO_RCX_REG_mask; +extern RegMask _FLOAT_REG_mask; extern RegMask _STACK_OR_PTR_REG_mask; extern RegMask _STACK_OR_LONG_REG_mask; @@ -350,6 +351,7 @@ RegMask _LONG_NO_RCX_REG_mask; RegMask _INT_REG_mask; RegMask _INT_NO_RAX_RDX_REG_mask; RegMask _INT_NO_RCX_REG_mask; +RegMask _FLOAT_REG_mask; RegMask _STACK_OR_PTR_REG_mask; RegMask _STACK_OR_LONG_REG_mask; RegMask _STACK_OR_INT_REG_mask; @@ -425,6 +427,10 @@ void reg_mask_init() { _INT_NO_RCX_REG_mask = _INT_REG_mask; _INT_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg())); + // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc + // from the float_reg_legacy/float_reg_evex register class. + _FLOAT_REG_mask = VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask; + if (Matcher::has_predicated_vectors()) { // Post-loop multi-versioning expects mask to be present in K1 register, till the time // its fixed, RA should not be allocting K1 register, this shall prevent any accidental @@ -1758,6 +1764,20 @@ bool Matcher::is_spillable_arg(int reg) return can_be_java_arg(reg); } +uint Matcher::int_pressure_limit() +{ + return (INTPRESSURE == -1) ? _INT_REG_mask.Size() : INTPRESSURE; +} + +uint Matcher::float_pressure_limit() +{ + // After experiment around with different values, the following default threshold + // works best for LCM's register pressure scheduling on x64. + uint dec_count = VM_Version::supports_evex() ? 4 : 2; + uint default_float_pressure_threshold = _FLOAT_REG_mask.Size() - dec_count; + return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE; +} + bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { // In 64 bit mode a code which use multiply when // devisor is constant is faster than hardware @@ -4800,6 +4820,87 @@ define // name must have been defined in an 'enc_class' specification // in the encode section of the architecture description. +// Dummy reg-to-reg vector moves. Removed during post-selection cleanup. +// Load Float +instruct MoveF2VL(vlRegF dst, regF src) %{ + match(Set dst src); + format %{ "movss $dst,$src\t! load float (4 bytes)" %} + ins_encode %{ + ShouldNotReachHere(); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Load Float +instruct MoveF2LEG(legRegF dst, regF src) %{ + match(Set dst src); + format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} + ins_encode %{ + ShouldNotReachHere(); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Load Float +instruct MoveVL2F(regF dst, vlRegF src) %{ + match(Set dst src); + format %{ "movss $dst,$src\t! load float (4 bytes)" %} + ins_encode %{ + ShouldNotReachHere(); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Load Float +instruct MoveLEG2F(regF dst, legRegF src) %{ + match(Set dst src); + format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} + ins_encode %{ + ShouldNotReachHere(); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Load Double +instruct MoveD2VL(vlRegD dst, regD src) %{ + match(Set dst src); + format %{ "movsd $dst,$src\t! load double (8 bytes)" %} + ins_encode %{ + ShouldNotReachHere(); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Load Double +instruct MoveD2LEG(legRegD dst, regD src) %{ + match(Set dst src); + format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} + ins_encode %{ + ShouldNotReachHere(); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Load Double +instruct MoveVL2D(regD dst, vlRegD src) %{ + match(Set dst src); + format %{ "movsd $dst,$src\t! load double (8 bytes)" %} + ins_encode %{ + ShouldNotReachHere(); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Load Double +instruct MoveLEG2D(regD dst, legRegD src) %{ + match(Set dst src); + format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} + ins_encode %{ + ShouldNotReachHere(); + %} + ins_pipe( fpu_reg_reg ); +%} + //----------Load/Store/Move Instructions--------------------------------------- //----------Load Instructions-------------------------------------------------- @@ -5213,46 +5314,6 @@ instruct loadF(regF dst, memory mem) ins_pipe(pipe_slow); // XXX %} -// Load Float -instruct MoveF2VL(vlRegF dst, regF src) %{ - match(Set dst src); - format %{ "movss $dst,$src\t! load float (4 bytes)" %} - ins_encode %{ - __ movflt($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Float -instruct MoveF2LEG(legRegF dst, regF src) %{ - match(Set dst src); - format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} - ins_encode %{ - __ movflt($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Float -instruct MoveVL2F(regF dst, vlRegF src) %{ - match(Set dst src); - format %{ "movss $dst,$src\t! load float (4 bytes)" %} - ins_encode %{ - __ movflt($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Float -instruct MoveLEG2F(regF dst, legRegF src) %{ - match(Set dst src); - format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} - ins_encode %{ - __ movflt($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - // Load Double instruct loadD_partial(regD dst, memory mem) %{ @@ -5280,45 +5341,6 @@ instruct loadD(regD dst, memory mem) ins_pipe(pipe_slow); // XXX %} -// Load Double -instruct MoveD2VL(vlRegD dst, regD src) %{ - match(Set dst src); - format %{ "movsd $dst,$src\t! load double (8 bytes)" %} - ins_encode %{ - __ movdbl($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Double -instruct MoveD2LEG(legRegD dst, regD src) %{ - match(Set dst src); - format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} - ins_encode %{ - __ movdbl($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Double -instruct MoveVL2D(regD dst, vlRegD src) %{ - match(Set dst src); - format %{ "movsd $dst,$src\t! load double (8 bytes)" %} - ins_encode %{ - __ movdbl($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Double -instruct MoveLEG2D(regD dst, legRegD src) %{ - match(Set dst src); - format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} - ins_encode %{ - __ movdbl($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} // Following pseudo code describes the algorithm for max[FD]: // Min algorithm is on similar lines @@ -11023,8 +11045,7 @@ instruct MoveL2D_reg_reg(regD dst, rRegL src) %{ instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero, Universe dummy, rFlagsReg cr) %{ - predicate(!((ClearArrayNode*)n)->is_large() && - (UseAVX <= 2 || !VM_Version::supports_avx512vlbw())); + predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); @@ -11081,13 +11102,12 @@ instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero, %} // Small ClearArray AVX512 non-constant length. -instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, regD tmp, kReg ktmp, rax_RegI zero, +instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero, Universe dummy, rFlagsReg cr) %{ - predicate(!((ClearArrayNode*)n)->is_large() && - UseAVX > 2 && VM_Version::supports_avx512vlbw() && - !n->in(2)->bottom_type()->is_long()->is_con()); + predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); match(Set dummy (ClearArray cnt base)); + ins_cost(125); effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); format %{ $$template @@ -11146,7 +11166,7 @@ instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, regD tmp, kReg ktmp, rax_Reg instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero, Universe dummy, rFlagsReg cr) %{ - predicate(UseAVX <=2 && ((ClearArrayNode*)n)->is_large()); + predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large()); match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); @@ -11194,10 +11214,10 @@ instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero, %} // Large ClearArray AVX512. -instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, regD tmp, kReg ktmp, rax_RegI zero, +instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero, Universe dummy, rFlagsReg cr) %{ - predicate(UseAVX > 2 && ((ClearArrayNode*)n)->is_large()); + predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); @@ -11248,9 +11268,9 @@ instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, regD tmp, kReg ktmp, r instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr) %{ predicate(!((ClearArrayNode*)n)->is_large() && - (UseAVX > 2 && VM_Version::supports_avx512vlbw() && - n->in(2)->bottom_type()->is_long()->is_con())); + ((UseAVX > 2) && VM_Version::supports_avx512vlbw())); match(Set dummy (ClearArray cnt base)); + ins_cost(100); effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); format %{ "clear_mem_imm $base , $cnt \n\t" %} ins_encode %{ @@ -11262,7 +11282,7 @@ instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Univ instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2, rax_RegI result, legRegD tmp1, rFlagsReg cr) %{ - predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); + predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -11278,7 +11298,7 @@ instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI c instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2, rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr) %{ - predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); + predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -11294,7 +11314,7 @@ instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_R instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2, rax_RegI result, legRegD tmp1, rFlagsReg cr) %{ - predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); + predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -11310,7 +11330,7 @@ instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI c instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2, rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr) %{ - predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); + predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -11326,7 +11346,7 @@ instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_R instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2, rax_RegI result, legRegD tmp1, rFlagsReg cr) %{ - predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); + predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -11342,7 +11362,7 @@ instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2, rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr) %{ - predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); + predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -11358,7 +11378,7 @@ instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_ instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2, rax_RegI result, legRegD tmp1, rFlagsReg cr) %{ - predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); + predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -11374,7 +11394,7 @@ instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2, rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr) %{ - predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); + predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -11557,7 +11577,7 @@ instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch, instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result, legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr) %{ - predicate(UseAVX <= 2); + predicate(!VM_Version::supports_avx512vlbw()); match(Set result (StrEquals (Binary str1 str2) cnt)); effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); @@ -11573,7 +11593,7 @@ instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI resu instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result, legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr) %{ - predicate(UseAVX > 2); + predicate(VM_Version::supports_avx512vlbw()); match(Set result (StrEquals (Binary str1 str2) cnt)); effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); @@ -11590,7 +11610,7 @@ instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result, legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr) %{ - predicate(UseAVX <= 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); + predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); match(Set result (AryEq ary1 ary2)); effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); @@ -11606,7 +11626,7 @@ instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result, instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result, legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr) %{ - predicate(UseAVX > 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); + predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); match(Set result (AryEq ary1 ary2)); effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); @@ -11622,7 +11642,7 @@ instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result, instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result, legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr) %{ - predicate(UseAVX <= 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); + predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); match(Set result (AryEq ary1 ary2)); effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); @@ -11638,7 +11658,7 @@ instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result, instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result, legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr) %{ - predicate(UseAVX > 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); + predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); match(Set result (AryEq ary1 ary2)); effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); @@ -11654,7 +11674,7 @@ instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result, instruct has_negatives(rsi_RegP ary1, rcx_RegI len, rax_RegI result, legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,) %{ - predicate(UseAVX <= 2); + predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); match(Set result (HasNegatives ary1 len)); effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); @@ -11670,7 +11690,7 @@ instruct has_negatives(rsi_RegP ary1, rcx_RegI len, rax_RegI result, instruct has_negatives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result, legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,) %{ - predicate(UseAVX > 2); + predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); match(Set result (HasNegatives ary1 len)); effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); @@ -11686,7 +11706,7 @@ instruct has_negatives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result, // fast char[] to byte[] compression instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{ - predicate(UseAVX <= 2); + predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); match(Set result (StrCompressedCopy src (Binary dst len))); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); @@ -11703,7 +11723,7 @@ instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{ - predicate(UseAVX > 2); + predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); match(Set result (StrCompressedCopy src (Binary dst len))); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); @@ -11720,7 +11740,7 @@ instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD // fast byte[] to char[] inflation instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{ - predicate(UseAVX <= 2); + predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); match(Set dummy (StrInflatedCopy src (Binary dst len))); effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); @@ -11734,7 +11754,7 @@ instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{ - predicate(UseAVX > 2); + predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); match(Set dummy (StrInflatedCopy src (Binary dst len))); effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); @@ -11750,14 +11770,32 @@ instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_Reg instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{ + predicate(!((EncodeISOArrayNode*)n)->is_ascii()); + match(Set result (EncodeISOArray src (Binary dst len))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); + + format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %} + ins_encode %{ + __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, + $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, + $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); + %} + ins_pipe( pipe_slow ); +%} + +// encode char[] to byte[] in ASCII +instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len, + legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4, + rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{ + predicate(((EncodeISOArrayNode*)n)->is_ascii()); match(Set result (EncodeISOArray src (Binary dst len))); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); - format %{ "Encode array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %} + format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %} ins_encode %{ __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, - $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); + $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); %} ins_pipe( pipe_slow ); %} @@ -12907,7 +12945,7 @@ instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, ins_encode %{ __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, $cx1$$Register, $cx2$$Register, - _counters, _rtm_counters, _stack_rtm_counters, + _rtm_counters, _stack_rtm_counters, ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), true, ra_->C->profile_rtm()); %} @@ -12922,7 +12960,7 @@ instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRe format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %} ins_encode %{ __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, - $scr$$Register, $cx1$$Register, noreg, _counters, NULL, NULL, NULL, false, false); + $scr$$Register, $cx1$$Register, noreg, NULL, NULL, NULL, false, false); %} ins_pipe(pipe_slow); %} diff --git a/src/hotspot/cpu/zero/assembler_zero.cpp b/src/hotspot/cpu/zero/assembler_zero.cpp index 693ff06ca54257fe0cfc57deb13489d931e45dbe..fe0f16888563076f9f978bffe80423b76bfd5446 100644 --- a/src/hotspot/cpu/zero/assembler_zero.cpp +++ b/src/hotspot/cpu/zero/assembler_zero.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. * Copyright 2007, 2008, 2009 Red Hat, Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -30,7 +30,6 @@ #include "interpreter/interpreter.hpp" #include "memory/resourceArea.hpp" #include "prims/methodHandles.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/objectMonitor.hpp" #include "runtime/os.hpp" diff --git a/src/hotspot/cpu/zero/frame_zero.cpp b/src/hotspot/cpu/zero/frame_zero.cpp index 70d6a5e855c1da8708319e016413bb9a2b526bca..19970cfb82bc20cc2e4b6729e080aaaa2f7d29e3 100644 --- a/src/hotspot/cpu/zero/frame_zero.cpp +++ b/src/hotspot/cpu/zero/frame_zero.cpp @@ -34,6 +34,7 @@ #include "runtime/frame.inline.hpp" #include "runtime/handles.inline.hpp" #include "runtime/signature.hpp" +#include "runtime/stackWatermarkSet.hpp" #include "vmreg_zero.inline.hpp" #ifdef ASSERT @@ -61,6 +62,16 @@ frame frame::sender_for_entry_frame(RegisterMap *map) const { return frame(zeroframe()->next(), sender_sp()); } +OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const { + ShouldNotCallThis(); + return nullptr; +} + +bool frame::optimized_entry_frame_is_first() const { + ShouldNotCallThis(); + return false; +} + frame frame::sender_for_nonentry_frame(RegisterMap *map) const { assert(zeroframe()->is_interpreter_frame() || zeroframe()->is_fake_stub_frame(), "wrong type of frame"); @@ -72,10 +83,15 @@ frame frame::sender(RegisterMap* map) const { // sender_for_xxx methods update this accordingly. map->set_include_argument_oops(false); - if (is_entry_frame()) - return sender_for_entry_frame(map); - else - return sender_for_nonentry_frame(map); + frame result = zeroframe()->is_entry_frame() ? + sender_for_entry_frame(map) : + sender_for_nonentry_frame(map); + + if (map->process_frames()) { + StackWatermarkSet::on_iteration(map->thread(), result); + } + + return result; } BasicObjectLock* frame::interpreter_frame_monitor_begin() const { @@ -95,7 +111,7 @@ void frame::patch_pc(Thread* thread, address pc) { // We borrow this call to set the thread pointer in the interpreter // state; the hook to set up deoptimized frames isn't supplied it. assert(pc == NULL, "should be"); - get_interpreterState()->set_thread(thread->as_Java_thread()); + get_interpreterState()->set_thread(JavaThread::cast(thread)); } } @@ -380,5 +396,4 @@ frame::frame(void* sp, void* fp, void* pc) { Unimplemented(); } -void frame::pd_ps() {} #endif diff --git a/src/hotspot/cpu/zero/globals_zero.hpp b/src/hotspot/cpu/zero/globals_zero.hpp index 33f208b28f27af1d9f61c26206054e2c9c449ab9..aa330925c5a184f211751154afd16481aa6fca00 100644 --- a/src/hotspot/cpu/zero/globals_zero.hpp +++ b/src/hotspot/cpu/zero/globals_zero.hpp @@ -39,7 +39,6 @@ define_pd_global(bool, UncommonNullCast, true); define_pd_global(uintx, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment. define_pd_global(intx, CodeEntryAlignment, 32); define_pd_global(intx, OptoLoopAlignment, 16); -define_pd_global(intx, InlineFrequencyCount, 100); define_pd_global(intx, InlineSmallCode, 1000); // not used, but must satisfy following constraints: diff --git a/src/hotspot/cpu/zero/vm_version_zero.cpp b/src/hotspot/cpu/zero/vm_version_zero.cpp index 14368bed5a04d0cff588da1b05e4ccf7f6ec8127..8b49a084be29b7b3390892baa7cc7a2b52fac39f 100644 --- a/src/hotspot/cpu/zero/vm_version_zero.cpp +++ b/src/hotspot/cpu/zero/vm_version_zero.cpp @@ -45,6 +45,76 @@ void VM_Version::initialize() { } FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0); + // If lock diagnostics is needed, always call to runtime + if (DiagnoseSyncOnValueBasedClasses != 0) { + FLAG_SET_DEFAULT(UseHeavyMonitors, true); + } + + if (UseAESIntrinsics) { + warning("AES intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } + + if (UseAES) { + warning("AES instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseAES, false); + } + + if (UseAESCTRIntrinsics) { + warning("AES/CTR intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); + } + + if (UseFMA) { + warning("FMA instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseFMA, false); + } + + if (UseMD5Intrinsics) { + warning("MD5 intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseMD5Intrinsics, false); + } + + if (UseSHA) { + warning("SHA instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA, false); + } + + if (UseSHA1Intrinsics) { + warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); + } + + if (UseSHA256Intrinsics) { + warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); + } + + if (UseSHA512Intrinsics) { + warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } + + if (UseSHA3Intrinsics) { + warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); + } + + if (UseCRC32Intrinsics) { + warning("CRC32 intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); + } + + if (UseAdler32Intrinsics) { + warning("Adler32 intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); + } + + if (UseVectorizedMismatchIntrinsic) { + warning("vectorizedMismatch intrinsic is not available on this CPU."); + FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); + } + // Not implemented UNSUPPORTED_OPTION(CriticalJNINatives); } diff --git a/src/hotspot/cpu/zero/vm_version_zero.hpp b/src/hotspot/cpu/zero/vm_version_zero.hpp index 84e1abb5894f6abe6f33d04943826cf122226de7..c63a47719e50d592d7db088fa0f9c04a77e901d4 100644 --- a/src/hotspot/cpu/zero/vm_version_zero.hpp +++ b/src/hotspot/cpu/zero/vm_version_zero.hpp @@ -32,6 +32,8 @@ class VM_Version : public Abstract_VM_Version { public: static void initialize(); + + constexpr static bool supports_stack_watermark_barrier() { return true; } }; #endif // CPU_ZERO_VM_VERSION_ZERO_HPP diff --git a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp index 14fc8c0b00c03608083e81ca659d4e2135f2019a..1193102b5e4665826bc4a7a6fdfd02f7b7f81299 100644 --- a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp +++ b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp @@ -200,6 +200,18 @@ void ZeroInterpreter::main_loop(int recurse, TRAPS) { } fixup_after_potential_safepoint(); + // If we are unwinding, notify the stack watermarks machinery. + // Should do this before resetting the frame anchor. + if (istate->msg() == BytecodeInterpreter::return_from_method || + istate->msg() == BytecodeInterpreter::do_osr) { + stack_watermark_unwind_check(thread); + } else { + assert(istate->msg() == BytecodeInterpreter::call_method || + istate->msg() == BytecodeInterpreter::more_monitors || + istate->msg() == BytecodeInterpreter::throwing_exception, + "Should be one of these otherwise"); + } + // Clear the frame anchor thread->reset_last_Java_frame(); @@ -320,13 +332,13 @@ int ZeroInterpreter::native_entry(Method* method, intptr_t UNUSED, TRAPS) { monitor = (BasicObjectLock*) istate->stack_base(); oop lockee = monitor->obj(); markWord disp = lockee->mark().set_unlocked(); - monitor->lock()->set_displaced_header(disp); - if (lockee->cas_set_mark(markWord::from_pointer(monitor), disp) != disp) { - if (thread->is_lock_owned((address) disp.clear_lock_bits().to_pointer())) { + bool call_vm = UseHeavyMonitors; + if (call_vm || lockee->cas_set_mark(markWord::from_pointer(monitor), disp) != disp) { + // Is it simple recursive case? + if (!call_vm && thread->is_lock_owned((address) disp.clear_lock_bits().to_pointer())) { monitor->lock()->set_displaced_header(markWord::from_pointer(NULL)); - } - else { + } else { CALL_VM_NOCHECK(InterpreterRuntime::monitorenter(thread, monitor)); if (HAS_PENDING_EXCEPTION) goto unwind_and_return; @@ -436,6 +448,10 @@ int ZeroInterpreter::native_entry(Method* method, intptr_t UNUSED, TRAPS) { thread->set_thread_state(_thread_in_Java); fixup_after_potential_safepoint(); + // Notify the stack watermarks machinery that we are unwinding. + // Should do this before resetting the frame anchor. + stack_watermark_unwind_check(thread); + // Clear the frame anchor thread->reset_last_Java_frame(); @@ -546,6 +562,12 @@ int ZeroInterpreter::native_entry(Method* method, intptr_t UNUSED, TRAPS) { } } + // Already did every pending exception check here. + // If HAS_PENDING_EXCEPTION is true, the interpreter would handle the rest. + if (CheckJNICalls) { + THREAD->clear_pending_jni_exception_check(); + } + // No deoptimized frames on the stack return 0; } @@ -869,3 +891,13 @@ address ZeroInterpreter::remove_activation_early_entry(TosState state) { bool ZeroInterpreter::contains(address pc) { return false; // make frame::print_value_on work } + +void ZeroInterpreter::stack_watermark_unwind_check(JavaThread* thread) { + // If frame pointer is in the danger zone, notify the runtime that + // it needs to act before continuing the unwinding. + uintptr_t fp = (uintptr_t)thread->last_Java_fp(); + uintptr_t watermark = thread->poll_data()->get_polling_word(); + if (fp > watermark) { + InterpreterRuntime::at_unwind(thread); + } +} diff --git a/src/hotspot/cpu/zero/zeroInterpreter_zero.hpp b/src/hotspot/cpu/zero/zeroInterpreter_zero.hpp index 2d1d28f4da5a814b0352541a7ae48dac7da3a56a..3761dfc58145a86037e0a7492ccaf4a09e0aaa5c 100644 --- a/src/hotspot/cpu/zero/zeroInterpreter_zero.hpp +++ b/src/hotspot/cpu/zero/zeroInterpreter_zero.hpp @@ -39,6 +39,9 @@ static int empty_entry(Method* method, intptr_t UNUSED, TRAPS); static int Reference_get_entry(Method* method, intptr_t UNUSED, TRAPS); + // Stack watermark machinery + static void stack_watermark_unwind_check(JavaThread* thread); + public: // Main loop of normal_entry static void main_loop(int recurse, TRAPS); diff --git a/src/hotspot/os/aix/osThread_aix.cpp b/src/hotspot/os/aix/osThread_aix.cpp index 6303dc27eb8bb6a74c11c545e5091711ad1df124..b6aa44de0aee33966d5a3eab7620405776a4543e 100644 --- a/src/hotspot/os/aix/osThread_aix.cpp +++ b/src/hotspot/os/aix/osThread_aix.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2015 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -46,8 +46,7 @@ void OSThread::pd_initialize() { sigemptyset(&_caller_sigmask); - _startThread_lock = new Monitor(Mutex::event, "startThread_lock", true, - Monitor::_safepoint_check_never); + _startThread_lock = new Monitor(Mutex::event, "startThread_lock"); assert(_startThread_lock != NULL, "check"); } diff --git a/src/hotspot/os/aix/os_aix.cpp b/src/hotspot/os/aix/os_aix.cpp index 14356b76f4c8c013fbb5eb88d9df6ac305a55b8d..df8563067e43e7d103d52a64e581b23e9e8bbec0 100644 --- a/src/hotspot/os/aix/os_aix.cpp +++ b/src/hotspot/os/aix/os_aix.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2020 SAP SE. All rights reserved. + * Copyright (c) 2012, 2021 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -182,8 +182,6 @@ int os::Aix::_extshm = -1; // local variables static volatile jlong max_real_time = 0; -static jlong initial_time_count = 0; -static int clock_tics_per_sec = 100; // Process break recorded at startup. static address g_brk_at_startup = NULL; @@ -267,18 +265,6 @@ julong os::physical_memory() { return Aix::physical_memory(); } -// Return true if user is running as root. - -bool os::have_special_privileges() { - static bool init = false; - static bool privileges = false; - if (!init) { - privileges = (getuid() != geteuid()) || (getgid() != getegid()); - init = true; - } - return privileges; -} - // Helper function, emulates disclaim64 using multiple 32bit disclaims // because we cannot use disclaim64() on AS/400 and old AIX releases. static bool my_disclaim64(char* addr, size_t size) { @@ -788,7 +774,7 @@ bool os::create_thread(Thread* thread, ThreadType thr_type, // JDK-8187028: It was observed that on some configurations (4K backed thread stacks) // the real thread stack size may be smaller than the requested stack size, by as much as 64K. // This very much looks like a pthread lib error. As a workaround, increase the stack size - // by 64K for small thread stacks (arbitrarily choosen to be < 4MB) + // by 64K for small thread stacks (arbitrarily chosen to be < 4MB) if (stack_size < 4096 * K) { stack_size += 64 * K; } @@ -814,19 +800,24 @@ bool os::create_thread(Thread* thread, ThreadType thr_type, ret = pthread_attr_setguardsize(&attr, 0); } + ResourceMark rm; pthread_t tid = 0; + if (ret == 0) { - ret = pthread_create(&tid, &attr, (void* (*)(void*)) thread_native_entry, thread); + int limit = 3; + do { + ret = pthread_create(&tid, &attr, (void* (*)(void*)) thread_native_entry, thread); + } while (ret == EAGAIN && limit-- > 0); } if (ret == 0) { char buf[64]; - log_info(os, thread)("Thread started (pthread id: " UINTX_FORMAT ", attributes: %s). ", - (uintx) tid, os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); + log_info(os, thread)("Thread \"%s\" started (pthread id: " UINTX_FORMAT ", attributes: %s). ", + thread->name(), (uintx) tid, os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); } else { char buf[64]; - log_warning(os, thread)("Failed to start thread - pthread_create failed (%d=%s) for attributes: %s.", - ret, os::errno_name(ret), os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); + log_warning(os, thread)("Failed to start thread \"%s\" - pthread_create failed (%d=%s) for attributes: %s.", + thread->name(), ret, os::errno_name(ret), os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); // Log some OS information which might explain why creating the thread failed. log_info(os, thread)("Number of threads approx. running in the VM: %d", Threads::number_of_threads()); LogStream st(Log(os, thread)::info()); @@ -928,21 +919,6 @@ void os::free_thread(OSThread* osthread) { //////////////////////////////////////////////////////////////////////////////// // time support -// Time since start-up in seconds to a fine granularity. -double os::elapsedTime() { - return ((double)os::elapsed_counter()) / os::elapsed_frequency(); // nanosecond resolution -} - -jlong os::elapsed_counter() { - return javaTimeNanos() - initial_time_count; -} - -jlong os::elapsed_frequency() { - return NANOSECS_PER_SEC; // nanosecond resolution -} - -bool os::supports_vtime() { return true; } - double os::elapsedVTime() { struct rusage usage; int retval = getrusage(RUSAGE_THREAD, &usage); @@ -1009,41 +985,6 @@ void os::javaTimeNanos_info(jvmtiTimerInfo *info_ptr) { info_ptr->kind = JVMTI_TIMER_ELAPSED; // elapsed not CPU time } -// Return the real, user, and system times in seconds from an -// arbitrary fixed point in the past. -bool os::getTimesSecs(double* process_real_time, - double* process_user_time, - double* process_system_time) { - struct tms ticks; - clock_t real_ticks = times(&ticks); - - if (real_ticks == (clock_t) (-1)) { - return false; - } else { - double ticks_per_second = (double) clock_tics_per_sec; - *process_user_time = ((double) ticks.tms_utime) / ticks_per_second; - *process_system_time = ((double) ticks.tms_stime) / ticks_per_second; - *process_real_time = ((double) real_ticks) / ticks_per_second; - - return true; - } -} - -char * os::local_time_string(char *buf, size_t buflen) { - struct tm t; - time_t long_time; - time(&long_time); - localtime_r(&long_time, &t); - jio_snprintf(buf, buflen, "%d-%02d-%02d %02d:%02d:%02d", - t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, - t.tm_hour, t.tm_min, t.tm_sec); - return buf; -} - -struct tm* os::localtime_pd(const time_t* clock, struct tm* res) { - return localtime_r(clock, res); -} - intx os::current_thread_id() { return (intx)pthread_self(); } @@ -1177,15 +1118,6 @@ void *os::dll_load(const char *filename, char *ebuf, int ebuflen) { return NULL; } -void* os::dll_lookup(void* handle, const char* name) { - void* res = dlsym(handle, name); - return res; -} - -void* os::get_default_process_handle() { - return (void*)::dlopen(NULL, RTLD_LAZY); -} - void os::print_dll_info(outputStream *st) { st->print_cr("Dynamic libraries:"); LoadedLibraries::print(st); @@ -2409,13 +2341,9 @@ void os::init(void) { // need libperfstat etc. os::Aix::initialize_system_info(); - clock_tics_per_sec = sysconf(_SC_CLK_TCK); - // _main_thread points to the thread that created/loaded the JVM. Aix::_main_thread = pthread_self(); - initial_time_count = javaTimeNanos(); - os::Posix::init(); } @@ -2512,11 +2440,6 @@ void os::set_native_thread_name(const char *name) { return; } -bool os::bind_to_processor(uint processor_id) { - // Not yet implemented. - return false; -} - //////////////////////////////////////////////////////////////////////////////// // debug support @@ -2665,9 +2588,7 @@ int os::open(const char *path, int oflag, int mode) { // create binary file, rewriting existing file if required int os::create_binary_file(const char* path, bool rewrite_existing) { int oflags = O_WRONLY | O_CREAT; - if (!rewrite_existing) { - oflags |= O_EXCL; - } + oflags |= rewrite_existing ? O_TRUNC : O_EXCL; return ::open64(path, oflags, S_IREAD | S_IWRITE); } diff --git a/src/hotspot/os/bsd/osThread_bsd.cpp b/src/hotspot/os/bsd/osThread_bsd.cpp index 9eba7288fbe36fbe2149fb54c5709b5fd3f098ba..100a5ce5447d4c260665f5ddb62b3f199c17d3e6 100644 --- a/src/hotspot/os/bsd/osThread_bsd.cpp +++ b/src/hotspot/os/bsd/osThread_bsd.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -45,8 +45,7 @@ void OSThread::pd_initialize() { sigemptyset(&_caller_sigmask); - _startThread_lock = new Monitor(Mutex::event, "startThread_lock", true, - Monitor::_safepoint_check_never); + _startThread_lock = new Monitor(Mutex::event, "startThread_lock"); assert(_startThread_lock !=NULL, "check"); } diff --git a/src/hotspot/os/bsd/os_bsd.cpp b/src/hotspot/os/bsd/os_bsd.cpp index 4e10c1cb908d032aa4f720009f008cf24cddfc39..d1adfeb1ddd1cf4f469449072a69c98608f3edc6 100644 --- a/src/hotspot/os/bsd/os_bsd.cpp +++ b/src/hotspot/os/bsd/os_bsd.cpp @@ -123,10 +123,6 @@ volatile uint64_t os::Bsd::_max_abstime = 0; pthread_t os::Bsd::_main_thread; int os::Bsd::_page_size = -1; -static jlong initial_time_count=0; - -static int clock_tics_per_sec = 100; - #if defined(__APPLE__) && defined(__x86_64__) static const int processor_id_unassigned = -1; static const int processor_id_assigning = -2; @@ -179,20 +175,6 @@ julong os::physical_memory() { return Bsd::physical_memory(); } -// Return true if user is running as root. - -bool os::have_special_privileges() { - static bool init = false; - static bool privileges = false; - if (!init) { - privileges = (getuid() != geteuid()) || (getgid() != getegid()); - init = true; - } - return privileges; -} - - - // Cpu architecture string #if defined(ZERO) static char cpu_arch[] = ZERO_LIBARCH; @@ -452,7 +434,9 @@ void os::init_system_properties_values() { } } Arguments::set_java_home(buf); - set_boot_path('/', ':'); + if (!set_boot_path('/', ':')) { + vm_exit_during_initialization("Failed setting boot class path.", NULL); + } } // Where to look for native libraries. @@ -631,16 +615,22 @@ bool os::create_thread(Thread* thread, ThreadType thr_type, ThreadState state; { + + ResourceMark rm; pthread_t tid; - int ret = pthread_create(&tid, &attr, (void* (*)(void*)) thread_native_entry, thread); + int ret = 0; + int limit = 3; + do { + ret = pthread_create(&tid, &attr, (void* (*)(void*)) thread_native_entry, thread); + } while (ret == EAGAIN && limit-- > 0); char buf[64]; if (ret == 0) { - log_info(os, thread)("Thread started (pthread id: " UINTX_FORMAT ", attributes: %s). ", - (uintx) tid, os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); + log_info(os, thread)("Thread \"%s\" started (pthread id: " UINTX_FORMAT ", attributes: %s). ", + thread->name(), (uintx) tid, os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); } else { - log_warning(os, thread)("Failed to start thread - pthread_create failed (%s) for attributes: %s.", - os::errno_name(ret), os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); + log_warning(os, thread)("Failed to start thread \"%s\" - pthread_create failed (%s) for attributes: %s.", + thread->name(), os::errno_name(ret), os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); // Log some OS information which might explain why creating the thread failed. log_info(os, thread)("Number of threads approx. running in the VM: %d", Threads::number_of_threads()); LogStream st(Log(os, thread)::info()); @@ -752,22 +742,6 @@ void os::free_thread(OSThread* osthread) { //////////////////////////////////////////////////////////////////////////////// // time support - -// Time since start-up in seconds to a fine granularity. -double os::elapsedTime() { - return ((double)os::elapsed_counter()) / os::elapsed_frequency(); -} - -jlong os::elapsed_counter() { - return javaTimeNanos() - initial_time_count; -} - -jlong os::elapsed_frequency() { - return NANOSECS_PER_SEC; // nanosecond resolution -} - -bool os::supports_vtime() { return true; } - double os::elapsedVTime() { // better than nothing, but not much return elapsedTime(); @@ -817,45 +791,8 @@ void os::javaTimeNanos_info(jvmtiTimerInfo *info_ptr) { info_ptr->may_skip_forward = false; // not subject to resetting or drifting info_ptr->kind = JVMTI_TIMER_ELAPSED; // elapsed not CPU time } - #endif // __APPLE__ -// Return the real, user, and system times in seconds from an -// arbitrary fixed point in the past. -bool os::getTimesSecs(double* process_real_time, - double* process_user_time, - double* process_system_time) { - struct tms ticks; - clock_t real_ticks = times(&ticks); - - if (real_ticks == (clock_t) (-1)) { - return false; - } else { - double ticks_per_second = (double) clock_tics_per_sec; - *process_user_time = ((double) ticks.tms_utime) / ticks_per_second; - *process_system_time = ((double) ticks.tms_stime) / ticks_per_second; - *process_real_time = ((double) real_ticks) / ticks_per_second; - - return true; - } -} - - -char * os::local_time_string(char *buf, size_t buflen) { - struct tm t; - time_t long_time; - time(&long_time); - localtime_r(&long_time, &t); - jio_snprintf(buf, buflen, "%d-%02d-%02d %02d:%02d:%02d", - t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, - t.tm_hour, t.tm_min, t.tm_sec); - return buf; -} - -struct tm* os::localtime_pd(const time_t* clock, struct tm* res) { - return localtime_r(clock, res); -} - // Information of current thread in variety of formats pid_t os::Bsd::gettid() { int retval = -1; @@ -1226,22 +1163,6 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) { } #endif // !__APPLE__ -void* os::get_default_process_handle() { -#ifdef __APPLE__ - // MacOS X needs to use RTLD_FIRST instead of RTLD_LAZY - // to avoid finding unexpected symbols on second (or later) - // loads of a library. - return (void*)::dlopen(NULL, RTLD_FIRST); -#else - return (void*)::dlopen(NULL, RTLD_LAZY); -#endif -} - -// XXX: Do we need a lock around this as per Linux? -void* os::dll_lookup(void* handle, const char* name) { - return dlsym(handle, name); -} - int _print_dll_info_cb(const char * name, address base_address, address top_address, void * param) { outputStream * out = (outputStream *) param; out->print_cr(INTPTR_FORMAT " \t%s", (intptr_t)base_address, name); @@ -1390,13 +1311,17 @@ void os::get_summary_cpu_info(char* buf, size_t buflen) { strncpy(machine, "", sizeof(machine)); } - const char* emulated = ""; #if defined(__APPLE__) && !defined(ZERO) if (VM_Version::is_cpu_emulated()) { - emulated = " (EMULATED)"; + snprintf(buf, buflen, "\"%s\" %s (EMULATED) %d MHz", model, machine, mhz); + } else { + NOT_AARCH64(snprintf(buf, buflen, "\"%s\" %s %d MHz", model, machine, mhz)); + // aarch64 CPU doesn't report its speed + AARCH64_ONLY(snprintf(buf, buflen, "\"%s\" %s", model, machine)); } +#else + snprintf(buf, buflen, "\"%s\" %s %d MHz", model, machine, mhz); #endif - snprintf(buf, buflen, "\"%s\" %s%s %d MHz", model, machine, emulated, mhz); } void os::print_memory_info(outputStream* st) { @@ -1996,8 +1921,6 @@ extern void report_error(char* file_name, int line_no, char* title, void os::init(void) { char dummy; // used to get a guess on initial stack address - clock_tics_per_sec = CLK_TCK; - Bsd::set_page_size(getpagesize()); if (Bsd::page_size() == -1) { fatal("os_bsd.cpp: os::init: sysconf failed (%s)", os::strerror(errno)); @@ -2010,7 +1933,6 @@ void os::init(void) { Bsd::_main_thread = pthread_self(); Bsd::clock_init(); - initial_time_count = javaTimeNanos(); os::Posix::init(); } @@ -2165,11 +2087,6 @@ void os::set_native_thread_name(const char *name) { #endif } -bool os::bind_to_processor(uint processor_id) { - // Not yet implemented. - return false; -} - //////////////////////////////////////////////////////////////////////////////// // debug support @@ -2355,9 +2272,7 @@ int os::open(const char *path, int oflag, int mode) { // create binary file, rewriting existing file if required int os::create_binary_file(const char* path, bool rewrite_existing) { int oflags = O_WRONLY | O_CREAT; - if (!rewrite_existing) { - oflags |= O_EXCL; - } + oflags |= rewrite_existing ? O_TRUNC : O_EXCL; return ::open(path, oflags, S_IREAD | S_IWRITE); } diff --git a/src/hotspot/os/bsd/semaphore_bsd.cpp b/src/hotspot/os/bsd/semaphore_bsd.cpp index 012b5b8ed914ffc20e57d94ddd7e82f199c9fafe..742790fa8960a6967c8139dffc8492520cb216b9 100644 --- a/src/hotspot/os/bsd/semaphore_bsd.cpp +++ b/src/hotspot/os/bsd/semaphore_bsd.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,7 +30,7 @@ #include #ifdef __APPLE__ -// OS X doesn't support unamed POSIX semaphores, so the implementation in os_posix.cpp can't be used. +// OS X doesn't support unnamed POSIX semaphores, so the implementation in os_posix.cpp can't be used. static const char* sem_init_strerror(kern_return_t value) { switch (value) { diff --git a/src/hotspot/os/bsd/semaphore_bsd.hpp b/src/hotspot/os/bsd/semaphore_bsd.hpp index 3a74cacbbf3df45f4caca56c31951ac6c644bfc2..66549e5dafc111ff1608133c00c120289bf3b36e 100644 --- a/src/hotspot/os/bsd/semaphore_bsd.hpp +++ b/src/hotspot/os/bsd/semaphore_bsd.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,7 +32,7 @@ # include "semaphore_posix.hpp" #else -// OS X doesn't support unamed POSIX semaphores, so the implementation in os_posix.cpp can't be used. +// OS X doesn't support unnamed POSIX semaphores, so the implementation in os_posix.cpp can't be used. # include "memory/allocation.hpp" # include diff --git a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp index fb653c762bce5fc2d28f23b9950f075b92839322..1593a701e67991e41b8a4d010b4a44b86803cd75 100644 --- a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp +++ b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,11 +34,15 @@ #include "runtime/os.hpp" #include "utilities/globalDefinitions.hpp" +// controller names have to match the *_IDX indices +static const char* cg_controller_name[] = { "cpu", "cpuset", "cpuacct", "memory", "pids" }; + CgroupSubsystem* CgroupSubsystemFactory::create() { CgroupV1MemoryController* memory = NULL; CgroupV1Controller* cpuset = NULL; CgroupV1Controller* cpu = NULL; CgroupV1Controller* cpuacct = NULL; + CgroupV1Controller* pids = NULL; CgroupInfo cg_infos[CG_INFO_LENGTH]; u1 cg_type_flags = INVALID_CGROUPS_GENERIC; const char* proc_cgroups = "/proc/cgroups"; @@ -93,22 +97,29 @@ CgroupSubsystem* CgroupSubsystemFactory::create() { assert(is_cgroup_v1(&cg_type_flags), "Cgroup v1 expected"); for (int i = 0; i < CG_INFO_LENGTH; i++) { CgroupInfo info = cg_infos[i]; - if (strcmp(info._name, "memory") == 0) { - memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path); - memory->set_subsystem_path(info._cgroup_path); - } else if (strcmp(info._name, "cpuset") == 0) { - cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path); - cpuset->set_subsystem_path(info._cgroup_path); - } else if (strcmp(info._name, "cpu") == 0) { - cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path); - cpu->set_subsystem_path(info._cgroup_path); - } else if (strcmp(info._name, "cpuacct") == 0) { - cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path); - cpuacct->set_subsystem_path(info._cgroup_path); + if (info._data_complete) { // pids controller might have incomplete data + if (strcmp(info._name, "memory") == 0) { + memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path); + memory->set_subsystem_path(info._cgroup_path); + } else if (strcmp(info._name, "cpuset") == 0) { + cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path); + cpuset->set_subsystem_path(info._cgroup_path); + } else if (strcmp(info._name, "cpu") == 0) { + cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path); + cpu->set_subsystem_path(info._cgroup_path); + } else if (strcmp(info._name, "cpuacct") == 0) { + cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path); + cpuacct->set_subsystem_path(info._cgroup_path); + } else if (strcmp(info._name, "pids") == 0) { + pids = new CgroupV1Controller(info._root_mount_path, info._mount_path); + pids->set_subsystem_path(info._cgroup_path); + } + } else { + log_debug(os, container)("CgroupInfo for %s not complete", cg_controller_name[i]); } } cleanup(cg_infos); - return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory); + return new CgroupV1Subsystem(cpuset, cpu, cpuacct, pids, memory); } bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, @@ -122,9 +133,10 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, char buf[MAXPATHLEN+1]; char *p; bool is_cgroupsV2; - // true iff all controllers, memory, cpu, cpuset, cpuacct are enabled + // true iff all required controllers, memory, cpu, cpuset, cpuacct are enabled // at the kernel level. - bool all_controllers_enabled; + // pids might not be enabled on older Linux distros (SLES 12.1, RHEL 7.1) + bool all_required_controllers_enabled; /* * Read /proc/cgroups so as to be able to distinguish cgroups v2 vs cgroups v1. @@ -136,10 +148,9 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, */ cgroups = fopen(proc_cgroups, "r"); if (cgroups == NULL) { - log_debug(os, container)("Can't open %s, %s", - proc_cgroups, os::strerror(errno)); - *flags = INVALID_CGROUPS_GENERIC; - return false; + log_debug(os, container)("Can't open %s, %s", proc_cgroups, os::strerror(errno)); + *flags = INVALID_CGROUPS_GENERIC; + return false; } while ((p = fgets(buf, MAXPATHLEN, cgroups)) != NULL) { @@ -167,19 +178,30 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, cg_infos[CPUACCT_IDX]._name = os::strdup(name); cg_infos[CPUACCT_IDX]._hierarchy_id = hierarchy_id; cg_infos[CPUACCT_IDX]._enabled = (enabled == 1); + } else if (strcmp(name, "pids") == 0) { + log_debug(os, container)("Detected optional pids controller entry in %s", proc_cgroups); + cg_infos[PIDS_IDX]._name = os::strdup(name); + cg_infos[PIDS_IDX]._hierarchy_id = hierarchy_id; + cg_infos[PIDS_IDX]._enabled = (enabled == 1); } } fclose(cgroups); is_cgroupsV2 = true; - all_controllers_enabled = true; + all_required_controllers_enabled = true; for (int i = 0; i < CG_INFO_LENGTH; i++) { - is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0; - all_controllers_enabled = all_controllers_enabled && cg_infos[i]._enabled; + // pids controller is optional. All other controllers are required + if (i != PIDS_IDX) { + is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0; + all_required_controllers_enabled = all_required_controllers_enabled && cg_infos[i]._enabled; + } + if (log_is_enabled(Debug, os, container) && !cg_infos[i]._enabled) { + log_debug(os, container)("controller %s is not enabled\n", cg_controller_name[i]); + } } - if (!all_controllers_enabled) { - // one or more controllers disabled, disable container support + if (!all_required_controllers_enabled) { + // one or more required controllers disabled, disable container support log_debug(os, container)("One or more required controllers disabled at kernel level."); cleanup(cg_infos); *flags = INVALID_CGROUPS_GENERIC; @@ -220,17 +242,21 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, while (!is_cgroupsV2 && (token = strsep(&controllers, ",")) != NULL) { if (strcmp(token, "memory") == 0) { - assert(hierarchy_id == cg_infos[MEMORY_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); + assert(hierarchy_id == cg_infos[MEMORY_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for memory"); cg_infos[MEMORY_IDX]._cgroup_path = os::strdup(cgroup_path); } else if (strcmp(token, "cpuset") == 0) { - assert(hierarchy_id == cg_infos[CPUSET_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); + assert(hierarchy_id == cg_infos[CPUSET_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuset"); cg_infos[CPUSET_IDX]._cgroup_path = os::strdup(cgroup_path); } else if (strcmp(token, "cpu") == 0) { - assert(hierarchy_id == cg_infos[CPU_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); + assert(hierarchy_id == cg_infos[CPU_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpu"); cg_infos[CPU_IDX]._cgroup_path = os::strdup(cgroup_path); } else if (strcmp(token, "cpuacct") == 0) { - assert(hierarchy_id == cg_infos[CPUACCT_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); + assert(hierarchy_id == cg_infos[CPUACCT_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuacc"); cg_infos[CPUACCT_IDX]._cgroup_path = os::strdup(cgroup_path); + } else if (strcmp(token, "pids") == 0) { + assert(hierarchy_id == cg_infos[PIDS_IDX]._hierarchy_id, "/proc/cgroups (%d) and /proc/self/cgroup (%d) hierarchy mismatch for pids", + cg_infos[PIDS_IDX]._hierarchy_id, hierarchy_id); + cg_infos[PIDS_IDX]._cgroup_path = os::strdup(cgroup_path); } } if (is_cgroupsV2) { @@ -281,13 +307,15 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, /* Cgroup v1 relevant info * - * Find the cgroup mount point for memory, cpuset, cpu, cpuacct + * Find the cgroup mount point for memory, cpuset, cpu, cpuacct, pids * * Example for docker: * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory * * Example for host: * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory + * + * 44 31 0:39 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:23 - cgroup cgroup rw,pids */ if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- %s %*s %s", tmproot, tmpmount, tmp_fs_type, tmpcgroups) == 4) { if (strcmp("cgroup", tmp_fs_type) != 0) { @@ -333,6 +361,12 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, cg_infos[CPUACCT_IDX]._mount_path = os::strdup(tmpmount); cg_infos[CPUACCT_IDX]._root_mount_path = os::strdup(tmproot); cg_infos[CPUACCT_IDX]._data_complete = true; + } else if (strcmp(token, "pids") == 0) { + any_cgroup_mounts_found = true; + assert(cg_infos[PIDS_IDX]._mount_path == NULL, "stomping of _mount_path"); + cg_infos[PIDS_IDX]._mount_path = os::strdup(tmpmount); + cg_infos[PIDS_IDX]._root_mount_path = os::strdup(tmproot); + cg_infos[PIDS_IDX]._data_complete = true; } } } @@ -387,10 +421,13 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, *flags = INVALID_CGROUPS_V1; return false; } + if (log_is_enabled(Debug, os, container) && !cg_infos[PIDS_IDX]._data_complete) { + log_debug(os, container)("Optional cgroup v1 pids subsystem not found"); + // keep the other controller info, pids is optional + } // Cgroups v1 case, we have all the info we need. *flags = CGROUPS_V1; return true; - }; void CgroupSubsystemFactory::cleanup(CgroupInfo* cg_infos) { @@ -514,3 +551,22 @@ jlong CgroupSubsystem::memory_limit_in_bytes() { memory_limit->set_value(mem_limit, OSCONTAINER_CACHE_TIMEOUT); return mem_limit; } + +jlong CgroupSubsystem::limit_from_str(char* limit_str) { + if (limit_str == NULL) { + return OSCONTAINER_ERROR; + } + // Unlimited memory in cgroups is the literal string 'max' for + // some controllers, for example the pids controller. + if (strcmp("max", limit_str) == 0) { + os::free(limit_str); + return (jlong)-1; + } + julong limit; + if (sscanf(limit_str, JULONG_FORMAT, &limit) != 1) { + os::free(limit_str); + return OSCONTAINER_ERROR; + } + os::free(limit_str); + return (jlong)limit; +} diff --git a/src/hotspot/os/linux/cgroupSubsystem_linux.hpp b/src/hotspot/os/linux/cgroupSubsystem_linux.hpp index 80c147c75764e1d91f03be91aa13cb52bb2c6dd8..754629714492911a7a20601a49fcad10873b31b1 100644 --- a/src/hotspot/os/linux/cgroupSubsystem_linux.hpp +++ b/src/hotspot/os/linux/cgroupSubsystem_linux.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -61,12 +61,13 @@ #define INVALID_CGROUPS_NO_MOUNT 5 #define INVALID_CGROUPS_GENERIC 6 -// Four controllers: cpu, cpuset, cpuacct, memory -#define CG_INFO_LENGTH 4 +// Five controllers: cpu, cpuset, cpuacct, memory, pids +#define CG_INFO_LENGTH 5 #define CPUSET_IDX 0 #define CPU_IDX 1 #define CPUACCT_IDX 2 #define MEMORY_IDX 3 +#define PIDS_IDX 4 typedef char * cptr; @@ -238,10 +239,13 @@ class CgroupSubsystem: public CHeapObj { public: jlong memory_limit_in_bytes(); int active_processor_count(); + jlong limit_from_str(char* limit_str); virtual int cpu_quota() = 0; virtual int cpu_period() = 0; virtual int cpu_shares() = 0; + virtual jlong pids_max() = 0; + virtual jlong pids_current() = 0; virtual jlong memory_usage_in_bytes() = 0; virtual jlong memory_and_swap_limit_in_bytes() = 0; virtual jlong memory_soft_limit_in_bytes() = 0; diff --git a/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp b/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp index 5638213cd60ed045bd320ea04207d4c843e8110b..e259206b41ec027a25e57bd4e0dd70414bc4a804 100644 --- a/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp +++ b/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -241,3 +241,43 @@ int CgroupV1Subsystem::cpu_shares() { return shares; } + + +char* CgroupV1Subsystem::pids_max_val() { + GET_CONTAINER_INFO_CPTR(cptr, _pids, "/pids.max", + "Maximum number of tasks is: %s", "%s %*d", pidsmax, 1024); + if (pidsmax == NULL) { + return NULL; + } + return os::strdup(pidsmax); +} + +/* pids_max + * + * Return the maximum number of tasks available to the process + * + * return: + * maximum number of tasks + * -1 for unlimited + * OSCONTAINER_ERROR for not supported + */ +jlong CgroupV1Subsystem::pids_max() { + if (_pids == NULL) return OSCONTAINER_ERROR; + char * pidsmax_str = pids_max_val(); + return limit_from_str(pidsmax_str); +} + +/* pids_current + * + * The number of tasks currently in the cgroup (and its descendants) of the process + * + * return: + * current number of tasks + * OSCONTAINER_ERROR for not supported + */ +jlong CgroupV1Subsystem::pids_current() { + if (_pids == NULL) return OSCONTAINER_ERROR; + GET_CONTAINER_INFO(jlong, _pids, "/pids.current", + "Current number of tasks is: " JLONG_FORMAT, JLONG_FORMAT, pids_current); + return pids_current; +} diff --git a/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp b/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp index 79a247a4562dadf468cecb7d5b414f93f59802f3..3811a56b3297283981a83c55a0896f36d482ab4b 100644 --- a/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp +++ b/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -87,6 +87,9 @@ class CgroupV1Subsystem: public CgroupSubsystem { int cpu_shares(); + jlong pids_max(); + jlong pids_current(); + const char * container_type() { return "cgroupv1"; } @@ -101,15 +104,20 @@ class CgroupV1Subsystem: public CgroupSubsystem { CgroupV1Controller* _cpuset = NULL; CachingCgroupController* _cpu = NULL; CgroupV1Controller* _cpuacct = NULL; + CgroupV1Controller* _pids = NULL; + + char * pids_max_val(); public: CgroupV1Subsystem(CgroupV1Controller* cpuset, CgroupV1Controller* cpu, CgroupV1Controller* cpuacct, + CgroupV1Controller* pids, CgroupV1MemoryController* memory) { _cpuset = cpuset; _cpu = new CachingCgroupController(cpu); _cpuacct = cpuacct; + _pids = pids; _memory = new CachingCgroupController(memory); _unlimited_memory = (LONG_MAX / os::vm_page_size()) * os::vm_page_size(); } diff --git a/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp b/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp index 66192f1d27161df15694abd3e15d31410d0af804..25146373532c6b13b641c973025c97320b503128 100644 --- a/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp +++ b/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Red Hat Inc. + * Copyright (c) 2020, 2021, Red Hat Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -203,24 +203,6 @@ jlong CgroupV2Subsystem::read_memory_limit_in_bytes() { return limit; } -jlong CgroupV2Subsystem::limit_from_str(char* limit_str) { - if (limit_str == NULL) { - return OSCONTAINER_ERROR; - } - // Unlimited memory in Cgroups V2 is the literal string 'max' - if (strcmp("max", limit_str) == 0) { - os::free(limit_str); - return (jlong)-1; - } - julong limit; - if (sscanf(limit_str, JULONG_FORMAT, &limit) != 1) { - os::free(limit_str); - return OSCONTAINER_ERROR; - } - os::free(limit_str); - return (jlong)limit; -} - char* CgroupV2Subsystem::mem_limit_val() { GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.max", "Raw value for memory limit is: %s", "%s", mem_limit_str, 1024); @@ -244,3 +226,39 @@ char* CgroupV2Controller::construct_path(char* mount_path, char *cgroup_path) { return os::strdup(buf); } +char* CgroupV2Subsystem::pids_max_val() { + GET_CONTAINER_INFO_CPTR(cptr, _unified, "/pids.max", + "Maximum number of tasks is: %s", "%s %*d", pidsmax, 1024); + if (pidsmax == NULL) { + return NULL; + } + return os::strdup(pidsmax); +} + +/* pids_max + * + * Return the maximum number of tasks available to the process + * + * return: + * maximum number of tasks + * -1 for unlimited + * OSCONTAINER_ERROR for not supported + */ +jlong CgroupV2Subsystem::pids_max() { + char * pidsmax_str = pids_max_val(); + return limit_from_str(pidsmax_str); +} + +/* pids_current + * + * The number of tasks currently in the cgroup (and its descendants) of the process + * + * return: + * current number of tasks + * OSCONTAINER_ERROR for not supported + */ +jlong CgroupV2Subsystem::pids_current() { + GET_CONTAINER_INFO(jlong, _unified, "/pids.current", + "Current number of tasks is: " JLONG_FORMAT, JLONG_FORMAT, pids_current); + return pids_current; +} diff --git a/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp b/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp index bd3380e22e3b4f14e8b41926b4e0b2c60a335597..beb78c2174393804fec3a2e51ddb423a70f4719e 100644 --- a/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp +++ b/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp @@ -60,7 +60,7 @@ class CgroupV2Subsystem: public CgroupSubsystem { char *mem_swp_limit_val(); char *mem_soft_limit_val(); char *cpu_quota_val(); - jlong limit_from_str(char* limit_str); + char *pids_max_val(); public: CgroupV2Subsystem(CgroupController * unified) { @@ -79,6 +79,9 @@ class CgroupV2Subsystem: public CgroupSubsystem { jlong memory_max_usage_in_bytes(); char * cpu_cpuset_cpus(); char * cpu_cpuset_memory_nodes(); + jlong pids_max(); + jlong pids_current(); + const char * container_type() { return "cgroupv2"; } diff --git a/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp b/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp index 7a113055423aa3837a114580c6ee66005874dae8..951b98d6cce40ba9d751cc3cc9ce3bba7f65c88c 100644 --- a/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp +++ b/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp @@ -58,6 +58,9 @@ #ifndef MFD_HUGETLB #define MFD_HUGETLB 0x0004U #endif +#ifndef MFD_HUGE_2MB +#define MFD_HUGE_2MB 0x54000000U +#endif // open(2) flags #ifndef O_CLOEXEC @@ -175,12 +178,6 @@ ZPhysicalMemoryBacking::ZPhysicalMemoryBacking(size_t max_capacity) : return; } - if (ZLargePages::is_explicit() && os::large_page_size() != ZGranuleSize) { - log_error_p(gc)("Incompatible large page size configured " SIZE_FORMAT " (expected " SIZE_FORMAT ")", - os::large_page_size(), ZGranuleSize); - return; - } - // Make sure the filesystem block size is compatible if (ZGranuleSize % _block_size != 0) { log_error_p(gc)("Filesystem backing the heap has incompatible block size (" SIZE_FORMAT ")", @@ -199,17 +196,20 @@ ZPhysicalMemoryBacking::ZPhysicalMemoryBacking(size_t max_capacity) : } int ZPhysicalMemoryBacking::create_mem_fd(const char* name) const { + assert(ZGranuleSize == 2 * M, "Granule size must match MFD_HUGE_2MB"); + // Create file name char filename[PATH_MAX]; snprintf(filename, sizeof(filename), "%s%s", name, ZLargePages::is_explicit() ? ".hugetlb" : ""); // Create file - const int extra_flags = ZLargePages::is_explicit() ? MFD_HUGETLB : 0; + const int extra_flags = ZLargePages::is_explicit() ? (MFD_HUGETLB | MFD_HUGE_2MB) : 0; const int fd = ZSyscall::memfd_create(filename, MFD_CLOEXEC | extra_flags); if (fd == -1) { ZErrno err; log_debug_p(gc, init)("Failed to create memfd file (%s)", - ((ZLargePages::is_explicit() && err == EINVAL) ? "Hugepages not supported" : err.to_string())); + (ZLargePages::is_explicit() && (err == EINVAL || err == ENODEV)) ? + "Hugepages (2M) not available" : err.to_string()); return -1; } @@ -445,7 +445,7 @@ ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap_tmpfs(size_t offset, size_t } // Advise mapping to use transparent huge pages - os::realign_memory((char*)addr, length, os::large_page_size()); + os::realign_memory((char*)addr, length, ZGranuleSize); // Touch the mapping (safely) to make sure it's backed by memory const bool backed = safe_touch_mapping(addr, length, _block_size); diff --git a/src/hotspot/os/linux/osContainer_linux.cpp b/src/hotspot/os/linux/osContainer_linux.cpp index b89cfd676ebe6e4689b2460ae20a0e39cdcb7e66..eb6f4a77fccbcf84c4e9f430cba93d5fe044c47c 100644 --- a/src/hotspot/os/linux/osContainer_linux.cpp +++ b/src/hotspot/os/linux/osContainer_linux.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -129,3 +129,13 @@ int OSContainer::cpu_shares() { assert(cgroup_subsystem != NULL, "cgroup subsystem not available"); return cgroup_subsystem->cpu_shares(); } + +jlong OSContainer::pids_max() { + assert(cgroup_subsystem != NULL, "cgroup subsystem not available"); + return cgroup_subsystem->pids_max(); +} + +jlong OSContainer::pids_current() { + assert(cgroup_subsystem != NULL, "cgroup subsystem not available"); + return cgroup_subsystem->pids_current(); +} diff --git a/src/hotspot/os/linux/osContainer_linux.hpp b/src/hotspot/os/linux/osContainer_linux.hpp index 21801b7dc4b38790db34ac30079101488076c51f..940bc0e3874bf00048722622e99e3defee97a211 100644 --- a/src/hotspot/os/linux/osContainer_linux.hpp +++ b/src/hotspot/os/linux/osContainer_linux.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -62,6 +62,8 @@ class OSContainer: AllStatic { static int cpu_shares(); + static jlong pids_max(); + static jlong pids_current(); }; inline bool OSContainer::is_containerized() { diff --git a/src/hotspot/os/linux/osThread_linux.cpp b/src/hotspot/os/linux/osThread_linux.cpp index 6f7e074a522d6110c9276c9154e220538486584b..b6365558da18b01767892375184543112c9d2629 100644 --- a/src/hotspot/os/linux/osThread_linux.cpp +++ b/src/hotspot/os/linux/osThread_linux.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -40,8 +40,7 @@ void OSThread::pd_initialize() { sigemptyset(&_caller_sigmask); - _startThread_lock = new Monitor(Mutex::event, "startThread_lock", true, - Monitor::_safepoint_check_never); + _startThread_lock = new Monitor(Mutex::event, "startThread_lock"); assert(_startThread_lock !=NULL, "check"); } diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp index a3424322be86cf964b135bdb3f609aeb44a8a295..e75e2e51b0e467b99b16e4904e551185b933a22b 100644 --- a/src/hotspot/os/linux/os_linux.cpp +++ b/src/hotspot/os/linux/os_linux.cpp @@ -171,8 +171,6 @@ os::Linux::mallinfo_func_t os::Linux::_mallinfo = NULL; os::Linux::mallinfo2_func_t os::Linux::_mallinfo2 = NULL; #endif // __GLIBC__ -static jlong initial_time_count=0; - static int clock_tics_per_sec = 100; // If the VM might have been created on the primordial thread, we need to resolve the @@ -308,19 +306,6 @@ bool os::Linux::get_tick_information(CPUPerfTicks* pticks, int which_logical_cpu return true; } -// Return true if user is running as root. - -bool os::have_special_privileges() { - static bool init = false; - static bool privileges = false; - if (!init) { - privileges = (getuid() != geteuid()) || (getgid() != getegid()); - init = true; - } - return privileges; -} - - #ifndef SYS_gettid // i386: 224, ia64: 1105, amd64: 186, sparc: 143 #ifdef __ia64__ @@ -412,7 +397,7 @@ void os::init_system_properties_values() { // ... // 7: The default directories, normally /lib and /usr/lib. #ifndef OVERRIDE_LIBPATH - #if defined(AMD64) || (defined(_LP64) && defined(SPARC)) || defined(PPC64) || defined(S390) + #if defined(_LP64) #define DEFAULT_LIBPATH "/usr/lib64:/lib64:/lib:/usr/lib" #else #define DEFAULT_LIBPATH "/lib:/usr/lib" @@ -863,16 +848,21 @@ bool os::create_thread(Thread* thread, ThreadType thr_type, ThreadState state; { + ResourceMark rm; pthread_t tid; - int ret = pthread_create(&tid, &attr, (void* (*)(void*)) thread_native_entry, thread); + int ret = 0; + int limit = 3; + do { + ret = pthread_create(&tid, &attr, (void* (*)(void*)) thread_native_entry, thread); + } while (ret == EAGAIN && limit-- > 0); char buf[64]; if (ret == 0) { - log_info(os, thread)("Thread started (pthread id: " UINTX_FORMAT ", attributes: %s). ", - (uintx) tid, os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); + log_info(os, thread)("Thread \"%s\" started (pthread id: " UINTX_FORMAT ", attributes: %s). ", + thread->name(), (uintx) tid, os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); } else { - log_warning(os, thread)("Failed to start thread - pthread_create failed (%s) for attributes: %s.", - os::errno_name(ret), os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); + log_warning(os, thread)("Failed to start thread \"%s\" - pthread_create failed (%s) for attributes: %s.", + thread->name(), os::errno_name(ret), os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); // Log some OS information which might explain why creating the thread failed. log_info(os, thread)("Number of threads approx. running in the VM: %d", Threads::number_of_threads()); LogStream st(Log(os, thread)::info()); @@ -1274,22 +1264,6 @@ void os::Linux::capture_initial_stack(size_t max_size) { //////////////////////////////////////////////////////////////////////////////// // time support - -// Time since start-up in seconds to a fine granularity. -double os::elapsedTime() { - return ((double)os::elapsed_counter()) / os::elapsed_frequency(); // nanosecond resolution -} - -jlong os::elapsed_counter() { - return javaTimeNanos() - initial_time_count; -} - -jlong os::elapsed_frequency() { - return NANOSECS_PER_SEC; // nanosecond resolution -} - -bool os::supports_vtime() { return true; } - double os::elapsedVTime() { struct rusage usage; int retval = getrusage(RUSAGE_THREAD, &usage); @@ -1327,42 +1301,6 @@ void os::Linux::fast_thread_clock_init() { } } -// Return the real, user, and system times in seconds from an -// arbitrary fixed point in the past. -bool os::getTimesSecs(double* process_real_time, - double* process_user_time, - double* process_system_time) { - struct tms ticks; - clock_t real_ticks = times(&ticks); - - if (real_ticks == (clock_t) (-1)) { - return false; - } else { - double ticks_per_second = (double) clock_tics_per_sec; - *process_user_time = ((double) ticks.tms_utime) / ticks_per_second; - *process_system_time = ((double) ticks.tms_stime) / ticks_per_second; - *process_real_time = ((double) real_ticks) / ticks_per_second; - - return true; - } -} - - -char * os::local_time_string(char *buf, size_t buflen) { - struct tm t; - time_t long_time; - time(&long_time); - localtime_r(&long_time, &t); - jio_snprintf(buf, buflen, "%d-%02d-%02d %02d:%02d:%02d", - t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, - t.tm_hour, t.tm_min, t.tm_sec); - return buf; -} - -struct tm* os::localtime_pd(const time_t* clock, struct tm* res) { - return localtime_r(clock, res); -} - // thread_id is kernel thread id (similar to Solaris LWP id) intx os::current_thread_id() { return os::Linux::gettid(); } int os::current_process_id() { @@ -1377,14 +1315,6 @@ const char* os::dll_file_extension() { return ".so"; } // directory not the java application's temp directory, ala java.io.tmpdir. const char* os::get_temp_directory() { return "/tmp"; } -static bool file_exists(const char* filename) { - struct stat statbuf; - if (filename == NULL || strlen(filename) == 0) { - return false; - } - return os::stat(filename, &statbuf) == 0; -} - // check if addr is inside libjvm.so bool os::address_is_in_vm(address addr) { static address libjvm_base_addr; @@ -1668,6 +1598,9 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) { #ifndef EM_RISCV #define EM_RISCV 243 /* RISC-V */ #endif +#ifndef EM_LOONGARCH + #define EM_LOONGARCH 258 /* LoongArch */ +#endif static const arch_t arch_array[]={ {EM_386, EM_386, ELFCLASS32, ELFDATA2LSB, (char*)"IA 32"}, @@ -1695,6 +1628,7 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) { {EM_68K, EM_68K, ELFCLASS32, ELFDATA2MSB, (char*)"M68k"}, {EM_AARCH64, EM_AARCH64, ELFCLASS64, ELFDATA2LSB, (char*)"AARCH64"}, {EM_RISCV, EM_RISCV, ELFCLASS64, ELFDATA2LSB, (char*)"RISC-V"}, + {EM_LOONGARCH, EM_LOONGARCH, ELFCLASS64, ELFDATA2LSB, (char*)"LoongArch"}, }; #if (defined IA32) @@ -1731,9 +1665,11 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) { static Elf32_Half running_arch_code=EM_SH; #elif (defined RISCV) static Elf32_Half running_arch_code=EM_RISCV; +#elif (defined LOONGARCH) + static Elf32_Half running_arch_code=EM_LOONGARCH; #else #error Method os::dll_load requires that one of following is defined:\ - AARCH64, ALPHA, ARM, AMD64, IA32, IA64, M68K, MIPS, MIPSEL, PARISC, __powerpc__, __powerpc64__, RISCV, S390, SH, __sparc + AARCH64, ALPHA, ARM, AMD64, IA32, IA64, LOONGARCH, M68K, MIPS, MIPSEL, PARISC, __powerpc__, __powerpc64__, RISCV, S390, SH, __sparc #endif // Identify compatibility class for VM's architecture and library's architecture @@ -1845,15 +1781,6 @@ void * os::Linux::dll_load_in_vmthread(const char *filename, char *ebuf, return result; } -void* os::dll_lookup(void* handle, const char* name) { - void* res = dlsym(handle, name); - return res; -} - -void* os::get_default_process_handle() { - return (void*)::dlopen(NULL, RTLD_LAZY); -} - static bool _print_ascii_file(const char* filename, outputStream* st, const char* hdr = NULL) { int fd = ::open(filename, O_RDONLY); if (fd == -1) { @@ -2137,44 +2064,51 @@ void os::Linux::print_system_memory_info(outputStream* st) { "/sys/kernel/mm/transparent_hugepage/defrag", st); } -void os::Linux::print_process_memory_info(outputStream* st) { - - st->print_cr("Process Memory:"); - - // Print virtual and resident set size; peak values; swap; and for - // rss its components if the kernel is recent enough. - ssize_t vmsize = -1, vmpeak = -1, vmswap = -1, - vmrss = -1, vmhwm = -1, rssanon = -1, rssfile = -1, rssshmem = -1; - const int num_values = 8; - int num_found = 0; +bool os::Linux::query_process_memory_info(os::Linux::meminfo_t* info) { FILE* f = ::fopen("/proc/self/status", "r"); + const int num_values = sizeof(os::Linux::meminfo_t) / sizeof(size_t); + int num_found = 0; char buf[256]; + info->vmsize = info->vmpeak = info->vmrss = info->vmhwm = info->vmswap = + info->rssanon = info->rssfile = info->rssshmem = -1; if (f != NULL) { while (::fgets(buf, sizeof(buf), f) != NULL && num_found < num_values) { - if ( (vmsize == -1 && sscanf(buf, "VmSize: " SSIZE_FORMAT " kB", &vmsize) == 1) || - (vmpeak == -1 && sscanf(buf, "VmPeak: " SSIZE_FORMAT " kB", &vmpeak) == 1) || - (vmswap == -1 && sscanf(buf, "VmSwap: " SSIZE_FORMAT " kB", &vmswap) == 1) || - (vmhwm == -1 && sscanf(buf, "VmHWM: " SSIZE_FORMAT " kB", &vmhwm) == 1) || - (vmrss == -1 && sscanf(buf, "VmRSS: " SSIZE_FORMAT " kB", &vmrss) == 1) || - (rssanon == -1 && sscanf(buf, "RssAnon: " SSIZE_FORMAT " kB", &rssanon) == 1) || - (rssfile == -1 && sscanf(buf, "RssFile: " SSIZE_FORMAT " kB", &rssfile) == 1) || - (rssshmem == -1 && sscanf(buf, "RssShmem: " SSIZE_FORMAT " kB", &rssshmem) == 1) + if ( (info->vmsize == -1 && sscanf(buf, "VmSize: " SSIZE_FORMAT " kB", &info->vmsize) == 1) || + (info->vmpeak == -1 && sscanf(buf, "VmPeak: " SSIZE_FORMAT " kB", &info->vmpeak) == 1) || + (info->vmswap == -1 && sscanf(buf, "VmSwap: " SSIZE_FORMAT " kB", &info->vmswap) == 1) || + (info->vmhwm == -1 && sscanf(buf, "VmHWM: " SSIZE_FORMAT " kB", &info->vmhwm) == 1) || + (info->vmrss == -1 && sscanf(buf, "VmRSS: " SSIZE_FORMAT " kB", &info->vmrss) == 1) || + (info->rssanon == -1 && sscanf(buf, "RssAnon: " SSIZE_FORMAT " kB", &info->rssanon) == 1) || // Needs Linux 4.5 + (info->rssfile == -1 && sscanf(buf, "RssFile: " SSIZE_FORMAT " kB", &info->rssfile) == 1) || // Needs Linux 4.5 + (info->rssshmem == -1 && sscanf(buf, "RssShmem: " SSIZE_FORMAT " kB", &info->rssshmem) == 1) // Needs Linux 4.5 ) { num_found ++; } } fclose(f); + return true; + } + return false; +} - st->print_cr("Virtual Size: " SSIZE_FORMAT "K (peak: " SSIZE_FORMAT "K)", vmsize, vmpeak); - st->print("Resident Set Size: " SSIZE_FORMAT "K (peak: " SSIZE_FORMAT "K)", vmrss, vmhwm); - if (rssanon != -1) { // requires kernel >= 4.5 +void os::Linux::print_process_memory_info(outputStream* st) { + + st->print_cr("Process Memory:"); + + // Print virtual and resident set size; peak values; swap; and for + // rss its components if the kernel is recent enough. + meminfo_t info; + if (query_process_memory_info(&info)) { + st->print_cr("Virtual Size: " SSIZE_FORMAT "K (peak: " SSIZE_FORMAT "K)", info.vmsize, info.vmpeak); + st->print("Resident Set Size: " SSIZE_FORMAT "K (peak: " SSIZE_FORMAT "K)", info.vmrss, info.vmhwm); + if (info.rssanon != -1) { // requires kernel >= 4.5 st->print(" (anon: " SSIZE_FORMAT "K, file: " SSIZE_FORMAT "K, shmem: " SSIZE_FORMAT "K)", - rssanon, rssfile, rssshmem); + info.rssanon, info.rssfile, info.rssshmem); } st->cr(); - if (vmswap != -1) { // requires kernel >= 2.6.34 - st->print_cr("Swapped out: " SSIZE_FORMAT "K", vmswap); + if (info.vmswap != -1) { // requires kernel >= 2.6.34 + st->print_cr("Swapped out: " SSIZE_FORMAT "K", info.vmswap); } } else { st->print_cr("Could not open /proc/self/status to get process memory related information"); @@ -2195,7 +2129,7 @@ void os::Linux::print_process_memory_info(outputStream* st) { struct glibc_mallinfo mi = _mallinfo(); total_allocated = (size_t)(unsigned)mi.uordblks; // Since mallinfo members are int, glibc values may have wrapped. Warn about this. - might_have_wrapped = (vmrss * K) > UINT_MAX && (vmrss * K) > (total_allocated + UINT_MAX); + might_have_wrapped = (info.vmrss * K) > UINT_MAX && (info.vmrss * K) > (total_allocated + UINT_MAX); } if (_mallinfo2 != NULL || _mallinfo != NULL) { st->print_cr("C-Heap outstanding allocations: " SIZE_FORMAT "K%s", @@ -2220,6 +2154,7 @@ void os::Linux::print_uptime_info(outputStream* st) { bool os::Linux::print_container_info(outputStream* st) { if (!OSContainer::is_containerized()) { + st->print_cr("container information not found."); return false; } @@ -2308,6 +2243,24 @@ bool os::Linux::print_container_info(outputStream* st) { st->print_cr("%s", j == OSCONTAINER_ERROR ? "not supported" : "unlimited"); } + j = OSContainer::OSContainer::pids_max(); + st->print("maximum number of tasks: "); + if (j > 0) { + st->print_cr(JLONG_FORMAT, j); + } else { + st->print_cr("%s", j == OSCONTAINER_ERROR ? "not supported" : "unlimited"); + } + + j = OSContainer::OSContainer::pids_current(); + st->print("current number of tasks: "); + if (j > 0) { + st->print_cr(JLONG_FORMAT, j); + } else { + if (j == OSCONTAINER_ERROR) { + st->print_cr("not supported"); + } + } + return true; } @@ -2406,7 +2359,7 @@ static void print_sys_devices_cpu_info(outputStream* st, char* buf, size_t bufle snprintf(hbuf_type, 60, "/sys/devices/system/cpu/cpu0/cache/index%u/type", i); snprintf(hbuf_size, 60, "/sys/devices/system/cpu/cpu0/cache/index%u/size", i); snprintf(hbuf_coherency_line_size, 80, "/sys/devices/system/cpu/cpu0/cache/index%u/coherency_line_size", i); - if (file_exists(hbuf_level)) { + if (os::file_exists(hbuf_level)) { _print_ascii_file_h("cache level", hbuf_level, st); _print_ascii_file_h("cache type", hbuf_type, st); _print_ascii_file_h("cache size", hbuf_size, st); @@ -3347,6 +3300,9 @@ bool os::pd_create_stack_guard_pages(char* addr, size_t size) { if (mincore((address)stack_extent, os::vm_page_size(), vec) == -1) { // Fallback to slow path on all errors, including EAGAIN + assert((uintptr_t)addr >= stack_extent, + "Sanity: addr should be larger than extent, " PTR_FORMAT " >= " PTR_FORMAT, + p2i(addr), stack_extent); stack_extent = (uintptr_t) get_stack_commited_bottom( os::Linux::initial_thread_stack_bottom(), (size_t)addr - stack_extent); @@ -4412,8 +4368,6 @@ void os::init(void) { check_pax(); os::Posix::init(); - - initial_time_count = javaTimeNanos(); } // To install functions for atexit system call @@ -4785,11 +4739,6 @@ void os::set_native_thread_name(const char *name) { } } -bool os::bind_to_processor(uint processor_id) { - // Not yet implemented. - return false; -} - //////////////////////////////////////////////////////////////////////////////// // debug support @@ -4968,9 +4917,7 @@ int os::open(const char *path, int oflag, int mode) { // create binary file, rewriting existing file if required int os::create_binary_file(const char* path, bool rewrite_existing) { int oflags = O_WRONLY | O_CREAT; - if (!rewrite_existing) { - oflags |= O_EXCL; - } + oflags |= rewrite_existing ? O_TRUNC : O_EXCL; return ::open64(path, oflags, S_IREAD | S_IWRITE); } diff --git a/src/hotspot/os/linux/os_linux.hpp b/src/hotspot/os/linux/os_linux.hpp index ada8db6977ea027f83ef71e013390040a286ca61..692dae042abda59a742c1c5e0baea80df241a747 100644 --- a/src/hotspot/os/linux/os_linux.hpp +++ b/src/hotspot/os/linux/os_linux.hpp @@ -174,6 +174,23 @@ class Linux { // Return the namespace pid if so, otherwise -1. static int get_namespace_pid(int vmid); + // Output structure for query_process_memory_info() + struct meminfo_t { + ssize_t vmsize; // current virtual size + ssize_t vmpeak; // peak virtual size + ssize_t vmrss; // current resident set size + ssize_t vmhwm; // peak resident set size + ssize_t vmswap; // swapped out + ssize_t rssanon; // resident set size (anonymous mappings, needs 4.5) + ssize_t rssfile; // resident set size (file mappings, needs 4.5) + ssize_t rssshmem; // resident set size (shared mappings, needs 4.5) + }; + + // Attempts to query memory information about the current process and return it in the output structure. + // May fail (returns false) or succeed (returns true) but not all output fields are available; unavailable + // fields will contain -1. + static bool query_process_memory_info(meminfo_t* info); + // Stack repair handling // none present diff --git a/src/hotspot/os/linux/trimCHeapDCmd.cpp b/src/hotspot/os/linux/trimCHeapDCmd.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ee93ac5e8c8d7141472b9a59bdc726f8092203a8 --- /dev/null +++ b/src/hotspot/os/linux/trimCHeapDCmd.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2021 SAP SE. All rights reserved. + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "logging/log.hpp" +#include "runtime/os.hpp" +#include "utilities/debug.hpp" +#include "utilities/ostream.hpp" +#include "trimCHeapDCmd.hpp" + +#include + +void TrimCLibcHeapDCmd::execute(DCmdSource source, TRAPS) { +#ifdef __GLIBC__ + stringStream ss_report(1024); // Note: before calling trim + + os::Linux::meminfo_t info1; + os::Linux::meminfo_t info2; + // Query memory before... + bool have_info1 = os::Linux::query_process_memory_info(&info1); + + _output->print_cr("Attempting trim..."); + ::malloc_trim(0); + _output->print_cr("Done."); + + // ...and after trim. + bool have_info2 = os::Linux::query_process_memory_info(&info2); + + // Print report both to output stream as well to UL + bool wrote_something = false; + if (have_info1 && have_info2) { + if (info1.vmsize != -1 && info2.vmsize != -1) { + ss_report.print_cr("Virtual size before: " SSIZE_FORMAT "k, after: " SSIZE_FORMAT "k, (" SSIZE_FORMAT "k)", + info1.vmsize, info2.vmsize, (info2.vmsize - info1.vmsize)); + wrote_something = true; + } + if (info1.vmrss != -1 && info2.vmrss != -1) { + ss_report.print_cr("RSS before: " SSIZE_FORMAT "k, after: " SSIZE_FORMAT "k, (" SSIZE_FORMAT "k)", + info1.vmrss, info2.vmrss, (info2.vmrss - info1.vmrss)); + wrote_something = true; + } + if (info1.vmswap != -1 && info2.vmswap != -1) { + ss_report.print_cr("Swap before: " SSIZE_FORMAT "k, after: " SSIZE_FORMAT "k, (" SSIZE_FORMAT "k)", + info1.vmswap, info2.vmswap, (info2.vmswap - info1.vmswap)); + wrote_something = true; + } + } + if (!wrote_something) { + ss_report.print_raw("No details available."); + } + + _output->print_raw(ss_report.base()); + log_info(os)("malloc_trim:\n%s", ss_report.base()); +#else + _output->print_cr("Not available."); +#endif +} diff --git a/src/hotspot/os/linux/trimCHeapDCmd.hpp b/src/hotspot/os/linux/trimCHeapDCmd.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4c5b5cc2219ca7873457b145dbc9c75c4c609fca --- /dev/null +++ b/src/hotspot/os/linux/trimCHeapDCmd.hpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2021 SAP SE. All rights reserved. + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_LINUX_TRIMCHEAPDCMD_HPP +#define OS_LINUX_TRIMCHEAPDCMD_HPP + +#include "services/diagnosticCommand.hpp" + +class outputStream; + +class TrimCLibcHeapDCmd : public DCmd { +public: + TrimCLibcHeapDCmd(outputStream* output, bool heap) : DCmd(output, heap) {} + static const char* name() { + return "System.trim_native_heap"; + } + static const char* description() { + return "Attempts to free up memory by trimming the C-heap."; + } + static const char* impact() { + return "Low"; + } + static const JavaPermission permission() { + JavaPermission p = { "java.lang.management.ManagementPermission", "control", NULL }; + return p; + } + virtual void execute(DCmdSource source, TRAPS); +}; + +#endif // OS_LINUX_TRIMCHEAPDCMD_HPP diff --git a/src/hotspot/os/posix/os_posix.cpp b/src/hotspot/os/posix/os_posix.cpp index ae058dd345b85cacc37e338d8afeca5d9b8e2c84..60820b31f925cd9ac838c1bed906564a3752e526 100644 --- a/src/hotspot/os/posix/os_posix.cpp +++ b/src/hotspot/os/posix/os_posix.cpp @@ -60,6 +60,8 @@ #include #include #include +#include +#include #include #include #include @@ -87,6 +89,10 @@ #define assert_with_errno(cond, msg) check_with_errno(assert, cond, msg) #define guarantee_with_errno(cond, msg) check_with_errno(guarantee, cond, msg) +static jlong initial_time_count = 0; + +static int clock_tics_per_sec = 100; + // Check core dump limit and report possible place where core can be found void os::check_dump_limit(char* buffer, size_t bufferSize) { if (!FLAG_IS_DEFAULT(CreateCoredumpOnCrash) && !CreateCoredumpOnCrash) { @@ -160,12 +166,6 @@ int os::get_native_stack(address* stack, int frames, int toSkip) { return num_of_frames; } - -bool os::unsetenv(const char* name) { - assert(name != NULL, "Null pointer"); - return (::unsetenv(name) == 0); -} - int os::get_last_error() { return errno; } @@ -183,6 +183,12 @@ size_t os::lasterror(char *buf, size_t len) { return n; } +// Return true if user is running as root. +bool os::have_special_privileges() { + static bool privileges = (getuid() != geteuid()) || (getgid() != getegid()); + return privileges; +} + void os::wait_for_keypress_at_exit(void) { // don't do anything on posix platforms return; @@ -633,6 +639,21 @@ bool os::has_allocatable_memory_limit(size_t* limit) { #endif } +void* os::get_default_process_handle() { +#ifdef __APPLE__ + // MacOS X needs to use RTLD_FIRST instead of RTLD_LAZY + // to avoid finding unexpected symbols on second (or later) + // loads of a library. + return (void*)::dlopen(NULL, RTLD_FIRST); +#else + return (void*)::dlopen(NULL, RTLD_LAZY); +#endif +} + +void* os::dll_lookup(void* handle, const char* name) { + return dlsym(handle, name); +} + void os::dll_unload(void *lib) { ::dlclose(lib); } @@ -726,6 +747,10 @@ void os::exit(int num) { ::exit(num); } +void os::_exit(int num) { + ::_exit(num); +} + // Builds a platform dependent Agent_OnLoad_ function name // which is used to find statically linked in agents. // Parameters: @@ -979,8 +1004,7 @@ size_t os::Posix::get_initial_stack_size(ThreadType thr_type, size_t req_stack_s _compiler_thread_min_stack_allowed); break; case os::vm_thread: - case os::pgc_thread: - case os::cgc_thread: + case os::gc_thread: case os::watcher_thread: default: // presume the unknown thr_type is a VM internal if (req_stack_size == 0 && VMThreadStackSize > 0) { @@ -1225,7 +1249,11 @@ static bool _use_clock_monotonic_condattr = false; // Determine what POSIX API's are present and do appropriate // configuration. void os::Posix::init(void) { - +#if defined(_ALLBSD_SOURCE) + clock_tics_per_sec = CLK_TCK; +#else + clock_tics_per_sec = sysconf(_SC_CLK_TCK); +#endif // NOTE: no logging available when this is called. Put logging // statements in init_2(). @@ -1257,6 +1285,8 @@ void os::Posix::init(void) { _use_clock_monotonic_condattr = true; } } + + initial_time_count = javaTimeNanos(); } void os::Posix::init_2(void) { @@ -1424,9 +1454,59 @@ void os::javaTimeNanos_info(jvmtiTimerInfo *info_ptr) { info_ptr->may_skip_forward = false; // not subject to resetting or drifting info_ptr->kind = JVMTI_TIMER_ELAPSED; // elapsed not CPU time } - #endif // ! APPLE && !AIX +// Time since start-up in seconds to a fine granularity. +double os::elapsedTime() { + return ((double)os::elapsed_counter()) / os::elapsed_frequency(); // nanosecond resolution +} + +jlong os::elapsed_counter() { + return os::javaTimeNanos() - initial_time_count; +} + +jlong os::elapsed_frequency() { + return NANOSECS_PER_SEC; // nanosecond resolution +} + +bool os::supports_vtime() { return true; } + +// Return the real, user, and system times in seconds from an +// arbitrary fixed point in the past. +bool os::getTimesSecs(double* process_real_time, + double* process_user_time, + double* process_system_time) { + struct tms ticks; + clock_t real_ticks = times(&ticks); + + if (real_ticks == (clock_t) (-1)) { + return false; + } else { + double ticks_per_second = (double) clock_tics_per_sec; + *process_user_time = ((double) ticks.tms_utime) / ticks_per_second; + *process_system_time = ((double) ticks.tms_stime) / ticks_per_second; + *process_real_time = ((double) real_ticks) / ticks_per_second; + + return true; + } +} + +char * os::local_time_string(char *buf, size_t buflen) { + struct tm t; + time_t long_time; + time(&long_time); + localtime_r(&long_time, &t); + jio_snprintf(buf, buflen, "%d-%02d-%02d %02d:%02d:%02d", + t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, + t.tm_hour, t.tm_min, t.tm_sec); + return buf; +} + +struct tm* os::localtime_pd(const time_t* clock, struct tm* res) { + return localtime_r(clock, res); +} + + // Shared pthread_mutex/cond based PlatformEvent implementation. // Not currently usable by Solaris. @@ -1885,7 +1965,11 @@ int os::fork_and_exec(const char* cmd, bool prefer_vfork) { // Use always vfork on AIX, since its safe and helps with analyzing OOM situations. // Otherwise leave it up to the caller. AIX_ONLY(prefer_vfork = true;) + #ifdef __APPLE__ + pid = ::fork(); + #else pid = prefer_vfork ? ::vfork() : ::fork(); + #endif if (pid < 0) { // fork failed diff --git a/src/hotspot/os/posix/os_posix.hpp b/src/hotspot/os/posix/os_posix.hpp index af2c158511b2c23a628355d4d347f4c4e712f168..a745a8f48dbe079dcbb9641d4d2b4d05d53347e9 100644 --- a/src/hotspot/os/posix/os_posix.hpp +++ b/src/hotspot/os/posix/os_posix.hpp @@ -25,6 +25,15 @@ #ifndef OS_POSIX_OS_POSIX_HPP #define OS_POSIX_OS_POSIX_HPP +// Note: the Posix API aims to capture functionality available on all Posix +// compliant platforms, but in practice the implementations may depend on +// non-Posix functionality. For example, the use of lseek64 and ftruncate64. +// This use of non-Posix API's is made possible by compiling/linking in a mode +// that is not restricted to being fully Posix complaint, such as by declaring +// -D_GNU_SOURCE. But be aware that in doing so we may enable non-Posix +// behaviour in API's that are defined by Posix. For example, that SIGSTKSZ +// is not defined as a constant as of Glibc 2.34. + // File conventions static const char* file_separator() { return "/"; } static const char* line_separator() { return "\n"; } diff --git a/src/hotspot/os/posix/perfMemory_posix.cpp b/src/hotspot/os/posix/perfMemory_posix.cpp index 8919afffb2c0208b602f01ee60cd173bcbaa27f6..a89947f32edc1da75a7d9622b8163ae7b5151757 100644 --- a/src/hotspot/os/posix/perfMemory_posix.cpp +++ b/src/hotspot/os/posix/perfMemory_posix.cpp @@ -31,6 +31,7 @@ #include "memory/resourceArea.hpp" #include "oops/oop.inline.hpp" #include "os_posix.inline.hpp" +#include "runtime/globals_extension.hpp" #include "runtime/handles.inline.hpp" #include "runtime/os.hpp" #include "runtime/perfMemory.hpp" @@ -1023,18 +1024,23 @@ static char* mmap_create_shared(size_t size) { return mapAddress; } -// release a named shared memory region +// release a named shared memory region that was mmap-ed. // static void unmap_shared(char* addr, size_t bytes) { -#if defined(_AIX) - // Do not rely on os::reserve_memory/os::release_memory to use mmap. - // Use os::reserve_memory/os::release_memory for PerfDisableSharedMem=1, mmap/munmap for PerfDisableSharedMem=0 - if (::munmap(addr, bytes) == -1) { - warning("perfmemory: munmap failed (%d)\n", errno); + int res; + if (MemTracker::tracking_level() > NMT_minimal) { + // Note: Tracker contains a ThreadCritical. + Tracker tkr(Tracker::release); + res = ::munmap(addr, bytes); + if (res == 0) { + tkr.record((address)addr, bytes); + } + } else { + res = ::munmap(addr, bytes); + } + if (res != 0) { + log_info(os)("os::release_memory failed (" PTR_FORMAT ", " SIZE_FORMAT ")", p2i(addr), bytes); } -#else - os::release_memory(addr, bytes); -#endif } // create the PerfData memory region in shared memory. @@ -1234,7 +1240,7 @@ void PerfMemory::create_memory_region(size_t size) { if (PrintMiscellaneous && Verbose) { warning("Reverting to non-shared PerfMemory region.\n"); } - PerfDisableSharedMem = true; + FLAG_SET_ERGO(PerfDisableSharedMem, true); _start = create_standard_memory(size); } } diff --git a/src/hotspot/os/posix/semaphore_posix.cpp b/src/hotspot/os/posix/semaphore_posix.cpp index b5f7855b64e4f0c685e00280bb8012204fd0e680..7f61e6d469e6b38edbebfd1f5741441910e2069c 100644 --- a/src/hotspot/os/posix/semaphore_posix.cpp +++ b/src/hotspot/os/posix/semaphore_posix.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ #include "precompiled.hpp" #ifndef __APPLE__ #include "runtime/os.hpp" -// POSIX unamed semaphores are not supported on OS X. +// POSIX unnamed semaphores are not supported on OS X. #include "semaphore_posix.hpp" #include diff --git a/src/hotspot/os/posix/signals_posix.cpp b/src/hotspot/os/posix/signals_posix.cpp index c2e5d50bb95a86511245c13cd2ccec3857461863..2c020a79408049797d5c2f1fcc1e5de8d968323e 100644 --- a/src/hotspot/os/posix/signals_posix.cpp +++ b/src/hotspot/os/posix/signals_posix.cpp @@ -1145,7 +1145,11 @@ void os::print_siginfo(outputStream* os, const void* si0) { os->print(", si_addr: " PTR_FORMAT, p2i(si->si_addr)); #ifdef SIGPOLL } else if (sig == SIGPOLL) { - os->print(", si_band: %ld", si->si_band); + // siginfo_t.si_band is defined as "long", and it is so in most + // implementations. But SPARC64 glibc has a bug: si_band is "int". + // Cast si_band to "long" to prevent format specifier mismatch. + // See: https://sourceware.org/bugzilla/show_bug.cgi?id=23821 + os->print(", si_band: %ld", (long) si->si_band); #endif } } diff --git a/src/hotspot/os/windows/osThread_windows.hpp b/src/hotspot/os/windows/osThread_windows.hpp index 88dedaffe3ed2725be3b1368b4f049654a0e047a..74b50100663396c3a9b3b981914241617a48ef0b 100644 --- a/src/hotspot/os/windows/osThread_windows.hpp +++ b/src/hotspot/os/windows/osThread_windows.hpp @@ -34,7 +34,6 @@ HANDLE _thread_handle; // Win32 thread handle HANDLE _interrupt_event; // Event signalled on thread interrupt for use by // Process.waitFor(). - ThreadState _last_state; public: // The following will only apply in the Win32 implementation, and should only @@ -58,12 +57,6 @@ } #endif // ASSERT - // This is a temporary fix for the thread states during - // suspend/resume until we throw away OSThread completely. - // NEEDS_CLEANUP - void set_last_state(ThreadState state) { _last_state = state; } - ThreadState get_last_state() { return _last_state; } - private: void pd_initialize(); void pd_destroy(); diff --git a/src/hotspot/os/windows/os_windows.cpp b/src/hotspot/os/windows/os_windows.cpp index 6e996b119934875e4a81f0fce21dedb34aefb4bc..6d0610ccd7591983ef79815596b9855c46a7b012 100644 --- a/src/hotspot/os/windows/os_windows.cpp +++ b/src/hotspot/os/windows/os_windows.cpp @@ -258,11 +258,6 @@ static BOOL unmapViewOfFile(LPCVOID lpBaseAddress) { return result; } -bool os::unsetenv(const char* name) { - assert(name != NULL, "Null pointer"); - return (SetEnvironmentVariable(name, NULL) == TRUE); -} - char** os::get_environ() { return _environ; } // No setuid programs under Windows. @@ -712,8 +707,7 @@ bool os::create_thread(Thread* thread, ThreadType thr_type, } // else fall through: // use VMThreadStackSize if CompilerThreadStackSize is not defined case os::vm_thread: - case os::pgc_thread: - case os::cgc_thread: + case os::gc_thread: case os::asynclog_thread: case os::watcher_thread: if (VMThreadStackSize > 0) stack_size = (size_t)(VMThreadStackSize * K); @@ -743,21 +737,27 @@ bool os::create_thread(Thread* thread, ThreadType thr_type, // flag appears to work with _beginthredex() as well. const unsigned initflag = CREATE_SUSPENDED | STACK_SIZE_PARAM_IS_A_RESERVATION; - HANDLE thread_handle = - (HANDLE)_beginthreadex(NULL, - (unsigned)stack_size, - (unsigned (__stdcall *)(void*)) thread_native_entry, - thread, - initflag, - &thread_id); - + HANDLE thread_handle; + int limit = 3; + do { + thread_handle = + (HANDLE)_beginthreadex(NULL, + (unsigned)stack_size, + (unsigned (__stdcall *)(void*)) thread_native_entry, + thread, + initflag, + &thread_id); + } while (thread_handle == NULL && errno == EAGAIN && limit-- > 0); + + ResourceMark rm; char buf[64]; if (thread_handle != NULL) { - log_info(os, thread)("Thread started (tid: %u, attributes: %s)", - thread_id, describe_beginthreadex_attributes(buf, sizeof(buf), stack_size, initflag)); + log_info(os, thread)("Thread \"%s\" started (tid: %u, attributes: %s)", + thread->name(), thread_id, + describe_beginthreadex_attributes(buf, sizeof(buf), stack_size, initflag)); } else { - log_warning(os, thread)("Failed to start thread - _beginthreadex failed (%s) for attributes: %s.", - os::errno_name(errno), describe_beginthreadex_attributes(buf, sizeof(buf), stack_size, initflag)); + log_warning(os, thread)("Failed to start thread \"%s\" - _beginthreadex failed (%s) for attributes: %s.", + thread->name(), os::errno_name(errno), describe_beginthreadex_attributes(buf, sizeof(buf), stack_size, initflag)); // Log some OS information which might explain why creating the thread failed. log_info(os, thread)("Number of threads approx. running in the VM: %d", Threads::number_of_threads()); LogStream st(Log(os, thread)::info()); @@ -892,7 +892,7 @@ static SetThreadDescriptionFnPtr _SetThreadDescription = NULL; DEBUG_ONLY(static GetThreadDescriptionFnPtr _GetThreadDescription = NULL;) // forward decl. -errno_t convert_to_unicode(char const* char_path, LPWSTR* unicode_path); +static errno_t convert_to_unicode(char const* char_path, LPWSTR* unicode_path); void os::set_native_thread_name(const char *name) { @@ -971,11 +971,6 @@ void os::set_native_thread_name(const char *name) { } __except(EXCEPTION_EXECUTE_HANDLER) {} } -bool os::bind_to_processor(uint processor_id) { - // Not yet implemented. - return false; -} - void os::win32::initialize_performance_counter() { LARGE_INTEGER count; QueryPerformanceFrequency(&count); @@ -1863,11 +1858,19 @@ void os::win32::print_windows_version(outputStream* st) { case 10000: if (is_workstation) { - st->print("10"); + if (build_number >= 22000) { + st->print("11"); + } else { + st->print("10"); + } } else { - // distinguish Windows Server 2016 and 2019 by build number - // Windows server 2019 GA 10/2018 build number is 17763 - if (build_number > 17762) { + // distinguish Windows Server by build number + // - 2016 GA 10/2016 build: 14393 + // - 2019 GA 11/2018 build: 17763 + // - 2022 GA 08/2021 build: 20348 + if (build_number > 20347) { + st->print("Server 2022"); + } else if (build_number > 17762) { st->print("Server 2019"); } else { st->print("Server 2016"); @@ -2288,7 +2291,7 @@ LONG Handle_Exception(struct _EXCEPTION_POINTERS* exceptionInfo, // Save pc in thread if (thread != nullptr && thread->is_Java_thread()) { - thread->as_Java_thread()->set_saved_exception_pc((address)(DWORD_PTR)exceptionInfo->ContextRecord->PC_NAME); + JavaThread::cast(thread)->set_saved_exception_pc((address)(DWORD_PTR)exceptionInfo->ContextRecord->PC_NAME); } // Set pc to handler @@ -2582,7 +2585,7 @@ LONG WINAPI topLevelExceptionFilter(struct _EXCEPTION_POINTERS* exceptionInfo) { #endif if (t != NULL && t->is_Java_thread()) { - JavaThread* thread = t->as_Java_thread(); + JavaThread* thread = JavaThread::cast(t); bool in_java = thread->thread_state() == _thread_in_Java; bool in_native = thread->thread_state() == _thread_in_native; bool in_vm = thread->thread_state() == _thread_in_vm; @@ -4160,8 +4163,8 @@ int os::win32::exit_process_or_thread(Ept what, int exit_code) { _endthreadex((unsigned)exit_code); } else if (what == EPT_PROCESS) { ::exit(exit_code); - } else { - _exit(exit_code); + } else { // EPT_PROCESS_DIE + ::_exit(exit_code); } // Should not reach here @@ -4764,6 +4767,10 @@ void os::exit(int num) { win32::exit_process_or_thread(win32::EPT_PROCESS, num); } +void os::_exit(int num) { + win32::exit_process_or_thread(win32::EPT_PROCESS_DIE, num); +} + // Is a (classpath) directory empty? bool os::dir_is_empty(const char* path) { errno_t err; @@ -4806,9 +4813,7 @@ bool os::dir_is_empty(const char* path) { // create binary file, rewriting existing file if required int os::create_binary_file(const char* path, bool rewrite_existing) { int oflags = _O_CREAT | _O_WRONLY | _O_BINARY; - if (!rewrite_existing) { - oflags |= _O_EXCL; - } + oflags |= rewrite_existing ? _O_TRUNC : _O_EXCL; return ::open(path, oflags, _S_IREAD | _S_IWRITE); } diff --git a/src/hotspot/os/windows/perfMemory_windows.cpp b/src/hotspot/os/windows/perfMemory_windows.cpp index d825c540f6ee3c1c2eb99241d8f440058f55d10f..600eb8882f9e4a51be91aa440713f0ff3dd29f36 100644 --- a/src/hotspot/os/windows/perfMemory_windows.cpp +++ b/src/hotspot/os/windows/perfMemory_windows.cpp @@ -29,6 +29,7 @@ #include "memory/resourceArea.hpp" #include "oops/oop.inline.hpp" #include "os_windows.inline.hpp" +#include "runtime/globals_extension.hpp" #include "runtime/handles.inline.hpp" #include "runtime/os.hpp" #include "runtime/perfMemory.hpp" @@ -1745,7 +1746,7 @@ void PerfMemory::create_memory_region(size_t size) { if (PrintMiscellaneous && Verbose) { warning("Reverting to non-shared PerfMemory region.\n"); } - PerfDisableSharedMem = true; + FLAG_SET_ERGO(PerfDisableSharedMem, true); _start = create_standard_memory(size); } } diff --git a/src/hotspot/os_cpu/bsd_aarch64/atomic_bsd_aarch64.hpp b/src/hotspot/os_cpu/bsd_aarch64/atomic_bsd_aarch64.hpp index e0c2961e4842c7380bc18934fec04da8dfe9ef37..fba59870d7c5029118ecccea57e0877de77c3376 100644 --- a/src/hotspot/os_cpu/bsd_aarch64/atomic_bsd_aarch64.hpp +++ b/src/hotspot/os_cpu/bsd_aarch64/atomic_bsd_aarch64.hpp @@ -27,6 +27,8 @@ #ifndef OS_CPU_BSD_AARCH64_ATOMIC_BSD_AARCH64_HPP #define OS_CPU_BSD_AARCH64_ATOMIC_BSD_AARCH64_HPP +#include "utilities/debug.hpp" + // Implementation of class atomic // Note that memory_order_conservative requires a full barrier after atomic stores. // See https://patchwork.kernel.org/patch/3575821/ @@ -64,17 +66,40 @@ inline T Atomic::PlatformCmpxchg::operator()(T volatile* dest, T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(byte_size == sizeof(T)); - if (order == memory_order_relaxed) { + if (order == memory_order_conservative) { T value = compare_value; + FULL_MEM_BARRIER; __atomic_compare_exchange(dest, &value, &exchange_value, /*weak*/false, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + FULL_MEM_BARRIER; return value; } else { + STATIC_ASSERT ( + // The modes that align with C++11 are intended to + // follow the same semantics. + memory_order_relaxed == __ATOMIC_RELAXED && + memory_order_acquire == __ATOMIC_ACQUIRE && + memory_order_release == __ATOMIC_RELEASE && + memory_order_acq_rel == __ATOMIC_ACQ_REL && + memory_order_seq_cst == __ATOMIC_SEQ_CST); + + // Some sanity checking on the memory order. It makes no + // sense to have a release operation for a store that never + // happens. + int failure_memory_order; + switch (order) { + case memory_order_release: + failure_memory_order = memory_order_relaxed; break; + case memory_order_acq_rel: + failure_memory_order = memory_order_acquire; break; + default: + failure_memory_order = order; + } + assert(failure_memory_order <= order, "must be"); + T value = compare_value; - FULL_MEM_BARRIER; __atomic_compare_exchange(dest, &value, &exchange_value, /*weak*/false, - __ATOMIC_RELAXED, __ATOMIC_RELAXED); - FULL_MEM_BARRIER; + order, failure_memory_order); return value; } } diff --git a/src/hotspot/os_cpu/bsd_aarch64/pauth_bsd_aarch64.inline.hpp b/src/hotspot/os_cpu/bsd_aarch64/pauth_bsd_aarch64.inline.hpp index 21193e181f2b787fed8590866919b7511eb91298..a4d416d384e29f2d5daedd76611ce78cfc456e54 100644 --- a/src/hotspot/os_cpu/bsd_aarch64/pauth_bsd_aarch64.inline.hpp +++ b/src/hotspot/os_cpu/bsd_aarch64/pauth_bsd_aarch64.inline.hpp @@ -22,8 +22,8 @@ * */ -#ifndef OS_CPU_LINUX_AARCH64_PAUTH_BSD_AARCH64_INLINE_HPP -#define OS_CPU_LINUX_AARCH64_PAUTH_BSD_AARCH64_INLINE_HPP +#ifndef OS_CPU_BSD_AARCH64_PAUTH_BSD_AARCH64_INLINE_HPP +#define OS_CPU_BSD_AARCH64_PAUTH_BSD_AARCH64_INLINE_HPP #ifdef __APPLE__ #include @@ -49,5 +49,5 @@ inline address pauth_strip_pointer(address ptr) { #undef XPACLRI -#endif // OS_CPU_LINUX_AARCH64_PAUTH_BSD_AARCH64_INLINE_HPP +#endif // OS_CPU_BSD_AARCH64_PAUTH_BSD_AARCH64_INLINE_HPP diff --git a/src/hotspot/os_cpu/bsd_aarch64/vm_version_bsd_aarch64.cpp b/src/hotspot/os_cpu/bsd_aarch64/vm_version_bsd_aarch64.cpp index 3d49ecf54ff9813d093426fb9256eeddd80a2ff3..13f4b8afbc43cf8503abc40bab4f9b7f1e477ed3 100644 --- a/src/hotspot/os_cpu/bsd_aarch64/vm_version_bsd_aarch64.cpp +++ b/src/hotspot/os_cpu/bsd_aarch64/vm_version_bsd_aarch64.cpp @@ -60,6 +60,9 @@ void VM_Version::get_os_cpu_info() { assert(cpu_has("hw.optional.neon"), "should be"); _features = CPU_FP | CPU_ASIMD; + // All Apple-darwin Arm processors have AES. + _features |= CPU_AES; + // Only few features are available via sysctl, see line 614 // https://opensource.apple.com/source/xnu/xnu-6153.141.1/bsd/kern/kern_mib.c.auto.html if (cpu_has("hw.optional.armv8_crc32")) _features |= CPU_CRC32; @@ -88,6 +91,7 @@ void VM_Version::get_os_cpu_info() { if (sysctlbyname("hw.cpufamily", &family, &sysctllen, NULL, 0)) { family = 0; } + _model = family; _cpu = CPU_APPLE; } diff --git a/src/hotspot/os_cpu/bsd_x86/orderAccess_bsd_x86.hpp b/src/hotspot/os_cpu/bsd_x86/orderAccess_bsd_x86.hpp index 691f278bc950c1efa976df7c3723f19a6e43d916..90e2574abf2a2f19ade0c72c29ff556abc20f02b 100644 --- a/src/hotspot/os_cpu/bsd_x86/orderAccess_bsd_x86.hpp +++ b/src/hotspot/os_cpu/bsd_x86/orderAccess_bsd_x86.hpp @@ -60,8 +60,12 @@ inline void OrderAccess::fence() { } inline void OrderAccess::cross_modify_fence_impl() { - int idx = 0; - __asm__ volatile ("cpuid " : "+a" (idx) : : "ebx", "ecx", "edx", "memory"); + if (VM_Version::supports_serialize()) { + __asm__ volatile (".byte 0x0f, 0x01, 0xe8\n\t" : : :); //serialize + } else { + int idx = 0; + __asm__ volatile ("cpuid " : "+a" (idx) : : "ebx", "ecx", "edx", "memory"); + } } #endif // OS_CPU_BSD_X86_ORDERACCESS_BSD_X86_HPP diff --git a/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.S b/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.S index f5d2c2b69c2226123fc868dfe8cd4c26c7b32d1e..9c91942cb335a163537fc99e3a00fddcd6780529 100644 --- a/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.S +++ b/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.S @@ -47,6 +47,28 @@ aarch64_atomic_fetch_add_4_default_impl: mov w0, w2 ret + .global aarch64_atomic_fetch_add_8_relaxed_default_impl + .align 5 +aarch64_atomic_fetch_add_8_relaxed_default_impl: + prfm pstl1strm, [x0] +0: ldxr x2, [x0] + add x8, x2, x1 + stxr w9, x8, [x0] + cbnz w9, 0b + mov x0, x2 + ret + + .global aarch64_atomic_fetch_add_4_relaxed_default_impl + .align 5 +aarch64_atomic_fetch_add_4_relaxed_default_impl: + prfm pstl1strm, [x0] +0: ldxr w2, [x0] + add w8, w2, w1 + stxr w9, w8, [x0] + cbnz w9, 0b + mov w0, w2 + ret + .globl aarch64_atomic_xchg_4_default_impl .align 5 aarch64_atomic_xchg_4_default_impl: @@ -112,7 +134,55 @@ aarch64_atomic_cmpxchg_8_default_impl: dmb ish ret - .globl aarch64_atomic_cmpxchg_1_relaxed_default_impl + .globl aarch64_atomic_cmpxchg_4_release_default_impl + .align 5 +aarch64_atomic_cmpxchg_4_release_default_impl: + prfm pstl1strm, [x0] +0: ldxr w3, [x0] + cmp w3, w1 + b.ne 1f + stlxr w8, w2, [x0] + cbnz w8, 0b +1: mov w0, w3 + ret + + .globl aarch64_atomic_cmpxchg_8_release_default_impl + .align 5 +aarch64_atomic_cmpxchg_8_release_default_impl: + prfm pstl1strm, [x0] +0: ldxr x3, [x0] + cmp x3, x1 + b.ne 1f + stlxr w8, x2, [x0] + cbnz w8, 0b +1: mov x0, x3 + ret + + .globl aarch64_atomic_cmpxchg_4_seq_cst_default_impl + .align 5 +aarch64_atomic_cmpxchg_4_seq_cst_default_impl: + prfm pstl1strm, [x0] +0: ldaxr w3, [x0] + cmp w3, w1 + b.ne 1f + stlxr w8, w2, [x0] + cbnz w8, 0b +1: mov w0, w3 + ret + + .globl aarch64_atomic_cmpxchg_8_seq_cst_default_impl + .align 5 +aarch64_atomic_cmpxchg_8_seq_cst_default_impl: + prfm pstl1strm, [x0] +0: ldaxr x3, [x0] + cmp x3, x1 + b.ne 1f + stlxr w8, x2, [x0] + cbnz w8, 0b +1: mov x0, x3 + ret + +.globl aarch64_atomic_cmpxchg_1_relaxed_default_impl .align 5 aarch64_atomic_cmpxchg_1_relaxed_default_impl: prfm pstl1strm, [x0] diff --git a/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp b/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp index 77e860ed5ec85202c4460e4faa0220a781bb426b..3208db5b4a6b7b04ad5df0e8bc82e87e50afa8e7 100644 --- a/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp +++ b/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -87,9 +87,14 @@ inline D Atomic::PlatformAdd<4>::fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(I)); STATIC_ASSERT(4 == sizeof(D)); - D old_value - = atomic_fastcall(aarch64_atomic_fetch_add_4_impl, dest, add_value); - return old_value; + aarch64_atomic_stub_t stub; + switch (order) { + case memory_order_relaxed: + stub = aarch64_atomic_fetch_add_4_relaxed_impl; break; + default: + stub = aarch64_atomic_fetch_add_4_impl; break; + } + return atomic_fastcall(stub, dest, add_value); } template<> @@ -98,9 +103,14 @@ inline D Atomic::PlatformAdd<8>::fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(I)); STATIC_ASSERT(8 == sizeof(D)); - D old_value - = atomic_fastcall(aarch64_atomic_fetch_add_8_impl, dest, add_value); - return old_value; + aarch64_atomic_stub_t stub; + switch (order) { + case memory_order_relaxed: + stub = aarch64_atomic_fetch_add_8_relaxed_impl; break; + default: + stub = aarch64_atomic_fetch_add_8_impl; break; + } + return atomic_fastcall(stub, dest, add_value); } template<> @@ -151,6 +161,11 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, switch (order) { case memory_order_relaxed: stub = aarch64_atomic_cmpxchg_4_relaxed_impl; break; + case memory_order_release: + stub = aarch64_atomic_cmpxchg_4_release_impl; break; + case memory_order_acq_rel: + case memory_order_seq_cst: + stub = aarch64_atomic_cmpxchg_4_seq_cst_impl; break; default: stub = aarch64_atomic_cmpxchg_4_impl; break; } @@ -169,6 +184,11 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, switch (order) { case memory_order_relaxed: stub = aarch64_atomic_cmpxchg_8_relaxed_impl; break; + case memory_order_release: + stub = aarch64_atomic_cmpxchg_8_release_impl; break; + case memory_order_acq_rel: + case memory_order_seq_cst: + stub = aarch64_atomic_cmpxchg_8_seq_cst_impl; break; default: stub = aarch64_atomic_cmpxchg_8_impl; break; } diff --git a/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp b/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp index 6fc0c840642f142ff987924ce0a69118461ac061..7df65bed1eef2078caead22b0c21f1536d7f4292 100644 --- a/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp +++ b/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp @@ -468,9 +468,8 @@ void os::print_register_info(outputStream *st, const void *context) { st->print_cr("Register to memory mapping:"); st->cr(); for (int r = 0; r < ARM_REGS_IN_CONTEXT; r++) { - st->print_cr(" %-3s = " INTPTR_FORMAT, as_Register(r)->name(), reg_area[r]); + st->print(" %-3s = ", as_Register(r)->name()); print_location(st, reg_area[r]); - st->cr(); } st->cr(); } diff --git a/src/hotspot/os_cpu/linux_ppc/gc/z/zSyscall_linux_ppc.hpp b/src/hotspot/os_cpu/linux_ppc/gc/z/zSyscall_linux_ppc.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5950b52136db83d711e8d8a43e3af3a353f7e1ce --- /dev/null +++ b/src/hotspot/os_cpu/linux_ppc/gc/z/zSyscall_linux_ppc.hpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef OS_CPU_LINUX_PPC_GC_Z_ZSYSCALL_LINUX_PPC_HPP +#define OS_CPU_LINUX_PPC_GC_Z_ZSYSCALL_LINUX_PPC_HPP + +#include + +// +// Support for building on older Linux systems +// + + +#ifndef SYS_memfd_create +#define SYS_memfd_create 360 +#endif +#ifndef SYS_fallocate +#define SYS_fallocate 309 +#endif + +#endif // OS_CPU_LINUX_PPC_GC_Z_ZSYSCALL_LINUX_PPC_HPP diff --git a/src/hotspot/os_cpu/linux_ppc/thread_linux_ppc.cpp b/src/hotspot/os_cpu/linux_ppc/thread_linux_ppc.cpp index 9f779456640211c636aa03ae1ab09f46278b5217..d09608c6aa763ff7f0b9cefea44f7871c5e95660 100644 --- a/src/hotspot/os_cpu/linux_ppc/thread_linux_ppc.cpp +++ b/src/hotspot/os_cpu/linux_ppc/thread_linux_ppc.cpp @@ -35,6 +35,8 @@ frame JavaThread::pd_last_frame() { address pc = _anchor.last_Java_pc(); // Last_Java_pc ist not set, if we come here from compiled code. + // Assume spill slot for link register contains a suitable pc. + // Should have been filled by method entry code. if (pc == NULL) { pc = (address) *(sp + 2); } @@ -64,6 +66,17 @@ bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, return false; } + if (ret_frame.fp() == NULL) { + // The found frame does not have a valid frame pointer. + // Bail out because this will create big trouble later on, either + // - when using istate, calculated as (NULL - ijava_state_size) or + // - when using fp() directly in safe_for_sender() + // + // There is no conclusive description (yet) how this could happen, but it does. + // For more details on what was observed, see thread_linux_s390.cpp + return false; + } + if (ret_frame.is_interpreted_frame()) { frame::ijava_state *istate = ret_frame.get_ijava_state(); const Method *m = (const Method*)(istate->method); diff --git a/src/hotspot/os_cpu/linux_s390/thread_linux_s390.cpp b/src/hotspot/os_cpu/linux_s390/thread_linux_s390.cpp index eeaf2f47fc607faa828afe9238a725de2cf88f6f..d06b851a99f67dad72fc2ef554c33f15f0c8335e 100644 --- a/src/hotspot/os_cpu/linux_s390/thread_linux_s390.cpp +++ b/src/hotspot/os_cpu/linux_s390/thread_linux_s390.cpp @@ -35,6 +35,8 @@ frame JavaThread::pd_last_frame() { address pc = _anchor.last_Java_pc(); // Last_Java_pc ist not set if we come here from compiled code. + // Assume spill slot for Z_R14 (return register) contains a suitable pc. + // Should have been filled by method entry code. if (pc == NULL) { pc = (address) *(sp + 14); } @@ -51,6 +53,9 @@ bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, return true; } + // At this point, we don't have a last_Java_frame, so + // we try to glean some information out of the ucontext + // if we were running Java code when SIGPROF came in. if (isInJava) { ucontext_t* uc = (ucontext_t*) ucontext; frame ret_frame((intptr_t*)uc->uc_mcontext.gregs[15/*Z_SP*/], @@ -61,6 +66,38 @@ bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, return false; } + if (ret_frame.fp() == NULL) { + // The found frame does not have a valid frame pointer. + // Bail out because this will create big trouble later on, either + // - when using istate, calculated as (NULL - z_ijava_state_size (= 0x70 (dbg) or 0x68 (rel)) or + // - when using fp() directly in safe_for_sender() + // + // There is no conclusive description (yet) how this could happen, but it does: + // + // We observed a SIGSEGV with the following stack trace (openjdk.jdk11u-dev, 2021-07-07, linuxs390x fastdebug) + // V [libjvm.so+0x12c8f12] JavaThread::pd_get_top_frame_for_profiling(frame*, void*, bool)+0x142 + // V [libjvm.so+0xb1020c] JfrGetCallTrace::get_topframe(void*, frame&)+0x3c + // V [libjvm.so+0xba0b08] OSThreadSampler::protected_task(os::SuspendedThreadTaskContext const&)+0x98 + // V [libjvm.so+0xff33c4] os::SuspendedThreadTask::internal_do_task()+0x14c + // V [libjvm.so+0xfe3c9c] os::SuspendedThreadTask::run()+0x24 + // V [libjvm.so+0xba0c66] JfrThreadSampleClosure::sample_thread_in_java(JavaThread*, JfrStackFrame*, unsigned int)+0x66 + // V [libjvm.so+0xba1718] JfrThreadSampleClosure::do_sample_thread(JavaThread*, JfrStackFrame*, unsigned int, JfrSampleType)+0x278 + // V [libjvm.so+0xba4f54] JfrThreadSampler::task_stacktrace(JfrSampleType, JavaThread**) [clone .constprop.62]+0x284 + // V [libjvm.so+0xba5e54] JfrThreadSampler::run()+0x2ec + // V [libjvm.so+0x12adc9c] Thread::call_run()+0x9c + // V [libjvm.so+0xff5ab0] thread_native_entry(Thread*)+0x128 + // siginfo: si_signo: 11 (SIGSEGV), si_code: 1 (SEGV_MAPERR), si_addr: 0xfffffffffffff000 + // failing instruction: e320 6008 0004 LG r2,8(r0,r6) + // contents of r6: 0xffffffffffffff90 + // + // Here is the sequence of what happens: + // - ret_frame is constructed with _fp == NULL (for whatever reason) + // - ijava_state_unchecked() calculates it's result as + // istate = fp() - z_ijava_state_size() = NULL - 0x68 DEBUG_ONLY(-8) + // - istate->method dereferences memory at offset 8 from istate + return false; + } + if (ret_frame.is_interpreted_frame()) { frame::z_ijava_state* istate = ret_frame.ijava_state_unchecked(); if (is_in_full_stack((address)istate)) { diff --git a/src/hotspot/os_cpu/linux_x86/orderAccess_linux_x86.hpp b/src/hotspot/os_cpu/linux_x86/orderAccess_linux_x86.hpp index 076cc5e8f3b459d8901e2f9af5dc12766f6d4d38..a22f547c071be6f3aad836f208eb0cd119f326da 100644 --- a/src/hotspot/os_cpu/linux_x86/orderAccess_linux_x86.hpp +++ b/src/hotspot/os_cpu/linux_x86/orderAccess_linux_x86.hpp @@ -56,14 +56,18 @@ inline void OrderAccess::fence() { } inline void OrderAccess::cross_modify_fence_impl() { - int idx = 0; + if (VM_Version::supports_serialize()) { + __asm__ volatile (".byte 0x0f, 0x01, 0xe8\n\t" : : :); //serialize + } else { + int idx = 0; #ifdef AMD64 - __asm__ volatile ("cpuid " : "+a" (idx) : : "ebx", "ecx", "edx", "memory"); + __asm__ volatile ("cpuid " : "+a" (idx) : : "ebx", "ecx", "edx", "memory"); #else - // On some x86 systems EBX is a reserved register that cannot be - // clobbered, so we must protect it around the CPUID. - __asm__ volatile ("xchg %%esi, %%ebx; cpuid; xchg %%esi, %%ebx " : "+a" (idx) : : "esi", "ecx", "edx", "memory"); + // On some x86 systems EBX is a reserved register that cannot be + // clobbered, so we must protect it around the CPUID. + __asm__ volatile ("xchg %%esi, %%ebx; cpuid; xchg %%esi, %%ebx " : "+a" (idx) : : "esi", "ecx", "edx", "memory"); #endif + } } #endif // OS_CPU_LINUX_X86_ORDERACCESS_LINUX_X86_HPP diff --git a/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp b/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp index 0360bcb6943a02bcbd649f01b17b9577ef3d9919..51a7fa8b0fc15e1e7182c258e3c252eb359777b2 100644 --- a/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp +++ b/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp @@ -199,6 +199,20 @@ size_t os::Posix::default_stack_size(os::ThreadType thr_type) { } static void current_stack_region(address *bottom, size_t *size) { + if (os::is_primordial_thread()) { + // primordial thread needs special handling because pthread_getattr_np() + // may return bogus value. + address stack_bottom = os::Linux::initial_thread_stack_bottom(); + size_t stack_bytes = os::Linux::initial_thread_stack_size(); + + assert(os::current_stack_pointer() >= stack_bottom, "should do"); + assert(os::current_stack_pointer() < stack_bottom + stack_bytes, "should do"); + + *bottom = stack_bottom; + *size = stack_bytes; + return; + } + pthread_attr_t attr; int res = pthread_getattr_np(pthread_self(), &attr); if (res != 0) { @@ -247,18 +261,6 @@ static void current_stack_region(address *bottom, size_t *size) { pthread_attr_destroy(&attr); - // The initial thread has a growable stack, and the size reported - // by pthread_attr_getstack is the maximum size it could possibly - // be given what currently mapped. This can be huge, so we cap it. - if (os::is_primordial_thread()) { - stack_bytes = stack_top - stack_bottom; - - if (stack_bytes > JavaThread::stack_size_at_create()) - stack_bytes = JavaThread::stack_size_at_create(); - - stack_bottom = stack_top - stack_bytes; - } - assert(os::current_stack_pointer() >= stack_bottom, "should do"); assert(os::current_stack_pointer() < stack_top, "should do"); diff --git a/src/hotspot/os_cpu/windows_aarch64/pauth_windows_aarch64.inline.hpp b/src/hotspot/os_cpu/windows_aarch64/pauth_windows_aarch64.inline.hpp index bf1d2aa99e1c8e6d93d9d3b8a4b979237baeed02..844291ee1e41231818704e1a9321632e26f98b50 100644 --- a/src/hotspot/os_cpu/windows_aarch64/pauth_windows_aarch64.inline.hpp +++ b/src/hotspot/os_cpu/windows_aarch64/pauth_windows_aarch64.inline.hpp @@ -22,13 +22,13 @@ * */ -#ifndef OS_CPU_LINUX_AARCH64_PAUTH_WINDOWS_AARCH64_INLINE_HPP -#define OS_CPU_LINUX_AARCH64_PAUTH_WINDOWS_AARCH64_INLINE_HPP +#ifndef OS_CPU_WINDOWS_AARCH64_PAUTH_WINDOWS_AARCH64_INLINE_HPP +#define OS_CPU_WINDOWS_AARCH64_PAUTH_WINDOWS_AARCH64_INLINE_HPP inline address pauth_strip_pointer(address ptr) { // No PAC support in windows as of yet. return ptr; } -#endif // OS_CPU_LINUX_AARCH64_PAUTH_WINDOWS_AARCH64_INLINE_HPP +#endif // OS_CPU_WINDOWS_AARCH64_PAUTH_WINDOWS_AARCH64_INLINE_HPP diff --git a/src/hotspot/os_cpu/windows_x86/orderAccess_windows_x86.hpp b/src/hotspot/os_cpu/windows_x86/orderAccess_windows_x86.hpp index f7d990a64c47f5010be40c248e6b7d4124140197..43764b14a01d109b53a3be0466638b0d1125c676 100644 --- a/src/hotspot/os_cpu/windows_x86/orderAccess_windows_x86.hpp +++ b/src/hotspot/os_cpu/windows_x86/orderAccess_windows_x86.hpp @@ -58,9 +58,22 @@ inline void OrderAccess::fence() { compiler_barrier(); } -inline void OrderAccess::cross_modify_fence_impl() { +inline void OrderAccess::cross_modify_fence_impl() +#if _MSC_VER >= 1928 +{ +//_serialize() intrinsic is supported starting from VS2019-16.7.2 + if (VM_Version::supports_serialize()) { + _serialize(); + } else { + int regs[4]; + __cpuid(regs, 0); + } +} +#else +{ int regs[4]; __cpuid(regs, 0); } +#endif #endif // OS_CPU_WINDOWS_X86_ORDERACCESS_WINDOWS_X86_HPP diff --git a/src/hotspot/share/adlc/arena.hpp b/src/hotspot/share/adlc/arena.hpp index d0dac9d45ca67003bdb30295840147abb15b0169..1fa99ed0e24bf7b9d840016e2adb16e22155c974 100644 --- a/src/hotspot/share/adlc/arena.hpp +++ b/src/hotspot/share/adlc/arena.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -118,8 +118,7 @@ public: } } // Further assume size is padded out to words - // Warning: in LP64, Amalloc_4 is really Amalloc_8 - void *Amalloc_4(size_t x) { + void *AmallocWords(size_t x) { assert( (x&(sizeof(char*)-1)) == 0, "misaligned size" ); if (_hwm + x > _max) { return grow(x); diff --git a/src/hotspot/share/adlc/dict2.cpp b/src/hotspot/share/adlc/dict2.cpp index 2dc60b250bf84db5d9533d65490562d2bfc2eb2e..4f3e33dd626b467324d69a6f8605c09531612ab3 100644 --- a/src/hotspot/share/adlc/dict2.cpp +++ b/src/hotspot/share/adlc/dict2.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -74,7 +74,7 @@ void Dict::init() { _size = 16; // Size is a power of 2 _cnt = 0; // Dictionary is empty - _bin = (bucket*)_arena->Amalloc_4(sizeof(bucket) * _size); + _bin = (bucket*)_arena->AmallocWords(sizeof(bucket) * _size); memset(_bin, 0, sizeof(bucket) * _size); } @@ -115,7 +115,7 @@ void Dict::doubhash(void) { if( !j ) j = 1; // Handle zero-sized buckets nb->_max = j<<1; // Allocate worst case space for key-value pairs - nb->_keyvals = (const void**)_arena->Amalloc_4( sizeof(void *)*nb->_max*2 ); + nb->_keyvals = (const void**)_arena->AmallocWords( sizeof(void *)*nb->_max*2 ); int nbcnt = 0; for( j=0; j_cnt; j++ ) { // Rehash all keys in this bucket @@ -138,11 +138,11 @@ void Dict::doubhash(void) { //------------------------------Dict----------------------------------------- // Deep copy a dictionary. Dict::Dict( const Dict &d ) : _size(d._size), _cnt(d._cnt), _hash(d._hash),_cmp(d._cmp), _arena(d._arena) { - _bin = (bucket*)_arena->Amalloc_4(sizeof(bucket)*_size); + _bin = (bucket*)_arena->AmallocWords(sizeof(bucket)*_size); memcpy( _bin, d._bin, sizeof(bucket)*_size ); for( int i=0; i<_size; i++ ) { if( !_bin[i]._keyvals ) continue; - _bin[i]._keyvals=(const void**)_arena->Amalloc_4( sizeof(void *)*_bin[i]._max*2); + _bin[i]._keyvals=(const void**)_arena->AmallocWords( sizeof(void *)*_bin[i]._max*2); memcpy( _bin[i]._keyvals, d._bin[i]._keyvals,_bin[i]._cnt*2*sizeof(void*)); } } @@ -195,7 +195,7 @@ const void *Dict::Insert(const void *key, const void *val) { if( b->_cnt == b->_max ) { // Must grow bucket? if( !b->_keyvals ) { b->_max = 2; // Initial bucket size - b->_keyvals = (const void**)_arena->Amalloc_4( sizeof(void *)*b->_max*2 ); + b->_keyvals = (const void**)_arena->AmallocWords( sizeof(void *)*b->_max*2 ); } else { b->_keyvals = (const void**)_arena->Arealloc( b->_keyvals, sizeof(void *)*b->_max*2, sizeof(void *)*b->_max*4 ); b->_max <<= 1; // Double bucket diff --git a/src/hotspot/share/adlc/main.cpp b/src/hotspot/share/adlc/main.cpp index 6f6c1bc6e30c98ed0b48385c82fb5e25cad81ce9..63123d0bcc97512dbee115b24aca2e13f14cdd01 100644 --- a/src/hotspot/share/adlc/main.cpp +++ b/src/hotspot/share/adlc/main.cpp @@ -229,7 +229,6 @@ int main(int argc, char *argv[]) AD.addInclude(AD._CPP_file, "opto/regalloc.hpp"); AD.addInclude(AD._CPP_file, "opto/regmask.hpp"); AD.addInclude(AD._CPP_file, "opto/runtime.hpp"); - AD.addInclude(AD._CPP_file, "runtime/biasedLocking.hpp"); AD.addInclude(AD._CPP_file, "runtime/safepointMechanism.hpp"); AD.addInclude(AD._CPP_file, "runtime/sharedRuntime.hpp"); AD.addInclude(AD._CPP_file, "runtime/stubRoutines.hpp"); diff --git a/src/hotspot/share/adlc/output_c.cpp b/src/hotspot/share/adlc/output_c.cpp index a0dbb7b1d28b5f5dd832a024df72593459cbf8fc..847e6cb222277e6672556e35d84e59a1951e0cbd 100644 --- a/src/hotspot/share/adlc/output_c.cpp +++ b/src/hotspot/share/adlc/output_c.cpp @@ -1342,6 +1342,9 @@ static void generate_peepreplace( FILE *fp, FormDict &globals, PeepMatch *pmatch assert( false, "ShouldNotReachHere();"); } + for (int i = 0; i <= max_position; i++) { + fprintf(fp, " inst%d->set_removed();\n", i); + } // Return the new sub-tree fprintf(fp, " deleted = %d;\n", max_position+1 /*zero to one based*/); fprintf(fp, " return root; // return new root;\n"); @@ -1526,7 +1529,6 @@ void ArchDesc::defineExpand(FILE *fp, InstructForm *node) { } if (node->is_ideal_fastlock() && new_inst->is_ideal_fastlock()) { - fprintf(fp, " ((MachFastLockNode*)n%d)->_counters = _counters;\n", cnt); fprintf(fp, " ((MachFastLockNode*)n%d)->_rtm_counters = _rtm_counters;\n", cnt); fprintf(fp, " ((MachFastLockNode*)n%d)->_stack_rtm_counters = _stack_rtm_counters;\n", cnt); } @@ -3941,7 +3943,6 @@ void ArchDesc::buildMachNode(FILE *fp_cpp, InstructForm *inst, const char *inden fprintf(fp_cpp, "%s node->_probs = _leaf->as_Jump()->_probs;\n", indent); } if( inst->is_ideal_fastlock() ) { - fprintf(fp_cpp, "%s node->_counters = _leaf->as_FastLock()->counters();\n", indent); fprintf(fp_cpp, "%s node->_rtm_counters = _leaf->as_FastLock()->rtm_counters();\n", indent); fprintf(fp_cpp, "%s node->_stack_rtm_counters = _leaf->as_FastLock()->stack_rtm_counters();\n", indent); } diff --git a/src/hotspot/share/adlc/output_h.cpp b/src/hotspot/share/adlc/output_h.cpp index 4a31813139049dfaedbadf4dff66a4fa48b8f99a..0ca2048a765eb53d876d95070b6d3039a79c34f6 100644 --- a/src/hotspot/share/adlc/output_h.cpp +++ b/src/hotspot/share/adlc/output_h.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1940,31 +1940,20 @@ void ArchDesc::declareClasses(FILE *fp) { // it doesn't understand what that might alias. fprintf(fp," const Type *bottom_type() const { return TypeRawPtr::BOTTOM; } // Box?\n"); } - else if( instr->_matrule && instr->_matrule->_rChild && !strcmp(instr->_matrule->_rChild->_opType,"CMoveP") ) { + else if (instr->_matrule && instr->_matrule->_rChild && + (!strcmp(instr->_matrule->_rChild->_opType,"CMoveP") || !strcmp(instr->_matrule->_rChild->_opType,"CMoveN")) ) { int offset = 1; // Special special hack to see if the Cmp? has been incorporated in the conditional move MatchNode *rl = instr->_matrule->_rChild->_lChild; - if( rl && !strcmp(rl->_opType, "Binary") ) { - MatchNode *rlr = rl->_rChild; - if (rlr && strncmp(rlr->_opType, "Cmp", 3) == 0) - offset = 2; - } - // Special hack for ideal CMoveP; ideal type depends on inputs - fprintf(fp," const Type *bottom_type() const { const Type *t = in(oper_input_base()+%d)->bottom_type(); return (req() <= oper_input_base()+%d) ? t : t->meet(in(oper_input_base()+%d)->bottom_type()); } // CMoveP\n", - offset, offset+1, offset+1); - } - else if( instr->_matrule && instr->_matrule->_rChild && !strcmp(instr->_matrule->_rChild->_opType,"CMoveN") ) { - int offset = 1; - // Special special hack to see if the Cmp? has been incorporated in the conditional move - MatchNode *rl = instr->_matrule->_rChild->_lChild; - if( rl && !strcmp(rl->_opType, "Binary") ) { - MatchNode *rlr = rl->_rChild; - if (rlr && strncmp(rlr->_opType, "Cmp", 3) == 0) - offset = 2; + if (rl && !strcmp(rl->_opType, "Binary") && rl->_rChild && strncmp(rl->_rChild->_opType, "Cmp", 3) == 0) { + offset = 2; + fprintf(fp," const Type *bottom_type() const { if (req() == 3) return in(2)->bottom_type();\n\tconst Type *t = in(oper_input_base()+%d)->bottom_type(); return (req() <= oper_input_base()+%d) ? t : t->meet(in(oper_input_base()+%d)->bottom_type()); } // %s\n", + offset, offset+1, offset+1, instr->_matrule->_rChild->_opType); + } else { + // Special hack for ideal CMove; ideal type depends on inputs + fprintf(fp," const Type *bottom_type() const { const Type *t = in(oper_input_base()+%d)->bottom_type(); return (req() <= oper_input_base()+%d) ? t : t->meet(in(oper_input_base()+%d)->bottom_type()); } // %s\n", + offset, offset+1, offset+1, instr->_matrule->_rChild->_opType); } - // Special hack for ideal CMoveN; ideal type depends on inputs - fprintf(fp," const Type *bottom_type() const { const Type *t = in(oper_input_base()+%d)->bottom_type(); return (req() <= oper_input_base()+%d) ? t : t->meet(in(oper_input_base()+%d)->bottom_type()); } // CMoveN\n", - offset, offset+1, offset+1); } else if (instr->is_tls_instruction()) { // Special hack for tlsLoadP diff --git a/src/hotspot/share/asm/assembler.hpp b/src/hotspot/share/asm/assembler.hpp index 202cba10c5d5396d1190604038db05235ad1c212..10fa79eb30a121756564c94ad634d97de79023e1 100644 --- a/src/hotspot/share/asm/assembler.hpp +++ b/src/hotspot/share/asm/assembler.hpp @@ -91,7 +91,7 @@ class Label { int _patch_index; GrowableArray* _patch_overflow; - Label(const Label&) { ShouldNotReachHere(); } + NONCOPYABLE(Label); protected: // The label will be bound to a location near its users. diff --git a/src/hotspot/share/asm/codeBuffer.cpp b/src/hotspot/share/asm/codeBuffer.cpp index 7d76c8ae751dee352be16a6bc7847cdd3ab23829..e8919b6ab382e60debc8c0f47729a91282c91c53 100644 --- a/src/hotspot/share/asm/codeBuffer.cpp +++ b/src/hotspot/share/asm/codeBuffer.cpp @@ -88,7 +88,7 @@ typedef CodeBuffer::csize_t csize_t; // file-local definition // External buffer, in a predefined CodeBlob. // Important: The code_start must be taken exactly, and not realigned. -CodeBuffer::CodeBuffer(CodeBlob* blob) { +CodeBuffer::CodeBuffer(CodeBlob* blob) DEBUG_ONLY(: Scrubber(this, sizeof(*this))) { // Provide code buffer with meaningful name initialize_misc(blob->name()); initialize(blob->content_begin(), blob->content_size()); @@ -126,11 +126,9 @@ void CodeBuffer::initialize(csize_t code_size, csize_t locs_size) { CodeBuffer::~CodeBuffer() { verify_section_allocation(); - // If we allocate our code buffer from the CodeCache - // via a BufferBlob, and it's not permanent, then - // free the BufferBlob. - // The rest of the memory will be freed when the ResourceObj - // is released. + // If we allocated our code buffer from the CodeCache via a BufferBlob, and + // it's not permanent, then free the BufferBlob. The rest of the memory + // will be freed when the ResourceObj is released. for (CodeBuffer* cb = this; cb != NULL; cb = cb->before_expand()) { // Previous incarnations of this buffer are held live, so that internal // addresses constructed before expansions will not be confused. @@ -140,18 +138,9 @@ CodeBuffer::~CodeBuffer() { // free any overflow storage delete _overflow_arena; - // Claim is that stack allocation ensures resources are cleaned up. - // This is resource clean up, let's hope that all were properly copied out. - NOT_PRODUCT(free_strings();) + NOT_PRODUCT(clear_strings()); -#ifdef ASSERT - // Save allocation type to execute assert in ~ResourceObj() - // which is called after this destructor. assert(_default_oop_recorder.allocated_on_stack(), "should be embedded object"); - ResourceObj::allocation_type at = _default_oop_recorder.get_allocation_type(); - Copy::fill_to_bytes(this, sizeof(*this), badResourceValue); - ResourceObj::set_allocation_type((address)(&_default_oop_recorder), at); -#endif } void CodeBuffer::initialize_oop_recorder(OopRecorder* r) { @@ -715,8 +704,9 @@ void CodeBuffer::copy_code_to(CodeBlob* dest_blob) { relocate_code_to(&dest); - // transfer strings and comments from buffer to blob - NOT_PRODUCT(dest_blob->set_strings(_code_strings);) + // Share assembly remarks and debug strings with the blob. + NOT_PRODUCT(dest_blob->use_remarks(_asm_remarks)); + NOT_PRODUCT(dest_blob->use_strings(_dbg_strings)); // Done moving code bytes; were they the right size? assert((int)align_up(dest.total_content_size(), oopSize) == dest_blob->content_size(), "sanity"); @@ -990,221 +980,342 @@ void CodeBuffer::log_section_sizes(const char* name) { } #ifndef PRODUCT - -void CodeBuffer::block_comment(intptr_t offset, const char * comment) { +void CodeBuffer::block_comment(ptrdiff_t offset, const char* comment) { if (_collect_comments) { - _code_strings.add_comment(offset, comment); + const char* str = _asm_remarks.insert(offset, comment); + postcond(str != comment); } } const char* CodeBuffer::code_string(const char* str) { - return _code_strings.add_string(str); + const char* tmp = _dbg_strings.insert(str); + postcond(tmp != str); + return tmp; } -class CodeString: public CHeapObj { - private: - friend class CodeStrings; - const char * _string; - CodeString* _next; - CodeString* _prev; - intptr_t _offset; - - static long allocated_code_strings; - - ~CodeString() { - assert(_next == NULL && _prev == NULL, "wrong interface for freeing list"); - allocated_code_strings--; - log_trace(codestrings)("Freeing CodeString [%s] (%p)", _string, (void*)_string); - os::free((void*)_string); +void CodeBuffer::decode() { + ttyLocker ttyl; + Disassembler::decode(decode_begin(), insts_end(), tty NOT_PRODUCT(COMMA &asm_remarks())); + _decode_begin = insts_end(); +} + +void CodeSection::print(const char* name) { + csize_t locs_size = locs_end() - locs_start(); + tty->print_cr(" %7s.code = " PTR_FORMAT " : " PTR_FORMAT " : " PTR_FORMAT " (%d of %d)", + name, p2i(start()), p2i(end()), p2i(limit()), size(), capacity()); + tty->print_cr(" %7s.locs = " PTR_FORMAT " : " PTR_FORMAT " : " PTR_FORMAT " (%d of %d) point=%d", + name, p2i(locs_start()), p2i(locs_end()), p2i(locs_limit()), locs_size, locs_capacity(), locs_point_off()); + if (PrintRelocations) { + RelocIterator iter(this); + iter.print(); } +} - bool is_comment() const { return _offset >= 0; } +void CodeBuffer::print() { + if (this == NULL) { + tty->print_cr("NULL CodeBuffer pointer"); + return; + } + tty->print_cr("CodeBuffer:"); + for (int n = 0; n < (int)SECT_LIMIT; n++) { + // print each section + CodeSection* cs = code_section(n); + cs->print(code_section_name(n)); + } +} + +// ----- CHeapString ----------------------------------------------------------- + +class CHeapString : public CHeapObj { public: - CodeString(const char * string, intptr_t offset = -1) - : _next(NULL), _prev(NULL), _offset(offset) { - allocated_code_strings++; - _string = os::strdup(string, mtCode); - log_trace(codestrings)("Created CodeString [%s] (%p)", _string, (void*)_string); + CHeapString(const char* str) : _string(os::strdup(str)) {} + ~CHeapString() { + os::free((void*)_string); + _string = nullptr; } + const char* string() const { return _string; } + + private: + const char* _string; +}; - const char * string() const { return _string; } - intptr_t offset() const { assert(_offset >= 0, "offset for non comment?"); return _offset; } - CodeString* next() const { return _next; } +// ----- AsmRemarkCollection --------------------------------------------------- - void set_next(CodeString* next) { - _next = next; - if (next != NULL) { - next->_prev = this; - } +class AsmRemarkCollection : public CHeapObj { + public: + AsmRemarkCollection() : _ref_cnt(1), _remarks(nullptr), _next(nullptr) {} + ~AsmRemarkCollection() { + assert(is_empty(), "Must 'clear()' before deleting!"); + assert(_ref_cnt == 0, "No uses must remain when deleting!"); + } + AsmRemarkCollection* reuse() { + precond(_ref_cnt > 0); + return _ref_cnt++, this; } - CodeString* first_comment() { - if (is_comment()) { - return this; - } else { - return next_comment(); + const char* insert(uint offset, const char* remark); + const char* lookup(uint offset) const; + const char* next(uint offset) const; + + bool is_empty() const { return _remarks == nullptr; } + uint clear(); + + private: + struct Cell : CHeapString { + Cell(const char* remark, uint offset) : + CHeapString(remark), offset(offset), prev(nullptr), next(nullptr) {} + void push_back(Cell* cell) { + Cell* head = this; + Cell* tail = prev; + tail->next = cell; + cell->next = head; + cell->prev = tail; + prev = cell; } + uint offset; + Cell* prev; + Cell* next; + }; + uint _ref_cnt; + Cell* _remarks; + // Using a 'mutable' iteration pointer to allow 'const' on lookup/next (that + // does not change the state of the list per se), supportig a simplistic + // iteration scheme. + mutable Cell* _next; +}; + +// ----- DbgStringCollection --------------------------------------------------- + +class DbgStringCollection : public CHeapObj { + public: + DbgStringCollection() : _ref_cnt(1), _strings(nullptr) {} + ~DbgStringCollection() { + assert(is_empty(), "Must 'clear()' before deleting!"); + assert(_ref_cnt == 0, "No uses must remain when deleting!"); } - CodeString* next_comment() const { - CodeString* s = _next; - while (s != NULL && !s->is_comment()) { - s = s->_next; - } - return s; + DbgStringCollection* reuse() { + precond(_ref_cnt > 0); + return _ref_cnt++, this; } + + const char* insert(const char* str); + const char* lookup(const char* str) const; + + bool is_empty() const { return _strings == nullptr; } + uint clear(); + + private: + struct Cell : CHeapString { + Cell(const char* dbgstr) : + CHeapString(dbgstr), prev(nullptr), next(nullptr) {} + void push_back(Cell* cell) { + Cell* head = this; + Cell* tail = prev; + tail->next = cell; + cell->next = head; + cell->prev = tail; + prev = cell; + } + Cell* prev; + Cell* next; + }; + uint _ref_cnt; + Cell* _strings; }; -// For tracing statistics. Will use raw increment/decrement, so it might not be -// exact -long CodeString::allocated_code_strings = 0; +// ----- AsmRemarks ------------------------------------------------------------ +// +// Acting as interface to reference counted mapping [offset -> remark], where +// offset is a byte offset into an instruction stream (CodeBuffer, CodeBlob or +// other memory buffer) and remark is a string (comment). +// +AsmRemarks::AsmRemarks() : _remarks(new AsmRemarkCollection()) { + assert(_remarks != nullptr, "Allocation failure!"); +} -CodeString* CodeStrings::find(intptr_t offset) const { - CodeString* a = _strings->first_comment(); - while (a != NULL && a->offset() != offset) { - a = a->next_comment(); - } - return a; +AsmRemarks::~AsmRemarks() { + assert(_remarks == nullptr, "Must 'clear()' before deleting!"); } -// Convenience for add_comment. -CodeString* CodeStrings::find_last(intptr_t offset) const { - CodeString* a = _strings_last; - while (a != NULL && !(a->is_comment() && a->offset() == offset)) { - a = a->_prev; - } - return a; +const char* AsmRemarks::insert(uint offset, const char* remstr) { + precond(remstr != nullptr); + return _remarks->insert(offset, remstr); } -void CodeStrings::add_comment(intptr_t offset, const char * comment) { - check_valid(); - CodeString* c = new CodeString(comment, offset); - CodeString* inspos = (_strings == NULL) ? NULL : find_last(offset); +bool AsmRemarks::is_empty() const { + return _remarks->is_empty(); +} - if (inspos != NULL) { - // insert after already existing comments with same offset - c->set_next(inspos->next()); - inspos->set_next(c); - } else { - // no comments with such offset, yet. Insert before anything else. - c->set_next(_strings); - _strings = c; - } - if (c->next() == NULL) { - _strings_last = c; - } +void AsmRemarks::share(const AsmRemarks &src) { + precond(is_empty()); + clear(); + _remarks = src._remarks->reuse(); } -// Deep copy of CodeStrings for consistent memory management. -void CodeStrings::copy(CodeStrings& other) { - log_debug(codestrings)("Copying %d Codestring(s)", other.count()); - - other.check_valid(); - check_valid(); - assert(is_null(), "Cannot copy onto non-empty CodeStrings"); - CodeString* n = other._strings; - CodeString** ps = &_strings; - CodeString* prev = NULL; - while (n != NULL) { - if (n->is_comment()) { - *ps = new CodeString(n->string(), n->offset()); - } else { - *ps = new CodeString(n->string()); - } - (*ps)->_prev = prev; - prev = *ps; - ps = &((*ps)->_next); - n = n->next(); +void AsmRemarks::clear() { + if (_remarks->clear() == 0) { + delete _remarks; } + _remarks = nullptr; } -const char* CodeStrings::_prefix = " ;; "; // default: can be changed via set_prefix - -void CodeStrings::print_block_comment(outputStream* stream, intptr_t offset) const { - check_valid(); - if (_strings != NULL) { - CodeString* c = find(offset); - while (c && c->offset() == offset) { - stream->bol(); - stream->print("%s", _prefix); - // Don't interpret as format strings since it could contain % - stream->print_raw(c->string()); - stream->bol(); // advance to next line only if string didn't contain a cr() at the end. - c = c->next_comment(); - } +uint AsmRemarks::print(uint offset, outputStream* strm) const { + uint count = 0; + const char* prefix = " ;; "; + const char* remstr = _remarks->lookup(offset); + while (remstr != nullptr) { + strm->bol(); + strm->print("%s", prefix); + // Don't interpret as format strings since it could contain '%'. + strm->print_raw(remstr); + // Advance to next line iff string didn't contain a cr() at the end. + strm->bol(); + remstr = _remarks->next(offset); + count++; } + return count; +} + +// ----- DbgStrings ------------------------------------------------------------ +// +// Acting as interface to reference counted collection of (debug) strings used +// in the code generated, and thus requiring a fixed address. +// +DbgStrings::DbgStrings() : _strings(new DbgStringCollection()) { + assert(_strings != nullptr, "Allocation failure!"); +} + +DbgStrings::~DbgStrings() { + assert(_strings == nullptr, "Must 'clear()' before deleting!"); +} + +const char* DbgStrings::insert(const char* dbgstr) { + const char* str = _strings->lookup(dbgstr); + return str != nullptr ? str : _strings->insert(dbgstr); +} + +bool DbgStrings::is_empty() const { + return _strings->is_empty(); +} + +void DbgStrings::share(const DbgStrings &src) { + precond(is_empty()); + _strings = src._strings->reuse(); } -int CodeStrings::count() const { - int i = 0; - CodeString* s = _strings; - while (s != NULL) { - i++; - s = s->_next; +void DbgStrings::clear() { + if (_strings->clear() == 0) { + delete _strings; } - return i; + _strings = nullptr; } -// Also sets is_null() -void CodeStrings::free() { - log_debug(codestrings)("Freeing %d out of approx. %ld CodeString(s), ", count(), CodeString::allocated_code_strings); - CodeString* n = _strings; - while (n) { - // unlink the node from the list saving a pointer to the next - CodeString* p = n->next(); - n->set_next(NULL); - if (p != NULL) { - assert(p->_prev == n, "missing prev link"); - p->_prev = NULL; - } - delete n; - n = p; +// ----- AsmRemarkCollection --------------------------------------------------- + +const char* AsmRemarkCollection::insert(uint offset, const char* remstr) { + precond(remstr != nullptr); + Cell* cell = new Cell { remstr, offset }; + if (is_empty()) { + cell->prev = cell; + cell->next = cell; + _remarks = cell; + } else { + _remarks->push_back(cell); } - set_null_and_invalidate(); + return cell->string(); } -const char* CodeStrings::add_string(const char * string) { - check_valid(); - CodeString* s = new CodeString(string); - s->set_next(_strings); - if (_strings == NULL) { - _strings_last = s; +const char* AsmRemarkCollection::lookup(uint offset) const { + _next = _remarks; + return next(offset); +} + +const char* AsmRemarkCollection::next(uint offset) const { + if (_next != nullptr) { + Cell* i = _next; + do { + if (i->offset == offset) { + _next = i->next == _remarks ? nullptr : i->next; + return i->string(); + } + i = i->next; + } while (i != _remarks); + _next = nullptr; } - _strings = s; - assert(s->string() != NULL, "should have a string"); - return s->string(); + return nullptr; } -void CodeBuffer::decode() { - ttyLocker ttyl; - Disassembler::decode(decode_begin(), insts_end(), tty NOT_PRODUCT(COMMA &strings())); - _decode_begin = insts_end(); +uint AsmRemarkCollection::clear() { + precond(_ref_cnt > 0); + if (--_ref_cnt > 0) { + return _ref_cnt; + } + if (!is_empty()) { + uint count = 0; + Cell* i = _remarks; + do { + Cell* next = i->next; + delete i; + i = next; + count++; + } while (i != _remarks); + + log_debug(codestrings)("Clear %u asm-remark%s.", count, count == 1 ? "" : "s"); + _remarks = nullptr; + } + return 0; // i.e. _ref_cnt == 0 } -void CodeSection::print(const char* name) { - csize_t locs_size = locs_end() - locs_start(); - tty->print_cr(" %7s.code = " PTR_FORMAT " : " PTR_FORMAT " : " PTR_FORMAT " (%d of %d)", - name, p2i(start()), p2i(end()), p2i(limit()), size(), capacity()); - tty->print_cr(" %7s.locs = " PTR_FORMAT " : " PTR_FORMAT " : " PTR_FORMAT " (%d of %d) point=%d", - name, p2i(locs_start()), p2i(locs_end()), p2i(locs_limit()), locs_size, locs_capacity(), locs_point_off()); - if (PrintRelocations) { - RelocIterator iter(this); - iter.print(); +// ----- DbgStringCollection --------------------------------------------------- + +const char* DbgStringCollection::insert(const char* dbgstr) { + precond(dbgstr != nullptr); + Cell* cell = new Cell { dbgstr }; + + if (is_empty()) { + cell->prev = cell; + cell->next = cell; + _strings = cell; + } else { + _strings->push_back(cell); } + return cell->string(); } -void CodeBuffer::print() { - if (this == NULL) { - tty->print_cr("NULL CodeBuffer pointer"); - return; +const char* DbgStringCollection::lookup(const char* dbgstr) const { + precond(dbgstr != nullptr); + if (_strings != nullptr) { + Cell* i = _strings; + do { + if (strcmp(i->string(), dbgstr) == 0) { + return i->string(); + } + i = i->next; + } while (i != _strings); } + return nullptr; +} - tty->print_cr("CodeBuffer:"); - for (int n = 0; n < (int)SECT_LIMIT; n++) { - // print each section - CodeSection* cs = code_section(n); - cs->print(code_section_name(n)); +uint DbgStringCollection::clear() { + precond(_ref_cnt > 0); + if (--_ref_cnt > 0) { + return _ref_cnt; + } + if (!is_empty()) { + uint count = 0; + Cell* i = _strings; + do { + Cell* next = i->next; + delete i; + i = next; + count++; + } while (i != _strings); + + log_debug(codestrings)("Clear %u dbg-string%s.", count, count == 1 ? "" : "s"); + _strings = nullptr; } + return 0; // i.e. _ref_cnt == 0 } -#endif // PRODUCT +#endif // not PRODUCT diff --git a/src/hotspot/share/asm/codeBuffer.hpp b/src/hotspot/share/asm/codeBuffer.hpp index a569bd710840339a2771650c116989f4d415ea6a..207be18606752115f4ec063415c2eef390383465 100644 --- a/src/hotspot/share/asm/codeBuffer.hpp +++ b/src/hotspot/share/asm/codeBuffer.hpp @@ -31,7 +31,6 @@ #include "utilities/debug.hpp" #include "utilities/macros.hpp" -class CodeStrings; class PhaseCFG; class Compile; class BufferBlob; @@ -222,7 +221,11 @@ class CodeSection { set_end(curr); } - void emit_int32(int32_t x) { *((int32_t*) end()) = x; set_end(end() + sizeof(int32_t)); } + void emit_int32(int32_t x) { + address curr = end(); + *((int32_t*) curr) = x; + set_end(curr + sizeof(int32_t)); + } void emit_int32(int8_t x1, int8_t x2, int8_t x3, int8_t x4) { address curr = end(); *((int8_t*) curr++) = x1; @@ -269,70 +272,75 @@ class CodeSection { #endif //PRODUCT }; -class CodeString; -class CodeStrings { -private: + #ifndef PRODUCT - CodeString* _strings; - CodeString* _strings_last; -#ifdef ASSERT - // Becomes true after copy-out, forbids further use. - bool _defunct; // Zero bit pattern is "valid", see memset call in decode_env::decode_env -#endif - static const char* _prefix; // defaults to " ;; " - CodeString* find(intptr_t offset) const; - CodeString* find_last(intptr_t offset) const; +class AsmRemarkCollection; +class DbgStringCollection; - void set_null_and_invalidate() { - _strings = NULL; - _strings_last = NULL; -#ifdef ASSERT - _defunct = true; -#endif - } -#endif +// The assumption made here is that most code remarks (or comments) added to +// the generated assembly code are unique, i.e. there is very little gain in +// trying to share the strings between the different offsets tracked in a +// buffer (or blob). -public: - CodeStrings() { -#ifndef PRODUCT - _strings = NULL; - _strings_last = NULL; -#ifdef ASSERT - _defunct = false; -#endif -#endif - } +class AsmRemarks { + public: + AsmRemarks(); + ~AsmRemarks(); -#ifndef PRODUCT - bool is_null() { -#ifdef ASSERT - return _strings == NULL; -#else - return true; -#endif - } + const char* insert(uint offset, const char* remstr); - const char* add_string(const char * string); + bool is_empty() const; - void add_comment(intptr_t offset, const char * comment); - void print_block_comment(outputStream* stream, intptr_t offset) const; - int count() const; - // COPY strings from other to this; leave other valid. - void copy(CodeStrings& other); - // FREE strings; invalidate this. - void free(); + void share(const AsmRemarks &src); + void clear(); + uint print(uint offset, outputStream* strm = tty) const; - // Guarantee that _strings are used at most once; assign and free invalidate a buffer. - inline void check_valid() const { - assert(!_defunct, "Use of invalid CodeStrings"); - } + // For testing purposes only. + const AsmRemarkCollection* ref() const { return _remarks; } + +private: + AsmRemarkCollection* _remarks; +}; + +// The assumption made here is that the number of debug strings (with a fixed +// address requirement) is a rather small set per compilation unit. + +class DbgStrings { + public: + DbgStrings(); + ~DbgStrings(); + + const char* insert(const char* dbgstr); + + bool is_empty() const; + + void share(const DbgStrings &src); + void clear(); - static void set_prefix(const char *prefix) { - _prefix = prefix; + // For testing purposes only. + const DbgStringCollection* ref() const { return _strings; } + +private: + DbgStringCollection* _strings; +}; +#endif // not PRODUCT + + +#ifdef ASSERT +#include "utilities/copy.hpp" + +class Scrubber { + public: + Scrubber(void* addr, size_t size) : _addr(addr), _size(size) {} + ~Scrubber() { + Copy::fill_to_bytes(_addr, _size, badResourceValue); } -#endif // !PRODUCT + private: + void* _addr; + size_t _size; }; +#endif // ASSERT // A CodeBuffer describes a memory space into which assembly // code is generated. This memory space usually occupies the @@ -358,7 +366,7 @@ public: // Instructions and data in one section can contain relocatable references to // addresses in a sibling section. -class CodeBuffer: public StackObj { +class CodeBuffer: public StackObj DEBUG_ONLY(COMMA private Scrubber) { friend class CodeSection; friend class StubCodeGenerator; @@ -407,7 +415,8 @@ class CodeBuffer: public StackObj { address _last_insn; // used to merge consecutive memory barriers, loads or stores. #ifndef PRODUCT - CodeStrings _code_strings; + AsmRemarks _asm_remarks; + DbgStrings _dbg_strings; bool _collect_comments; // Indicate if we need to collect block comments at all. address _decode_begin; // start address for decode address decode_begin(); @@ -425,7 +434,6 @@ class CodeBuffer: public StackObj { #ifndef PRODUCT _decode_begin = NULL; - _code_strings = CodeStrings(); // Collect block comments, but restrict collection to cases where a disassembly is output. _collect_comments = ( PrintAssembly || PrintStubCode @@ -480,7 +488,9 @@ class CodeBuffer: public StackObj { public: // (1) code buffer referring to pre-allocated instruction memory - CodeBuffer(address code_start, csize_t code_size) { + CodeBuffer(address code_start, csize_t code_size) + DEBUG_ONLY(: Scrubber(this, sizeof(*this))) + { assert(code_start != NULL, "sanity"); initialize_misc("static buffer"); initialize(code_start, code_size); @@ -493,14 +503,18 @@ class CodeBuffer: public StackObj { // (3) code buffer allocating codeBlob memory for code & relocation // info but with lazy initialization. The name must be something // informative. - CodeBuffer(const char* name) { + CodeBuffer(const char* name) + DEBUG_ONLY(: Scrubber(this, sizeof(*this))) + { initialize_misc(name); } // (4) code buffer allocating codeBlob memory for code & relocation // info. The name must be something informative and code_size must // include both code and stubs sizes. - CodeBuffer(const char* name, csize_t code_size, csize_t locs_size) { + CodeBuffer(const char* name, csize_t code_size, csize_t locs_size) + DEBUG_ONLY(: Scrubber(this, sizeof(*this))) + { initialize_misc(name); initialize(code_size, locs_size); } @@ -630,12 +644,12 @@ class CodeBuffer: public StackObj { void clear_last_insn() { set_last_insn(NULL); } #ifndef PRODUCT - CodeStrings& strings() { return _code_strings; } + AsmRemarks &asm_remarks() { return _asm_remarks; } + DbgStrings &dbg_strings() { return _dbg_strings; } - void free_strings() { - if (!_code_strings.is_null()) { - _code_strings.free(); // sets _strings Null as a side-effect. - } + void clear_strings() { + _asm_remarks.clear(); + _dbg_strings.clear(); } #endif @@ -662,7 +676,7 @@ class CodeBuffer: public StackObj { } } - void block_comment(intptr_t offset, const char * comment) PRODUCT_RETURN; + void block_comment(ptrdiff_t offset, const char* comment) PRODUCT_RETURN; const char* code_string(const char* str) PRODUCT_RETURN_(return NULL;); // Log a little info about section usage in the CodeBuffer diff --git a/src/hotspot/share/asm/register.hpp b/src/hotspot/share/asm/register.hpp index 66f58d52953828e7c28556a8ef34ddd990823f30..861c43b2c84128e12b7fa465aadaa6fa73358739 100644 --- a/src/hotspot/share/asm/register.hpp +++ b/src/hotspot/share/asm/register.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -45,317 +45,32 @@ class AbstractRegisterImpl { int value() const { return (int)(intx)this; } }; - -// -// Macros for use in defining Register instances. We'd like to be -// able to simply define const instances of the RegisterImpl* for each -// of the registers needed on a system in a header file. However many -// compilers don't handle this very well and end up producing a -// private definition in every file which includes the header file. -// Along with the static constructors necessary for initialization it -// can consume a significant amount of space in the result library. -// -// The following macros allow us to declare the instance in a .hpp and -// produce an enumeration value which has the same number. Then in a -// .cpp the the register instance can be defined using the enumeration -// value. This avoids the use of static constructors and multiple -// definitions per .cpp. In addition #defines for the register can be -// produced so that the constant registers can be inlined. These -// macros should not be used inside other macros, because you may get -// multiple evaluations of the macros which can give bad results. -// -// Here are some example uses and expansions. Note that the macro -// invocation is terminated with a ;. -// -// CONSTANT_REGISTER_DECLARATION(Register, G0, 0); -// -// extern const Register G0 ; -// enum { G0_RegisterEnumValue = 0 } ; -// -// REGISTER_DECLARATION(Register, Gmethod, G5); -// -// extern const Register Gmethod ; -// enum { Gmethod_RegisterEnumValue = G5_RegisterEnumValue } ; -// -// REGISTER_DEFINITION(Register, G0); -// -// const Register G0 = ( ( Register ) G0_RegisterEnumValue ) ; -// - #define AS_REGISTER(type,name) ((type)name##_##type##EnumValue) -#define CONSTANT_REGISTER_DECLARATION(type, name, value) \ -extern const type name; \ +#define CONSTANT_REGISTER_DECLARATION(type, name, value) \ +const type name = ((type)value); \ enum { name##_##type##EnumValue = (value) } -#define REGISTER_DECLARATION(type, name, value) \ -extern const type name; \ -enum { name##_##type##EnumValue = value##_##type##EnumValue } +#define REGISTER_DECLARATION(type, name, value) \ +const type name = ((type)value) -#define REGISTER_DEFINITION(type, name) \ -const type name = ((type)name##_##type##EnumValue) +#define REGISTER_DEFINITION(type, name) #include CPU_HEADER(register) // Debugging support -inline void assert_different_registers( - AbstractRegister a, - AbstractRegister b -) { - assert( - a != b, - "registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT "", p2i(a), p2i(b) - ); -} - - -inline void assert_different_registers( - AbstractRegister a, - AbstractRegister b, - AbstractRegister c -) { - assert( - a != b && a != c - && b != c, - "registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT - ", c=" INTPTR_FORMAT "", - p2i(a), p2i(b), p2i(c) - ); -} - - -inline void assert_different_registers( - AbstractRegister a, - AbstractRegister b, - AbstractRegister c, - AbstractRegister d -) { - assert( - a != b && a != c && a != d - && b != c && b != d - && c != d, - "registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT - ", c=" INTPTR_FORMAT ", d=" INTPTR_FORMAT "", - p2i(a), p2i(b), p2i(c), p2i(d) - ); -} - - -inline void assert_different_registers( - AbstractRegister a, - AbstractRegister b, - AbstractRegister c, - AbstractRegister d, - AbstractRegister e -) { - assert( - a != b && a != c && a != d && a != e - && b != c && b != d && b != e - && c != d && c != e - && d != e, - "registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT - ", c=" INTPTR_FORMAT ", d=" INTPTR_FORMAT ", e=" INTPTR_FORMAT "", - p2i(a), p2i(b), p2i(c), p2i(d), p2i(e) - ); -} - - -inline void assert_different_registers( - AbstractRegister a, - AbstractRegister b, - AbstractRegister c, - AbstractRegister d, - AbstractRegister e, - AbstractRegister f -) { - assert( - a != b && a != c && a != d && a != e && a != f - && b != c && b != d && b != e && b != f - && c != d && c != e && c != f - && d != e && d != f - && e != f, - "registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT - ", c=" INTPTR_FORMAT ", d=" INTPTR_FORMAT ", e=" INTPTR_FORMAT - ", f=" INTPTR_FORMAT "", - p2i(a), p2i(b), p2i(c), p2i(d), p2i(e), p2i(f) - ); -} - - -inline void assert_different_registers( - AbstractRegister a, - AbstractRegister b, - AbstractRegister c, - AbstractRegister d, - AbstractRegister e, - AbstractRegister f, - AbstractRegister g -) { - assert( - a != b && a != c && a != d && a != e && a != f && a != g - && b != c && b != d && b != e && b != f && b != g - && c != d && c != e && c != f && c != g - && d != e && d != f && d != g - && e != f && e != g - && f != g, - "registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT - ", c=" INTPTR_FORMAT ", d=" INTPTR_FORMAT ", e=" INTPTR_FORMAT - ", f=" INTPTR_FORMAT ", g=" INTPTR_FORMAT "", - p2i(a), p2i(b), p2i(c), p2i(d), p2i(e), p2i(f), p2i(g) - ); -} - - -inline void assert_different_registers( - AbstractRegister a, - AbstractRegister b, - AbstractRegister c, - AbstractRegister d, - AbstractRegister e, - AbstractRegister f, - AbstractRegister g, - AbstractRegister h -) { - assert( - a != b && a != c && a != d && a != e && a != f && a != g && a != h - && b != c && b != d && b != e && b != f && b != g && b != h - && c != d && c != e && c != f && c != g && c != h - && d != e && d != f && d != g && d != h - && e != f && e != g && e != h - && f != g && f != h - && g != h, - "registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT - ", c=" INTPTR_FORMAT ", d=" INTPTR_FORMAT ", e=" INTPTR_FORMAT - ", f=" INTPTR_FORMAT ", g=" INTPTR_FORMAT ", h=" INTPTR_FORMAT "", - p2i(a), p2i(b), p2i(c), p2i(d), p2i(e), p2i(f), p2i(g), p2i(h) - ); -} - - -inline void assert_different_registers( - AbstractRegister a, - AbstractRegister b, - AbstractRegister c, - AbstractRegister d, - AbstractRegister e, - AbstractRegister f, - AbstractRegister g, - AbstractRegister h, - AbstractRegister i -) { - assert( - a != b && a != c && a != d && a != e && a != f && a != g && a != h && a != i - && b != c && b != d && b != e && b != f && b != g && b != h && b != i - && c != d && c != e && c != f && c != g && c != h && c != i - && d != e && d != f && d != g && d != h && d != i - && e != f && e != g && e != h && e != i - && f != g && f != h && f != i - && g != h && g != i - && h != i, - "registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT - ", c=" INTPTR_FORMAT ", d=" INTPTR_FORMAT ", e=" INTPTR_FORMAT - ", f=" INTPTR_FORMAT ", g=" INTPTR_FORMAT ", h=" INTPTR_FORMAT - ", i=" INTPTR_FORMAT "", - p2i(a), p2i(b), p2i(c), p2i(d), p2i(e), p2i(f), p2i(g), p2i(h), p2i(i) - ); -} - -inline void assert_different_registers( - AbstractRegister a, - AbstractRegister b, - AbstractRegister c, - AbstractRegister d, - AbstractRegister e, - AbstractRegister f, - AbstractRegister g, - AbstractRegister h, - AbstractRegister i, - AbstractRegister j -) { - assert( - a != b && a != c && a != d && a != e && a != f && a != g && a != h && a != i && a != j - && b != c && b != d && b != e && b != f && b != g && b != h && b != i && b != j - && c != d && c != e && c != f && c != g && c != h && c != i && c != j - && d != e && d != f && d != g && d != h && d != i && d != j - && e != f && e != g && e != h && e != i && e != j - && f != g && f != h && f != i && f != j - && g != h && g != i && g != j - && h != i && h != j - && i != j, - "registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT - ", c=" INTPTR_FORMAT ", d=" INTPTR_FORMAT ", e=" INTPTR_FORMAT - ", f=" INTPTR_FORMAT ", g=" INTPTR_FORMAT ", h=" INTPTR_FORMAT - ", i=" INTPTR_FORMAT ", j=" INTPTR_FORMAT "", - p2i(a), p2i(b), p2i(c), p2i(d), p2i(e), p2i(f), p2i(g), p2i(h), p2i(i), p2i(j) - ); -} - -inline void assert_different_registers( - AbstractRegister a, - AbstractRegister b, - AbstractRegister c, - AbstractRegister d, - AbstractRegister e, - AbstractRegister f, - AbstractRegister g, - AbstractRegister h, - AbstractRegister i, - AbstractRegister j, - AbstractRegister k -) { - assert( - a != b && a != c && a != d && a != e && a != f && a != g && a != h && a != i && a != j && a !=k - && b != c && b != d && b != e && b != f && b != g && b != h && b != i && b != j && b !=k - && c != d && c != e && c != f && c != g && c != h && c != i && c != j && c !=k - && d != e && d != f && d != g && d != h && d != i && d != j && d !=k - && e != f && e != g && e != h && e != i && e != j && e !=k - && f != g && f != h && f != i && f != j && f !=k - && g != h && g != i && g != j && g !=k - && h != i && h != j && h !=k - && i != j && i !=k - && j !=k, - "registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT - ", c=" INTPTR_FORMAT ", d=" INTPTR_FORMAT ", e=" INTPTR_FORMAT - ", f=" INTPTR_FORMAT ", g=" INTPTR_FORMAT ", h=" INTPTR_FORMAT - ", i=" INTPTR_FORMAT ", j=" INTPTR_FORMAT ", k=" INTPTR_FORMAT "", - p2i(a), p2i(b), p2i(c), p2i(d), p2i(e), p2i(f), p2i(g), p2i(h), p2i(i), p2i(j), p2i(k) - ); -} - -inline void assert_different_registers( - AbstractRegister a, - AbstractRegister b, - AbstractRegister c, - AbstractRegister d, - AbstractRegister e, - AbstractRegister f, - AbstractRegister g, - AbstractRegister h, - AbstractRegister i, - AbstractRegister j, - AbstractRegister k, - AbstractRegister l -) { - assert( - a != b && a != c && a != d && a != e && a != f && a != g && a != h && a != i && a != j && a !=k && a !=l - && b != c && b != d && b != e && b != f && b != g && b != h && b != i && b != j && b !=k && b !=l - && c != d && c != e && c != f && c != g && c != h && c != i && c != j && c !=k && c !=l - && d != e && d != f && d != g && d != h && d != i && d != j && d !=k && d !=l - && e != f && e != g && e != h && e != i && e != j && e !=k && e !=l - && f != g && f != h && f != i && f != j && f !=k && f !=l - && g != h && g != i && g != j && g !=k && g !=l - && h != i && h != j && h !=k && h !=l - && i != j && i !=k && i !=l - && j !=k && j !=l - && k !=l, - "registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT - ", c=" INTPTR_FORMAT ", d=" INTPTR_FORMAT ", e=" INTPTR_FORMAT - ", f=" INTPTR_FORMAT ", g=" INTPTR_FORMAT ", h=" INTPTR_FORMAT - ", i=" INTPTR_FORMAT ", j=" INTPTR_FORMAT ", k=" INTPTR_FORMAT - ", l=" INTPTR_FORMAT "", - p2i(a), p2i(b), p2i(c), p2i(d), p2i(e), p2i(f), p2i(g), p2i(h), p2i(i), p2i(j), p2i(k), p2i(l) - ); +template +inline void assert_different_registers(R first_register, Rx... more_registers) { +#ifdef ASSERT + const R regs[] = { first_register, more_registers... }; + // Verify there are no equal entries. + for (size_t i = 0; i < ARRAY_SIZE(regs) - 1; ++i) { + for (size_t j = i + 1; j < ARRAY_SIZE(regs); ++j) { + assert(regs[i] != regs[j], "Multiple uses of register: %s", regs[i]->name()); + } + } +#endif } #endif // SHARE_ASM_REGISTER_HPP diff --git a/src/hotspot/share/c1/c1_Canonicalizer.cpp b/src/hotspot/share/c1/c1_Canonicalizer.cpp index 07ef5eb038c5aa1cfb3cc2752aa3d35d010c9950..a8a29797404b698d88011488a674e961033d4407 100644 --- a/src/hotspot/share/c1/c1_Canonicalizer.cpp +++ b/src/hotspot/share/c1/c1_Canonicalizer.cpp @@ -794,7 +794,7 @@ void Canonicalizer::do_If(If* x) { if (cmp->x() == cmp->y()) { do_If(canon); } else { - if (compilation()->profile_branches() || compilation()->count_backedges()) { + if (compilation()->profile_branches() || compilation()->is_profiling()) { // TODO: If profiling, leave floating point comparisons unoptimized. // We currently do not support profiling of the unordered case. switch(cmp->op()) { @@ -857,171 +857,16 @@ void Canonicalizer::do_Throw (Throw* x) {} void Canonicalizer::do_Base (Base* x) {} void Canonicalizer::do_OsrEntry (OsrEntry* x) {} void Canonicalizer::do_ExceptionObject(ExceptionObject* x) {} - -static bool match_index_and_scale(Instruction* instr, - Instruction** index, - int* log2_scale) { - // Skip conversion ops. This works only on 32bit because of the implicit l2i that the - // unsafe performs. -#ifndef _LP64 - Convert* convert = instr->as_Convert(); - if (convert != NULL && convert->op() == Bytecodes::_i2l) { - assert(convert->value()->type() == intType, "invalid input type"); - instr = convert->value(); - } -#endif - - ShiftOp* shift = instr->as_ShiftOp(); - if (shift != NULL) { - if (shift->op() == Bytecodes::_lshl) { - assert(shift->x()->type() == longType, "invalid input type"); - } else { -#ifndef _LP64 - if (shift->op() == Bytecodes::_ishl) { - assert(shift->x()->type() == intType, "invalid input type"); - } else { - return false; - } -#else - return false; -#endif - } - - - // Constant shift value? - Constant* con = shift->y()->as_Constant(); - if (con == NULL) return false; - // Well-known type and value? - IntConstant* val = con->type()->as_IntConstant(); - assert(val != NULL, "Should be an int constant"); - - *index = shift->x(); - int tmp_scale = val->value(); - if (tmp_scale >= 0 && tmp_scale < 4) { - *log2_scale = tmp_scale; - return true; - } else { - return false; - } - } - - ArithmeticOp* arith = instr->as_ArithmeticOp(); - if (arith != NULL) { - // See if either arg is a known constant - Constant* con = arith->x()->as_Constant(); - if (con != NULL) { - *index = arith->y(); - } else { - con = arith->y()->as_Constant(); - if (con == NULL) return false; - *index = arith->x(); - } - long const_value; - // Check for integer multiply - if (arith->op() == Bytecodes::_lmul) { - assert((*index)->type() == longType, "invalid input type"); - LongConstant* val = con->type()->as_LongConstant(); - assert(val != NULL, "expecting a long constant"); - const_value = val->value(); - } else { -#ifndef _LP64 - if (arith->op() == Bytecodes::_imul) { - assert((*index)->type() == intType, "invalid input type"); - IntConstant* val = con->type()->as_IntConstant(); - assert(val != NULL, "expecting an int constant"); - const_value = val->value(); - } else { - return false; - } -#else - return false; -#endif - } - switch (const_value) { - case 1: *log2_scale = 0; return true; - case 2: *log2_scale = 1; return true; - case 4: *log2_scale = 2; return true; - case 8: *log2_scale = 3; return true; - default: return false; - } - } - - // Unknown instruction sequence; don't touch it - return false; -} - - -static bool match(UnsafeRawOp* x, - Instruction** base, - Instruction** index, - int* log2_scale) { - ArithmeticOp* root = x->base()->as_ArithmeticOp(); - if (root == NULL) return false; - // Limit ourselves to addition for now - if (root->op() != Bytecodes::_ladd) return false; - - bool match_found = false; - // Try to find shift or scale op - if (match_index_and_scale(root->y(), index, log2_scale)) { - *base = root->x(); - match_found = true; - } else if (match_index_and_scale(root->x(), index, log2_scale)) { - *base = root->y(); - match_found = true; - } else if (NOT_LP64(root->y()->as_Convert() != NULL) LP64_ONLY(false)) { - // Skipping i2l works only on 32bit because of the implicit l2i that the unsafe performs. - // 64bit needs a real sign-extending conversion. - Convert* convert = root->y()->as_Convert(); - if (convert->op() == Bytecodes::_i2l) { - assert(convert->value()->type() == intType, "should be an int"); - // pick base and index, setting scale at 1 - *base = root->x(); - *index = convert->value(); - *log2_scale = 0; - match_found = true; - } - } - // The default solution - if (!match_found) { - *base = root->x(); - *index = root->y(); - *log2_scale = 0; - } - - // If the value is pinned then it will be always be computed so - // there's no profit to reshaping the expression. - return !root->is_pinned(); -} - - -void Canonicalizer::do_UnsafeRawOp(UnsafeRawOp* x) { - Instruction* base = NULL; - Instruction* index = NULL; - int log2_scale; - - if (match(x, &base, &index, &log2_scale)) { - x->set_base(base); - x->set_index(index); - x->set_log2_scale(log2_scale); - if (PrintUnsafeOptimization) { - tty->print_cr("Canonicalizer: UnsafeRawOp id %d: base = id %d, index = id %d, log2_scale = %d", - x->id(), x->base()->id(), x->index()->id(), x->log2_scale()); - } - } -} - -void Canonicalizer::do_RoundFP(RoundFP* x) {} -void Canonicalizer::do_UnsafeGetRaw(UnsafeGetRaw* x) { if (OptimizeUnsafes) do_UnsafeRawOp(x); } -void Canonicalizer::do_UnsafePutRaw(UnsafePutRaw* x) { if (OptimizeUnsafes) do_UnsafeRawOp(x); } -void Canonicalizer::do_UnsafeGetObject(UnsafeGetObject* x) {} -void Canonicalizer::do_UnsafePutObject(UnsafePutObject* x) {} -void Canonicalizer::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) {} -void Canonicalizer::do_ProfileCall(ProfileCall* x) {} +void Canonicalizer::do_RoundFP (RoundFP* x) {} +void Canonicalizer::do_UnsafeGet (UnsafeGet* x) {} +void Canonicalizer::do_UnsafePut (UnsafePut* x) {} +void Canonicalizer::do_UnsafeGetAndSet(UnsafeGetAndSet* x) {} +void Canonicalizer::do_ProfileCall (ProfileCall* x) {} void Canonicalizer::do_ProfileReturnType(ProfileReturnType* x) {} -void Canonicalizer::do_ProfileInvoke(ProfileInvoke* x) {} -void Canonicalizer::do_RuntimeCall(RuntimeCall* x) {} +void Canonicalizer::do_ProfileInvoke (ProfileInvoke* x) {} +void Canonicalizer::do_RuntimeCall (RuntimeCall* x) {} void Canonicalizer::do_RangeCheckPredicate(RangeCheckPredicate* x) {} #ifdef ASSERT -void Canonicalizer::do_Assert(Assert* x) {} +void Canonicalizer::do_Assert (Assert* x) {} #endif -void Canonicalizer::do_MemBar(MemBar* x) {} +void Canonicalizer::do_MemBar (MemBar* x) {} diff --git a/src/hotspot/share/c1/c1_Canonicalizer.hpp b/src/hotspot/share/c1/c1_Canonicalizer.hpp index fe6095d5bdc29a93e83c94c753f4c40b556560b4..bc340587cba6eff16005cf46ee4622bdb4752b76 100644 --- a/src/hotspot/share/c1/c1_Canonicalizer.hpp +++ b/src/hotspot/share/c1/c1_Canonicalizer.hpp @@ -46,12 +46,6 @@ class Canonicalizer: InstructionVisitor { #endif void move_const_to_right(Op2* x); void do_Op2(Op2* x); - void do_UnsafeRawOp(UnsafeRawOp* x); - - void unsafe_raw_match(UnsafeRawOp* x, - Instruction** base, - Instruction** index, - int* scale); public: Canonicalizer(Compilation* c, Value x, int bci) : _compilation(c), _canonical(x), _bci(bci) { @@ -99,11 +93,9 @@ class Canonicalizer: InstructionVisitor { virtual void do_OsrEntry (OsrEntry* x); virtual void do_ExceptionObject(ExceptionObject* x); virtual void do_RoundFP (RoundFP* x); - virtual void do_UnsafeGetRaw (UnsafeGetRaw* x); - virtual void do_UnsafePutRaw (UnsafePutRaw* x); - virtual void do_UnsafeGetObject(UnsafeGetObject* x); - virtual void do_UnsafePutObject(UnsafePutObject* x); - virtual void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x); + virtual void do_UnsafeGet (UnsafeGet* x); + virtual void do_UnsafePut (UnsafePut* x); + virtual void do_UnsafeGetAndSet(UnsafeGetAndSet* x); virtual void do_ProfileCall (ProfileCall* x); virtual void do_ProfileReturnType (ProfileReturnType* x); virtual void do_ProfileInvoke (ProfileInvoke* x); diff --git a/src/hotspot/share/c1/c1_CodeStubs.hpp b/src/hotspot/share/c1/c1_CodeStubs.hpp index 961f33ca69eed7597688becdb33273188c56cfd8..63d44d7a7b69922a0805043a9e7f478ed4ff95dd 100644 --- a/src/hotspot/share/c1/c1_CodeStubs.hpp +++ b/src/hotspot/share/c1/c1_CodeStubs.hpp @@ -56,8 +56,6 @@ class CodeStub: public CompilationResourceObj { virtual void emit_code(LIR_Assembler* e) = 0; virtual CodeEmitInfo* info() const { return NULL; } virtual bool is_exception_throw_stub() const { return false; } - virtual bool is_range_check_stub() const { return false; } - virtual bool is_divbyzero_stub() const { return false; } virtual bool is_simple_exception_stub() const { return false; } #ifndef PRODUCT virtual void print_name(outputStream* out) const = 0; @@ -67,15 +65,7 @@ class CodeStub: public CompilationResourceObj { Label* entry() { return &_entry; } Label* continuation() { return &_continuation; } // for LIR - virtual void visit(LIR_OpVisitState* visit) { -#ifndef PRODUCT - if (LIRTracePeephole && Verbose) { - tty->print("no visitor for "); - print_name(tty); - tty->cr(); - } -#endif - } + virtual void visit(LIR_OpVisitState* visit) = 0; }; class CodeStubList: public GrowableArray { @@ -181,7 +171,6 @@ class RangeCheckStub: public CodeStub { virtual void emit_code(LIR_Assembler* e); virtual CodeEmitInfo* info() const { return _info; } virtual bool is_exception_throw_stub() const { return true; } - virtual bool is_range_check_stub() const { return true; } virtual void visit(LIR_OpVisitState* visitor) { visitor->do_slow_case(_info); visitor->do_input(_index); @@ -224,7 +213,6 @@ class DivByZeroStub: public CodeStub { virtual void emit_code(LIR_Assembler* e); virtual CodeEmitInfo* info() const { return _info; } virtual bool is_exception_throw_stub() const { return true; } - virtual bool is_divbyzero_stub() const { return true; } virtual void visit(LIR_OpVisitState* visitor) { visitor->do_slow_case(_info); } diff --git a/src/hotspot/share/c1/c1_Compilation.cpp b/src/hotspot/share/c1/c1_Compilation.cpp index aa3857742f93c45ad3e3c3eabdee64d589fbb923..53ef19b1c1f8b9141ff0b51b21ddce10f70035aa 100644 --- a/src/hotspot/share/c1/c1_Compilation.cpp +++ b/src/hotspot/share/c1/c1_Compilation.cpp @@ -598,6 +598,9 @@ Compilation::Compilation(AbstractCompiler* compiler, ciEnv* env, ciMethod* metho } Compilation::~Compilation() { + // simulate crash during compilation + assert(CICrashAt < 0 || (uintx)_env->compile_id() != (uintx)CICrashAt, "just as planned"); + _env->set_compiler_data(NULL); } diff --git a/src/hotspot/share/c1/c1_Compilation.hpp b/src/hotspot/share/c1/c1_Compilation.hpp index dd0f8ed8c658f1269542d2f185a350812f078f73..3c13a261cc1a050e0a042abdae359778daa06a3f 100644 --- a/src/hotspot/share/c1/c1_Compilation.hpp +++ b/src/hotspot/share/c1/c1_Compilation.hpp @@ -228,8 +228,6 @@ class Compilation: public StackObj { return env()->comp_level() == CompLevel_full_profile || env()->comp_level() == CompLevel_limited_profile; } - bool count_invocations() { return is_profiling(); } - bool count_backedges() { return is_profiling(); } // Helpers for generation of profile information bool profile_branches() { diff --git a/src/hotspot/share/c1/c1_GraphBuilder.cpp b/src/hotspot/share/c1/c1_GraphBuilder.cpp index a7a47781447e53ca829514c356b0338e8b84c20c..36169af0d6cf0020d3a68ec9b57a6e0966e05c24 100644 --- a/src/hotspot/share/c1/c1_GraphBuilder.cpp +++ b/src/hotspot/share/c1/c1_GraphBuilder.cpp @@ -3051,7 +3051,7 @@ BlockBegin* GraphBuilder::setup_start_block(int osr_bci, BlockBegin* std_entry, // In addition, with range check elimination, we may need a valid block // that dominates all the rest to insert range predicates. BlockBegin* new_header_block; - if (std_entry->number_of_preds() > 0 || count_invocations() || count_backedges() || RangeCheckElimination) { + if (std_entry->number_of_preds() > 0 || is_profiling() || RangeCheckElimination) { new_header_block = header_block(std_entry, BlockBegin::std_entry_flag, state); } else { new_header_block = std_entry; @@ -3138,10 +3138,11 @@ void GraphBuilder::setup_osr_entry_block() { // doesn't so pretend that the interpreter passed in null. get = append(new Constant(objectNull)); } else { - get = append(new UnsafeGetRaw(as_BasicType(local->type()), e, - append(new Constant(new IntConstant(offset))), - 0, - true /*unaligned*/, true /*wide*/)); + Value off_val = append(new Constant(new IntConstant(offset))); + get = append(new UnsafeGet(as_BasicType(local->type()), e, + off_val, + false/*is_volatile*/, + true/*is_raw*/)); } _state->store_local(index, get); } @@ -3468,60 +3469,60 @@ void GraphBuilder::build_graph_for_intrinsic(ciMethod* callee, bool ignore_retur // Some intrinsics need special IR nodes. switch(id) { - case vmIntrinsics::_getReference : append_unsafe_get_obj(callee, T_OBJECT, false); return; - case vmIntrinsics::_getBoolean : append_unsafe_get_obj(callee, T_BOOLEAN, false); return; - case vmIntrinsics::_getByte : append_unsafe_get_obj(callee, T_BYTE, false); return; - case vmIntrinsics::_getShort : append_unsafe_get_obj(callee, T_SHORT, false); return; - case vmIntrinsics::_getChar : append_unsafe_get_obj(callee, T_CHAR, false); return; - case vmIntrinsics::_getInt : append_unsafe_get_obj(callee, T_INT, false); return; - case vmIntrinsics::_getLong : append_unsafe_get_obj(callee, T_LONG, false); return; - case vmIntrinsics::_getFloat : append_unsafe_get_obj(callee, T_FLOAT, false); return; - case vmIntrinsics::_getDouble : append_unsafe_get_obj(callee, T_DOUBLE, false); return; - case vmIntrinsics::_putReference : append_unsafe_put_obj(callee, T_OBJECT, false); return; - case vmIntrinsics::_putBoolean : append_unsafe_put_obj(callee, T_BOOLEAN, false); return; - case vmIntrinsics::_putByte : append_unsafe_put_obj(callee, T_BYTE, false); return; - case vmIntrinsics::_putShort : append_unsafe_put_obj(callee, T_SHORT, false); return; - case vmIntrinsics::_putChar : append_unsafe_put_obj(callee, T_CHAR, false); return; - case vmIntrinsics::_putInt : append_unsafe_put_obj(callee, T_INT, false); return; - case vmIntrinsics::_putLong : append_unsafe_put_obj(callee, T_LONG, false); return; - case vmIntrinsics::_putFloat : append_unsafe_put_obj(callee, T_FLOAT, false); return; - case vmIntrinsics::_putDouble : append_unsafe_put_obj(callee, T_DOUBLE, false); return; - case vmIntrinsics::_getShortUnaligned : append_unsafe_get_obj(callee, T_SHORT, false); return; - case vmIntrinsics::_getCharUnaligned : append_unsafe_get_obj(callee, T_CHAR, false); return; - case vmIntrinsics::_getIntUnaligned : append_unsafe_get_obj(callee, T_INT, false); return; - case vmIntrinsics::_getLongUnaligned : append_unsafe_get_obj(callee, T_LONG, false); return; - case vmIntrinsics::_putShortUnaligned : append_unsafe_put_obj(callee, T_SHORT, false); return; - case vmIntrinsics::_putCharUnaligned : append_unsafe_put_obj(callee, T_CHAR, false); return; - case vmIntrinsics::_putIntUnaligned : append_unsafe_put_obj(callee, T_INT, false); return; - case vmIntrinsics::_putLongUnaligned : append_unsafe_put_obj(callee, T_LONG, false); return; - case vmIntrinsics::_getReferenceVolatile : append_unsafe_get_obj(callee, T_OBJECT, true); return; - case vmIntrinsics::_getBooleanVolatile : append_unsafe_get_obj(callee, T_BOOLEAN, true); return; - case vmIntrinsics::_getByteVolatile : append_unsafe_get_obj(callee, T_BYTE, true); return; - case vmIntrinsics::_getShortVolatile : append_unsafe_get_obj(callee, T_SHORT, true); return; - case vmIntrinsics::_getCharVolatile : append_unsafe_get_obj(callee, T_CHAR, true); return; - case vmIntrinsics::_getIntVolatile : append_unsafe_get_obj(callee, T_INT, true); return; - case vmIntrinsics::_getLongVolatile : append_unsafe_get_obj(callee, T_LONG, true); return; - case vmIntrinsics::_getFloatVolatile : append_unsafe_get_obj(callee, T_FLOAT, true); return; - case vmIntrinsics::_getDoubleVolatile : append_unsafe_get_obj(callee, T_DOUBLE, true); return; - case vmIntrinsics::_putReferenceVolatile : append_unsafe_put_obj(callee, T_OBJECT, true); return; - case vmIntrinsics::_putBooleanVolatile : append_unsafe_put_obj(callee, T_BOOLEAN, true); return; - case vmIntrinsics::_putByteVolatile : append_unsafe_put_obj(callee, T_BYTE, true); return; - case vmIntrinsics::_putShortVolatile : append_unsafe_put_obj(callee, T_SHORT, true); return; - case vmIntrinsics::_putCharVolatile : append_unsafe_put_obj(callee, T_CHAR, true); return; - case vmIntrinsics::_putIntVolatile : append_unsafe_put_obj(callee, T_INT, true); return; - case vmIntrinsics::_putLongVolatile : append_unsafe_put_obj(callee, T_LONG, true); return; - case vmIntrinsics::_putFloatVolatile : append_unsafe_put_obj(callee, T_FLOAT, true); return; - case vmIntrinsics::_putDoubleVolatile : append_unsafe_put_obj(callee, T_DOUBLE, true); return; + case vmIntrinsics::_getReference : append_unsafe_get(callee, T_OBJECT, false); return; + case vmIntrinsics::_getBoolean : append_unsafe_get(callee, T_BOOLEAN, false); return; + case vmIntrinsics::_getByte : append_unsafe_get(callee, T_BYTE, false); return; + case vmIntrinsics::_getShort : append_unsafe_get(callee, T_SHORT, false); return; + case vmIntrinsics::_getChar : append_unsafe_get(callee, T_CHAR, false); return; + case vmIntrinsics::_getInt : append_unsafe_get(callee, T_INT, false); return; + case vmIntrinsics::_getLong : append_unsafe_get(callee, T_LONG, false); return; + case vmIntrinsics::_getFloat : append_unsafe_get(callee, T_FLOAT, false); return; + case vmIntrinsics::_getDouble : append_unsafe_get(callee, T_DOUBLE, false); return; + case vmIntrinsics::_putReference : append_unsafe_put(callee, T_OBJECT, false); return; + case vmIntrinsics::_putBoolean : append_unsafe_put(callee, T_BOOLEAN, false); return; + case vmIntrinsics::_putByte : append_unsafe_put(callee, T_BYTE, false); return; + case vmIntrinsics::_putShort : append_unsafe_put(callee, T_SHORT, false); return; + case vmIntrinsics::_putChar : append_unsafe_put(callee, T_CHAR, false); return; + case vmIntrinsics::_putInt : append_unsafe_put(callee, T_INT, false); return; + case vmIntrinsics::_putLong : append_unsafe_put(callee, T_LONG, false); return; + case vmIntrinsics::_putFloat : append_unsafe_put(callee, T_FLOAT, false); return; + case vmIntrinsics::_putDouble : append_unsafe_put(callee, T_DOUBLE, false); return; + case vmIntrinsics::_getShortUnaligned : append_unsafe_get(callee, T_SHORT, false); return; + case vmIntrinsics::_getCharUnaligned : append_unsafe_get(callee, T_CHAR, false); return; + case vmIntrinsics::_getIntUnaligned : append_unsafe_get(callee, T_INT, false); return; + case vmIntrinsics::_getLongUnaligned : append_unsafe_get(callee, T_LONG, false); return; + case vmIntrinsics::_putShortUnaligned : append_unsafe_put(callee, T_SHORT, false); return; + case vmIntrinsics::_putCharUnaligned : append_unsafe_put(callee, T_CHAR, false); return; + case vmIntrinsics::_putIntUnaligned : append_unsafe_put(callee, T_INT, false); return; + case vmIntrinsics::_putLongUnaligned : append_unsafe_put(callee, T_LONG, false); return; + case vmIntrinsics::_getReferenceVolatile : append_unsafe_get(callee, T_OBJECT, true); return; + case vmIntrinsics::_getBooleanVolatile : append_unsafe_get(callee, T_BOOLEAN, true); return; + case vmIntrinsics::_getByteVolatile : append_unsafe_get(callee, T_BYTE, true); return; + case vmIntrinsics::_getShortVolatile : append_unsafe_get(callee, T_SHORT, true); return; + case vmIntrinsics::_getCharVolatile : append_unsafe_get(callee, T_CHAR, true); return; + case vmIntrinsics::_getIntVolatile : append_unsafe_get(callee, T_INT, true); return; + case vmIntrinsics::_getLongVolatile : append_unsafe_get(callee, T_LONG, true); return; + case vmIntrinsics::_getFloatVolatile : append_unsafe_get(callee, T_FLOAT, true); return; + case vmIntrinsics::_getDoubleVolatile : append_unsafe_get(callee, T_DOUBLE, true); return; + case vmIntrinsics::_putReferenceVolatile : append_unsafe_put(callee, T_OBJECT, true); return; + case vmIntrinsics::_putBooleanVolatile : append_unsafe_put(callee, T_BOOLEAN, true); return; + case vmIntrinsics::_putByteVolatile : append_unsafe_put(callee, T_BYTE, true); return; + case vmIntrinsics::_putShortVolatile : append_unsafe_put(callee, T_SHORT, true); return; + case vmIntrinsics::_putCharVolatile : append_unsafe_put(callee, T_CHAR, true); return; + case vmIntrinsics::_putIntVolatile : append_unsafe_put(callee, T_INT, true); return; + case vmIntrinsics::_putLongVolatile : append_unsafe_put(callee, T_LONG, true); return; + case vmIntrinsics::_putFloatVolatile : append_unsafe_put(callee, T_FLOAT, true); return; + case vmIntrinsics::_putDoubleVolatile : append_unsafe_put(callee, T_DOUBLE, true); return; case vmIntrinsics::_compareAndSetLong: case vmIntrinsics::_compareAndSetInt: case vmIntrinsics::_compareAndSetReference : append_unsafe_CAS(callee); return; case vmIntrinsics::_getAndAddInt: - case vmIntrinsics::_getAndAddLong : append_unsafe_get_and_set_obj(callee, true); return; - case vmIntrinsics::_getAndSetInt : - case vmIntrinsics::_getAndSetLong : - case vmIntrinsics::_getAndSetReference : append_unsafe_get_and_set_obj(callee, false); return; - case vmIntrinsics::_getCharStringU : append_char_access(callee, false); return; - case vmIntrinsics::_putCharStringU : append_char_access(callee, true); return; + case vmIntrinsics::_getAndAddLong : append_unsafe_get_and_set(callee, true); return; + case vmIntrinsics::_getAndSetInt : + case vmIntrinsics::_getAndSetLong : + case vmIntrinsics::_getAndSetReference : append_unsafe_get_and_set(callee, false); return; + case vmIntrinsics::_getCharStringU : append_char_access(callee, false); return; + case vmIntrinsics::_putCharStringU : append_char_access(callee, true); return; default: break; } @@ -4199,20 +4200,20 @@ void GraphBuilder::pop_scope_for_jsr() { _scope_data = scope_data()->parent(); } -void GraphBuilder::append_unsafe_get_obj(ciMethod* callee, BasicType t, bool is_volatile) { +void GraphBuilder::append_unsafe_get(ciMethod* callee, BasicType t, bool is_volatile) { Values* args = state()->pop_arguments(callee->arg_size()); null_check(args->at(0)); Instruction* offset = args->at(2); #ifndef _LP64 offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT))); #endif - Instruction* op = append(new UnsafeGetObject(t, args->at(1), offset, is_volatile)); + Instruction* op = append(new UnsafeGet(t, args->at(1), offset, is_volatile)); push(op->type(), op); compilation()->set_has_unsafe_access(true); } -void GraphBuilder::append_unsafe_put_obj(ciMethod* callee, BasicType t, bool is_volatile) { +void GraphBuilder::append_unsafe_put(ciMethod* callee, BasicType t, bool is_volatile) { Values* args = state()->pop_arguments(callee->arg_size()); null_check(args->at(0)); Instruction* offset = args->at(2); @@ -4224,29 +4225,11 @@ void GraphBuilder::append_unsafe_put_obj(ciMethod* callee, BasicType t, bool is_ Value mask = append(new Constant(new IntConstant(1))); val = append(new LogicOp(Bytecodes::_iand, val, mask)); } - Instruction* op = append(new UnsafePutObject(t, args->at(1), offset, val, is_volatile)); + Instruction* op = append(new UnsafePut(t, args->at(1), offset, val, is_volatile)); compilation()->set_has_unsafe_access(true); kill_all(); } - -void GraphBuilder::append_unsafe_get_raw(ciMethod* callee, BasicType t) { - Values* args = state()->pop_arguments(callee->arg_size()); - null_check(args->at(0)); - Instruction* op = append(new UnsafeGetRaw(t, args->at(1), false)); - push(op->type(), op); - compilation()->set_has_unsafe_access(true); -} - - -void GraphBuilder::append_unsafe_put_raw(ciMethod* callee, BasicType t) { - Values* args = state()->pop_arguments(callee->arg_size()); - null_check(args->at(0)); - Instruction* op = append(new UnsafePutRaw(t, args->at(1), args->at(2))); - compilation()->set_has_unsafe_access(true); -} - - void GraphBuilder::append_unsafe_CAS(ciMethod* callee) { ValueStack* state_before = copy_state_for_exception(); ValueType* result_type = as_ValueType(callee->return_type()); @@ -4334,7 +4317,7 @@ void GraphBuilder::print_inlining(ciMethod* callee, const char* msg, bool succes } } -void GraphBuilder::append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add) { +void GraphBuilder::append_unsafe_get_and_set(ciMethod* callee, bool is_add) { Values* args = state()->pop_arguments(callee->arg_size()); BasicType t = callee->return_type()->basic_type(); null_check(args->at(0)); @@ -4342,7 +4325,7 @@ void GraphBuilder::append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add) #ifndef _LP64 offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT))); #endif - Instruction* op = append(new UnsafeGetAndSetObject(t, args->at(1), offset, args->at(3), is_add)); + Instruction* op = append(new UnsafeGetAndSet(t, args->at(1), offset, args->at(3), is_add)); compilation()->set_has_unsafe_access(true); kill_all(); push(op->type(), op); diff --git a/src/hotspot/share/c1/c1_GraphBuilder.hpp b/src/hotspot/share/c1/c1_GraphBuilder.hpp index 80058a210a8e2f40a2604ce0240cbd26cf6b42df..eddb6c8fd64ffe60982192f61da2ae187a519b1e 100644 --- a/src/hotspot/share/c1/c1_GraphBuilder.hpp +++ b/src/hotspot/share/c1/c1_GraphBuilder.hpp @@ -373,12 +373,10 @@ class GraphBuilder { void pop_scope(); void pop_scope_for_jsr(); - void append_unsafe_get_obj(ciMethod* callee, BasicType t, bool is_volatile); - void append_unsafe_put_obj(ciMethod* callee, BasicType t, bool is_volatile); - void append_unsafe_get_raw(ciMethod* callee, BasicType t); - void append_unsafe_put_raw(ciMethod* callee, BasicType t); + void append_unsafe_get(ciMethod* callee, BasicType t, bool is_volatile); + void append_unsafe_put(ciMethod* callee, BasicType t, bool is_volatile); void append_unsafe_CAS(ciMethod* callee); - void append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add); + void append_unsafe_get_and_set(ciMethod* callee, bool is_add); void append_char_access(ciMethod* callee, bool is_store); void print_inlining(ciMethod* callee, const char* msg, bool success = true); @@ -389,8 +387,6 @@ class GraphBuilder { // Shortcuts to profiling control. bool is_profiling() { return _compilation->is_profiling(); } - bool count_invocations() { return _compilation->count_invocations(); } - bool count_backedges() { return _compilation->count_backedges(); } bool profile_branches() { return _compilation->profile_branches(); } bool profile_calls() { return _compilation->profile_calls(); } bool profile_inlined_calls() { return _compilation->profile_inlined_calls(); } diff --git a/src/hotspot/share/c1/c1_Instruction.hpp b/src/hotspot/share/c1/c1_Instruction.hpp index 858d4f606edaa90a3709baa1ecd2340a0254aef2..6eb080841a960a0d124a41cb32693e79f92a54f8 100644 --- a/src/hotspot/share/c1/c1_Instruction.hpp +++ b/src/hotspot/share/c1/c1_Instruction.hpp @@ -95,13 +95,9 @@ class Throw; class Base; class RoundFP; class UnsafeOp; -class UnsafeRawOp; -class UnsafeGetRaw; -class UnsafePutRaw; -class UnsafeObjectOp; -class UnsafeGetObject; -class UnsafePutObject; -class UnsafeGetAndSetObject; +class UnsafeGet; +class UnsafePut; +class UnsafeGetAndSet; class ProfileCall; class ProfileReturnType; class ProfileInvoke; @@ -195,11 +191,9 @@ class InstructionVisitor: public StackObj { virtual void do_OsrEntry (OsrEntry* x) = 0; virtual void do_ExceptionObject(ExceptionObject* x) = 0; virtual void do_RoundFP (RoundFP* x) = 0; - virtual void do_UnsafeGetRaw (UnsafeGetRaw* x) = 0; - virtual void do_UnsafePutRaw (UnsafePutRaw* x) = 0; - virtual void do_UnsafeGetObject(UnsafeGetObject* x) = 0; - virtual void do_UnsafePutObject(UnsafePutObject* x) = 0; - virtual void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) = 0; + virtual void do_UnsafeGet (UnsafeGet* x) = 0; + virtual void do_UnsafePut (UnsafePut* x) = 0; + virtual void do_UnsafeGetAndSet(UnsafeGetAndSet* x) = 0; virtual void do_ProfileCall (ProfileCall* x) = 0; virtual void do_ProfileReturnType (ProfileReturnType* x) = 0; virtual void do_ProfileInvoke (ProfileInvoke* x) = 0; @@ -2193,13 +2187,16 @@ LEAF(RoundFP, Instruction) BASE(UnsafeOp, Instruction) private: - BasicType _basic_type; // ValueType can not express byte-sized integers + Value _object; // Object to be fetched from or mutated + Value _offset; // Offset within object + bool _is_volatile; // true if volatile - dl/JSR166 + BasicType _basic_type; // ValueType can not express byte-sized integers protected: // creation - UnsafeOp(BasicType basic_type, bool is_put) - : Instruction(is_put ? voidType : as_ValueType(basic_type)) - , _basic_type(basic_type) + UnsafeOp(BasicType basic_type, Value object, Value offset, bool is_put, bool is_volatile) + : Instruction(is_put ? voidType : as_ValueType(basic_type)), + _object(object), _offset(offset), _is_volatile(is_volatile), _basic_type(basic_type) { //Note: Unsafe ops are not not guaranteed to throw NPE. // Convservatively, Unsafe operations must be pinned though we could be @@ -2210,148 +2207,42 @@ BASE(UnsafeOp, Instruction) public: // accessors BasicType basic_type() { return _basic_type; } + Value object() { return _object; } + Value offset() { return _offset; } + bool is_volatile() { return _is_volatile; } // generic - virtual void input_values_do(ValueVisitor* f) { } -}; - - -BASE(UnsafeRawOp, UnsafeOp) - private: - Value _base; // Base address (a Java long) - Value _index; // Index if computed by optimizer; initialized to NULL - int _log2_scale; // Scale factor: 0, 1, 2, or 3. - // Indicates log2 of number of bytes (1, 2, 4, or 8) - // to scale index by. - - protected: - UnsafeRawOp(BasicType basic_type, Value addr, bool is_put) - : UnsafeOp(basic_type, is_put) - , _base(addr) - , _index(NULL) - , _log2_scale(0) - { - // Can not use ASSERT_VALUES because index may be NULL - assert(addr != NULL && addr->type()->is_long(), "just checking"); - } - - UnsafeRawOp(BasicType basic_type, Value base, Value index, int log2_scale, bool is_put) - : UnsafeOp(basic_type, is_put) - , _base(base) - , _index(index) - , _log2_scale(log2_scale) - { - } - - public: - // accessors - Value base() { return _base; } - Value index() { return _index; } - bool has_index() { return (_index != NULL); } - int log2_scale() { return _log2_scale; } - - // setters - void set_base (Value base) { _base = base; } - void set_index(Value index) { _index = index; } - void set_log2_scale(int log2_scale) { _log2_scale = log2_scale; } - - // generic - virtual void input_values_do(ValueVisitor* f) { UnsafeOp::input_values_do(f); - f->visit(&_base); - if (has_index()) f->visit(&_index); } + virtual void input_values_do(ValueVisitor* f) { f->visit(&_object); + f->visit(&_offset); } }; - -LEAF(UnsafeGetRaw, UnsafeRawOp) +LEAF(UnsafeGet, UnsafeOp) private: - bool _may_be_unaligned, _is_wide; // For OSREntry - + bool _is_raw; public: - UnsafeGetRaw(BasicType basic_type, Value addr, bool may_be_unaligned, bool is_wide = false) - : UnsafeRawOp(basic_type, addr, false) { - _may_be_unaligned = may_be_unaligned; - _is_wide = is_wide; - } - - UnsafeGetRaw(BasicType basic_type, Value base, Value index, int log2_scale, bool may_be_unaligned, bool is_wide = false) - : UnsafeRawOp(basic_type, base, index, log2_scale, false) { - _may_be_unaligned = may_be_unaligned; - _is_wide = is_wide; - } - - bool may_be_unaligned() { return _may_be_unaligned; } - bool is_wide() { return _is_wide; } -}; - - -LEAF(UnsafePutRaw, UnsafeRawOp) - private: - Value _value; // Value to be stored - - public: - UnsafePutRaw(BasicType basic_type, Value addr, Value value) - : UnsafeRawOp(basic_type, addr, true) - , _value(value) + UnsafeGet(BasicType basic_type, Value object, Value offset, bool is_volatile) + : UnsafeOp(basic_type, object, offset, false, is_volatile) { - assert(value != NULL, "just checking"); ASSERT_VALUES + _is_raw = false; } - - UnsafePutRaw(BasicType basic_type, Value base, Value index, int log2_scale, Value value) - : UnsafeRawOp(basic_type, base, index, log2_scale, true) - , _value(value) + UnsafeGet(BasicType basic_type, Value object, Value offset, bool is_volatile, bool is_raw) + : UnsafeOp(basic_type, object, offset, false, is_volatile), _is_raw(is_raw) { - assert(value != NULL, "just checking"); ASSERT_VALUES } // accessors - Value value() { return _value; } - - // generic - virtual void input_values_do(ValueVisitor* f) { UnsafeRawOp::input_values_do(f); - f->visit(&_value); } -}; - - -BASE(UnsafeObjectOp, UnsafeOp) - private: - Value _object; // Object to be fetched from or mutated - Value _offset; // Offset within object - bool _is_volatile; // true if volatile - dl/JSR166 - public: - UnsafeObjectOp(BasicType basic_type, Value object, Value offset, bool is_put, bool is_volatile) - : UnsafeOp(basic_type, is_put), _object(object), _offset(offset), _is_volatile(is_volatile) - { - } - - // accessors - Value object() { return _object; } - Value offset() { return _offset; } - bool is_volatile() { return _is_volatile; } - // generic - virtual void input_values_do(ValueVisitor* f) { UnsafeOp::input_values_do(f); - f->visit(&_object); - f->visit(&_offset); } + bool is_raw() { return _is_raw; } }; -LEAF(UnsafeGetObject, UnsafeObjectOp) - public: - UnsafeGetObject(BasicType basic_type, Value object, Value offset, bool is_volatile) - : UnsafeObjectOp(basic_type, object, offset, false, is_volatile) - { - ASSERT_VALUES - } -}; - - -LEAF(UnsafePutObject, UnsafeObjectOp) +LEAF(UnsafePut, UnsafeOp) private: Value _value; // Value to be stored public: - UnsafePutObject(BasicType basic_type, Value object, Value offset, Value value, bool is_volatile) - : UnsafeObjectOp(basic_type, object, offset, true, is_volatile) + UnsafePut(BasicType basic_type, Value object, Value offset, Value value, bool is_volatile) + : UnsafeOp(basic_type, object, offset, true, is_volatile) , _value(value) { ASSERT_VALUES @@ -2361,17 +2252,17 @@ LEAF(UnsafePutObject, UnsafeObjectOp) Value value() { return _value; } // generic - virtual void input_values_do(ValueVisitor* f) { UnsafeObjectOp::input_values_do(f); + virtual void input_values_do(ValueVisitor* f) { UnsafeOp::input_values_do(f); f->visit(&_value); } }; -LEAF(UnsafeGetAndSetObject, UnsafeObjectOp) +LEAF(UnsafeGetAndSet, UnsafeOp) private: Value _value; // Value to be stored bool _is_add; public: - UnsafeGetAndSetObject(BasicType basic_type, Value object, Value offset, Value value, bool is_add) - : UnsafeObjectOp(basic_type, object, offset, false, false) + UnsafeGetAndSet(BasicType basic_type, Value object, Value offset, Value value, bool is_add) + : UnsafeOp(basic_type, object, offset, false, false) , _value(value) , _is_add(is_add) { @@ -2383,7 +2274,7 @@ LEAF(UnsafeGetAndSetObject, UnsafeObjectOp) Value value() { return _value; } // generic - virtual void input_values_do(ValueVisitor* f) { UnsafeObjectOp::input_values_do(f); + virtual void input_values_do(ValueVisitor* f) { UnsafeOp::input_values_do(f); f->visit(&_value); } }; diff --git a/src/hotspot/share/c1/c1_InstructionPrinter.cpp b/src/hotspot/share/c1/c1_InstructionPrinter.cpp index a726cf196d64751ba9ff95147bba52fe6055c206..3e0708351a6cbfd65653700b97995dbb7a3f0f35 100644 --- a/src/hotspot/share/c1/c1_InstructionPrinter.cpp +++ b/src/hotspot/share/c1/c1_InstructionPrinter.cpp @@ -263,22 +263,6 @@ void InstructionPrinter::print_inline_level(BlockBegin* block) { void InstructionPrinter::print_unsafe_op(UnsafeOp* op, const char* name) { output()->print("%s", name); - output()->print(".("); -} - -void InstructionPrinter::print_unsafe_raw_op(UnsafeRawOp* op, const char* name) { - print_unsafe_op(op, name); - output()->print("base "); - print_value(op->base()); - if (op->has_index()) { - output()->print(", index "); print_value(op->index()); - output()->print(", log2_scale %d", op->log2_scale()); - } -} - - -void InstructionPrinter::print_unsafe_object_op(UnsafeObjectOp* op, const char* name) { - print_unsafe_op(op, name); print_value(op->object()); output()->print(", "); print_value(op->offset()); @@ -809,36 +793,20 @@ void InstructionPrinter::do_RoundFP(RoundFP* x) { print_value(x->input()); } - -void InstructionPrinter::do_UnsafeGetRaw(UnsafeGetRaw* x) { - print_unsafe_raw_op(x, "UnsafeGetRaw"); - output()->put(')'); -} - - -void InstructionPrinter::do_UnsafePutRaw(UnsafePutRaw* x) { - print_unsafe_raw_op(x, "UnsafePutRaw"); - output()->print(", value "); - print_value(x->value()); +void InstructionPrinter::do_UnsafeGet(UnsafeGet* x) { + print_unsafe_op(x, x->is_raw() ? "UnsafeGet (raw)" : "UnsafeGet"); output()->put(')'); } - -void InstructionPrinter::do_UnsafeGetObject(UnsafeGetObject* x) { - print_unsafe_object_op(x, "UnsafeGetObject"); - output()->put(')'); -} - - -void InstructionPrinter::do_UnsafePutObject(UnsafePutObject* x) { - print_unsafe_object_op(x, "UnsafePutObject"); +void InstructionPrinter::do_UnsafePut(UnsafePut* x) { + print_unsafe_op(x, "UnsafePut"); output()->print(", value "); print_value(x->value()); output()->put(')'); } -void InstructionPrinter::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) { - print_unsafe_object_op(x, x->is_add()?"UnsafeGetAndSetObject (add)":"UnsafeGetAndSetObject"); +void InstructionPrinter::do_UnsafeGetAndSet(UnsafeGetAndSet* x) { + print_unsafe_op(x, x->is_add()?"UnsafeGetAndSet (add)":"UnsafeGetAndSet"); output()->print(", value "); print_value(x->value()); output()->put(')'); diff --git a/src/hotspot/share/c1/c1_InstructionPrinter.hpp b/src/hotspot/share/c1/c1_InstructionPrinter.hpp index 52800c24bcbffd01231acdb30ac9d747a6a3714a..0e5ba78bdc762f1e5558e1c02e72d4205fa5d4d9 100644 --- a/src/hotspot/share/c1/c1_InstructionPrinter.hpp +++ b/src/hotspot/share/c1/c1_InstructionPrinter.hpp @@ -74,8 +74,6 @@ class InstructionPrinter: public InstructionVisitor { void print_stack(ValueStack* stack); void print_inline_level(BlockBegin* block); void print_unsafe_op(UnsafeOp* op, const char* name); - void print_unsafe_raw_op(UnsafeRawOp* op, const char* name); - void print_unsafe_object_op(UnsafeObjectOp* op, const char* name); void print_phi(int i, Value v, BlockBegin* b); void print_alias(Value v); @@ -123,11 +121,9 @@ class InstructionPrinter: public InstructionVisitor { virtual void do_OsrEntry (OsrEntry* x); virtual void do_ExceptionObject(ExceptionObject* x); virtual void do_RoundFP (RoundFP* x); - virtual void do_UnsafeGetRaw (UnsafeGetRaw* x); - virtual void do_UnsafePutRaw (UnsafePutRaw* x); - virtual void do_UnsafeGetObject(UnsafeGetObject* x); - virtual void do_UnsafePutObject(UnsafePutObject* x); - virtual void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x); + virtual void do_UnsafeGet (UnsafeGet* x); + virtual void do_UnsafePut (UnsafePut* x); + virtual void do_UnsafeGetAndSet(UnsafeGetAndSet* x); virtual void do_ProfileCall (ProfileCall* x); virtual void do_ProfileReturnType (ProfileReturnType* x); virtual void do_ProfileInvoke (ProfileInvoke* x); diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index 62cff4c7505285e52c4af1ed9d89ff5ecfa7d536..1e4529d7c2b418f20e14907be562eda86ff18036 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -403,7 +403,6 @@ void LIR_OpVisitState::visit(LIR_Op* op) { switch (op->code()) { // LIR_Op0 - case lir_backwardbranch_target: // result and info always invalid case lir_fpop_raw: // result and info always invalid case lir_breakpoint: // result and info always invalid case lir_membar: // result and info always invalid @@ -1637,7 +1636,6 @@ const char * LIR_Op::name() const { case lir_label: s = "label"; break; case lir_nop: s = "nop"; break; case lir_on_spin_wait: s = "on_spin_wait"; break; - case lir_backwardbranch_target: s = "backbranch"; break; case lir_std_entry: s = "std_entry"; break; case lir_osr_entry: s = "osr_entry"; break; case lir_fpop_raw: s = "fpop_raw"; break; @@ -1781,8 +1779,6 @@ const char * LIR_Op1::name() const { switch (move_kind()) { case lir_move_normal: return "move"; - case lir_move_unaligned: - return "unaligned move"; case lir_move_volatile: return "volatile_move"; case lir_move_wide: diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index f22e486bb48fc4d4909e37886b56ec234153eaab..43f400653b4e4bc7642ce3ecb0335ea6d8fb6b62 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -888,7 +888,6 @@ enum LIR_Code { , begin_op0 , lir_label , lir_nop - , lir_backwardbranch_target , lir_std_entry , lir_osr_entry , lir_fpop_raw @@ -1019,7 +1018,6 @@ enum LIR_PatchCode { enum LIR_MoveKind { lir_move_normal, lir_move_volatile, - lir_move_unaligned, lir_move_wide, lir_move_max_flag }; @@ -2074,9 +2072,6 @@ class LIR_List: public CompilationResourceObj { // result is a stack location for old backend and vreg for UseLinearScan // stack_loc_temp is an illegal register for old backend void roundfp(LIR_Opr reg, LIR_Opr stack_loc_temp, LIR_Opr result) { append(new LIR_OpRoundFP(reg, stack_loc_temp, result)); } - void unaligned_move(LIR_Address* src, LIR_Opr dst) { append(new LIR_Op1(lir_move, LIR_OprFact::address(src), dst, dst->type(), lir_patch_none, NULL, lir_move_unaligned)); } - void unaligned_move(LIR_Opr src, LIR_Address* dst) { append(new LIR_Op1(lir_move, src, LIR_OprFact::address(dst), src->type(), lir_patch_none, NULL, lir_move_unaligned)); } - void unaligned_move(LIR_Opr src, LIR_Opr dst) { append(new LIR_Op1(lir_move, src, dst, dst->type(), lir_patch_none, NULL, lir_move_unaligned)); } void move(LIR_Opr src, LIR_Opr dst, CodeEmitInfo* info = NULL) { append(new LIR_Op1(lir_move, src, dst, dst->type(), lir_patch_none, info)); } void move(LIR_Address* src, LIR_Opr dst, CodeEmitInfo* info = NULL) { append(new LIR_Op1(lir_move, LIR_OprFact::address(src), dst, src->type(), lir_patch_none, info)); } void move(LIR_Opr src, LIR_Address* dst, CodeEmitInfo* info = NULL) { append(new LIR_Op1(lir_move, src, LIR_OprFact::address(dst), dst->type(), lir_patch_none, info)); } diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp index cc1e2014892612020134213fabfacd3ccf42c7b1..1dc3981ceaf073f0b816a0abb702d03c203bedbe 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -509,7 +509,6 @@ void LIR_Assembler::emit_op1(LIR_Op1* op) { } else { move_op(op->in_opr(), op->result_opr(), op->type(), op->patch_code(), op->info(), op->pop_fpu_stack(), - op->move_kind() == lir_move_unaligned, op->move_kind() == lir_move_wide); } break; @@ -771,7 +770,7 @@ void LIR_Assembler::roundfp_op(LIR_Opr src, LIR_Opr tmp, LIR_Opr dest, bool pop_ } -void LIR_Assembler::move_op(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool unaligned, bool wide) { +void LIR_Assembler::move_op(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide) { if (src->is_register()) { if (dest->is_register()) { assert(patch_code == lir_patch_none && info == NULL, "no patching and info allowed here"); @@ -780,7 +779,7 @@ void LIR_Assembler::move_op(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch assert(patch_code == lir_patch_none && info == NULL, "no patching and info allowed here"); reg2stack(src, dest, type, pop_fpu_stack); } else if (dest->is_address()) { - reg2mem(src, dest, type, patch_code, info, pop_fpu_stack, wide, unaligned); + reg2mem(src, dest, type, patch_code, info, pop_fpu_stack, wide); } else { ShouldNotReachHere(); } @@ -809,8 +808,7 @@ void LIR_Assembler::move_op(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch } } else if (src->is_address()) { - mem2reg(src, dest, type, patch_code, info, wide, unaligned); - + mem2reg(src, dest, type, patch_code, info, wide); } else { ShouldNotReachHere(); } diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp index 02c79160d04166d2406287b0665062f2f7a095b6..683e921846242bb97a488334cabdf782fe3b97b1 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp @@ -169,12 +169,12 @@ class LIR_Assembler: public CompilationResourceObj { void reg2reg (LIR_Opr src, LIR_Opr dest); void reg2mem (LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, - bool pop_fpu_stack, bool wide, bool unaligned); + bool pop_fpu_stack, bool wide); void stack2reg (LIR_Opr src, LIR_Opr dest, BasicType type); void stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type); void mem2reg (LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, - CodeEmitInfo* info, bool wide, bool unaligned); + CodeEmitInfo* info, bool wide); void shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp); void shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest); @@ -214,7 +214,7 @@ class LIR_Assembler: public CompilationResourceObj { void roundfp_op(LIR_Opr src, LIR_Opr tmp, LIR_Opr dest, bool pop_fpu_stack); void move_op(LIR_Opr src, LIR_Opr result, BasicType type, - LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool unaligned, bool wide); + LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide); void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); // info set for null exceptions void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index f6a624a137efe466176c478e4c9af202fc0ec4c5..c8f2eb3d835066323cb25a6645333c963e913b38 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -647,7 +647,7 @@ void LIRGenerator::new_instance(LIR_Opr dst, ciInstanceKlass* klass, bool is_unr assert(klass->is_loaded(), "must be loaded"); // allocate space for instance - assert(klass->size_helper() >= 0, "illegal instance size"); + assert(klass->size_helper() > 0, "illegal instance size"); const int instance_size = align_object_size(klass->size_helper()); __ allocate_object(dst, scratch1, scratch2, scratch3, scratch4, oopDesc::header_size(), instance_size, klass_reg, !klass->is_initialized(), slow_path); @@ -2088,189 +2088,8 @@ void LIRGenerator::do_RoundFP(RoundFP* x) { } } -// Here UnsafeGetRaw may have x->base() and x->index() be int or long -// on both 64 and 32 bits. Expecting x->base() to be always long on 64bit. -void LIRGenerator::do_UnsafeGetRaw(UnsafeGetRaw* x) { - LIRItem base(x->base(), this); - LIRItem idx(this); - base.load_item(); - if (x->has_index()) { - idx.set_instruction(x->index()); - idx.load_nonconstant(); - } - - LIR_Opr reg = rlock_result(x, x->basic_type()); - - int log2_scale = 0; - if (x->has_index()) { - log2_scale = x->log2_scale(); - } - - assert(!x->has_index() || idx.value() == x->index(), "should match"); - - LIR_Opr base_op = base.result(); - LIR_Opr index_op = idx.result(); -#ifndef _LP64 - if (base_op->type() == T_LONG) { - base_op = new_register(T_INT); - __ convert(Bytecodes::_l2i, base.result(), base_op); - } - if (x->has_index()) { - if (index_op->type() == T_LONG) { - LIR_Opr long_index_op = index_op; - if (index_op->is_constant()) { - long_index_op = new_register(T_LONG); - __ move(index_op, long_index_op); - } - index_op = new_register(T_INT); - __ convert(Bytecodes::_l2i, long_index_op, index_op); - } else { - assert(x->index()->type()->tag() == intTag, "must be"); - } - } - // At this point base and index should be all ints. - assert(base_op->type() == T_INT && !base_op->is_constant(), "base should be an non-constant int"); - assert(!x->has_index() || index_op->type() == T_INT, "index should be an int"); -#else - if (x->has_index()) { - if (index_op->type() == T_INT) { - if (!index_op->is_constant()) { - index_op = new_register(T_LONG); - __ convert(Bytecodes::_i2l, idx.result(), index_op); - } - } else { - assert(index_op->type() == T_LONG, "must be"); - if (index_op->is_constant()) { - index_op = new_register(T_LONG); - __ move(idx.result(), index_op); - } - } - } - // At this point base is a long non-constant - // Index is a long register or a int constant. - // We allow the constant to stay an int because that would allow us a more compact encoding by - // embedding an immediate offset in the address expression. If we have a long constant, we have to - // move it into a register first. - assert(base_op->type() == T_LONG && !base_op->is_constant(), "base must be a long non-constant"); - assert(!x->has_index() || (index_op->type() == T_INT && index_op->is_constant()) || - (index_op->type() == T_LONG && !index_op->is_constant()), "unexpected index type"); -#endif - - BasicType dst_type = x->basic_type(); - - LIR_Address* addr; - if (index_op->is_constant()) { - assert(log2_scale == 0, "must not have a scale"); - assert(index_op->type() == T_INT, "only int constants supported"); - addr = new LIR_Address(base_op, index_op->as_jint(), dst_type); - } else { -#ifdef X86 - addr = new LIR_Address(base_op, index_op, LIR_Address::Scale(log2_scale), 0, dst_type); -#elif defined(GENERATE_ADDRESS_IS_PREFERRED) - addr = generate_address(base_op, index_op, log2_scale, 0, dst_type); -#else - if (index_op->is_illegal() || log2_scale == 0) { - addr = new LIR_Address(base_op, index_op, dst_type); - } else { - LIR_Opr tmp = new_pointer_register(); - __ shift_left(index_op, log2_scale, tmp); - addr = new LIR_Address(base_op, tmp, dst_type); - } -#endif - } - - if (x->may_be_unaligned() && (dst_type == T_LONG || dst_type == T_DOUBLE)) { - __ unaligned_move(addr, reg); - } else { - if (dst_type == T_OBJECT && x->is_wide()) { - __ move_wide(addr, reg); - } else { - __ move(addr, reg); - } - } -} - - -void LIRGenerator::do_UnsafePutRaw(UnsafePutRaw* x) { - int log2_scale = 0; - BasicType type = x->basic_type(); - - if (x->has_index()) { - log2_scale = x->log2_scale(); - } - - LIRItem base(x->base(), this); - LIRItem value(x->value(), this); - LIRItem idx(this); - - base.load_item(); - if (x->has_index()) { - idx.set_instruction(x->index()); - idx.load_item(); - } - - if (type == T_BYTE || type == T_BOOLEAN) { - value.load_byte_item(); - } else { - value.load_item(); - } - - set_no_result(x); - - LIR_Opr base_op = base.result(); - LIR_Opr index_op = idx.result(); - -#ifdef GENERATE_ADDRESS_IS_PREFERRED - LIR_Address* addr = generate_address(base_op, index_op, log2_scale, 0, x->basic_type()); -#else -#ifndef _LP64 - if (base_op->type() == T_LONG) { - base_op = new_register(T_INT); - __ convert(Bytecodes::_l2i, base.result(), base_op); - } - if (x->has_index()) { - if (index_op->type() == T_LONG) { - index_op = new_register(T_INT); - __ convert(Bytecodes::_l2i, idx.result(), index_op); - } - } - // At this point base and index should be all ints and not constants - assert(base_op->type() == T_INT && !base_op->is_constant(), "base should be an non-constant int"); - assert(!x->has_index() || (index_op->type() == T_INT && !index_op->is_constant()), "index should be an non-constant int"); -#else - if (x->has_index()) { - if (index_op->type() == T_INT) { - index_op = new_register(T_LONG); - __ convert(Bytecodes::_i2l, idx.result(), index_op); - } - } - // At this point base and index are long and non-constant - assert(base_op->type() == T_LONG && !base_op->is_constant(), "base must be a non-constant long"); - assert(!x->has_index() || (index_op->type() == T_LONG && !index_op->is_constant()), "index must be a non-constant long"); -#endif - - if (log2_scale != 0) { - // temporary fix (platform dependent code without shift on Intel would be better) - // TODO: ARM also allows embedded shift in the address - LIR_Opr tmp = new_pointer_register(); - if (TwoOperandLIRForm) { - __ move(index_op, tmp); - index_op = tmp; - } - __ shift_left(index_op, log2_scale, tmp); - if (!TwoOperandLIRForm) { - index_op = tmp; - } - } - - LIR_Address* addr = new LIR_Address(base_op, index_op, x->basic_type()); -#endif // !GENERATE_ADDRESS_IS_PREFERRED - __ move(value.result(), addr); -} - - -void LIRGenerator::do_UnsafeGetObject(UnsafeGetObject* x) { +void LIRGenerator::do_UnsafeGet(UnsafeGet* x) { BasicType type = x->basic_type(); LIRItem src(x->object(), this); LIRItem off(x->offset(), this); @@ -2291,12 +2110,28 @@ void LIRGenerator::do_UnsafeGetObject(UnsafeGetObject* x) { } LIR_Opr result = rlock_result(x, type); - access_load_at(decorators, type, - src, off.result(), result); + if (!x->is_raw()) { + access_load_at(decorators, type, src, off.result(), result); + } else { + // Currently it is only used in GraphBuilder::setup_osr_entry_block. + // It reads the value from [src + offset] directly. +#ifdef _LP64 + LIR_Opr offset = new_register(T_LONG); + __ convert(Bytecodes::_i2l, off.result(), offset); +#else + LIR_Opr offset = off.result(); +#endif + LIR_Address* addr = new LIR_Address(src.result(), offset, type); + if (is_reference_type(type)) { + __ move_wide(addr, result); + } else { + __ move(addr, result); + } + } } -void LIRGenerator::do_UnsafePutObject(UnsafePutObject* x) { +void LIRGenerator::do_UnsafePut(UnsafePut* x) { BasicType type = x->basic_type(); LIRItem src(x->object(), this); LIRItem off(x->offset(), this); @@ -2322,7 +2157,7 @@ void LIRGenerator::do_UnsafePutObject(UnsafePutObject* x) { access_store_at(decorators, type, src, off.result(), data.result()); } -void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) { +void LIRGenerator::do_UnsafeGetAndSet(UnsafeGetAndSet* x) { BasicType type = x->basic_type(); LIRItem src(x->object(), this); LIRItem off(x->offset(), this); @@ -2855,11 +2690,7 @@ void LIRGenerator::invoke_load_arguments(Invoke* x, LIRItemList* args, const LIR if (addr->type() == T_OBJECT) { __ move_wide(param->result(), addr); } else - if (addr->type() == T_LONG || addr->type() == T_DOUBLE) { - __ unaligned_move(param->result(), addr); - } else { - __ move(param->result(), addr); - } + __ move(param->result(), addr); } } @@ -3368,7 +3199,7 @@ void LIRGenerator::do_ProfileInvoke(ProfileInvoke* x) { } void LIRGenerator::increment_backedge_counter_conditionally(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeEmitInfo* info, int left_bci, int right_bci, int bci) { - if (compilation()->count_backedges()) { + if (compilation()->is_profiling()) { #if defined(X86) && !defined(_LP64) // BEWARE! On 32-bit x86 cmp clobbers its left argument so we need a temp copy. LIR_Opr left_copy = new_register(left->type()); @@ -3661,11 +3492,7 @@ LIR_Opr LIRGenerator::call_runtime(BasicTypeArray* signature, LIR_OprList* args, // __ move(arg, tmp); // arg = tmp; // } - if (addr->type() == T_LONG || addr->type() == T_DOUBLE) { - __ unaligned_move(arg, addr); - } else { - __ move(arg, addr); - } + __ move(arg, addr); } } @@ -3703,11 +3530,7 @@ LIR_Opr LIRGenerator::call_runtime(BasicTypeArray* signature, LIRItemList* args, } else { LIR_Address* addr = loc->as_address_ptr(); arg->load_for_store(addr->type()); - if (addr->type() == T_LONG || addr->type() == T_DOUBLE) { - __ unaligned_move(arg->result(), addr); - } else { - __ move(arg->result(), addr); - } + __ move(arg->result(), addr); } } @@ -3756,10 +3579,3 @@ LIR_Opr LIRGenerator::mask_boolean(LIR_Opr array, LIR_Opr value, CodeEmitInfo*& value = value_fixed; return value; } - -LIR_Opr LIRGenerator::maybe_mask_boolean(StoreIndexed* x, LIR_Opr array, LIR_Opr value, CodeEmitInfo*& null_check_info) { - if (x->check_boolean()) { - value = mask_boolean(array, value, null_check_info); - } - return value; -} diff --git a/src/hotspot/share/c1/c1_LIRGenerator.hpp b/src/hotspot/share/c1/c1_LIRGenerator.hpp index 442f66ee861e82d962942a048ae80491a9dbeac1..cefadc9b11443d1eea0ce9c2b9c86d1c55b60880 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.hpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.hpp @@ -397,18 +397,18 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { int bci, bool backedge, bool notify); void increment_event_counter(CodeEmitInfo* info, LIR_Opr step, int bci, bool backedge); void increment_invocation_counter(CodeEmitInfo *info) { - if (compilation()->count_invocations()) { + if (compilation()->is_profiling()) { increment_event_counter(info, LIR_OprFact::intConst(InvocationCounter::count_increment), InvocationEntryBci, false); } } void increment_backedge_counter(CodeEmitInfo* info, int bci) { - if (compilation()->count_backedges()) { + if (compilation()->is_profiling()) { increment_event_counter(info, LIR_OprFact::intConst(InvocationCounter::count_increment), bci, true); } } void increment_backedge_counter_conditionally(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeEmitInfo* info, int left_bci, int right_bci, int bci); void increment_backedge_counter(CodeEmitInfo* info, LIR_Opr step, int bci) { - if (compilation()->count_backedges()) { + if (compilation()->is_profiling()) { increment_event_counter(info, step, bci, true); } } @@ -428,9 +428,6 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { void do_root (Instruction* instr); void walk (Instruction* instr); - void bind_block_entry(BlockBegin* block); - void start_block(BlockBegin* block); - LIR_Opr new_register(BasicType type); LIR_Opr new_register(Value value) { return new_register(as_BasicType(value->type())); } LIR_Opr new_register(ValueType* type) { return new_register(as_BasicType(type)); } @@ -469,7 +466,6 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { void do_SwitchRanges(SwitchRangeArray* x, LIR_Opr value, BlockBegin* default_sux); #ifdef JFR_HAVE_INTRINSICS - void do_ClassIDIntrinsic(Intrinsic* x); void do_getEventWriter(Intrinsic* x); #endif @@ -482,7 +478,6 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { void profile_parameters(Base* x); void profile_parameters_at_call(ProfileCall* x); LIR_Opr mask_boolean(LIR_Opr array, LIR_Opr value, CodeEmitInfo*& null_check_info); - LIR_Opr maybe_mask_boolean(StoreIndexed* x, LIR_Opr array, LIR_Opr value, CodeEmitInfo*& null_check_info); public: Compilation* compilation() const { return _compilation; } @@ -581,11 +576,9 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { virtual void do_OsrEntry (OsrEntry* x); virtual void do_ExceptionObject(ExceptionObject* x); virtual void do_RoundFP (RoundFP* x); - virtual void do_UnsafeGetRaw (UnsafeGetRaw* x); - virtual void do_UnsafePutRaw (UnsafePutRaw* x); - virtual void do_UnsafeGetObject(UnsafeGetObject* x); - virtual void do_UnsafePutObject(UnsafePutObject* x); - virtual void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x); + virtual void do_UnsafeGet (UnsafeGet* x); + virtual void do_UnsafePut (UnsafePut* x); + virtual void do_UnsafeGetAndSet(UnsafeGetAndSet* x); virtual void do_ProfileCall (ProfileCall* x); virtual void do_ProfileReturnType (ProfileReturnType* x); virtual void do_ProfileInvoke (ProfileInvoke* x); diff --git a/src/hotspot/share/c1/c1_Optimizer.cpp b/src/hotspot/share/c1/c1_Optimizer.cpp index b8d4ceae19af059baa7bb6469dfe8754ab2a6159..e5c5061e334a7a21f7dc55bb36dca9d393e98021 100644 --- a/src/hotspot/share/c1/c1_Optimizer.cpp +++ b/src/hotspot/share/c1/c1_Optimizer.cpp @@ -26,7 +26,7 @@ #include "c1/c1_Canonicalizer.hpp" #include "c1/c1_Optimizer.hpp" #include "c1/c1_ValueMap.hpp" -#include "c1/c1_ValueSet.inline.hpp" +#include "c1/c1_ValueSet.hpp" #include "c1/c1_ValueStack.hpp" #include "memory/resourceArea.hpp" #include "utilities/bitMap.inline.hpp" @@ -529,11 +529,9 @@ public: void do_OsrEntry (OsrEntry* x); void do_ExceptionObject(ExceptionObject* x); void do_RoundFP (RoundFP* x); - void do_UnsafeGetRaw (UnsafeGetRaw* x); - void do_UnsafePutRaw (UnsafePutRaw* x); - void do_UnsafeGetObject(UnsafeGetObject* x); - void do_UnsafePutObject(UnsafePutObject* x); - void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x); + void do_UnsafeGet (UnsafeGet* x); + void do_UnsafePut (UnsafePut* x); + void do_UnsafeGetAndSet(UnsafeGetAndSet* x); void do_ProfileCall (ProfileCall* x); void do_ProfileReturnType (ProfileReturnType* x); void do_ProfileInvoke (ProfileInvoke* x); @@ -714,11 +712,9 @@ void NullCheckVisitor::do_Base (Base* x) {} void NullCheckVisitor::do_OsrEntry (OsrEntry* x) {} void NullCheckVisitor::do_ExceptionObject(ExceptionObject* x) { nce()->handle_ExceptionObject(x); } void NullCheckVisitor::do_RoundFP (RoundFP* x) {} -void NullCheckVisitor::do_UnsafeGetRaw (UnsafeGetRaw* x) {} -void NullCheckVisitor::do_UnsafePutRaw (UnsafePutRaw* x) {} -void NullCheckVisitor::do_UnsafeGetObject(UnsafeGetObject* x) {} -void NullCheckVisitor::do_UnsafePutObject(UnsafePutObject* x) {} -void NullCheckVisitor::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) {} +void NullCheckVisitor::do_UnsafeGet (UnsafeGet* x) {} +void NullCheckVisitor::do_UnsafePut (UnsafePut* x) {} +void NullCheckVisitor::do_UnsafeGetAndSet(UnsafeGetAndSet* x) {} void NullCheckVisitor::do_ProfileCall (ProfileCall* x) { nce()->clear_last_explicit_null_check(); nce()->handle_ProfileCall(x); } void NullCheckVisitor::do_ProfileReturnType (ProfileReturnType* x) { nce()->handle_ProfileReturnType(x); } diff --git a/src/hotspot/share/c1/c1_RangeCheckElimination.cpp b/src/hotspot/share/c1/c1_RangeCheckElimination.cpp index 198b5afeb7b755161e6c51725807cdb6d5a2737f..fc5df4a420133f139b97ba55a0a7d425fb0ea59e 100644 --- a/src/hotspot/share/c1/c1_RangeCheckElimination.cpp +++ b/src/hotspot/share/c1/c1_RangeCheckElimination.cpp @@ -1001,7 +1001,7 @@ void RangeCheckEliminator::calc_bounds(BlockBegin *block, BlockBegin *loop_heade } else { // Has no upper bound Instruction *instr = ai->length(); - if (instr != NULL) instr = ai->array(); + if (instr == NULL) instr = ai->array(); update_bound(pushed, ai->index(), Instruction::lss, instr, 0); } } diff --git a/src/hotspot/share/c1/c1_RangeCheckElimination.hpp b/src/hotspot/share/c1/c1_RangeCheckElimination.hpp index f6d5168eda5688f705112c9a2063c0423159f610..a00163c14349404826ce22155b8e0033e4f6ef73 100644 --- a/src/hotspot/share/c1/c1_RangeCheckElimination.hpp +++ b/src/hotspot/share/c1/c1_RangeCheckElimination.hpp @@ -130,8 +130,6 @@ public: void do_MonitorEnter (MonitorEnter* x) { /* nothing to do */ }; void do_MonitorExit (MonitorExit* x) { /* nothing to do */ }; void do_Invoke (Invoke* x) { /* nothing to do */ }; - void do_UnsafePutRaw (UnsafePutRaw* x) { /* nothing to do */ }; - void do_UnsafePutObject(UnsafePutObject* x) { /* nothing to do */ }; void do_Intrinsic (Intrinsic* x) { /* nothing to do */ }; void do_Local (Local* x) { /* nothing to do */ }; void do_LoadField (LoadField* x) { /* nothing to do */ }; @@ -160,9 +158,9 @@ public: void do_OsrEntry (OsrEntry* x) { /* nothing to do */ }; void do_ExceptionObject(ExceptionObject* x) { /* nothing to do */ }; void do_RoundFP (RoundFP* x) { /* nothing to do */ }; - void do_UnsafeGetRaw (UnsafeGetRaw* x) { /* nothing to do */ }; - void do_UnsafeGetObject(UnsafeGetObject* x) { /* nothing to do */ }; - void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) { /* nothing to do */ }; + void do_UnsafePut (UnsafePut* x) { /* nothing to do */ }; + void do_UnsafeGet (UnsafeGet* x) { /* nothing to do */ }; + void do_UnsafeGetAndSet(UnsafeGetAndSet* x) { /* nothing to do */ }; void do_ProfileCall (ProfileCall* x) { /* nothing to do */ }; void do_ProfileReturnType (ProfileReturnType* x) { /* nothing to do */ }; void do_ProfileInvoke (ProfileInvoke* x) { /* nothing to do */ }; diff --git a/src/hotspot/share/c1/c1_Runtime1.cpp b/src/hotspot/share/c1/c1_Runtime1.cpp index fe84de26c6d1131698d111ce6deb982ebc9cb930..072b3d3900a72b4b55252d782ccebf2a0c00697f 100644 --- a/src/hotspot/share/c1/c1_Runtime1.cpp +++ b/src/hotspot/share/c1/c1_Runtime1.cpp @@ -59,7 +59,6 @@ #include "oops/oop.inline.hpp" #include "prims/jvmtiExport.hpp" #include "runtime/atomic.hpp" -#include "runtime/biasedLocking.hpp" #include "runtime/fieldDescriptor.inline.hpp" #include "runtime/frame.inline.hpp" #include "runtime/handles.inline.hpp" @@ -348,8 +347,11 @@ const char* Runtime1::name_for_address(address entry) { JRT_ENTRY(void, Runtime1::new_instance(JavaThread* current, Klass* klass)) - NOT_PRODUCT(_new_instance_slowcase_cnt++;) - +#ifndef PRODUCT + if (PrintC1Statistics) { + _new_instance_slowcase_cnt++; + } +#endif assert(klass->is_klass(), "not a class"); Handle holder(current, klass->klass_holder()); // keep the klass alive InstanceKlass* h = InstanceKlass::cast(klass); @@ -363,7 +365,11 @@ JRT_END JRT_ENTRY(void, Runtime1::new_type_array(JavaThread* current, Klass* klass, jint length)) - NOT_PRODUCT(_new_type_array_slowcase_cnt++;) +#ifndef PRODUCT + if (PrintC1Statistics) { + _new_type_array_slowcase_cnt++; + } +#endif // Note: no handle for klass needed since they are not used // anymore after new_typeArray() and no GC can happen before. // (This may have to change if this code changes!) @@ -381,8 +387,11 @@ JRT_END JRT_ENTRY(void, Runtime1::new_object_array(JavaThread* current, Klass* array_klass, jint length)) - NOT_PRODUCT(_new_object_array_slowcase_cnt++;) - +#ifndef PRODUCT + if (PrintC1Statistics) { + _new_object_array_slowcase_cnt++; + } +#endif // Note: no handle for klass needed since they are not used // anymore after new_objArray() and no GC can happen before. // (This may have to change if this code changes!) @@ -400,8 +409,11 @@ JRT_END JRT_ENTRY(void, Runtime1::new_multi_array(JavaThread* current, Klass* klass, int rank, jint* dims)) - NOT_PRODUCT(_new_multi_array_slowcase_cnt++;) - +#ifndef PRODUCT + if (PrintC1Statistics) { + _new_multi_array_slowcase_cnt++; + } +#endif assert(klass->is_klass(), "not a class"); assert(rank >= 1, "rank must be nonzero"); Handle holder(current, klass->klass_holder()); // keep the klass alive @@ -653,7 +665,11 @@ address Runtime1::exception_handler_for_pc(JavaThread* current) { JRT_ENTRY(void, Runtime1::throw_range_check_exception(JavaThread* current, int index, arrayOopDesc* a)) - NOT_PRODUCT(_throw_range_check_exception_count++;) +#ifndef PRODUCT + if (PrintC1Statistics) { + _throw_range_check_exception_count++; + } +#endif const int len = 35; assert(len < strlen("Index %d out of bounds for length %d"), "Must allocate more space for message."); char message[2 * jintAsStringSize + len]; @@ -663,7 +679,11 @@ JRT_END JRT_ENTRY(void, Runtime1::throw_index_exception(JavaThread* current, int index)) - NOT_PRODUCT(_throw_index_exception_count++;) +#ifndef PRODUCT + if (PrintC1Statistics) { + _throw_index_exception_count++; + } +#endif char message[16]; sprintf(message, "%d", index); SharedRuntime::throw_and_post_jvmti_exception(current, vmSymbols::java_lang_IndexOutOfBoundsException(), message); @@ -671,19 +691,31 @@ JRT_END JRT_ENTRY(void, Runtime1::throw_div0_exception(JavaThread* current)) - NOT_PRODUCT(_throw_div0_exception_count++;) +#ifndef PRODUCT + if (PrintC1Statistics) { + _throw_div0_exception_count++; + } +#endif SharedRuntime::throw_and_post_jvmti_exception(current, vmSymbols::java_lang_ArithmeticException(), "/ by zero"); JRT_END JRT_ENTRY(void, Runtime1::throw_null_pointer_exception(JavaThread* current)) - NOT_PRODUCT(_throw_null_pointer_exception_count++;) +#ifndef PRODUCT + if (PrintC1Statistics) { + _throw_null_pointer_exception_count++; + } +#endif SharedRuntime::throw_and_post_jvmti_exception(current, vmSymbols::java_lang_NullPointerException()); JRT_END JRT_ENTRY(void, Runtime1::throw_class_cast_exception(JavaThread* current, oopDesc* object)) - NOT_PRODUCT(_throw_class_cast_exception_count++;) +#ifndef PRODUCT + if (PrintC1Statistics) { + _throw_class_cast_exception_count++; + } +#endif ResourceMark rm(current); char* message = SharedRuntime::generate_class_cast_message(current, object->klass()); SharedRuntime::throw_and_post_jvmti_exception(current, vmSymbols::java_lang_ClassCastException(), message); @@ -691,14 +723,22 @@ JRT_END JRT_ENTRY(void, Runtime1::throw_incompatible_class_change_error(JavaThread* current)) - NOT_PRODUCT(_throw_incompatible_class_change_error_count++;) +#ifndef PRODUCT + if (PrintC1Statistics) { + _throw_incompatible_class_change_error_count++; + } +#endif ResourceMark rm(current); SharedRuntime::throw_and_post_jvmti_exception(current, vmSymbols::java_lang_IncompatibleClassChangeError()); JRT_END JRT_BLOCK_ENTRY(void, Runtime1::monitorenter(JavaThread* current, oopDesc* obj, BasicObjectLock* lock)) - NOT_PRODUCT(_monitorenter_slowcase_cnt++;) +#ifndef PRODUCT + if (PrintC1Statistics) { + _monitorenter_slowcase_cnt++; + } +#endif if (!UseFastLocking) { lock->set_obj(obj); } @@ -708,7 +748,11 @@ JRT_END JRT_LEAF(void, Runtime1::monitorexit(JavaThread* current, BasicObjectLock* lock)) - NOT_PRODUCT(_monitorexit_slowcase_cnt++;) +#ifndef PRODUCT + if (PrintC1Statistics) { + _monitorexit_slowcase_cnt++; + } +#endif assert(current->last_Java_sp(), "last_Java_sp must be set"); oop obj = lock->obj(); assert(oopDesc::is_oop(obj), "must be NULL or an object"); @@ -860,7 +904,11 @@ static Klass* resolve_field_return_klass(const methodHandle& caller, int bci, TR // patch only naturally aligned words, as single, full-word writes. JRT_ENTRY(void, Runtime1::patch_code(JavaThread* current, Runtime1::StubID stub_id )) - NOT_PRODUCT(_patch_code_slowcase_cnt++;) +#ifndef PRODUCT + if (PrintC1Statistics) { + _patch_code_slowcase_cnt++; + } +#endif ResourceMark rm(current); RegisterMap reg_map(current, false); @@ -1255,7 +1303,11 @@ JRT_END #else // DEOPTIMIZE_WHEN_PATCHING void Runtime1::patch_code(JavaThread* current, Runtime1::StubID stub_id) { - NOT_PRODUCT(_patch_code_slowcase_cnt++); +#ifndef PRODUCT + if (PrintC1Statistics) { + _patch_code_slowcase_cnt++; + } +#endif // Enable WXWrite: the function is called by c1 stub as a runtime function // (see another implementation above). diff --git a/src/hotspot/share/c1/c1_ValueMap.cpp b/src/hotspot/share/c1/c1_ValueMap.cpp index a0a012ce81386ba3735e6b03aaa6ac2224d71239..f1961663179b547211a2b6389a36f7fe826d1624 100644 --- a/src/hotspot/share/c1/c1_ValueMap.cpp +++ b/src/hotspot/share/c1/c1_ValueMap.cpp @@ -26,7 +26,7 @@ #include "c1/c1_Canonicalizer.hpp" #include "c1/c1_IR.hpp" #include "c1/c1_ValueMap.hpp" -#include "c1/c1_ValueSet.inline.hpp" +#include "c1/c1_ValueSet.hpp" #include "c1/c1_ValueStack.hpp" #ifndef PRODUCT diff --git a/src/hotspot/share/c1/c1_ValueMap.hpp b/src/hotspot/share/c1/c1_ValueMap.hpp index 52f85d4b812372209dde345df83430313d86d2a2..00034c24ed16aa5c2886d6a596ca0866c873a95a 100644 --- a/src/hotspot/share/c1/c1_ValueMap.hpp +++ b/src/hotspot/share/c1/c1_ValueMap.hpp @@ -154,11 +154,9 @@ class ValueNumberingVisitor: public InstructionVisitor { void do_MonitorEnter (MonitorEnter* x) { kill_memory(); } void do_MonitorExit (MonitorExit* x) { kill_memory(); } void do_Invoke (Invoke* x) { kill_memory(); } - void do_UnsafePutRaw (UnsafePutRaw* x) { kill_memory(); } - void do_UnsafePutObject(UnsafePutObject* x) { kill_memory(); } - void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) { kill_memory(); } - void do_UnsafeGetRaw (UnsafeGetRaw* x) { /* nothing to do */ } - void do_UnsafeGetObject(UnsafeGetObject* x) { + void do_UnsafePut (UnsafePut* x) { kill_memory(); } + void do_UnsafeGetAndSet(UnsafeGetAndSet* x) { kill_memory(); } + void do_UnsafeGet (UnsafeGet* x) { if (x->is_volatile()) { // the JMM requires this kill_memory(); } diff --git a/src/hotspot/share/c1/c1_ValueSet.hpp b/src/hotspot/share/c1/c1_ValueSet.hpp index 71b67f2401732288d08a383bfdbce6bc6d4ae799..afd8d081dc5d2b8be381f94c5863fb9eb5224b69 100644 --- a/src/hotspot/share/c1/c1_ValueSet.hpp +++ b/src/hotspot/share/c1/c1_ValueSet.hpp @@ -28,6 +28,7 @@ #include "c1/c1_Instruction.hpp" #include "memory/allocation.hpp" #include "utilities/bitMap.hpp" +#include "utilities/bitMap.inline.hpp" // A ValueSet is a simple abstraction on top of a BitMap representing // a set of Instructions. Currently it assumes that the number of @@ -39,17 +40,21 @@ class ValueSet: public CompilationResourceObj { ResourceBitMap _map; public: - ValueSet(); + ValueSet() : _map(Instruction::number_of_instructions()) {} - ValueSet* copy(); - bool contains(Value x); - void put (Value x); - void remove (Value x); - bool set_intersect(ValueSet* other); - void set_union(ValueSet* other); - void clear (); - void set_from(ValueSet* other); - bool equals (ValueSet* other); + ValueSet* copy() { + ValueSet* res = new ValueSet(); + res->_map.set_from(_map); + return res; + } + bool contains(Value x) { return _map.at(x->id()); } + void put(Value x) { _map.set_bit(x->id()); } + void remove(Value x) { _map.clear_bit(x->id()); } + bool set_intersect(ValueSet* other) { return _map.set_intersection_with_result(other->_map); } + void set_union(ValueSet* other) { _map.set_union(other->_map); } + void clear() { _map.clear(); } + void set_from(ValueSet* other) { _map.set_from(other->_map); } + bool equals(ValueSet* other) { return _map.is_same(other->_map); } }; #endif // SHARE_C1_C1_VALUESET_HPP diff --git a/src/hotspot/share/c1/c1_ValueSet.inline.hpp b/src/hotspot/share/c1/c1_ValueSet.inline.hpp deleted file mode 100644 index 4daed86b561f89c08ed38e428bfb532c3145621d..0000000000000000000000000000000000000000 --- a/src/hotspot/share/c1/c1_ValueSet.inline.hpp +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef SHARE_C1_C1_VALUESET_INLINE_HPP -#define SHARE_C1_C1_VALUESET_INLINE_HPP - -#include "c1/c1_ValueSet.hpp" - -#include "c1/c1_Instruction.hpp" -#include "utilities/bitMap.inline.hpp" - -inline ValueSet::ValueSet() : _map(Instruction::number_of_instructions()) { -} - -inline ValueSet* ValueSet::copy() { - ValueSet* res = new ValueSet(); - res->_map.set_from(_map); - return res; -} - -inline bool ValueSet::contains(Value x) { - return _map.at(x->id()); -} - -inline void ValueSet::put(Value x) { - _map.set_bit(x->id()); -} - -inline void ValueSet::remove(Value x) { - _map.clear_bit(x->id()); -} - -inline bool ValueSet::set_intersect(ValueSet* other) { - return _map.set_intersection_with_result(other->_map); -} - -inline void ValueSet::set_union(ValueSet* other) { - _map.set_union(other->_map); -} - -inline void ValueSet::clear() { - _map.clear(); -} - -inline void ValueSet::set_from(ValueSet* other) { - _map.set_from(other->_map); -} - -inline bool ValueSet::equals(ValueSet* other) { - return _map.is_same(other->_map); -} - -#endif // SHARE_C1_C1_VALUESET_INLINE_HPP diff --git a/src/hotspot/share/c1/c1_ValueType.cpp b/src/hotspot/share/c1/c1_ValueType.cpp index 6de787f9a0901d3d205b41b480eb8c5ebd861c40..f3135eada63b5de697a2ad7d7caa65ad6accf1cb 100644 --- a/src/hotspot/share/c1/c1_ValueType.cpp +++ b/src/hotspot/share/c1/c1_ValueType.cpp @@ -80,12 +80,6 @@ ValueType* ValueType::meet(ValueType* y) const { } -ValueType* ValueType::join(ValueType* y) const { - Unimplemented(); - return NULL; -} - - ciType* ObjectConstant::exact_type() const { ciObject* c = constant_value(); return (c != NULL && !c->is_null_object()) ? c->klass() : NULL; diff --git a/src/hotspot/share/c1/c1_ValueType.hpp b/src/hotspot/share/c1/c1_ValueType.hpp index 05dfd483bc735271b2994a530724c1855428ccb3..9eb4d99743062f9fc36fa022a1a5d82e80eb2d3f 100644 --- a/src/hotspot/share/c1/c1_ValueType.hpp +++ b/src/hotspot/share/c1/c1_ValueType.hpp @@ -34,7 +34,6 @@ class ValueType; class VoidType; class IntType; class IntConstant; -class IntInterval; class LongType; class LongConstant; class FloatType; @@ -53,8 +52,6 @@ class ClassType; class ClassConstant; class MethodType; class MethodConstant; -class MethodDataType; -class MethodDataConstant; class AddressType; class AddressConstant; class IllegalType; @@ -132,7 +129,6 @@ class ValueType: public CompilationResourceObj { bool is_instance() { return as_InstanceType() != NULL; } bool is_class() { return as_ClassType() != NULL; } bool is_method() { return as_MethodType() != NULL; } - bool is_method_data() { return as_MethodDataType() != NULL; } bool is_address() { return as_AddressType() != NULL; } bool is_illegal() { return tag() == illegalTag; } @@ -155,10 +151,8 @@ class ValueType: public CompilationResourceObj { virtual ClassType* as_ClassType() { return NULL; } virtual MetadataType* as_MetadataType() { return NULL; } virtual MethodType* as_MethodType() { return NULL; } - virtual MethodDataType* as_MethodDataType() { return NULL; } virtual AddressType* as_AddressType() { return NULL; } virtual IllegalType* as_IllegalType() { return NULL; } - virtual IntConstant* as_IntConstant() { return NULL; } virtual LongConstant* as_LongConstant() { return NULL; } virtual FloatConstant* as_FloatConstant() { return NULL; } @@ -167,14 +161,12 @@ class ValueType: public CompilationResourceObj { virtual InstanceConstant* as_InstanceConstant(){ return NULL; } virtual ClassConstant* as_ClassConstant() { return NULL; } virtual MethodConstant* as_MethodConstant() { return NULL; } - virtual MethodDataConstant* as_MethodDataConstant() { return NULL; } virtual ArrayConstant* as_ArrayConstant() { return NULL; } virtual StableArrayConstant* as_StableArrayConstant() { return NULL; } virtual AddressConstant* as_AddressConstant() { return NULL; } // type operations ValueType* meet(ValueType* y) const; - ValueType* join(ValueType* y) const; // debugging void print(outputStream* s = tty) { s->print("%s", name()); } @@ -215,25 +207,6 @@ class IntConstant: public IntType { }; -class IntInterval: public IntType { - private: - jint _beg; - jint _end; - - public: - IntInterval(jint beg, jint end) { - assert(beg <= end, "illegal interval"); - _beg = beg; - _end = end; - } - - jint beg() const { return _beg; } - jint end() const { return _end; } - - virtual bool is_interval() const { return true; } -}; - - class LongType: public ValueType { public: LongType(): ValueType(longTag, 2) {} @@ -451,28 +424,6 @@ class MethodConstant: public MethodType { }; -class MethodDataType: public MetadataType { - public: - virtual MethodDataType* as_MethodDataType() { return this; } -}; - - -class MethodDataConstant: public MethodDataType { - private: - ciMethodData* _value; - - public: - MethodDataConstant(ciMethodData* value) { _value = value; } - - ciMethodData* value() const { return _value; } - - virtual bool is_constant() const { return true; } - - virtual MethodDataConstant* as_MethodDataConstant() { return this; } - virtual ciMetadata* constant_value() const { return _value; } -}; - - class AddressType: public ValueType { public: AddressType(): ValueType(addressTag, 1) {} diff --git a/src/hotspot/share/c1/c1_globals.hpp b/src/hotspot/share/c1/c1_globals.hpp index e41091ebfcd08a8cbdcf57d5ad6ec986ad754e4c..2b5de079a0c9410e490dafb9d2bb9d60ad21c056 100644 --- a/src/hotspot/share/c1/c1_globals.hpp +++ b/src/hotspot/share/c1/c1_globals.hpp @@ -197,9 +197,6 @@ develop(bool, CommentedAssembly, trueInDebug, \ "Show extra info in PrintNMethods output") \ \ - develop(bool, LIRTracePeephole, false, \ - "Trace peephole optimizer") \ - \ develop(bool, LIRTraceExecution, false, \ "add LIR code which logs the execution of blocks") \ \ @@ -294,12 +291,6 @@ develop(bool, TraceFPURegisterUsage, false, \ "Trace usage of FPU registers at start of blocks (intel only)") \ \ - develop(bool, OptimizeUnsafes, true, \ - "Optimize raw unsafe ops") \ - \ - develop(bool, PrintUnsafeOptimization, false, \ - "Print optimization of raw unsafe ops") \ - \ develop(intx, InstructionCountCutoff, 37000, \ "If GraphBuilder adds this many instructions, bails out") \ range(0, max_jint) \ diff --git a/src/hotspot/share/cds/archiveBuilder.cpp b/src/hotspot/share/cds/archiveBuilder.cpp index 699926fcfe0d78c2d76796668df23aaf00009084..cb5c0aeb8c78008f044e0d009bddbd102fa38bbb 100644 --- a/src/hotspot/share/cds/archiveBuilder.cpp +++ b/src/hotspot/share/cds/archiveBuilder.cpp @@ -115,20 +115,16 @@ public: _builder(builder), _dumped_obj(dumped_obj), _start_idx(start_idx) {} bool do_bit(BitMap::idx_t bit_offset) { - uintx FLAG_MASK = 0x03; // See comments around MetaspaceClosure::FLAG_MASK size_t field_offset = size_t(bit_offset - _start_idx) * sizeof(address); address* ptr_loc = (address*)(_dumped_obj + field_offset); - uintx old_p_and_bits = (uintx)(*ptr_loc); - uintx flag_bits = (old_p_and_bits & FLAG_MASK); - address old_p = (address)(old_p_and_bits & (~FLAG_MASK)); + address old_p = *ptr_loc; address new_p = _builder->get_dumped_addr(old_p); - uintx new_p_and_bits = ((uintx)new_p) | flag_bits; log_trace(cds)("Ref: [" PTR_FORMAT "] -> " PTR_FORMAT " => " PTR_FORMAT, p2i(ptr_loc), p2i(old_p), p2i(new_p)); - ArchivePtrMarker::set_and_mark_pointer(ptr_loc, (address)(new_p_and_bits)); + ArchivePtrMarker::set_and_mark_pointer(ptr_loc, new_p); return true; // keep iterating the bitmap } }; @@ -160,10 +156,7 @@ ArchiveBuilder::ArchiveBuilder() : _ro_region("ro", MAX_SHARED_DELTA), _rw_src_objs(), _ro_src_objs(), - _src_obj_table(INITIAL_TABLE_SIZE), - _num_instance_klasses(0), - _num_obj_array_klasses(0), - _num_type_array_klasses(0), + _src_obj_table(INITIAL_TABLE_SIZE, MAX_TABLE_SIZE), _total_closed_heap_region_size(0), _total_open_heap_region_size(0), _estimated_metaspaceobj_bytes(0), @@ -190,6 +183,9 @@ ArchiveBuilder::~ArchiveBuilder() { delete _klasses; delete _symbols; delete _special_refs; + if (_shared_rs.is_reserved()) { + _shared_rs.release(); + } } bool ArchiveBuilder::is_dumping_full_module_graph() { @@ -219,16 +215,8 @@ bool ArchiveBuilder::gather_klass_and_symbol(MetaspaceClosure::Ref* ref, bool re assert(klass->is_klass(), "must be"); if (!is_excluded(klass)) { _klasses->append(klass); - if (klass->is_instance_klass()) { - _num_instance_klasses ++; - } else if (klass->is_objArray_klass()) { - _num_obj_array_klasses ++; - } else { - assert(klass->is_typeArray_klass(), "sanity"); - _num_type_array_klasses ++; - } } - // See RunTimeSharedClassInfo::get_for() + // See RunTimeClassInfo::get_for() _estimated_metaspaceobj_bytes += align_up(BytesPerWord, SharedSpaceObjectAlignment); } else if (ref->msotype() == MetaspaceObj::SymbolType) { // Make sure the symbol won't be GC'ed while we are dumping the archive. @@ -255,12 +243,6 @@ void ArchiveBuilder::gather_klasses_and_symbols() { #endif doit.finish(); - log_info(cds)("Number of classes %d", _num_instance_klasses + _num_obj_array_klasses + _num_type_array_klasses); - log_info(cds)(" instance classes = %5d", _num_instance_klasses); - log_info(cds)(" obj array classes = %5d", _num_obj_array_klasses); - log_info(cds)(" type array classes = %5d", _num_type_array_klasses); - log_info(cds)(" symbols = %5d", _symbols->length()); - if (DumpSharedSpaces) { // To ensure deterministic contents in the static archive, we need to ensure that // we iterate the MetaspaceObjs in a deterministic order. It doesn't matter where @@ -319,7 +301,7 @@ void ArchiveBuilder::sort_klasses() { } size_t ArchiveBuilder::estimate_archive_size() { - // size of the symbol table and two dictionaries, plus the RunTimeSharedClassInfo's + // size of the symbol table and two dictionaries, plus the RunTimeClassInfo's size_t symbol_table_est = SymbolTable::estimate_size_for_archive(); size_t dictionary_est = SystemDictionaryShared::estimate_size_for_archive(); _estimated_hashtable_bytes = symbol_table_est + dictionary_est; @@ -463,9 +445,9 @@ bool ArchiveBuilder::gather_one_source_obj(MetaspaceClosure::Ref* enclosing_ref, FollowMode follow_mode = get_follow_mode(ref); SourceObjInfo src_info(ref, read_only, follow_mode); bool created; - SourceObjInfo* p = _src_obj_table.add_if_absent(src_obj, src_info, &created); + SourceObjInfo* p = _src_obj_table.put_if_absent(src_obj, src_info, &created); if (created) { - if (_src_obj_table.maybe_grow(MAX_TABLE_SIZE)) { + if (_src_obj_table.maybe_grow()) { log_info(cds, hashtables)("Expanded _src_obj_table table to %d", _src_obj_table.table_size()); } } @@ -632,8 +614,8 @@ void ArchiveBuilder::make_shallow_copy(DumpRegion *dump_region, SourceObjInfo* s oldtop = dump_region->top(); if (ref->msotype() == MetaspaceObj::ClassType) { // Save a pointer immediate in front of an InstanceKlass, so - // we can do a quick lookup from InstanceKlass* -> RunTimeSharedClassInfo* - // without building another hashtable. See RunTimeSharedClassInfo::get_for() + // we can do a quick lookup from InstanceKlass* -> RunTimeClassInfo* + // without building another hashtable. See RunTimeClassInfo::get_for() // in systemDictionaryShared.cpp. Klass* klass = (Klass*)src; if (klass->is_instance_klass()) { @@ -659,7 +641,7 @@ void ArchiveBuilder::make_shallow_copy(DumpRegion *dump_region, SourceObjInfo* s } address ArchiveBuilder::get_dumped_addr(address src_obj) const { - SourceObjInfo* p = _src_obj_table.lookup(src_obj); + SourceObjInfo* p = _src_obj_table.get(src_obj); assert(p != NULL, "must be"); return p->dumped_addr(); @@ -728,31 +710,85 @@ void ArchiveBuilder::relocate_vm_classes() { } void ArchiveBuilder::make_klasses_shareable() { + int num_instance_klasses = 0; + int num_boot_klasses = 0; + int num_platform_klasses = 0; + int num_app_klasses = 0; + int num_hidden_klasses = 0; + int num_unlinked_klasses = 0; + int num_unregistered_klasses = 0; + int num_obj_array_klasses = 0; + int num_type_array_klasses = 0; + for (int i = 0; i < klasses()->length(); i++) { + const char* type; + const char* unlinked = ""; + const char* hidden = ""; Klass* k = klasses()->at(i); k->remove_java_mirror(); if (k->is_objArray_klass()) { // InstanceKlass and TypeArrayKlass will in turn call remove_unshareable_info // on their array classes. + num_obj_array_klasses ++; + type = "array"; } else if (k->is_typeArray_klass()) { + num_type_array_klasses ++; + type = "array"; k->remove_unshareable_info(); } else { assert(k->is_instance_klass(), " must be"); + num_instance_klasses ++; InstanceKlass* ik = InstanceKlass::cast(k); if (DynamicDumpSharedSpaces) { // For static dump, class loader type are already set. ik->assign_class_loader_type(); } + if (ik->is_shared_boot_class()) { + type = "boot"; + num_boot_klasses ++; + } else if (ik->is_shared_platform_class()) { + type = "plat"; + num_platform_klasses ++; + } else if (ik->is_shared_app_class()) { + type = "app"; + num_app_klasses ++; + } else { + assert(ik->is_shared_unregistered_class(), "must be"); + type = "unreg"; + num_unregistered_klasses ++; + } + + if (!ik->is_linked()) { + num_unlinked_klasses ++; + unlinked = " ** unlinked"; + } + + if (ik->is_hidden()) { + num_hidden_klasses ++; + hidden = " ** hidden"; + } MetaspaceShared::rewrite_nofast_bytecodes_and_calculate_fingerprints(Thread::current(), ik); ik->remove_unshareable_info(); + } - if (log_is_enabled(Debug, cds, class)) { - ResourceMark rm; - log_debug(cds, class)("klasses[%4d] = " PTR_FORMAT " %s", i, p2i(to_requested(ik)), ik->external_name()); - } + if (log_is_enabled(Debug, cds, class)) { + ResourceMark rm; + log_debug(cds, class)("klasses[%5d] = " PTR_FORMAT " %-5s %s%s%s", i, p2i(to_requested(k)), type, k->external_name(), hidden, unlinked); } } + + log_info(cds)("Number of classes %d", num_instance_klasses + num_obj_array_klasses + num_type_array_klasses); + log_info(cds)(" instance classes = %5d", num_instance_klasses); + log_info(cds)(" boot = %5d", num_boot_klasses); + log_info(cds)(" app = %5d", num_app_klasses); + log_info(cds)(" platform = %5d", num_platform_klasses); + log_info(cds)(" unregistered = %5d", num_unregistered_klasses); + log_info(cds)(" (hidden) = %5d", num_hidden_klasses); + log_info(cds)(" (unlinked) = %5d", num_unlinked_klasses); + log_info(cds)(" obj array classes = %5d", num_obj_array_klasses); + log_info(cds)(" type array classes = %5d", num_type_array_klasses); + log_info(cds)(" symbols = %5d", _symbols->length()); } uintx ArchiveBuilder::buffer_to_offset(address p) const { @@ -1077,25 +1113,21 @@ void ArchiveBuilder::write_archive(FileMapInfo* mapinfo, bitmap_size_in_bytes); if (closed_heap_regions != NULL) { - _total_closed_heap_region_size = mapinfo->write_archive_heap_regions( + _total_closed_heap_region_size = mapinfo->write_heap_regions( closed_heap_regions, closed_heap_oopmaps, - MetaspaceShared::first_closed_archive_heap_region, - MetaspaceShared::max_closed_archive_heap_region); - _total_open_heap_region_size = mapinfo->write_archive_heap_regions( + MetaspaceShared::first_closed_heap_region, + MetaspaceShared::max_num_closed_heap_regions); + _total_open_heap_region_size = mapinfo->write_heap_regions( open_heap_regions, open_heap_oopmaps, - MetaspaceShared::first_open_archive_heap_region, - MetaspaceShared::max_open_archive_heap_region); + MetaspaceShared::first_open_heap_region, + MetaspaceShared::max_num_open_heap_regions); } print_region_stats(mapinfo, closed_heap_regions, open_heap_regions); mapinfo->set_requested_base((char*)MetaspaceShared::requested_base_address()); - if (mapinfo->header()->magic() == CDS_DYNAMIC_ARCHIVE_MAGIC) { - mapinfo->set_header_base_archive_name_size(strlen(Arguments::GetSharedArchivePath()) + 1); - mapinfo->set_header_base_archive_is_default(FLAG_IS_DEFAULT(SharedArchiveFile)); - } mapinfo->set_header_crc(mapinfo->compute_header_crc()); // After this point, we should not write any data into mapinfo->header() since this // would corrupt its checksum we have calculated before. @@ -1152,12 +1184,12 @@ void ArchiveBuilder::print_bitmap_region_stats(size_t size, size_t total_size) { size, size/double(total_size)*100.0, size); } -void ArchiveBuilder::print_heap_region_stats(GrowableArray *heap_mem, +void ArchiveBuilder::print_heap_region_stats(GrowableArray* regions, const char *name, size_t total_size) { - int arr_len = heap_mem == NULL ? 0 : heap_mem->length(); + int arr_len = regions == NULL ? 0 : regions->length(); for (int i = 0; i < arr_len; i++) { - char* start = (char*)heap_mem->at(i).start(); - size_t size = heap_mem->at(i).byte_size(); + char* start = (char*)regions->at(i).start(); + size_t size = regions->at(i).byte_size(); char* top = start + size; log_debug(cds)("%s%d space: " SIZE_FORMAT_W(9) " [ %4.1f%% of total] out of " SIZE_FORMAT_W(9) " bytes [100.0%% used] at " INTPTR_FORMAT, name, i, size, size/double(total_size)*100.0, size, p2i(start)); diff --git a/src/hotspot/share/cds/archiveBuilder.hpp b/src/hotspot/share/cds/archiveBuilder.hpp index 6f29c8112871f67abac05dab7c498981325fdc62..1057d05b0836647cd1410df9996efc95d233e6fa 100644 --- a/src/hotspot/share/cds/archiveBuilder.hpp +++ b/src/hotspot/share/cds/archiveBuilder.hpp @@ -33,7 +33,7 @@ #include "runtime/os.hpp" #include "utilities/bitMap.hpp" #include "utilities/growableArray.hpp" -#include "utilities/hashtable.hpp" +#include "utilities/resizeableResourceHash.hpp" #include "utilities/resourceHash.hpp" struct ArchiveHeapOopmapInfo; @@ -179,8 +179,8 @@ private: class SrcObjTableCleaner { public: - bool do_entry(address key, const SourceObjInfo* value) { - delete value->ref(); + bool do_entry(address key, const SourceObjInfo& value) { + delete value.ref(); return true; } }; @@ -199,15 +199,12 @@ private: SourceObjList _rw_src_objs; // objs to put in rw region SourceObjList _ro_src_objs; // objs to put in ro region - KVHashtable _src_obj_table; + ResizeableResourceHashtable _src_obj_table; GrowableArray* _klasses; GrowableArray* _symbols; GrowableArray* _special_refs; // statistics - int _num_instance_klasses; - int _num_obj_array_klasses; - int _num_type_array_klasses; DumpAllocStats _alloc_stats; size_t _total_closed_heap_region_size; size_t _total_open_heap_region_size; @@ -216,7 +213,7 @@ private: GrowableArray* closed_heap_regions, GrowableArray* open_heap_regions); void print_bitmap_region_stats(size_t size, size_t total_size); - void print_heap_region_stats(GrowableArray *heap_mem, + void print_heap_region_stats(GrowableArray* regions, const char *name, size_t total_size); // For global access. diff --git a/src/hotspot/share/cds/archiveUtils.cpp b/src/hotspot/share/cds/archiveUtils.cpp index bd279d35777abe335f97d41b03da637f380aede2..8c273c06a8d0b431a9591420c0577bb18808a371 100644 --- a/src/hotspot/share/cds/archiveUtils.cpp +++ b/src/hotspot/share/cds/archiveUtils.cpp @@ -39,6 +39,7 @@ #include "oops/compressedOops.inline.hpp" #include "runtime/arguments.hpp" #include "utilities/bitMap.inline.hpp" +#include "utilities/formatBuffer.hpp" CHeapBitMap* ArchivePtrMarker::_ptrmap = NULL; VirtualSpace* ArchivePtrMarker::_vs; @@ -261,8 +262,7 @@ void WriteClosure::do_oop(oop* o) { if (*o == NULL) { _dump_region->append_intptr_t(0); } else { - assert(HeapShared::is_heap_object_archiving_allowed(), - "Archiving heap object is not allowed"); + assert(HeapShared::can_write(), "sanity"); _dump_region->append_intptr_t( (intptr_t)CompressedOops::encode_not_null(*o)); } @@ -307,13 +307,11 @@ void ReadClosure::do_tag(int tag) { void ReadClosure::do_oop(oop *p) { narrowOop o = CompressedOops::narrow_oop_cast(nextPtr()); - if (CompressedOops::is_null(o) || !HeapShared::open_archive_heap_region_mapped()) { + if (CompressedOops::is_null(o) || !HeapShared::is_fully_available()) { *p = NULL; } else { - assert(HeapShared::is_heap_object_archiving_allowed(), - "Archived heap object is not allowed"); - assert(HeapShared::open_archive_heap_region_mapped(), - "Open archive heap region is not mapped"); + assert(HeapShared::can_use(), "sanity"); + assert(HeapShared::is_fully_available(), "must be"); *p = HeapShared::decode_from_archive(o); } } @@ -329,23 +327,24 @@ void ReadClosure::do_region(u_char* start, size_t size) { } } -fileStream* ClassListWriter::_classlist_file = NULL; - void ArchiveUtils::log_to_classlist(BootstrapInfo* bootstrap_specifier, TRAPS) { if (ClassListWriter::is_enabled()) { if (SystemDictionaryShared::is_supported_invokedynamic(bootstrap_specifier)) { - ResourceMark rm(THREAD); const constantPoolHandle& pool = bootstrap_specifier->pool(); - int pool_index = bootstrap_specifier->bss_index(); - ClassListWriter w; - w.stream()->print("%s %s", LAMBDA_PROXY_TAG, pool->pool_holder()->name()->as_C_string()); - CDSIndyInfo cii; - ClassListParser::populate_cds_indy_info(pool, pool_index, &cii, CHECK); - GrowableArray* indy_items = cii.items(); - for (int i = 0; i < indy_items->length(); i++) { - w.stream()->print(" %s", indy_items->at(i)); + if (SystemDictionaryShared::is_builtin_loader(pool->pool_holder()->class_loader_data())) { + // Currently lambda proxy classes are supported only for the built-in loaders. + ResourceMark rm(THREAD); + int pool_index = bootstrap_specifier->bss_index(); + ClassListWriter w; + w.stream()->print("%s %s", LAMBDA_PROXY_TAG, pool->pool_holder()->name()->as_C_string()); + CDSIndyInfo cii; + ClassListParser::populate_cds_indy_info(pool, pool_index, &cii, CHECK); + GrowableArray* indy_items = cii.items(); + for (int i = 0; i < indy_items->length(); i++) { + w.stream()->print(" %s", indy_items->at(i)); + } + w.stream()->cr(); } - w.stream()->cr(); } } } diff --git a/src/hotspot/share/cds/archiveUtils.hpp b/src/hotspot/share/cds/archiveUtils.hpp index cdb3d99ab5350ef7397b7e41c323935b2382eebd..588ad1b6da921152f1caaf4827504286d2e969ba 100644 --- a/src/hotspot/share/cds/archiveUtils.hpp +++ b/src/hotspot/share/cds/archiveUtils.hpp @@ -71,6 +71,11 @@ public: static CHeapBitMap* ptrmap() { return _ptrmap; } + + static void reset_map_and_vs() { + _ptrmap = nullptr; + _vs = nullptr; + } }; // SharedDataRelocator is used to shift pointers in the CDS archive. diff --git a/src/hotspot/share/cds/cdsConstants.cpp b/src/hotspot/share/cds/cdsConstants.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5d8948b0ee0612cb21ac17547dd2b6f0b92a0969 --- /dev/null +++ b/src/hotspot/share/cds/cdsConstants.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2014, 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" + +#include "cds.h" +#include "cds/cdsConstants.hpp" +#include "cds/dynamicArchive.hpp" +#include "cds/filemap.hpp" +#include "utilities/globalDefinitions.hpp" + +CDSConst CDSConstants::offsets[] = { + { "GenericCDSFileMapHeader::_magic", offset_of(GenericCDSFileMapHeader, _magic) }, + { "GenericCDSFileMapHeader::_crc", offset_of(GenericCDSFileMapHeader, _crc) }, + { "GenericCDSFileMapHeader::_version", offset_of(GenericCDSFileMapHeader, _version) }, + { "GenericCDSFileMapHeader::_header_size", offset_of(GenericCDSFileMapHeader, _header_size) }, + { "GenericCDSFileMapHeader::_base_archive_path_offset", offset_of(GenericCDSFileMapHeader, _base_archive_path_offset) }, + { "GenericCDSFileMapHeader::_base_archive_name_size", offset_of(GenericCDSFileMapHeader, _base_archive_name_size) }, + { "CDSFileMapHeaderBase::_space[0]", offset_of(CDSFileMapHeaderBase, _space) }, + { "FileMapHeader::_jvm_ident", offset_of(FileMapHeader, _jvm_ident) }, + { "CDSFileMapRegion::_crc", offset_of(CDSFileMapRegion, _crc) }, + { "CDSFileMapRegion::_used", offset_of(CDSFileMapRegion, _used) }, + { "DynamicArchiveHeader::_base_region_crc", offset_of(DynamicArchiveHeader, _base_region_crc) } +}; + +CDSConst CDSConstants::constants[] = { + { "static_magic", (size_t)CDS_ARCHIVE_MAGIC }, + { "dynamic_magic", (size_t)CDS_DYNAMIC_ARCHIVE_MAGIC }, + { "int_size", sizeof(int) }, + { "CDSFileMapRegion_size", sizeof(CDSFileMapRegion) }, + { "static_file_header_size", sizeof(FileMapHeader) }, + { "dynamic_archive_header_size", sizeof(DynamicArchiveHeader) }, + { "size_t_size", sizeof(size_t) } +}; + +size_t CDSConstants::get_cds_offset(const char* name) { + for (int i = 0; i < (int)ARRAY_SIZE(offsets); i++) { + if (strcmp(name, offsets[i]._name) == 0) { + return offsets[i]._value; + } + } + return -1; +} + +size_t CDSConstants::get_cds_constant(const char* name) { + for (int i = 0; i < (int)ARRAY_SIZE(constants); i++) { + if (strcmp(name, constants[i]._name) == 0) { + return constants[i]._value; + } + } + return -1; +} diff --git a/src/hotspot/share/cds/cdsoffsets.hpp b/src/hotspot/share/cds/cdsConstants.hpp similarity index 66% rename from src/hotspot/share/cds/cdsoffsets.hpp rename to src/hotspot/share/cds/cdsConstants.hpp index 4598404791b2ce53ab4681a22d7f4cea70b350f9..d116fbf47aec7ca2646a5b993258a0af0d947326 100644 --- a/src/hotspot/share/cds/cdsoffsets.hpp +++ b/src/hotspot/share/cds/cdsConstants.hpp @@ -22,26 +22,23 @@ * */ -#ifndef SHARE_CDS_CDSOFFSETS_HPP -#define SHARE_CDS_CDSOFFSETS_HPP +#ifndef SHARE_CDS_CDSCONSTANTS_HPP +#define SHARE_CDS_CDSCONSTANTS_HPP -#include "memory/allocation.hpp" +#include "memory/allStatic.hpp" +#include "utilities/globalDefinitions.hpp" -class CDSOffsets: public CHeapObj { +typedef struct { + const char* _name; + size_t _value; +} CDSConst; + +class CDSConstants : AllStatic { private: - char* _name; - int _offset; - CDSOffsets* _next; - static CDSOffsets* _all; // sole list for cds + static CDSConst offsets[]; + static CDSConst constants[]; public: - CDSOffsets(const char* name, int offset, CDSOffsets* next); - - char* get_name() const { return _name; } - int get_offset() const { return _offset; } - CDSOffsets* next() const { return _next; } - void add_end(CDSOffsets* n); - - static int find_offset(const char* name); + static size_t get_cds_constant(const char* name); + static size_t get_cds_offset(const char* name); }; - -#endif // SHARE_CDS_CDSOFFSETS_HPP +#endif // SHARE_CDS_CDSCONSTANTS_HPP diff --git a/src/hotspot/share/cds/cdsProtectionDomain.cpp b/src/hotspot/share/cds/cdsProtectionDomain.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e5b06d0c2bb964764a4abbbeeb8b4aa305128e18 --- /dev/null +++ b/src/hotspot/share/cds/cdsProtectionDomain.cpp @@ -0,0 +1,338 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "cds/cdsProtectionDomain.hpp" +#include "classfile/classLoader.hpp" +#include "classfile/classLoaderExt.hpp" +#include "classfile/javaClasses.hpp" +#include "classfile/moduleEntry.hpp" +#include "classfile/symbolTable.hpp" +#include "classfile/systemDictionaryShared.hpp" +#include "classfile/vmClasses.hpp" +#include "classfile/vmSymbols.hpp" +#include "memory/oopFactory.hpp" +#include "memory/resourceArea.hpp" +#include "memory/universe.hpp" +#include "oops/instanceKlass.hpp" +#include "oops/symbol.hpp" +#include "runtime/javaCalls.hpp" + +OopHandle CDSProtectionDomain::_shared_protection_domains; +OopHandle CDSProtectionDomain::_shared_jar_urls; +OopHandle CDSProtectionDomain::_shared_jar_manifests; + +// Initializes the java.lang.Package and java.security.ProtectionDomain objects associated with +// the given InstanceKlass. +// Returns the ProtectionDomain for the InstanceKlass. +Handle CDSProtectionDomain::init_security_info(Handle class_loader, InstanceKlass* ik, PackageEntry* pkg_entry, TRAPS) { + Handle pd; + + if (ik != NULL) { + int index = ik->shared_classpath_index(); + assert(index >= 0, "Sanity"); + SharedClassPathEntry* ent = FileMapInfo::shared_path(index); + Symbol* class_name = ik->name(); + + if (ent->is_modules_image()) { + // For shared app/platform classes originated from the run-time image: + // The ProtectionDomains are cached in the corresponding ModuleEntries + // for fast access by the VM. + // all packages from module image are already created during VM bootstrap in + // Modules::define_module(). + assert(pkg_entry != NULL, "archived class in module image cannot be from unnamed package"); + ModuleEntry* mod_entry = pkg_entry->module(); + pd = get_shared_protection_domain(class_loader, mod_entry, CHECK_(pd)); + } else { + // For shared app/platform classes originated from JAR files on the class path: + // Each of the 3 SystemDictionaryShared::_shared_xxx arrays has the same length + // as the shared classpath table in the shared archive (see + // FileMap::_shared_path_table in filemap.hpp for details). + // + // If a shared InstanceKlass k is loaded from the class path, let + // + // index = k->shared_classpath_index(): + // + // FileMap::_shared_path_table[index] identifies the JAR file that contains k. + // + // k's protection domain is: + // + // ProtectionDomain pd = _shared_protection_domains[index]; + // + // and k's Package is initialized using + // + // manifest = _shared_jar_manifests[index]; + // url = _shared_jar_urls[index]; + // define_shared_package(class_name, class_loader, manifest, url, CHECK_(pd)); + // + // Note that if an element of these 3 _shared_xxx arrays is NULL, it will be initialized by + // the corresponding SystemDictionaryShared::get_shared_xxx() function. + Handle manifest = get_shared_jar_manifest(index, CHECK_(pd)); + Handle url = get_shared_jar_url(index, CHECK_(pd)); + int index_offset = index - ClassLoaderExt::app_class_paths_start_index(); + if (index_offset < PackageEntry::max_index_for_defined_in_class_path()) { + if (pkg_entry == NULL || !pkg_entry->is_defined_by_cds_in_class_path(index_offset)) { + // define_shared_package only needs to be called once for each package in a jar specified + // in the shared class path. + define_shared_package(class_name, class_loader, manifest, url, CHECK_(pd)); + if (pkg_entry != NULL) { + pkg_entry->set_defined_by_cds_in_class_path(index_offset); + } + } + } else { + define_shared_package(class_name, class_loader, manifest, url, CHECK_(pd)); + } + pd = get_shared_protection_domain(class_loader, index, url, CHECK_(pd)); + } + } + return pd; +} + +Handle CDSProtectionDomain::get_package_name(Symbol* class_name, TRAPS) { + ResourceMark rm(THREAD); + Handle pkgname_string; + TempNewSymbol pkg = ClassLoader::package_from_class_name(class_name); + if (pkg != NULL) { // Package prefix found + const char* pkgname = pkg->as_klass_external_name(); + pkgname_string = java_lang_String::create_from_str(pkgname, + CHECK_(pkgname_string)); + } + return pkgname_string; +} + +PackageEntry* CDSProtectionDomain::get_package_entry_from_class(InstanceKlass* ik, Handle class_loader) { + PackageEntry* pkg_entry = ik->package(); + if (MetaspaceShared::use_full_module_graph() && ik->is_shared() && pkg_entry != NULL) { + assert(MetaspaceShared::is_in_shared_metaspace(pkg_entry), "must be"); + assert(!ik->is_shared_unregistered_class(), "unexpected archived package entry for an unregistered class"); + assert(ik->module()->is_named(), "unexpected archived package entry for a class in an unnamed module"); + return pkg_entry; + } + TempNewSymbol pkg_name = ClassLoader::package_from_class_name(ik->name()); + if (pkg_name != NULL) { + pkg_entry = SystemDictionaryShared::class_loader_data(class_loader)->packages()->lookup_only(pkg_name); + } else { + pkg_entry = NULL; + } + return pkg_entry; +} + +// Define Package for shared app classes from JAR file and also checks for +// package sealing (all done in Java code) +// See http://docs.oracle.com/javase/tutorial/deployment/jar/sealman.html +void CDSProtectionDomain::define_shared_package(Symbol* class_name, + Handle class_loader, + Handle manifest, + Handle url, + TRAPS) { + assert(SystemDictionary::is_system_class_loader(class_loader()), "unexpected class loader"); + // get_package_name() returns a NULL handle if the class is in unnamed package + Handle pkgname_string = get_package_name(class_name, CHECK); + if (pkgname_string.not_null()) { + Klass* app_classLoader_klass = vmClasses::jdk_internal_loader_ClassLoaders_AppClassLoader_klass(); + JavaValue result(T_OBJECT); + JavaCallArguments args(3); + args.set_receiver(class_loader); + args.push_oop(pkgname_string); + args.push_oop(manifest); + args.push_oop(url); + JavaCalls::call_virtual(&result, app_classLoader_klass, + vmSymbols::defineOrCheckPackage_name(), + vmSymbols::defineOrCheckPackage_signature(), + &args, + CHECK); + } +} + +Handle CDSProtectionDomain::create_jar_manifest(const char* manifest_chars, size_t size, TRAPS) { + typeArrayOop buf = oopFactory::new_byteArray((int)size, CHECK_NH); + typeArrayHandle bufhandle(THREAD, buf); + ArrayAccess<>::arraycopy_from_native(reinterpret_cast(manifest_chars), + buf, typeArrayOopDesc::element_offset(0), size); + Handle bais = JavaCalls::construct_new_instance(vmClasses::ByteArrayInputStream_klass(), + vmSymbols::byte_array_void_signature(), + bufhandle, CHECK_NH); + // manifest = new Manifest(ByteArrayInputStream) + Handle manifest = JavaCalls::construct_new_instance(vmClasses::Jar_Manifest_klass(), + vmSymbols::input_stream_void_signature(), + bais, CHECK_NH); + return manifest; +} + +Handle CDSProtectionDomain::get_shared_jar_manifest(int shared_path_index, TRAPS) { + Handle manifest; + if (shared_jar_manifest(shared_path_index) == NULL) { + SharedClassPathEntry* ent = FileMapInfo::shared_path(shared_path_index); + size_t size = (size_t)ent->manifest_size(); + if (size == 0) { + return Handle(); + } + + // ByteArrayInputStream bais = new ByteArrayInputStream(buf); + const char* src = ent->manifest(); + assert(src != NULL, "No Manifest data"); + manifest = create_jar_manifest(src, size, CHECK_NH); + atomic_set_shared_jar_manifest(shared_path_index, manifest()); + } + manifest = Handle(THREAD, shared_jar_manifest(shared_path_index)); + assert(manifest.not_null(), "sanity"); + return manifest; +} + +Handle CDSProtectionDomain::get_shared_jar_url(int shared_path_index, TRAPS) { + Handle url_h; + if (shared_jar_url(shared_path_index) == NULL) { + JavaValue result(T_OBJECT); + const char* path = FileMapInfo::shared_path_name(shared_path_index); + Handle path_string = java_lang_String::create_from_str(path, CHECK_(url_h)); + Klass* classLoaders_klass = + vmClasses::jdk_internal_loader_ClassLoaders_klass(); + JavaCalls::call_static(&result, classLoaders_klass, + vmSymbols::toFileURL_name(), + vmSymbols::toFileURL_signature(), + path_string, CHECK_(url_h)); + + atomic_set_shared_jar_url(shared_path_index, result.get_oop()); + } + + url_h = Handle(THREAD, shared_jar_url(shared_path_index)); + assert(url_h.not_null(), "sanity"); + return url_h; +} + +// Get the ProtectionDomain associated with the CodeSource from the classloader. +Handle CDSProtectionDomain::get_protection_domain_from_classloader(Handle class_loader, + Handle url, TRAPS) { + // CodeSource cs = new CodeSource(url, null); + Handle cs = JavaCalls::construct_new_instance(vmClasses::CodeSource_klass(), + vmSymbols::url_code_signer_array_void_signature(), + url, Handle(), CHECK_NH); + + // protection_domain = SecureClassLoader.getProtectionDomain(cs); + Klass* secureClassLoader_klass = vmClasses::SecureClassLoader_klass(); + JavaValue obj_result(T_OBJECT); + JavaCalls::call_virtual(&obj_result, class_loader, secureClassLoader_klass, + vmSymbols::getProtectionDomain_name(), + vmSymbols::getProtectionDomain_signature(), + cs, CHECK_NH); + return Handle(THREAD, obj_result.get_oop()); +} + +// Returns the ProtectionDomain associated with the JAR file identified by the url. +Handle CDSProtectionDomain::get_shared_protection_domain(Handle class_loader, + int shared_path_index, + Handle url, + TRAPS) { + Handle protection_domain; + if (shared_protection_domain(shared_path_index) == NULL) { + Handle pd = get_protection_domain_from_classloader(class_loader, url, THREAD); + atomic_set_shared_protection_domain(shared_path_index, pd()); + } + + // Acquire from the cache because if another thread beats the current one to + // set the shared protection_domain and the atomic_set fails, the current thread + // needs to get the updated protection_domain from the cache. + protection_domain = Handle(THREAD, shared_protection_domain(shared_path_index)); + assert(protection_domain.not_null(), "sanity"); + return protection_domain; +} + +// Returns the ProtectionDomain associated with the moduleEntry. +Handle CDSProtectionDomain::get_shared_protection_domain(Handle class_loader, + ModuleEntry* mod, TRAPS) { + ClassLoaderData *loader_data = mod->loader_data(); + if (mod->shared_protection_domain() == NULL) { + Symbol* location = mod->location(); + if (location != NULL) { + Handle location_string = java_lang_String::create_from_symbol( + location, CHECK_NH); + Handle url; + JavaValue result(T_OBJECT); + if (location->starts_with("jrt:/")) { + url = JavaCalls::construct_new_instance(vmClasses::URL_klass(), + vmSymbols::string_void_signature(), + location_string, CHECK_NH); + } else { + Klass* classLoaders_klass = + vmClasses::jdk_internal_loader_ClassLoaders_klass(); + JavaCalls::call_static(&result, classLoaders_klass, vmSymbols::toFileURL_name(), + vmSymbols::toFileURL_signature(), + location_string, CHECK_NH); + url = Handle(THREAD, result.get_oop()); + } + + Handle pd = get_protection_domain_from_classloader(class_loader, url, + CHECK_NH); + mod->set_shared_protection_domain(loader_data, pd); + } + } + + Handle protection_domain(THREAD, mod->shared_protection_domain()); + assert(protection_domain.not_null(), "sanity"); + return protection_domain; +} + +void CDSProtectionDomain::atomic_set_array_index(OopHandle array, int index, oop o) { + // Benign race condition: array.obj_at(index) may already be filled in. + // The important thing here is that all threads pick up the same result. + // It doesn't matter which racing thread wins, as long as only one + // result is used by all threads, and all future queries. + ((objArrayOop)array.resolve())->atomic_compare_exchange_oop(index, o, NULL); +} + +oop CDSProtectionDomain::shared_protection_domain(int index) { + return ((objArrayOop)_shared_protection_domains.resolve())->obj_at(index); +} + +void CDSProtectionDomain::allocate_shared_protection_domain_array(int size, TRAPS) { + if (_shared_protection_domains.resolve() == NULL) { + oop spd = oopFactory::new_objArray( + vmClasses::ProtectionDomain_klass(), size, CHECK); + _shared_protection_domains = OopHandle(Universe::vm_global(), spd); + } +} + +oop CDSProtectionDomain::shared_jar_url(int index) { + return ((objArrayOop)_shared_jar_urls.resolve())->obj_at(index); +} + +void CDSProtectionDomain::allocate_shared_jar_url_array(int size, TRAPS) { + if (_shared_jar_urls.resolve() == NULL) { + oop sju = oopFactory::new_objArray( + vmClasses::URL_klass(), size, CHECK); + _shared_jar_urls = OopHandle(Universe::vm_global(), sju); + } +} + +oop CDSProtectionDomain::shared_jar_manifest(int index) { + return ((objArrayOop)_shared_jar_manifests.resolve())->obj_at(index); +} + +void CDSProtectionDomain::allocate_shared_jar_manifest_array(int size, TRAPS) { + if (_shared_jar_manifests.resolve() == NULL) { + oop sjm = oopFactory::new_objArray( + vmClasses::Jar_Manifest_klass(), size, CHECK); + _shared_jar_manifests = OopHandle(Universe::vm_global(), sjm); + } +} diff --git a/src/hotspot/share/cds/cdsProtectionDomain.hpp b/src/hotspot/share/cds/cdsProtectionDomain.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1e048a96285c59f5801abc65575fda4813270de3 --- /dev/null +++ b/src/hotspot/share/cds/cdsProtectionDomain.hpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARED_CDS_CDSPROTECTIONDOMAIN_HPP +#define SHARED_CDS_CDSPROTECTIONDOMAIN_HPP +#include "oops/oopHandle.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/thread.hpp" +#include "classfile/moduleEntry.hpp" + +class InstanceKlass; +class Symbol; +class PackageEntry; +class ModuleEntry; + +// CDS security +class CDSProtectionDomain : AllStatic { + // See init_security_info for more info. + static OopHandle _shared_protection_domains; + static OopHandle _shared_jar_urls; + static OopHandle _shared_jar_manifests; + +public: + // Package handling: + // + // 1. For named modules in the runtime image + // BOOT classes: Reuses the existing JVM_GetSystemPackage(s) interfaces + // to get packages in named modules for shared classes. + // Package for non-shared classes in named module is also + // handled using JVM_GetSystemPackage(s). + // + // APP classes: VM calls ClassLoaders.AppClassLoader::definePackage(String, Module) + // to define package for shared app classes from named + // modules. + // + // PLATFORM classes: VM calls ClassLoaders.PlatformClassLoader::definePackage(String, Module) + // to define package for shared platform classes from named + // modules. + // + // 2. For unnamed modules + // BOOT classes: Reuses the existing JVM_GetSystemPackage(s) interfaces to + // get packages for shared boot classes in unnamed modules. + // + // APP classes: VM calls ClassLoaders.AppClassLoader::defineOrCheckPackage() + // with with the manifest and url from archived data. + // + // PLATFORM classes: No package is defined. + // + // The following two define_shared_package() functions are used to define + // package for shared APP and PLATFORM classes. + static Handle get_package_name(Symbol* class_name, TRAPS); + static PackageEntry* get_package_entry_from_class(InstanceKlass* ik, Handle class_loader); + static void define_shared_package(Symbol* class_name, + Handle class_loader, + Handle manifest, + Handle url, + TRAPS); + static Handle create_jar_manifest(const char* man, size_t size, TRAPS); + static Handle get_shared_jar_manifest(int shared_path_index, TRAPS); + static Handle get_shared_jar_url(int shared_path_index, TRAPS); + static Handle get_protection_domain_from_classloader(Handle class_loader, + Handle url, TRAPS); + static Handle get_shared_protection_domain(Handle class_loader, + int shared_path_index, + Handle url, + TRAPS); + static Handle get_shared_protection_domain(Handle class_loader, + ModuleEntry* mod, TRAPS); + static void atomic_set_array_index(OopHandle array, int index, oop o); + static oop shared_protection_domain(int index); + static void allocate_shared_protection_domain_array(int size, TRAPS); + static oop shared_jar_url(int index); + static void allocate_shared_jar_url_array(int size, TRAPS); + static oop shared_jar_manifest(int index); + static void allocate_shared_jar_manifest_array(int size, TRAPS); + static Handle init_security_info(Handle class_loader, InstanceKlass* ik, PackageEntry* pkg_entry, TRAPS); + + static void allocate_shared_data_arrays(int size, TRAPS) { + allocate_shared_protection_domain_array(size, CHECK); + allocate_shared_jar_url_array(size, CHECK); + allocate_shared_jar_manifest_array(size, CHECK); + } + static void atomic_set_shared_protection_domain(int index, oop pd) { + atomic_set_array_index(_shared_protection_domains, index, pd); + } + static void atomic_set_shared_jar_url(int index, oop url) { + atomic_set_array_index(_shared_jar_urls, index, url); + } + static void atomic_set_shared_jar_manifest(int index, oop man) { + atomic_set_array_index(_shared_jar_manifests, index, man); + } +}; + +#endif // SHARED_CDS_CDSPROTECTIONDOMAIN_HPP diff --git a/src/hotspot/share/cds/cdsoffsets.cpp b/src/hotspot/share/cds/cdsoffsets.cpp deleted file mode 100644 index 9d93900aa67ba6174b417a4d048becfb2ecd6e2e..0000000000000000000000000000000000000000 --- a/src/hotspot/share/cds/cdsoffsets.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2014, 2021, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "cds/cdsoffsets.hpp" -#include "cds/dynamicArchive.hpp" -#include "cds/filemap.hpp" -#include "runtime/os.hpp" -#include "memory/allocation.hpp" -#include "memory/allocation.inline.hpp" -#include "utilities/macros.hpp" - -CDSOffsets::CDSOffsets(const char* name, int offset, CDSOffsets* next) { - _name = NEW_C_HEAP_ARRAY(char, strlen(name) + 1, mtInternal); - strcpy(_name, name); - _offset = offset; - _next = next; -} - -CDSOffsets* CDSOffsets::_all = NULL; -#define ADD_NEXT(list, name, value) \ - list->add_end(new CDSOffsets(name, value, NULL)) - -#define CREATE_OFFSET_MAPS \ - _all = new CDSOffsets("size_t_size", sizeof(size_t), NULL); \ - ADD_NEXT(_all, "int_size", sizeof(int)); \ - ADD_NEXT(_all, "FileMapHeader::_magic", offset_of(FileMapHeader, _magic)); \ - ADD_NEXT(_all, "FileMapHeader::_crc", offset_of(FileMapHeader, _crc)); \ - ADD_NEXT(_all, "FileMapHeader::_version", offset_of(FileMapHeader, _version)); \ - ADD_NEXT(_all, "FileMapHeader::_jvm_ident", offset_of(FileMapHeader, _jvm_ident)); \ - ADD_NEXT(_all, "FileMapHeader::_space[0]", offset_of(FileMapHeader, _space)); \ - ADD_NEXT(_all, "CDSFileMapRegion::_crc", offset_of(CDSFileMapRegion, _crc)); \ - ADD_NEXT(_all, "CDSFileMapRegion::_used", offset_of(CDSFileMapRegion, _used)); \ - ADD_NEXT(_all, "file_header_size", sizeof(FileMapHeader)); \ - ADD_NEXT(_all, "DynamicArchiveHeader::_base_region_crc", offset_of(DynamicArchiveHeader, _base_region_crc)); \ - ADD_NEXT(_all, "CDSFileMapRegion_size", sizeof(CDSFileMapRegion)); - -int CDSOffsets::find_offset(const char* name) { - if (_all == NULL) { - CREATE_OFFSET_MAPS - } - CDSOffsets* it = _all; - while(it) { - if (!strcmp(name, it->get_name())) { - return it->get_offset(); - } - it = it->next(); - } - return -1; // not found -} - -void CDSOffsets::add_end(CDSOffsets* n) { - CDSOffsets* p = this; - while(p && p->_next) { p = p->_next; } - p->_next = n; -} diff --git a/src/hotspot/share/cds/classListParser.cpp b/src/hotspot/share/cds/classListParser.cpp index 5cc764fe9f187378577230159df9b9c281ec9a7c..9fa59284c5fe43fb7e06757cff8aa4fc31bbf3a8 100644 --- a/src/hotspot/share/cds/classListParser.cpp +++ b/src/hotspot/share/cds/classListParser.cpp @@ -29,6 +29,7 @@ #include "cds/classListParser.hpp" #include "cds/lambdaFormInvokers.hpp" #include "cds/metaspaceShared.hpp" +#include "cds/unregisteredClasses.hpp" #include "classfile/classLoaderExt.hpp" #include "classfile/javaClasses.inline.hpp" #include "classfile/symbolTable.hpp" @@ -54,7 +55,7 @@ volatile Thread* ClassListParser::_parsing_thread = NULL; ClassListParser* ClassListParser::_instance = NULL; -ClassListParser::ClassListParser(const char* file) : _id2klass_table(INITIAL_TABLE_SIZE) { +ClassListParser::ClassListParser(const char* file) : _id2klass_table(INITIAL_TABLE_SIZE, MAX_TABLE_SIZE) { _classlist_file = file; _file = NULL; // Use os::open() because neither fopen() nor os::fopen() @@ -120,6 +121,13 @@ int ClassListParser::parse(TRAPS) { return 0; // THROW } + ResourceMark rm(THREAD); + char* ex_msg = (char*)""; + oop message = java_lang_Throwable::message(PENDING_EXCEPTION); + if (message != NULL) { + ex_msg = java_lang_String::as_utf8_string(message); + } + log_warning(cds)("%s: %s", PENDING_EXCEPTION->klass()->external_name(), ex_msg); // We might have an invalid class name or an bad class. Warn about it // and keep going to the next line. CLEAR_PENDING_EXCEPTION; @@ -458,7 +466,7 @@ InstanceKlass* ClassListParser::load_class_from_source(Symbol* class_name, TRAPS THROW_NULL(vmSymbols::java_lang_ClassNotFoundException()); } - InstanceKlass* k = ClassLoaderExt::load_class(class_name, _source, CHECK_NULL); + InstanceKlass* k = UnregisteredClasses::load_class(class_name, _source, CHECK_NULL); if (k->local_interfaces()->length() != _interfaces->length()) { print_specified_interfaces(); print_actual_interfaces(k); @@ -466,16 +474,14 @@ InstanceKlass* ClassListParser::load_class_from_source(Symbol* class_name, TRAPS _interfaces->length(), k->local_interfaces()->length()); } + assert(k->is_shared_unregistered_class(), "must be"); + bool added = SystemDictionaryShared::add_unregistered_class(THREAD, k); if (!added) { // We allow only a single unregistered class for each unique name. error("Duplicated class %s", _class_name); } - // This tells JVM_FindLoadedClass to not find this class. - k->set_shared_classpath_index(UNREGISTERED_INDEX); - k->clear_shared_class_loader_type(); - return k; } @@ -507,7 +513,7 @@ void ClassListParser::populate_cds_indy_info(const constantPoolHandle &pool, int } } -bool ClassListParser::is_matching_cp_entry(constantPoolHandle &pool, int cp_index, TRAPS) { +bool ClassListParser::is_matching_cp_entry(const constantPoolHandle &pool, int cp_index, TRAPS) { ResourceMark rm(THREAD); CDSIndyInfo cii; populate_cds_indy_info(pool, cp_index, &cii, CHECK_0); @@ -643,11 +649,14 @@ Klass* ClassListParser::load_current_class(Symbol* class_name_symbol, TRAPS) { InstanceKlass* ik = InstanceKlass::cast(klass); int id = this->id(); SystemDictionaryShared::update_shared_entry(ik, id); - InstanceKlass** old_ptr = table()->lookup(id); - if (old_ptr != NULL) { + bool created; + id2klass_table()->put_if_absent(id, ik, &created); + if (!created) { error("Duplicated ID %d for class %s", id, _class_name); } - table()->add(id, ik); + if (id2klass_table()->maybe_grow()) { + log_info(cds, hashtables)("Expanded id2klass_table() to %d", id2klass_table()->table_size()); + } } return klass; @@ -658,7 +667,7 @@ bool ClassListParser::is_loading_from_source() { } InstanceKlass* ClassListParser::lookup_class_by_id(int id) { - InstanceKlass** klass_ptr = table()->lookup(id); + InstanceKlass** klass_ptr = id2klass_table()->get(id); if (klass_ptr == NULL) { error("Class ID %d has not been defined", id); } diff --git a/src/hotspot/share/cds/classListParser.hpp b/src/hotspot/share/cds/classListParser.hpp index 9929cdca60992d7e5722321ffb4addeeebee5657..684605d9688a2e5cd3f9005edb1c65f7ea93f85e 100644 --- a/src/hotspot/share/cds/classListParser.hpp +++ b/src/hotspot/share/cds/classListParser.hpp @@ -28,11 +28,12 @@ #include "utilities/exceptions.hpp" #include "utilities/globalDefinitions.hpp" #include "utilities/growableArray.hpp" -#include "utilities/hashtable.inline.hpp" +#include "utilities/resizeableResourceHash.hpp" #define LAMBDA_PROXY_TAG "@lambda-proxy" #define LAMBDA_FORM_TAG "@lambda-form-invoker" +class constantPoolHandle; class Thread; class CDSIndyInfo { @@ -66,7 +67,9 @@ public: }; class ClassListParser : public StackObj { - typedef KVHashtable ID2KlassTable; + // Must be C_HEAP allocated -- we don't want nested resource allocations. + typedef ResizeableResourceHashtable ID2KlassTable; enum { _unspecified = -999, @@ -80,7 +83,9 @@ class ClassListParser : public StackObj { _line_buf_size = _max_allowed_line_len + _line_buf_extra }; - static const int INITIAL_TABLE_SIZE = 1987; + // Use a small initial size in debug build to test resizing logic + static const int INITIAL_TABLE_SIZE = DEBUG_ONLY(17) NOT_DEBUG(1987); + static const int MAX_TABLE_SIZE = 61333; static volatile Thread* _parsing_thread; // the thread that created _instance static ClassListParser* _instance; // the singleton. const char* _classlist_file; @@ -106,13 +111,13 @@ class ClassListParser : public StackObj { bool parse_int_option(const char* option_name, int* value); bool parse_uint_option(const char* option_name, int* value); InstanceKlass* load_class_from_source(Symbol* class_name, TRAPS); - ID2KlassTable* table() { + ID2KlassTable* id2klass_table() { return &_id2klass_table; } InstanceKlass* lookup_class_by_id(int id); void print_specified_interfaces(); void print_actual_interfaces(InstanceKlass *ik); - bool is_matching_cp_entry(constantPoolHandle &pool, int cp_index, TRAPS); + bool is_matching_cp_entry(const constantPoolHandle &pool, int cp_index, TRAPS); void resolve_indy(JavaThread* current, Symbol* class_name_symbol); void resolve_indy_impl(Symbol* class_name_symbol, TRAPS); @@ -161,7 +166,7 @@ public: return _super; } void check_already_loaded(const char* which, int id) { - if (_id2klass_table.lookup(id) == NULL) { + if (!id2klass_table()->contains(id)) { error("%s id %d is not yet loaded", which, id); } } diff --git a/src/hotspot/share/cds/classListWriter.cpp b/src/hotspot/share/cds/classListWriter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..45421457a633a99755d46ec9dfd0ed42e9302993 --- /dev/null +++ b/src/hotspot/share/cds/classListWriter.cpp @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "cds/classListWriter.hpp" +#include "classfile/classFileStream.hpp" +#include "classfile/classLoader.hpp" +#include "classfile/classLoaderData.hpp" +#include "classfile/moduleEntry.hpp" +#include "classfile/systemDictionaryShared.hpp" +#include "memory/resourceArea.hpp" +#include "oops/instanceKlass.hpp" +#include "runtime/mutexLocker.hpp" + +fileStream* ClassListWriter::_classlist_file = NULL; + +void ClassListWriter::init() { + // For -XX:DumpLoadedClassList= option + if (DumpLoadedClassList != NULL) { + const char* list_name = make_log_name(DumpLoadedClassList, NULL); + _classlist_file = new(ResourceObj::C_HEAP, mtInternal) + fileStream(list_name); + _classlist_file->print_cr("# NOTE: Do not modify this file."); + _classlist_file->print_cr("#"); + _classlist_file->print_cr("# This file is generated via the -XX:DumpLoadedClassList= option"); + _classlist_file->print_cr("# and is used at CDS archive dump time (see -Xshare:dump)."); + _classlist_file->print_cr("#"); + FREE_C_HEAP_ARRAY(char, list_name); + } +} + +void ClassListWriter::write(const InstanceKlass* k, const ClassFileStream* cfs) { + assert(is_enabled(), "must be"); + + if (!ClassLoader::has_jrt_entry()) { + warning("DumpLoadedClassList and CDS are not supported in exploded build"); + DumpLoadedClassList = NULL; + return; + } + + ClassListWriter w; + write_to_stream(k, w.stream(), cfs); +} + +class ClassListWriter::IDTable : public ResourceHashtable< + const InstanceKlass*, int, + 15889, // prime number + ResourceObj::C_HEAP> {}; + +ClassListWriter::IDTable* ClassListWriter::_id_table = NULL; +int ClassListWriter::_total_ids = 0; + +int ClassListWriter::get_id(const InstanceKlass* k) { + assert_locked(); + if (_id_table == NULL) { + _id_table = new (ResourceObj::C_HEAP, mtClass)IDTable(); + } + bool created; + int* v = _id_table->put_if_absent(k, &created); + if (created) { + *v = _total_ids++; + } + return *v; +} + +bool ClassListWriter::has_id(const InstanceKlass* k) { + assert_locked(); + if (_id_table != NULL) { + return _id_table->get(k) != NULL; + } else { + return false; + } +} + +void ClassListWriter::handle_class_unloading(const InstanceKlass* klass) { + assert_locked(); + if (_id_table != NULL) { + _id_table->remove(klass); + } +} + +void ClassListWriter::write_to_stream(const InstanceKlass* k, outputStream* stream, const ClassFileStream* cfs) { + assert_locked(); + ClassLoaderData* loader_data = k->class_loader_data(); + + if (!SystemDictionaryShared::is_builtin_loader(loader_data)) { + if (cfs == NULL || strncmp(cfs->source(), "file:", 5) != 0) { + return; + } + if (!SystemDictionaryShared::add_unregistered_class(Thread::current(), (InstanceKlass*)k)) { + return; + } + } + + + { + InstanceKlass* super = k->java_super(); + if (super != NULL && !has_id(super)) { + return; + } + + Array* interfaces = k->local_interfaces(); + int len = interfaces->length(); + for (int i = 0; i < len; i++) { + InstanceKlass* intf = interfaces->at(i); + if (!has_id(intf)) { + return; + } + } + } + + if (k->is_hidden()) { + return; + } + + if (k->module()->is_patched()) { + return; + } + + ResourceMark rm; + stream->print("%s id: %d", k->name()->as_C_string(), get_id(k)); + if (!SystemDictionaryShared::is_builtin_loader(loader_data)) { + InstanceKlass* super = k->java_super(); + assert(super != NULL, "must be"); + stream->print(" super: %d", get_id(super)); + + Array* interfaces = k->local_interfaces(); + int len = interfaces->length(); + if (len > 0) { + stream->print(" interfaces:"); + for (int i = 0; i < len; i++) { + InstanceKlass* intf = interfaces->at(i); + stream->print(" %d", get_id(intf)); + } + } + +#ifdef _WINDOWS + // "file:/C:/dir/foo.jar" -> "C:/dir/foo.jar" + stream->print(" source: %s", cfs->source() + 6); +#else + // "file:/dir/foo.jar" -> "/dir/foo.jar" + stream->print(" source: %s", cfs->source() + 5); +#endif + } + + stream->cr(); + stream->flush(); +} + +void ClassListWriter::delete_classlist() { + if (_classlist_file != NULL) { + delete _classlist_file; + } +} diff --git a/src/hotspot/share/cds/classListWriter.hpp b/src/hotspot/share/cds/classListWriter.hpp index 6ece57ef4298aa72caee6f32250b0f1d05b2fa29..db7265b120986f22a135ad17d83c0660081ce01f 100644 --- a/src/hotspot/share/cds/classListWriter.hpp +++ b/src/hotspot/share/cds/classListWriter.hpp @@ -29,54 +29,44 @@ #include "runtime/thread.hpp" #include "utilities/ostream.hpp" -class ClassListWriter { - friend const char* make_log_name(const char* log_name, const char* force_directory); +class ClassFileStream; +class ClassListWriter { +#if INCLUDE_CDS + class IDTable; static fileStream* _classlist_file; + static IDTable* _id_table; + static int _total_ids; MutexLocker _locker; + + static int get_id(const InstanceKlass* k); + static bool has_id(const InstanceKlass* k); + static void assert_locked() { assert_lock_strong(ClassListFile_lock); } public: -#if INCLUDE_CDS ClassListWriter() : _locker(Thread::current(), ClassListFile_lock, Mutex::_no_safepoint_check_flag) {} -#else - ClassListWriter() : _locker(Thread::current(), NULL, Mutex::_no_safepoint_check_flag) {} -#endif outputStream* stream() { return _classlist_file; } + void handle_class_unloading(const InstanceKlass* klass); + static bool is_enabled() { -#if INCLUDE_CDS return _classlist_file != NULL && _classlist_file->is_open(); + } + #else +public: + static bool is_enabled() { return false; -#endif } +#endif // INCLUDE_CDS - static void init() { -#if INCLUDE_CDS - // For -XX:DumpLoadedClassList= option - if (DumpLoadedClassList != NULL) { - const char* list_name = make_log_name(DumpLoadedClassList, NULL); - _classlist_file = new(ResourceObj::C_HEAP, mtInternal) - fileStream(list_name); - _classlist_file->print_cr("# NOTE: Do not modify this file."); - _classlist_file->print_cr("#"); - _classlist_file->print_cr("# This file is generated via the -XX:DumpLoadedClassList= option"); - _classlist_file->print_cr("# and is used at CDS archive dump time (see -Xshare:dump)."); - _classlist_file->print_cr("#"); - FREE_C_HEAP_ARRAY(char, list_name); - } -#endif - } - static void delete_classlist() { -#if INCLUDE_CDS - if (_classlist_file != NULL) { - delete _classlist_file; - } -#endif - } + static void init() NOT_CDS_RETURN; + static void write(const InstanceKlass* k, const ClassFileStream* cfs) NOT_CDS_RETURN; + static void write_to_stream(const InstanceKlass* k, outputStream* stream, const ClassFileStream* cfs = NULL) NOT_CDS_RETURN; + static void delete_classlist() NOT_CDS_RETURN; }; #endif // SHARE_CDS_CLASSLISTWRITER_HPP diff --git a/src/hotspot/share/cds/dumpAllocStats.hpp b/src/hotspot/share/cds/dumpAllocStats.hpp index c897c1738ec82bf2e5285a53bd350d47ed06bcda..fb178df99e88bc1f85f530c1b41c9f5bb9bebaa8 100644 --- a/src/hotspot/share/cds/dumpAllocStats.hpp +++ b/src/hotspot/share/cds/dumpAllocStats.hpp @@ -30,7 +30,7 @@ // This is for dumping detailed statistics for the allocations // in the shared spaces. -class DumpAllocStats : public ResourceObj { +class DumpAllocStats : public StackObj { public: // Here's poor man's enum inheritance diff --git a/src/hotspot/share/cds/dumpTimeClassInfo.cpp b/src/hotspot/share/cds/dumpTimeClassInfo.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5a9028d32255a740a74cf32f7795f10923a6e633 --- /dev/null +++ b/src/hotspot/share/cds/dumpTimeClassInfo.cpp @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "cds/archiveBuilder.hpp" +#include "cds/dumpTimeClassInfo.hpp" +#include "classfile/classLoader.hpp" +#include "classfile/classLoaderData.inline.hpp" +#include "classfile/systemDictionaryShared.hpp" +#include "memory/resourceArea.hpp" + +DumpTimeClassInfo DumpTimeClassInfo::clone() { + DumpTimeClassInfo clone; + clone._klass = _klass; + clone._nest_host = _nest_host; + clone._failed_verification = _failed_verification; + clone._is_archived_lambda_proxy = _is_archived_lambda_proxy; + clone._has_checked_exclusion = _has_checked_exclusion; + clone._id = _id; + clone._clsfile_size = _clsfile_size; + clone._clsfile_crc32 = _clsfile_crc32; + clone._excluded = _excluded; + clone._is_early_klass = _is_early_klass; + clone._verifier_constraints = NULL; + clone._verifier_constraint_flags = NULL; + clone._loader_constraints = NULL; + int clone_num_verifier_constraints = num_verifier_constraints(); + if (clone_num_verifier_constraints > 0) { + clone._verifier_constraints = new (ResourceObj::C_HEAP, mtClass) GrowableArray(clone_num_verifier_constraints, mtClass); + clone._verifier_constraint_flags = new (ResourceObj::C_HEAP, mtClass) GrowableArray(clone_num_verifier_constraints, mtClass); + for (int i = 0; i < clone_num_verifier_constraints; i++) { + clone._verifier_constraints->append(_verifier_constraints->at(i)); + clone._verifier_constraint_flags->append(_verifier_constraint_flags->at(i)); + } + } + int clone_num_loader_constraints = num_loader_constraints(); + if (clone_num_loader_constraints > 0) { + clone._loader_constraints = new (ResourceObj::C_HEAP, mtClass) GrowableArray(clone_num_loader_constraints, mtClass); + for (int i = 0; i < clone_num_loader_constraints; i++) { + clone._loader_constraints->append(_loader_constraints->at(i)); + } + } + return clone; +} + +void DumpTimeClassInfo::add_verification_constraint(InstanceKlass* k, Symbol* name, + Symbol* from_name, bool from_field_is_protected, bool from_is_array, bool from_is_object) { + if (_verifier_constraints == NULL) { + _verifier_constraints = new (ResourceObj::C_HEAP, mtClass) GrowableArray(4, mtClass); + } + if (_verifier_constraint_flags == NULL) { + _verifier_constraint_flags = new (ResourceObj::C_HEAP, mtClass) GrowableArray(4, mtClass); + } + GrowableArray* vc_array = _verifier_constraints; + for (int i = 0; i < vc_array->length(); i++) { + DTVerifierConstraint* p = vc_array->adr_at(i); + if (name == p->_name && from_name == p->_from_name) { + return; + } + } + DTVerifierConstraint cons(name, from_name); + vc_array->append(cons); + + GrowableArray* vcflags_array = _verifier_constraint_flags; + char c = 0; + c |= from_field_is_protected ? SystemDictionaryShared::FROM_FIELD_IS_PROTECTED : 0; + c |= from_is_array ? SystemDictionaryShared::FROM_IS_ARRAY : 0; + c |= from_is_object ? SystemDictionaryShared::FROM_IS_OBJECT : 0; + vcflags_array->append(c); + + if (log_is_enabled(Trace, cds, verification)) { + ResourceMark rm; + log_trace(cds, verification)("add_verification_constraint: %s: %s must be subclass of %s [0x%x] array len %d flags len %d", + k->external_name(), from_name->as_klass_external_name(), + name->as_klass_external_name(), c, vc_array->length(), vcflags_array->length()); + } +} + +static char get_loader_type_by(oop loader) { + assert(SystemDictionary::is_builtin_class_loader(loader), "Must be built-in loader"); + if (SystemDictionary::is_boot_class_loader(loader)) { + return (char)ClassLoader::BOOT_LOADER; + } else if (SystemDictionary::is_platform_class_loader(loader)) { + return (char)ClassLoader::PLATFORM_LOADER; + } else { + assert(SystemDictionary::is_system_class_loader(loader), "Class loader mismatch"); + return (char)ClassLoader::APP_LOADER; + } +} + +void DumpTimeClassInfo::record_linking_constraint(Symbol* name, Handle loader1, Handle loader2) { + assert(loader1 != loader2, "sanity"); + LogTarget(Info, class, loader, constraints) log; + if (_loader_constraints == NULL) { + _loader_constraints = new (ResourceObj::C_HEAP, mtClass) GrowableArray(4, mtClass); + } + char lt1 = get_loader_type_by(loader1()); + char lt2 = get_loader_type_by(loader2()); + DTLoaderConstraint lc(name, lt1, lt2); + for (int i = 0; i < _loader_constraints->length(); i++) { + DTLoaderConstraint dt = _loader_constraints->at(i); + if (lc.equals(dt)) { + if (log.is_enabled()) { + ResourceMark rm; + // Use loader[0]/loader[1] to be consistent with the logs in loaderConstraints.cpp + log.print("[CDS record loader constraint for class: %s constraint_name: %s loader[0]: %s loader[1]: %s already added]", + _klass->external_name(), name->as_C_string(), + ClassLoaderData::class_loader_data(loader1())->loader_name_and_id(), + ClassLoaderData::class_loader_data(loader2())->loader_name_and_id()); + } + return; + } + } + _loader_constraints->append(lc); + if (log.is_enabled()) { + ResourceMark rm; + // Use loader[0]/loader[1] to be consistent with the logs in loaderConstraints.cpp + log.print("[CDS record loader constraint for class: %s constraint_name: %s loader[0]: %s loader[1]: %s total %d]", + _klass->external_name(), name->as_C_string(), + ClassLoaderData::class_loader_data(loader1())->loader_name_and_id(), + ClassLoaderData::class_loader_data(loader2())->loader_name_and_id(), + _loader_constraints->length()); + } +} + +bool DumpTimeClassInfo::is_builtin() { + return SystemDictionaryShared::is_builtin(_klass); +} + +DumpTimeClassInfo* DumpTimeSharedClassTable::find_or_allocate_info_for(InstanceKlass* k, bool dump_in_progress) { + bool created = false; + DumpTimeClassInfo* p; + if (!dump_in_progress) { + p = put_if_absent(k, &created); + } else { + p = get(k); + } + if (created) { + assert(!SystemDictionaryShared::no_class_loading_should_happen(), + "no new classes can be loaded while dumping archive"); + p->_klass = k; + } else { + if (!dump_in_progress) { + assert(p->_klass == k, "Sanity"); + } + } + return p; +} + +class CountClassByCategory : StackObj { + DumpTimeSharedClassTable* _table; +public: + CountClassByCategory(DumpTimeSharedClassTable* table) : _table(table) {} + bool do_entry(InstanceKlass* k, DumpTimeClassInfo& info) { + if (!info.is_excluded()) { + if (info.is_builtin()) { + _table->inc_builtin_count(); + } else { + _table->inc_unregistered_count(); + } + } + return true; // keep on iterating + } +}; + +void DumpTimeSharedClassTable::update_counts() { + _builtin_count = 0; + _unregistered_count = 0; + CountClassByCategory counter(this); + iterate(&counter); +} diff --git a/src/hotspot/share/cds/dumpTimeClassInfo.hpp b/src/hotspot/share/cds/dumpTimeClassInfo.hpp new file mode 100644 index 0000000000000000000000000000000000000000..722849954fa47868728e665225cf91e3914759d0 --- /dev/null +++ b/src/hotspot/share/cds/dumpTimeClassInfo.hpp @@ -0,0 +1,199 @@ + +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARED_CDS_DUMPTIMESHAREDCLASSINFO_HPP +#define SHARED_CDS_DUMPTIMESHAREDCLASSINFO_HPP +#include "cds/archiveBuilder.hpp" +#include "cds/archiveUtils.hpp" +#include "cds/metaspaceShared.hpp" +#include "classfile/compactHashtable.hpp" +#include "memory/metaspaceClosure.hpp" +#include "oops/instanceKlass.hpp" +#include "prims/jvmtiExport.hpp" +#include "utilities/growableArray.hpp" + +class Method; +class Symbol; + +class DumpTimeClassInfo: public CHeapObj { + bool _excluded; + bool _is_early_klass; + bool _has_checked_exclusion; +public: + struct DTLoaderConstraint { + Symbol* _name; + char _loader_type1; + char _loader_type2; + DTLoaderConstraint(Symbol* name, char l1, char l2) : _name(name), _loader_type1(l1), _loader_type2(l2) { + _name->increment_refcount(); + } + DTLoaderConstraint() : _name(NULL), _loader_type1('0'), _loader_type2('0') {} + bool equals(const DTLoaderConstraint& t) { + return t._name == _name && + ((t._loader_type1 == _loader_type1 && t._loader_type2 == _loader_type2) || + (t._loader_type2 == _loader_type1 && t._loader_type1 == _loader_type2)); + } + }; + + struct DTVerifierConstraint { + Symbol* _name; + Symbol* _from_name; + DTVerifierConstraint() : _name(NULL), _from_name(NULL) {} + DTVerifierConstraint(Symbol* n, Symbol* fn) : _name(n), _from_name(fn) { + _name->increment_refcount(); + _from_name->increment_refcount(); + } + }; + + InstanceKlass* _klass; + InstanceKlass* _nest_host; + bool _failed_verification; + bool _is_archived_lambda_proxy; + int _id; + int _clsfile_size; + int _clsfile_crc32; + GrowableArray* _verifier_constraints; + GrowableArray* _verifier_constraint_flags; + GrowableArray* _loader_constraints; + + DumpTimeClassInfo() { + _klass = NULL; + _nest_host = NULL; + _failed_verification = false; + _is_archived_lambda_proxy = false; + _has_checked_exclusion = false; + _id = -1; + _clsfile_size = -1; + _clsfile_crc32 = -1; + _excluded = false; + _is_early_klass = JvmtiExport::is_early_phase(); + _verifier_constraints = NULL; + _verifier_constraint_flags = NULL; + _loader_constraints = NULL; + } + + void add_verification_constraint(InstanceKlass* k, Symbol* name, + Symbol* from_name, bool from_field_is_protected, bool from_is_array, bool from_is_object); + void record_linking_constraint(Symbol* name, Handle loader1, Handle loader2); + + bool is_builtin(); + + int num_verifier_constraints() { + if (_verifier_constraint_flags != NULL) { + return _verifier_constraint_flags->length(); + } else { + return 0; + } + } + + int num_loader_constraints() { + if (_loader_constraints != NULL) { + return _loader_constraints->length(); + } else { + return 0; + } + } + + void metaspace_pointers_do(MetaspaceClosure* it) { + it->push(&_klass); + it->push(&_nest_host); + if (_verifier_constraints != NULL) { + for (int i = 0; i < _verifier_constraints->length(); i++) { + DTVerifierConstraint* cons = _verifier_constraints->adr_at(i); + it->push(&cons->_name); + it->push(&cons->_from_name); + } + } + if (_loader_constraints != NULL) { + for (int i = 0; i < _loader_constraints->length(); i++) { + DTLoaderConstraint* lc = _loader_constraints->adr_at(i); + it->push(&lc->_name); + } + } + } + + bool is_excluded() { + // _klass may become NULL due to DynamicArchiveBuilder::set_to_null + return _excluded || _failed_verification || _klass == NULL; + } + + // Was this class loaded while JvmtiExport::is_early_phase()==true + bool is_early_klass() { + return _is_early_klass; + } + + // simple accessors + void set_excluded() { _excluded = true; } + bool has_checked_exclusion() const { return _has_checked_exclusion; } + void set_has_checked_exclusion() { _has_checked_exclusion = true; } + bool failed_verification() const { return _failed_verification; } + void set_failed_verification() { _failed_verification = true; } + InstanceKlass* nest_host() const { return _nest_host; } + void set_nest_host(InstanceKlass* nest_host) { _nest_host = nest_host; } + DumpTimeClassInfo clone(); +}; + + +inline unsigned DumpTimeSharedClassTable_hash(InstanceKlass* const& k) { + if (DumpSharedSpaces) { + // Deterministic archive contents + uintx delta = k->name() - MetaspaceShared::symbol_rs_base(); + return primitive_hash(delta); + } else { + // Deterministic archive is not possible because classes can be loaded + // in multiple threads. + return primitive_hash(k); + } +} + +class DumpTimeSharedClassTable: public ResourceHashtable< + InstanceKlass*, + DumpTimeClassInfo, + 15889, // prime number + ResourceObj::C_HEAP, + mtClassShared, + &DumpTimeSharedClassTable_hash> +{ + int _builtin_count; + int _unregistered_count; +public: + DumpTimeSharedClassTable() { + _builtin_count = 0; + _unregistered_count = 0; + } + DumpTimeClassInfo* find_or_allocate_info_for(InstanceKlass* k, bool dump_in_progress); + void inc_builtin_count() { _builtin_count++; } + void inc_unregistered_count() { _unregistered_count++; } + void update_counts(); + int count_of(bool is_builtin) const { + if (is_builtin) { + return _builtin_count; + } else { + return _unregistered_count; + } + } +}; + +#endif // SHARED_CDS_DUMPTIMESHAREDCLASSINFO_HPP diff --git a/src/hotspot/share/cds/dynamicArchive.cpp b/src/hotspot/share/cds/dynamicArchive.cpp index 47c3642ff0ec34931f909a3bdf835b7c0ef14663..b3c18e318a85058ecd403885281010bd69dcfcc0 100644 --- a/src/hotspot/share/cds/dynamicArchive.cpp +++ b/src/hotspot/share/cds/dynamicArchive.cpp @@ -83,6 +83,7 @@ public: void init_header(); void release_header(); + void post_dump(); void sort_methods(); void sort_methods(InstanceKlass* ik) const; void remark_pointers_for_instance_klass(InstanceKlass* k, bool should_mark) const; @@ -112,6 +113,9 @@ public: MutexLocker ml(DumpTimeTable_lock, Mutex::_no_safepoint_check_flag); SystemDictionaryShared::check_excluded_classes(); + // save dumptime tables + SystemDictionaryShared::clone_dumptime_tables(); + init_header(); gather_source_objs(); reserve_buffer(); @@ -158,6 +162,11 @@ public: write_archive(serialized_data); release_header(); + post_dump(); + + // Restore dumptime tables + SystemDictionaryShared::restore_dumptime_tables(); + assert(_num_dump_regions_used == _total_dump_regions, "must be"); verify_universe("After CDS dynamic dump"); } @@ -171,14 +180,15 @@ public: void DynamicArchiveBuilder::init_header() { FileMapInfo* mapinfo = new FileMapInfo(false); assert(FileMapInfo::dynamic_info() == mapinfo, "must be"); + FileMapInfo* base_info = FileMapInfo::current_info(); + // header only be available after populate_header + mapinfo->populate_header(base_info->core_region_alignment()); _header = mapinfo->dynamic_header(); - FileMapInfo* base_info = FileMapInfo::current_info(); _header->set_base_header_crc(base_info->crc()); for (int i = 0; i < MetaspaceShared::n_regions; i++) { _header->set_base_region_crc(i, base_info->space_crc(i)); } - _header->populate(base_info, base_info->core_region_alignment()); } void DynamicArchiveBuilder::release_header() { @@ -194,6 +204,10 @@ void DynamicArchiveBuilder::release_header() { _header = NULL; } +void DynamicArchiveBuilder::post_dump() { + ArchivePtrMarker::reset_map_and_vs(); +} + void DynamicArchiveBuilder::sort_methods() { InstanceKlass::disable_method_binary_search(); for (int i = 0; i < klasses()->length(); i++) { @@ -257,8 +271,14 @@ void DynamicArchiveBuilder::sort_methods(InstanceKlass* ik) const { if (ik->default_methods() != NULL) { Method::sort_methods(ik->default_methods(), /*set_idnums=*/false, dynamic_dump_method_comparator); } - ik->vtable().initialize_vtable(); - ik->itable().initialize_itable(); + if (ik->is_linked()) { + // If the class has already been linked, we must relayout the i/v tables, whose order depends + // on the method sorting order. + // If the class is unlinked, we cannot layout the i/v tables yet. This is OK, as the + // i/v tables will be initialized at runtime after bytecode verification. + ik->vtable().initialize_vtable(); + ik->itable().initialize_itable(); + } // Set all the pointer marking bits after sorting. remark_pointers_for_instance_klass(ik, true); @@ -306,7 +326,7 @@ void DynamicArchiveBuilder::write_archive(char* serialized_data) { size_t file_size = pointer_delta(top, base, sizeof(char)); log_info(cds, dynamic)("Written dynamic archive " PTR_FORMAT " - " PTR_FORMAT - " [" SIZE_FORMAT " bytes header, " SIZE_FORMAT " bytes total]", + " [" UINT32_FORMAT " bytes header, " SIZE_FORMAT " bytes total]", p2i(base), p2i(top), _header->header_size(), file_size); log_info(cds, dynamic)("%d klasses; %d symbols", klasses()->length(), symbols()->length()); @@ -319,7 +339,7 @@ public: VMOp_Type type() const { return VMOp_PopulateDumpSharedSpace; } void doit() { ResourceMark rm; - if (SystemDictionaryShared::empty_dumptime_table()) { + if (SystemDictionaryShared::is_dumptime_table_empty()) { log_warning(cds, dynamic)("There is no class to be included in the dynamic archive."); return; } @@ -333,12 +353,12 @@ public: } }; -void DynamicArchive::prepare_for_dynamic_dumping_at_exit() { +void DynamicArchive::prepare_for_dynamic_dumping() { EXCEPTION_MARK; ResourceMark rm(THREAD); - MetaspaceShared::link_and_cleanup_shared_classes(THREAD); + MetaspaceShared::link_shared_classes(THREAD); if (HAS_PENDING_EXCEPTION) { - log_error(cds)("ArchiveClassesAtExit has failed"); + log_error(cds)("Dynamic dump has failed"); log_error(cds)("%s: %s", PENDING_EXCEPTION->klass()->external_name(), java_lang_String::as_utf8_string(java_lang_Throwable::message(PENDING_EXCEPTION))); // We cannot continue to dump the archive anymore. @@ -347,42 +367,37 @@ void DynamicArchive::prepare_for_dynamic_dumping_at_exit() { } } -bool DynamicArchive::_has_been_dumped_once = false; - void DynamicArchive::dump(const char* archive_name, TRAPS) { assert(UseSharedSpaces && RecordDynamicDumpInfo, "already checked in arguments.cpp?"); assert(ArchiveClassesAtExit == nullptr, "already checked in arguments.cpp?"); - // During dynamic archive dumping, some of the data structures are overwritten so - // we cannot dump the dynamic archive again. TODO: this should be fixed. - if (has_been_dumped_once()) { - THROW_MSG(vmSymbols::java_lang_RuntimeException(), - "Dynamic dump has been done, and should only be done once"); - } else { - // prevent multiple dumps. - set_has_been_dumped_once(); - ArchiveClassesAtExit = archive_name; - if (Arguments::init_shared_archive_paths()) { - dump(); - } else { - ArchiveClassesAtExit = nullptr; - THROW_MSG(vmSymbols::java_lang_RuntimeException(), - "Could not setup SharedDynamicArchivePath"); + ArchiveClassesAtExit = archive_name; + if (Arguments::init_shared_archive_paths()) { + prepare_for_dynamic_dumping(); + if (DynamicDumpSharedSpaces) { + dump(CHECK); } - // prevent do dynamic dump at exit. + } else { ArchiveClassesAtExit = nullptr; - if (!Arguments::init_shared_archive_paths()) { - THROW_MSG(vmSymbols::java_lang_RuntimeException(), - "Could not restore SharedDynamicArchivePath"); - } + THROW_MSG(vmSymbols::java_lang_RuntimeException(), + "Could not setup SharedDynamicArchivePath"); + } + // prevent do dynamic dump at exit. + ArchiveClassesAtExit = nullptr; + if (!Arguments::init_shared_archive_paths()) { + THROW_MSG(vmSymbols::java_lang_RuntimeException(), + "Could not restore SharedDynamicArchivePath"); } } -void DynamicArchive::dump() { +void DynamicArchive::dump(TRAPS) { if (Arguments::GetSharedDynamicArchivePath() == NULL) { log_warning(cds, dynamic)("SharedDynamicArchivePath is not specified"); return; } + // copy shared path table to saved. + FileMapInfo::clone_shared_path_table(CHECK); + VM_PopulateDynamicDumpSharedSpace op; VMThread::execute(&op); } diff --git a/src/hotspot/share/cds/dynamicArchive.hpp b/src/hotspot/share/cds/dynamicArchive.hpp index 86d9be01f695236659228a49275bfda3ffcb4e53..0371b94849752aad06eaf08d38333b7291637113 100644 --- a/src/hotspot/share/cds/dynamicArchive.hpp +++ b/src/hotspot/share/cds/dynamicArchive.hpp @@ -38,7 +38,7 @@ #if INCLUDE_CDS class DynamicArchiveHeader : public FileMapHeader { - friend class CDSOffsets; + friend class CDSConstants; private: int _base_header_crc; int _base_region_crc[MetaspaceShared::n_regions]; @@ -58,13 +58,10 @@ public: }; class DynamicArchive : AllStatic { - static bool _has_been_dumped_once; public: - static void prepare_for_dynamic_dumping_at_exit(); + static void prepare_for_dynamic_dumping(); static void dump(const char* archive_name, TRAPS); - static void dump(); - static bool has_been_dumped_once() { return _has_been_dumped_once; } - static void set_has_been_dumped_once() { _has_been_dumped_once = true; } + static void dump(TRAPS); static bool is_mapped() { return FileMapInfo::dynamic_info() != NULL; } static bool validate(FileMapInfo* dynamic_info); }; diff --git a/src/hotspot/share/cds/filemap.cpp b/src/hotspot/share/cds/filemap.cpp index ccc88ce08e27dc84ff5c14680dc57784fc8a4c4b..1651aef99b08295f27b545a3f3a96acce258296e 100644 --- a/src/hotspot/share/cds/filemap.cpp +++ b/src/hotspot/share/cds/filemap.cpp @@ -54,6 +54,7 @@ #include "oops/oop.inline.hpp" #include "prims/jvmtiExport.hpp" #include "runtime/arguments.hpp" +#include "runtime/globals_extension.hpp" #include "runtime/java.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/os.hpp" @@ -169,21 +170,13 @@ template static void get_header_version(char (&header_version) [N]) { FileMapInfo::FileMapInfo(bool is_static) { memset((void*)this, 0, sizeof(FileMapInfo)); _is_static = is_static; - size_t header_size; - if (is_static) { + if (_is_static) { assert(_current_info == NULL, "must be singleton"); // not thread safe _current_info = this; - header_size = sizeof(FileMapHeader); } else { assert(_dynamic_archive_info == NULL, "must be singleton"); // not thread safe _dynamic_archive_info = this; - header_size = sizeof(DynamicArchiveHeader); } - _header = (FileMapHeader*)os::malloc(header_size, mtInternal); - memset((void*)_header, 0, header_size); - _header->set_header_size(header_size); - _header->set_version(INVALID_CDS_ARCHIVE_VERSION); - _header->set_has_platform_or_app_classes(true); _file_offset = 0; _file_open = false; } @@ -199,20 +192,53 @@ FileMapInfo::~FileMapInfo() { } void FileMapInfo::populate_header(size_t core_region_alignment) { - header()->populate(this, core_region_alignment); -} - -void FileMapHeader::populate(FileMapInfo* mapinfo, size_t core_region_alignment) { - if (DynamicDumpSharedSpaces) { - _magic = CDS_DYNAMIC_ARCHIVE_MAGIC; + assert(_header == NULL, "Sanity check"); + size_t c_header_size; + size_t header_size; + size_t base_archive_name_size = 0; + size_t base_archive_path_offset = 0; + if (is_static()) { + c_header_size = sizeof(FileMapHeader); + header_size = c_header_size; } else { - _magic = CDS_ARCHIVE_MAGIC; + // dynamic header including base archive name for non-default base archive + c_header_size = sizeof(DynamicArchiveHeader); + header_size = c_header_size; + if (!FLAG_IS_DEFAULT(SharedArchiveFile)) { + base_archive_name_size = strlen(Arguments::GetSharedArchivePath()) + 1; + header_size += base_archive_name_size; + base_archive_path_offset = c_header_size; + } + } + _header = (FileMapHeader*)os::malloc(header_size, mtInternal); + memset((void*)_header, 0, header_size); + _header->populate(this, + core_region_alignment, + header_size, + base_archive_name_size, + base_archive_path_offset); +} + +void FileMapHeader::populate(FileMapInfo *info, size_t core_region_alignment, + size_t header_size, size_t base_archive_name_size, + size_t base_archive_path_offset) { + // 1. We require _generic_header._magic to be at the beginning of the file + // 2. FileMapHeader also assumes that _generic_header is at the beginning of the file + assert(offset_of(FileMapHeader, _generic_header) == 0, "must be"); + set_header_size((unsigned int)header_size); + set_base_archive_path_offset((unsigned int)base_archive_path_offset); + set_base_archive_name_size((unsigned int)base_archive_name_size); + set_magic(DynamicDumpSharedSpaces ? CDS_DYNAMIC_ARCHIVE_MAGIC : CDS_ARCHIVE_MAGIC); + set_version(CURRENT_CDS_ARCHIVE_VERSION); + + if (!info->is_static() && base_archive_name_size != 0) { + // copy base archive name + copy_base_archive_name(Arguments::GetSharedArchivePath()); } - _version = CURRENT_CDS_ARCHIVE_VERSION; _core_region_alignment = core_region_alignment; _obj_alignment = ObjectAlignmentInBytes; _compact_strings = CompactStrings; - if (HeapShared::is_heap_object_archiving_allowed()) { + if (DumpSharedSpaces && HeapShared::can_write()) { _narrow_oop_mode = CompressedOops::mode(); _narrow_oop_base = CompressedOops::base(); _narrow_oop_shift = CompressedOops::shift(); @@ -245,22 +271,29 @@ void FileMapHeader::populate(FileMapInfo* mapinfo, size_t core_region_alignment) _requested_base_address = (char*)SharedBaseAddress; _mapped_base_address = (char*)SharedBaseAddress; _allow_archiving_with_java_agent = AllowArchivingWithJavaAgent; - // the following 2 fields will be set in write_header for dynamic archive header - _base_archive_name_size = 0; - _base_archive_is_default = false; if (!DynamicDumpSharedSpaces) { - set_shared_path_table(mapinfo->_shared_path_table); + set_shared_path_table(info->_shared_path_table); CDS_JAVA_HEAP_ONLY(_heap_obj_roots = CompressedOops::encode(HeapShared::roots());) } } +void FileMapHeader::copy_base_archive_name(const char* archive) { + assert(base_archive_name_size() != 0, "_base_archive_name_size not set"); + assert(base_archive_path_offset() != 0, "_base_archive_path_offset not set"); + assert(header_size() > sizeof(*this), "_base_archive_name_size not included in header size?"); + memcpy((char*)this + base_archive_path_offset(), archive, base_archive_name_size()); +} + void FileMapHeader::print(outputStream* st) { ResourceMark rm; - st->print_cr("- magic: 0x%08x", _magic); - st->print_cr("- crc: 0x%08x", _crc); - st->print_cr("- version: %d", _version); + st->print_cr("- magic: 0x%08x", magic()); + st->print_cr("- crc: 0x%08x", crc()); + st->print_cr("- version: %d", version()); + st->print_cr("- header_size: " UINT32_FORMAT, header_size()); + st->print_cr("- base_archive_path_offset: " UINT32_FORMAT, base_archive_path_offset()); + st->print_cr("- base_archive_name_size: " UINT32_FORMAT, base_archive_name_size()); for (int i = 0; i < NUM_CDS_REGIONS; i++) { FileMapRegion* si = space_at(i); @@ -268,7 +301,6 @@ void FileMapHeader::print(outputStream* st) { } st->print_cr("============ end regions ======== "); - st->print_cr("- header_size: " SIZE_FORMAT, _header_size); st->print_cr("- core_region_alignment: " SIZE_FORMAT, _core_region_alignment); st->print_cr("- obj_alignment: %d", _obj_alignment); st->print_cr("- narrow_oop_base: " INTPTR_FORMAT, p2i(_narrow_oop_base)); @@ -283,9 +315,7 @@ void FileMapHeader::print(outputStream* st) { st->print_cr("- cloned_vtables_offset: " SIZE_FORMAT_HEX, _cloned_vtables_offset); st->print_cr("- serialized_data_offset: " SIZE_FORMAT_HEX, _serialized_data_offset); st->print_cr("- heap_end: " INTPTR_FORMAT, p2i(_heap_end)); - st->print_cr("- base_archive_is_default: %d", _base_archive_is_default); st->print_cr("- jvm_ident: %s", _jvm_ident); - st->print_cr("- base_archive_name_size: " SIZE_FORMAT, _base_archive_name_size); st->print_cr("- shared_path_table_offset: " SIZE_FORMAT_HEX, _shared_path_table_offset); st->print_cr("- shared_path_table_size: %d", _shared_path_table_size); st->print_cr("- app_class_paths_start_index: %d", _app_class_paths_start_index); @@ -478,6 +508,24 @@ void FileMapInfo::copy_shared_path_table(ClassLoaderData* loader_data, TRAPS) { for (int i = 0; i < _shared_path_table.size(); i++) { _saved_shared_path_table.path_at(i)->copy_from(shared_path(i), loader_data, CHECK); } + _saved_shared_path_table_array = array; +} + +void FileMapInfo::clone_shared_path_table(TRAPS) { + Arguments::assert_is_dumping_archive(); + + ClassLoaderData* loader_data = ClassLoaderData::the_null_class_loader_data(); + ClassPathEntry* jrt = ClassLoader::get_jrt_entry(); + + assert(jrt != NULL, + "No modular java runtime image present when allocating the CDS classpath entry table"); + + if (_saved_shared_path_table_array != NULL) { + MetadataFactory::free_array(loader_data, _saved_shared_path_table_array); + _saved_shared_path_table_array = NULL; + } + + copy_shared_path_table(loader_data, CHECK); } void FileMapInfo::allocate_shared_path_table(TRAPS) { @@ -503,8 +551,7 @@ void FileMapInfo::allocate_shared_path_table(TRAPS) { } assert(i == _shared_path_table.size(), "number of shared path entry mismatch"); - - copy_shared_path_table(loader_data, CHECK); + clone_shared_path_table(CHECK); } int FileMapInfo::add_shared_classpaths(int i, const char* which, ClassPathEntry *cpe, TRAPS) { @@ -995,122 +1042,169 @@ void FileMapInfo::validate_non_existent_class_paths() { } } +// a utility class for checking file header +class FileHeaderHelper { + int _fd; + GenericCDSFileMapHeader _header; + +public: + FileHeaderHelper() { + _fd = -1; + } + + ~FileHeaderHelper() { + if (_fd != -1) { + os::close(_fd); + } + } + + bool initialize(const char* archive_name) { + _fd = os::open(archive_name, O_RDONLY | O_BINARY, 0); + if (_fd < 0) { + return false; + } + return initialize(_fd); + } + + // for an already opened file, do not set _fd + bool initialize(int fd) { + assert(fd != -1, "Archive should be opened"); + size_t size = sizeof(GenericCDSFileMapHeader); + lseek(fd, 0, SEEK_SET); + size_t n = os::read(fd, (void*)&_header, (unsigned int)size); + if (n != size) { + vm_exit_during_initialization("Unable to read generic CDS file map header from shared archive"); + return false; + } + return true; + } + + GenericCDSFileMapHeader* get_generic_file_header() { + return &_header; + } + + bool read_base_archive_name(char** target) { + assert(_fd != -1, "Archive should be open"); + size_t name_size = (size_t)_header._base_archive_name_size; + assert(name_size != 0, "For non-default base archive, name size should be non-zero!"); + *target = NEW_C_HEAP_ARRAY(char, name_size, mtInternal); + lseek(_fd, _header._base_archive_path_offset, SEEK_SET); // position to correct offset. + size_t n = os::read(_fd, *target, (unsigned int)name_size); + if (n != name_size) { + log_info(cds)("Unable to read base archive name from archive"); + FREE_C_HEAP_ARRAY(char, *target); + return false; + } + if (!os::file_exists(*target)) { + log_info(cds)("Base archive %s does not exist", *target); + FREE_C_HEAP_ARRAY(char, *target); + return false; + } + return true; + } +}; + bool FileMapInfo::check_archive(const char* archive_name, bool is_static) { - int fd = os::open(archive_name, O_RDONLY | O_BINARY, 0); - if (fd < 0) { + FileHeaderHelper file_helper; + if (!file_helper.initialize(archive_name)) { // do not vm_exit_during_initialization here because Arguments::init_shared_archive_paths() // requires a shared archive name. The open_for_read() function will log a message regarding // failure in opening a shared archive. return false; } - size_t sz = is_static ? sizeof(FileMapHeader) : sizeof(DynamicArchiveHeader); - void* header = os::malloc(sz, mtInternal); - memset(header, 0, sz); - size_t n = os::read(fd, header, (unsigned int)sz); - if (n != sz) { - os::free(header); - os::close(fd); - vm_exit_during_initialization("Unable to read header from shared archive", archive_name); - return false; - } + GenericCDSFileMapHeader* header = file_helper.get_generic_file_header(); if (is_static) { - FileMapHeader* static_header = (FileMapHeader*)header; - if (static_header->magic() != CDS_ARCHIVE_MAGIC) { - os::free(header); - os::close(fd); + if (header->_magic != CDS_ARCHIVE_MAGIC) { vm_exit_during_initialization("Not a base shared archive", archive_name); return false; } + if (header->_base_archive_path_offset != 0) { + log_info(cds)("_base_archive_path_offset should be 0"); + log_info(cds)("_base_archive_path_offset = " UINT32_FORMAT, header->_base_archive_path_offset); + return false; + } } else { - DynamicArchiveHeader* dynamic_header = (DynamicArchiveHeader*)header; - if (dynamic_header->magic() != CDS_DYNAMIC_ARCHIVE_MAGIC) { - os::free(header); - os::close(fd); + if (header->_magic != CDS_DYNAMIC_ARCHIVE_MAGIC) { vm_exit_during_initialization("Not a top shared archive", archive_name); return false; } + unsigned int name_size = header->_base_archive_name_size; + unsigned int path_offset = header->_base_archive_path_offset; + unsigned int header_size = header->_header_size; + if (path_offset + name_size != header_size) { + log_info(cds)("_header_size should be equal to _base_archive_path_offset plus _base_archive_name_size"); + log_info(cds)(" _base_archive_name_size = " UINT32_FORMAT, name_size); + log_info(cds)(" _base_archive_path_offset = " UINT32_FORMAT, path_offset); + log_info(cds)(" _header_size = " UINT32_FORMAT, header_size); + return false; + } + char* base_name = NULL; + if (!file_helper.read_base_archive_name(&base_name)) { + return false; + } + FREE_C_HEAP_ARRAY(char, base_name); } - os::free(header); - os::close(fd); return true; } bool FileMapInfo::get_base_archive_name_from_header(const char* archive_name, - int* size, char** base_archive_name) { - int fd = os::open(archive_name, O_RDONLY | O_BINARY, 0); - if (fd < 0) { - *size = 0; + char** base_archive_name) { + FileHeaderHelper file_helper; + if (!file_helper.initialize(archive_name)) { return false; } - - // read the header as a dynamic archive header - size_t sz = sizeof(DynamicArchiveHeader); - DynamicArchiveHeader* dynamic_header = (DynamicArchiveHeader*)os::malloc(sz, mtInternal); - size_t n = os::read(fd, dynamic_header, (unsigned int)sz); - if (n != sz) { - fail_continue("Unable to read the file header."); - os::free(dynamic_header); - os::close(fd); + GenericCDSFileMapHeader* header = file_helper.get_generic_file_header(); + if (header->_magic != CDS_DYNAMIC_ARCHIVE_MAGIC) { + // Not a dynamic header, no need to proceed further. return false; } - if (dynamic_header->magic() != CDS_DYNAMIC_ARCHIVE_MAGIC) { - // Not a dynamic header, no need to proceed further. - *size = 0; - os::free(dynamic_header); - os::close(fd); + + if ((header->_base_archive_name_size == 0 && header->_base_archive_path_offset != 0) || + (header->_base_archive_name_size != 0 && header->_base_archive_path_offset == 0)) { + fail_continue("Default base archive not set correct"); return false; } - if (dynamic_header->base_archive_is_default()) { + if (header->_base_archive_name_size == 0 && + header->_base_archive_path_offset == 0) { *base_archive_name = Arguments::get_default_shared_archive_path(); } else { // read the base archive name - size_t name_size = dynamic_header->base_archive_name_size(); - if (name_size == 0) { - os::free(dynamic_header); - os::close(fd); - return false; - } - *base_archive_name = NEW_C_HEAP_ARRAY(char, name_size, mtInternal); - n = os::read(fd, *base_archive_name, (unsigned int)name_size); - if (n != name_size) { - fail_continue("Unable to read the base archive name from the header."); - FREE_C_HEAP_ARRAY(char, *base_archive_name); + if (!file_helper.read_base_archive_name(base_archive_name)) { *base_archive_name = NULL; - os::free(dynamic_header); - os::close(fd); return false; } } - - os::free(dynamic_header); - os::close(fd); return true; } // Read the FileMapInfo information from the file. bool FileMapInfo::init_from_file(int fd) { - size_t sz = is_static() ? sizeof(FileMapHeader) : sizeof(DynamicArchiveHeader); - size_t n = os::read(fd, header(), (unsigned int)sz); - if (n != sz) { + FileHeaderHelper file_helper; + if (!file_helper.initialize(fd)) { fail_continue("Unable to read the file header."); return false; } - - if (!Arguments::has_jimage()) { - FileMapInfo::fail_continue("The shared archive file cannot be used with an exploded module build."); - return false; - } + GenericCDSFileMapHeader* gen_header = file_helper.get_generic_file_header(); unsigned int expected_magic = is_static() ? CDS_ARCHIVE_MAGIC : CDS_DYNAMIC_ARCHIVE_MAGIC; - if (header()->magic() != expected_magic) { + if (gen_header->_magic != expected_magic) { log_info(cds)("_magic expected: 0x%08x", expected_magic); - log_info(cds)(" actual: 0x%08x", header()->magic()); + log_info(cds)(" actual: 0x%08x", gen_header->_magic); FileMapInfo::fail_continue("The shared archive file has a bad magic number."); return false; } + _header = (FileMapHeader*)os::malloc(gen_header->_header_size, mtInternal); + lseek(fd, 0, SEEK_SET); // reset to begin of the archive + size_t size = gen_header->_header_size; + size_t n = os::read(fd, (void*)_header, (unsigned int)size); + if (n != size) { + fail_continue("Failed to read file header from the top archive file\n"); + return false; + } + if (header()->version() != CURRENT_CDS_ARCHIVE_VERSION) { log_info(cds)("_version expected: %d", CURRENT_CDS_ARCHIVE_VERSION); log_info(cds)(" actual: %d", header()->version()); @@ -1118,11 +1212,17 @@ bool FileMapInfo::init_from_file(int fd) { return false; } - if (header()->header_size() != sz) { - log_info(cds)("_header_size expected: " SIZE_FORMAT, sz); - log_info(cds)(" actual: " SIZE_FORMAT, header()->header_size()); - FileMapInfo::fail_continue("The shared archive file has an incorrect header size."); - return false; + unsigned int base_offset = header()->base_archive_path_offset(); + unsigned int name_size = header()->base_archive_name_size(); + unsigned int header_size = header()->header_size(); + if (base_offset != 0 && name_size != 0) { + if (header_size != base_offset + name_size) { + log_info(cds)("_header_size: " UINT32_FORMAT, header_size); + log_info(cds)("base_archive_name_size: " UINT32_FORMAT, name_size); + log_info(cds)("base_archive_path_offset: " UINT32_FORMAT, base_offset); + FileMapInfo::fail_continue("The shared archive file has an incorrect header size."); + return false; + } } const char* actual_ident = header()->jvm_ident(); @@ -1152,7 +1252,7 @@ bool FileMapInfo::init_from_file(int fd) { } } - _file_offset = n + header()->base_archive_name_size(); // accounts for the size of _base_archive_name + _file_offset = header()->header_size(); // accounts for the size of _base_archive_name if (is_static()) { // just checking the last region is sufficient since the archive is written @@ -1236,16 +1336,12 @@ void FileMapInfo::open_for_write(const char* path) { // Seek past the header. We will write the header after all regions are written // and their CRCs computed. size_t header_bytes = header()->header_size(); - if (header()->magic() == CDS_DYNAMIC_ARCHIVE_MAGIC) { - header_bytes += strlen(Arguments::GetSharedArchivePath()) + 1; - } header_bytes = align_up(header_bytes, MetaspaceShared::core_region_alignment()); _file_offset = header_bytes; seek_to_position(_file_offset); } - // Write the header to the file, seek to the next allocation boundary. void FileMapInfo::write_header() { @@ -1253,13 +1349,6 @@ void FileMapInfo::write_header() { seek_to_position(_file_offset); assert(is_file_position_aligned(), "must be"); write_bytes(header(), header()->header_size()); - - if (header()->magic() == CDS_DYNAMIC_ARCHIVE_MAGIC) { - char* base_archive_name = (char*)Arguments::GetSharedArchivePath(); - if (base_archive_name != NULL) { - write_bytes(base_archive_name, header()->base_archive_name_size()); - } - } } size_t FileMapRegion::used_aligned() const { @@ -1389,13 +1478,13 @@ char* FileMapInfo::write_bitmap_region(const CHeapBitMap* ptrmap, // Write out the given archive heap memory regions. GC code combines multiple // consecutive archive GC regions into one MemRegion whenever possible and -// produces the 'heap_mem' array. +// produces the 'regions' array. // // If the archive heap memory size is smaller than a single dump time GC region // size, there is only one MemRegion in the array. // // If the archive heap memory size is bigger than one dump time GC region size, -// the 'heap_mem' array may contain more than one consolidated MemRegions. When +// the 'regions' array may contain more than one consolidated MemRegions. When // the first/bottom archive GC region is a partial GC region (with the empty // portion at the higher address within the region), one MemRegion is used for // the bottom partial archive GC region. The rest of the consecutive archive @@ -1418,13 +1507,13 @@ char* FileMapInfo::write_bitmap_region(const CHeapBitMap* ptrmap, // ^^^ // | // +-- gap -size_t FileMapInfo::write_archive_heap_regions(GrowableArray *heap_mem, - GrowableArray *oopmaps, - int first_region_id, int max_num_regions) { +size_t FileMapInfo::write_heap_regions(GrowableArray* regions, + GrowableArray* oopmaps, + int first_region_id, int max_num_regions) { assert(max_num_regions <= 2, "Only support maximum 2 memory regions"); - int arr_len = heap_mem == NULL ? 0 : heap_mem->length(); - if(arr_len > max_num_regions) { + int arr_len = regions == NULL ? 0 : regions->length(); + if (arr_len > max_num_regions) { fail_stop("Unable to write archive heap memory regions: " "number of memory regions exceeds maximum due to fragmentation. " "Please increase java heap size " @@ -1437,8 +1526,8 @@ size_t FileMapInfo::write_archive_heap_regions(GrowableArray *heap_me char* start = NULL; size_t size = 0; if (i < arr_len) { - start = (char*)heap_mem->at(i).start(); - size = heap_mem->at(i).byte_size(); + start = (char*)regions->at(i).start(); + size = regions->at(i).byte_size(); total_size += size; } @@ -1580,21 +1669,27 @@ MapArchiveResult FileMapInfo::map_regions(int regions[], int num_regions, char* return MAP_ARCHIVE_SUCCESS; } -bool FileMapInfo::read_region(int i, char* base, size_t size) { - assert(MetaspaceShared::use_windows_memory_mapping(), "used by windows only"); +bool FileMapInfo::read_region(int i, char* base, size_t size, bool do_commit) { FileMapRegion* si = space_at(i); - log_info(cds)("Commit %s region #%d at base " INTPTR_FORMAT " top " INTPTR_FORMAT " (%s)%s", - is_static() ? "static " : "dynamic", i, p2i(base), p2i(base + size), - shared_region_name[i], si->allow_exec() ? " exec" : ""); - if (!os::commit_memory(base, size, si->allow_exec())) { - log_error(cds)("Failed to commit %s region #%d (%s)", is_static() ? "static " : "dynamic", - i, shared_region_name[i]); - return false; + if (do_commit) { + log_info(cds)("Commit %s region #%d at base " INTPTR_FORMAT " top " INTPTR_FORMAT " (%s)%s", + is_static() ? "static " : "dynamic", i, p2i(base), p2i(base + size), + shared_region_name[i], si->allow_exec() ? " exec" : ""); + if (!os::commit_memory(base, size, si->allow_exec())) { + log_error(cds)("Failed to commit %s region #%d (%s)", is_static() ? "static " : "dynamic", + i, shared_region_name[i]); + return false; + } } if (lseek(_fd, (long)si->file_offset(), SEEK_SET) != (int)si->file_offset() || read_bytes(base, size) != size) { return false; } + + if (VerifySharedSpaces && !region_crc_check(base, si->used(), si->crc())) { + return false; + } + return true; } @@ -1625,7 +1720,7 @@ MapArchiveResult FileMapInfo::map_region(int i, intx addr_delta, char* mapped_ba // that covers all the FileMapRegions to ensure all regions can be mapped. However, Windows // can't mmap into a ReservedSpace, so we just os::read() the data. We're going to patch all the // regions anyway, so there's no benefit for mmap anyway. - if (!read_region(i, requested_addr, size)) { + if (!read_region(i, requested_addr, size, /* do_commit = */ true)) { log_info(cds)("Failed to read %s shared space into reserved space at " INTPTR_FORMAT, shared_region_name[i], p2i(requested_addr)); return MAP_ARCHIVE_OTHER_FAILURE; // oom or I/O error. @@ -1749,14 +1844,14 @@ address FileMapInfo::decode_start_address(FileMapRegion* spc, bool with_current_ } } -static MemRegion *closed_archive_heap_ranges = NULL; -static MemRegion *open_archive_heap_ranges = NULL; -static int num_closed_archive_heap_ranges = 0; -static int num_open_archive_heap_ranges = 0; +static MemRegion *closed_heap_regions = NULL; +static MemRegion *open_heap_regions = NULL; +static int num_closed_heap_regions = 0; +static int num_open_heap_regions = 0; #if INCLUDE_CDS_JAVA_HEAP bool FileMapInfo::has_heap_regions() { - return (space_at(MetaspaceShared::first_closed_archive_heap_region)->used() > 0); + return (space_at(MetaspaceShared::first_closed_heap_region)->used() > 0); } // Returns the address range of the archived heap regions computed using the @@ -1767,7 +1862,7 @@ MemRegion FileMapInfo::get_heap_regions_range_with_current_oop_encoding_mode() { address start = (address) max_uintx; address end = NULL; - for (int i = MetaspaceShared::first_closed_archive_heap_region; + for (int i = MetaspaceShared::first_closed_heap_region; i <= MetaspaceShared::last_valid_region; i++) { FileMapRegion* si = space_at(i); @@ -1787,27 +1882,28 @@ MemRegion FileMapInfo::get_heap_regions_range_with_current_oop_encoding_mode() { return MemRegion((HeapWord*)start, (HeapWord*)end); } -// -// Map the closed and open archive heap objects to the runtime java heap. -// -// The shared objects are mapped at (or close to ) the java heap top in -// closed archive regions. The mapped objects contain no out-going -// references to any other java heap regions. GC does not write into the -// mapped closed archive heap region. -// -// The open archive heap objects are mapped below the shared objects in -// the runtime java heap. The mapped open archive heap data only contains -// references to the shared objects and open archive objects initially. -// During runtime execution, out-going references to any other java heap -// regions may be added. GC may mark and update references in the mapped -// open archive objects. -void FileMapInfo::map_heap_regions_impl() { - if (!HeapShared::is_heap_object_archiving_allowed()) { - log_info(cds)("CDS heap data is being ignored. UseG1GC, " - "UseCompressedOops and UseCompressedClassPointers are required."); - return; +void FileMapInfo::map_or_load_heap_regions() { + bool success = false; + + if (can_use_heap_regions()) { + if (HeapShared::can_map()) { + success = map_heap_regions(); + } else if (HeapShared::can_load()) { + success = HeapShared::load_heap_regions(this); + } else { + log_info(cds)("Cannot use CDS heap data. UseEpsilonGC, UseG1GC or UseSerialGC are required."); + } + } + + if (!success) { + MetaspaceShared::disable_full_module_graph(); } +} +bool FileMapInfo::can_use_heap_regions() { + if (!has_heap_regions()) { + return false; + } if (JvmtiExport::should_post_class_file_load_hook() && JvmtiExport::has_early_class_hook_env()) { ShouldNotReachHere(); // CDS should have been disabled. // The archived objects are mapped at JVM start-up, but we don't know if @@ -1842,9 +1938,27 @@ void FileMapInfo::map_heap_regions_impl() { if (narrow_klass_base() != CompressedKlassPointers::base() || narrow_klass_shift() != CompressedKlassPointers::shift()) { log_info(cds)("CDS heap data cannot be used because the archive was created with an incompatible narrow klass encoding mode."); - return; + return false; } + return true; +} + +// +// Map the closed and open archive heap objects to the runtime java heap. +// +// The shared objects are mapped at (or close to ) the java heap top in +// closed archive regions. The mapped objects contain no out-going +// references to any other java heap regions. GC does not write into the +// mapped closed archive heap region. +// +// The open archive heap objects are mapped below the shared objects in +// the runtime java heap. The mapped open archive heap data only contains +// references to the shared objects and open archive objects initially. +// During runtime execution, out-going references to any other java heap +// regions may be added. GC may mark and update references in the mapped +// open archive objects. +void FileMapInfo::map_heap_regions_impl() { if (narrow_oop_mode() != CompressedOops::mode() || narrow_oop_base() != CompressedOops::base() || narrow_oop_shift() != CompressedOops::shift()) { @@ -1882,7 +1996,7 @@ void FileMapInfo::map_heap_regions_impl() { log_info(cds)("CDS heap data relocation delta = " INTX_FORMAT " bytes", delta); HeapShared::init_narrow_oop_decoding(narrow_oop_base() + delta, narrow_oop_shift()); - FileMapRegion* si = space_at(MetaspaceShared::first_closed_archive_heap_region); + FileMapRegion* si = space_at(MetaspaceShared::first_closed_heap_region); address relocated_closed_heap_region_bottom = start_address_as_decoded_from_archive(si); if (!is_aligned(relocated_closed_heap_region_bottom, HeapRegion::GrainBytes)) { // Align the bottom of the closed archive heap regions at G1 region boundary. @@ -1901,43 +2015,42 @@ void FileMapInfo::map_heap_regions_impl() { assert(is_aligned(relocated_closed_heap_region_bottom, HeapRegion::GrainBytes), "must be"); - // Map the closed_archive_heap regions, GC does not write into the regions. - if (map_heap_data(&closed_archive_heap_ranges, - MetaspaceShared::first_closed_archive_heap_region, - MetaspaceShared::max_closed_archive_heap_region, - &num_closed_archive_heap_ranges)) { - HeapShared::set_closed_archive_heap_region_mapped(); - - // Now, map open_archive heap regions, GC can write into the regions. - if (map_heap_data(&open_archive_heap_ranges, - MetaspaceShared::first_open_archive_heap_region, - MetaspaceShared::max_open_archive_heap_region, - &num_open_archive_heap_ranges, - true /* open */)) { - HeapShared::set_open_archive_heap_region_mapped(); + // Map the closed heap regions: GC does not write into these regions. + if (map_heap_regions(MetaspaceShared::first_closed_heap_region, + MetaspaceShared::max_num_closed_heap_regions, + /*is_open_archive=*/ false, + &closed_heap_regions, &num_closed_heap_regions)) { + HeapShared::set_closed_regions_mapped(); + + // Now, map the open heap regions: GC can write into these regions. + if (map_heap_regions(MetaspaceShared::first_open_heap_region, + MetaspaceShared::max_num_open_heap_regions, + /*is_open_archive=*/ true, + &open_heap_regions, &num_open_heap_regions)) { + HeapShared::set_open_regions_mapped(); HeapShared::set_roots(header()->heap_obj_roots()); } } } -void FileMapInfo::map_heap_regions() { - if (has_heap_regions()) { - map_heap_regions_impl(); - } +bool FileMapInfo::map_heap_regions() { + map_heap_regions_impl(); - if (!HeapShared::closed_archive_heap_region_mapped()) { - assert(closed_archive_heap_ranges == NULL && - num_closed_archive_heap_ranges == 0, "sanity"); + if (!HeapShared::closed_regions_mapped()) { + assert(closed_heap_regions == NULL && + num_closed_heap_regions == 0, "sanity"); } - if (!HeapShared::open_archive_heap_region_mapped()) { - assert(open_archive_heap_ranges == NULL && num_open_archive_heap_ranges == 0, "sanity"); - MetaspaceShared::disable_full_module_graph(); + if (!HeapShared::open_regions_mapped()) { + assert(open_heap_regions == NULL && num_open_heap_regions == 0, "sanity"); + return false; + } else { + return true; } } -bool FileMapInfo::map_heap_data(MemRegion **heap_mem, int first, - int max, int* num, bool is_open_archive) { +bool FileMapInfo::map_heap_regions(int first, int max, bool is_open_archive, + MemRegion** regions_ret, int* num_regions_ret) { MemRegion* regions = MemRegion::create_array(max, mtInternal); struct Cleanup { @@ -1949,7 +2062,7 @@ bool FileMapInfo::map_heap_data(MemRegion **heap_mem, int first, } cleanup(regions, max); FileMapRegion* si; - int region_num = 0; + int num_regions = 0; for (int i = first; i < first + max; i++) { @@ -1957,26 +2070,26 @@ bool FileMapInfo::map_heap_data(MemRegion **heap_mem, int first, size_t size = si->used(); if (size > 0) { HeapWord* start = (HeapWord*)start_address_as_decoded_from_archive(si); - regions[region_num] = MemRegion(start, size / HeapWordSize); - region_num ++; + regions[num_regions] = MemRegion(start, size / HeapWordSize); + num_regions ++; log_info(cds)("Trying to map heap data: region[%d] at " INTPTR_FORMAT ", size = " SIZE_FORMAT_W(8) " bytes", i, p2i(start), size); } } - if (region_num == 0) { + if (num_regions == 0) { return false; // no archived java heap data } - // Check that ranges are within the java heap - if (!G1CollectedHeap::heap()->check_archive_addresses(regions, region_num)) { + // Check that regions are within the java heap + if (!G1CollectedHeap::heap()->check_archive_addresses(regions, num_regions)) { log_info(cds)("UseSharedSpaces: Unable to allocate region, range is not within java heap."); return false; } // allocate from java heap if (!G1CollectedHeap::heap()->alloc_archive_regions( - regions, region_num, is_open_archive)) { + regions, num_regions, is_open_archive)) { log_info(cds)("UseSharedSpaces: Unable to allocate region, java heap range is already in use."); return false; } @@ -1984,7 +2097,7 @@ bool FileMapInfo::map_heap_data(MemRegion **heap_mem, int first, // Map the archived heap data. No need to call MemTracker::record_virtual_memory_type() // for mapped regions as they are part of the reserved java heap, which is // already recorded. - for (int i = 0; i < region_num; i++) { + for (int i = 0; i < num_regions; i++) { si = space_at(first + i); char* addr = (char*)regions[i].start(); char* base = os::map_memory(_fd, _full_path, si->file_offset(), @@ -1992,7 +2105,7 @@ bool FileMapInfo::map_heap_data(MemRegion **heap_mem, int first, si->allow_exec()); if (base == NULL || base != addr) { // dealloc the regions from java heap - dealloc_archive_heap_regions(regions, region_num); + dealloc_heap_regions(regions, num_regions); log_info(cds)("UseSharedSpaces: Unable to map at required address in java heap. " INTPTR_FORMAT ", size = " SIZE_FORMAT " bytes", p2i(addr), regions[i].byte_size()); @@ -2001,7 +2114,7 @@ bool FileMapInfo::map_heap_data(MemRegion **heap_mem, int first, if (VerifySharedSpaces && !region_crc_check(addr, regions[i].byte_size(), si->crc())) { // dealloc the regions from java heap - dealloc_archive_heap_regions(regions, region_num); + dealloc_heap_regions(regions, num_regions); log_info(cds)("UseSharedSpaces: mapped heap regions are corrupt"); return false; } @@ -2009,36 +2122,36 @@ bool FileMapInfo::map_heap_data(MemRegion **heap_mem, int first, cleanup._aborted = false; // the shared heap data is mapped successfully - *heap_mem = regions; - *num = region_num; + *regions_ret = regions; + *num_regions_ret = num_regions; return true; } -void FileMapInfo::patch_archived_heap_embedded_pointers() { +void FileMapInfo::patch_heap_embedded_pointers() { if (!_heap_pointers_need_patching) { return; } log_info(cds)("patching heap embedded pointers"); - patch_archived_heap_embedded_pointers(closed_archive_heap_ranges, - num_closed_archive_heap_ranges, - MetaspaceShared::first_closed_archive_heap_region); + patch_heap_embedded_pointers(closed_heap_regions, + num_closed_heap_regions, + MetaspaceShared::first_closed_heap_region); - patch_archived_heap_embedded_pointers(open_archive_heap_ranges, - num_open_archive_heap_ranges, - MetaspaceShared::first_open_archive_heap_region); + patch_heap_embedded_pointers(open_heap_regions, + num_open_heap_regions, + MetaspaceShared::first_open_heap_region); } -void FileMapInfo::patch_archived_heap_embedded_pointers(MemRegion* ranges, int num_ranges, - int first_region_idx) { +void FileMapInfo::patch_heap_embedded_pointers(MemRegion* regions, int num_regions, + int first_region_idx) { char* bitmap_base = map_bitmap_region(); if (bitmap_base == NULL) { return; } - for (int i=0; imapped_base()) + si->oopmap_offset(), si->oopmap_size_in_bits()); } @@ -2049,19 +2162,19 @@ void FileMapInfo::patch_archived_heap_embedded_pointers(MemRegion* ranges, int n void FileMapInfo::fixup_mapped_heap_regions() { assert(vmClasses::Object_klass_loaded(), "must be"); // If any closed regions were found, call the fill routine to make them parseable. - // Note that closed_archive_heap_ranges may be non-NULL even if no ranges were found. - if (num_closed_archive_heap_ranges != 0) { - assert(closed_archive_heap_ranges != NULL, - "Null closed_archive_heap_ranges array with non-zero count"); - G1CollectedHeap::heap()->fill_archive_regions(closed_archive_heap_ranges, - num_closed_archive_heap_ranges); + // Note that closed_heap_regions may be non-NULL even if no regions were found. + if (num_closed_heap_regions != 0) { + assert(closed_heap_regions != NULL, + "Null closed_heap_regions array with non-zero count"); + G1CollectedHeap::heap()->fill_archive_regions(closed_heap_regions, + num_closed_heap_regions); } // do the same for mapped open archive heap regions - if (num_open_archive_heap_ranges != 0) { - assert(open_archive_heap_ranges != NULL, "NULL open_archive_heap_ranges array with non-zero count"); - G1CollectedHeap::heap()->fill_archive_regions(open_archive_heap_ranges, - num_open_archive_heap_ranges); + if (num_open_heap_regions != 0) { + assert(open_heap_regions != NULL, "NULL open_heap_regions array with non-zero count"); + G1CollectedHeap::heap()->fill_archive_regions(open_heap_regions, + num_open_heap_regions); // Populate the open archive regions' G1BlockOffsetTableParts. That ensures // fast G1BlockOffsetTablePart::block_start operations for any given address @@ -2072,15 +2185,15 @@ void FileMapInfo::fixup_mapped_heap_regions() { // regions, because objects in closed archive regions never reference objects // outside the closed archive regions and they are immutable. So we never // need their BOT during garbage collection. - G1CollectedHeap::heap()->populate_archive_regions_bot_part(open_archive_heap_ranges, - num_open_archive_heap_ranges); + G1CollectedHeap::heap()->populate_archive_regions_bot_part(open_heap_regions, + num_open_heap_regions); } } // dealloc the archive regions from java heap -void FileMapInfo::dealloc_archive_heap_regions(MemRegion* regions, int num) { +void FileMapInfo::dealloc_heap_regions(MemRegion* regions, int num) { if (num > 0) { - assert(regions != NULL, "Null archive ranges array with non-zero count"); + assert(regions != NULL, "Null archive regions array with non-zero count"); G1CollectedHeap::heap()->dealloc_archive_regions(regions, num); } } @@ -2152,6 +2265,7 @@ FileMapInfo* FileMapInfo::_dynamic_archive_info = NULL; bool FileMapInfo::_heap_pointers_need_patching = false; SharedPathTable FileMapInfo::_shared_path_table; SharedPathTable FileMapInfo::_saved_shared_path_table; +Array* FileMapInfo::_saved_shared_path_table_array = NULL; bool FileMapInfo::_validating_shared_path_table = false; bool FileMapInfo::_memory_mapping_failed = false; GrowableArray* FileMapInfo::_non_existent_class_paths = NULL; @@ -2178,6 +2292,11 @@ bool FileMapInfo::initialize() { return false; } + if (!Arguments::has_jimage()) { + FileMapInfo::fail_continue("The shared archive file cannot be used with an exploded module build."); + return false; + } + if (!open_for_read()) { return false; } @@ -2216,9 +2335,9 @@ void FileMapHeader::set_as_offset(char* p, size_t *offset) { int FileMapHeader::compute_crc() { char* start = (char*)this; - // start computing from the field after _crc - char* buf = (char*)&_crc + sizeof(_crc); - size_t sz = _header_size - (buf - start); + // start computing from the field after _crc to end of base archive name. + char* buf = (char*)&(_generic_header._crc) + sizeof(_generic_header._crc); + size_t sz = header_size() - (buf - start); int crc = ClassLoader::crc32(0, buf, (jint)sz); return crc; } @@ -2336,17 +2455,17 @@ void FileMapInfo::stop_sharing_and_unmap(const char* msg) { FileMapInfo *map_info = FileMapInfo::current_info(); if (map_info) { map_info->fail_continue("%s", msg); - for (int i = 0; i < MetaspaceShared::num_non_heap_spaces; i++) { + for (int i = 0; i < MetaspaceShared::num_non_heap_regions; i++) { if (!HeapShared::is_heap_region(i)) { map_info->unmap_region(i); } } // Dealloc the archive heap regions only without unmapping. The regions are part // of the java heap. Unmapping of the heap regions are managed by GC. - map_info->dealloc_archive_heap_regions(open_archive_heap_ranges, - num_open_archive_heap_ranges); - map_info->dealloc_archive_heap_regions(closed_archive_heap_ranges, - num_closed_archive_heap_ranges); + map_info->dealloc_heap_regions(open_heap_regions, + num_open_heap_regions); + map_info->dealloc_heap_regions(closed_heap_regions, + num_closed_heap_regions); } else if (DumpSharedSpaces) { fail_stop("%s", msg); } diff --git a/src/hotspot/share/cds/filemap.hpp b/src/hotspot/share/cds/filemap.hpp index 13cb5f4acf2d4bffbf69296f10eb1849c2f14d7e..7708bbe7fa585a12d90c40e9071d702b155ceb33 100644 --- a/src/hotspot/share/cds/filemap.hpp +++ b/src/hotspot/share/cds/filemap.hpp @@ -137,6 +137,7 @@ public: class FileMapRegion: private CDSFileMapRegion { +public: void assert_is_heap_region() const { assert(_is_heap_region, "must be heap region"); } @@ -144,7 +145,6 @@ class FileMapRegion: private CDSFileMapRegion { assert(!_is_heap_region, "must not be heap region"); } -public: static FileMapRegion* cast(CDSFileMapRegion* p) { return (FileMapRegion*)p; } @@ -180,11 +180,10 @@ public: }; class FileMapHeader: private CDSFileMapHeaderBase { - friend class CDSOffsets; + friend class CDSConstants; friend class VMStructs; - size_t _header_size; - +private: // The following fields record the states of the VM during dump time. // They are compared with the runtime states to see if the archive // can be used. @@ -202,7 +201,6 @@ class FileMapHeader: private CDSFileMapHeaderBase { size_t _serialized_data_offset; // Data accessed using {ReadClosure,WriteClosure}::serialize() address _heap_begin; // heap begin at dump time. address _heap_end; // heap end at dump time. - bool _base_archive_is_default; // indicates if the base archive is the system default one bool _has_non_jar_in_classpath; // non-jar file entry exists in classpath // The following fields are all sanity checks for whether this archive @@ -210,9 +208,6 @@ class FileMapHeader: private CDSFileMapHeaderBase { // invoked with. char _jvm_ident[JVM_IDENT_MAX]; // identifier string of the jvm that created this dump - // size of the base archive name including NULL terminator - size_t _base_archive_name_size; - // The following is a table of all the boot/app/module path entries that were used // during dumping. At run time, we validate these entries according to their // SharedClassPathEntry::_type. See: @@ -243,17 +238,21 @@ class FileMapHeader: private CDSFileMapHeaderBase { } void set_as_offset(char* p, size_t *offset); public: - // Accessors -- fields declared in CDSFileMapHeaderBase - unsigned int magic() const { return _magic; } - int crc() const { return _crc; } - int version() const { return _version; } - - void set_crc(int crc_value) { _crc = crc_value; } - void set_version(int v) { _version = v; } - - // Accessors -- fields declared in FileMapHeader + // Accessors -- fields declared in GenericCDSFileMapHeader + unsigned int magic() const { return _generic_header._magic; } + int crc() const { return _generic_header._crc; } + int version() const { return _generic_header._version; } + unsigned int header_size() const { return _generic_header._header_size; } + unsigned int base_archive_path_offset() const { return _generic_header._base_archive_path_offset; } + unsigned int base_archive_name_size() const { return _generic_header._base_archive_name_size; } + + void set_magic(unsigned int m) { _generic_header._magic = m; } + void set_crc(int crc_value) { _generic_header._crc = crc_value; } + void set_version(int v) { _generic_header._version = v; } + void set_header_size(unsigned int s) { _generic_header._header_size = s; } + void set_base_archive_path_offset(unsigned int s) { _generic_header._base_archive_path_offset = s; } + void set_base_archive_name_size(unsigned int s) { _generic_header._base_archive_name_size = s; } - size_t header_size() const { return _header_size; } size_t core_region_alignment() const { return _core_region_alignment; } int obj_alignment() const { return _obj_alignment; } address narrow_oop_base() const { return _narrow_oop_base; } @@ -267,9 +266,7 @@ public: char* serialized_data() const { return from_mapped_offset(_serialized_data_offset); } address heap_begin() const { return _heap_begin; } address heap_end() const { return _heap_end; } - bool base_archive_is_default() const { return _base_archive_is_default; } const char* jvm_ident() const { return _jvm_ident; } - size_t base_archive_name_size() const { return _base_archive_name_size; } char* requested_base_address() const { return _requested_base_address; } char* mapped_base_address() const { return _mapped_base_address; } bool has_platform_or_app_classes() const { return _has_platform_or_app_classes; } @@ -287,12 +284,10 @@ public: void set_has_platform_or_app_classes(bool v) { _has_platform_or_app_classes = v; } void set_cloned_vtables(char* p) { set_as_offset(p, &_cloned_vtables_offset); } void set_serialized_data(char* p) { set_as_offset(p, &_serialized_data_offset); } - void set_base_archive_name_size(size_t s) { _base_archive_name_size = s; } - void set_base_archive_is_default(bool b) { _base_archive_is_default = b; } - void set_header_size(size_t s) { _header_size = s; } void set_ptrmap_size_in_bits(size_t s) { _ptrmap_size_in_bits = s; } void set_mapped_base_address(char* p) { _mapped_base_address = p; } void set_heap_obj_roots(narrowOop r) { _heap_obj_roots = r; } + void copy_base_archive_name(const char* name); void set_shared_path_table(SharedPathTable table) { set_as_offset((char*)table.table(), &_shared_path_table_offset); @@ -317,8 +312,8 @@ public: return FileMapRegion::cast(&_space[i]); } - void populate(FileMapInfo* info, size_t core_region_alignment); - + void populate(FileMapInfo *info, size_t core_region_alignment, size_t header_size, + size_t base_archive_name_size, size_t base_archive_path_offset); static bool is_valid_region(int region) { return (0 <= region && region < NUM_CDS_REGIONS); } @@ -346,6 +341,7 @@ private: // TODO: Probably change the following to be non-static static SharedPathTable _shared_path_table; static SharedPathTable _saved_shared_path_table; + static Array* _saved_shared_path_table_array; // remember the table array for cleanup static bool _validating_shared_path_table; // FileMapHeader describes the shared space data in the file to be @@ -362,12 +358,13 @@ private: public: static bool get_base_archive_name_from_header(const char* archive_name, - int* size, char** base_archive_name); + char** base_archive_name); static bool check_archive(const char* archive_name, bool is_static); static SharedPathTable shared_path_table() { return _shared_path_table; } static SharedPathTable saved_shared_path_table() { + assert(_saved_shared_path_table.size() >= 0, "Sanity check"); return _saved_shared_path_table; } @@ -396,9 +393,6 @@ public: int narrow_klass_shift() const { return header()->narrow_klass_shift(); } size_t core_region_alignment() const { return header()->core_region_alignment(); } - void set_header_base_archive_name_size(size_t size) { header()->set_base_archive_name_size(size); } - void set_header_base_archive_is_default(bool is_default) { header()->set_base_archive_is_default(is_default); } - CompressedOops::Mode narrow_oop_mode() const { return header()->narrow_oop_mode(); } jshort app_module_paths_start_index() const { return header()->app_module_paths_start_index(); } jshort app_class_paths_start_index() const { return header()->app_class_paths_start_index(); } @@ -419,6 +413,8 @@ public: void set_requested_base(char* b) { header()->set_requested_base(b); } char* requested_base_address() const { return header()->requested_base_address(); } + narrowOop heap_obj_roots() const { return header()->heap_obj_roots(); } + class DynamicArchiveHeader* dynamic_header() const { assert(!is_static(), "must be"); return (DynamicArchiveHeader*)header(); @@ -458,21 +454,23 @@ public: GrowableArray* closed_oopmaps, GrowableArray* open_oopmaps, size_t &size_in_bytes); - size_t write_archive_heap_regions(GrowableArray *heap_mem, - GrowableArray *oopmaps, - int first_region_id, int max_num_regions); + size_t write_heap_regions(GrowableArray* regions, + GrowableArray* oopmaps, + int first_region_id, int max_num_regions); void write_bytes(const void* buffer, size_t count); void write_bytes_aligned(const void* buffer, size_t count); size_t read_bytes(void* buffer, size_t count); MapArchiveResult map_regions(int regions[], int num_regions, char* mapped_base_address, ReservedSpace rs); void unmap_regions(int regions[], int num_regions); - void map_heap_regions() NOT_CDS_JAVA_HEAP_RETURN; + void map_or_load_heap_regions() NOT_CDS_JAVA_HEAP_RETURN; void fixup_mapped_heap_regions() NOT_CDS_JAVA_HEAP_RETURN; - void patch_archived_heap_embedded_pointers() NOT_CDS_JAVA_HEAP_RETURN; - void patch_archived_heap_embedded_pointers(MemRegion* ranges, int num_ranges, - int first_region_idx) NOT_CDS_JAVA_HEAP_RETURN; + void patch_heap_embedded_pointers() NOT_CDS_JAVA_HEAP_RETURN; + void patch_heap_embedded_pointers(MemRegion* regions, int num_regions, + int first_region_idx) NOT_CDS_JAVA_HEAP_RETURN; bool has_heap_regions() NOT_CDS_JAVA_HEAP_RETURN_(false); MemRegion get_heap_regions_range_with_current_oop_encoding_mode() NOT_CDS_JAVA_HEAP_RETURN_(MemRegion()); + bool read_region(int i, char* base, size_t size, bool do_commit); + char* map_bitmap_region(); void unmap_region(int i); bool verify_region_checksum(int i); void close(); @@ -497,6 +495,7 @@ public: static void allocate_shared_path_table(TRAPS); static void copy_shared_path_table(ClassLoaderData* loader_data, TRAPS); + static void clone_shared_path_table(TRAPS); static int add_shared_classpaths(int i, const char* which, ClassPathEntry *cpe, TRAPS); static void check_nonempty_dir_in_shared_path_table(); bool validate_shared_path_table(); @@ -567,29 +566,32 @@ public: GrowableArray* rp_array) NOT_CDS_RETURN_(false); bool validate_boot_class_paths() NOT_CDS_RETURN_(false); bool validate_app_class_paths(int shared_app_paths_len) NOT_CDS_RETURN_(false); - bool map_heap_data(MemRegion **heap_mem, int first, int max, int* num, - bool is_open = false) NOT_CDS_JAVA_HEAP_RETURN_(false); + bool map_heap_regions(int first, int max, bool is_open_archive, + MemRegion** regions_ret, int* num_regions_ret) NOT_CDS_JAVA_HEAP_RETURN_(false); bool region_crc_check(char* buf, size_t size, int expected_crc) NOT_CDS_RETURN_(false); - void dealloc_archive_heap_regions(MemRegion* regions, int num) NOT_CDS_JAVA_HEAP_RETURN; + void dealloc_heap_regions(MemRegion* regions, int num) NOT_CDS_JAVA_HEAP_RETURN; + bool can_use_heap_regions(); + bool load_heap_regions() NOT_CDS_JAVA_HEAP_RETURN_(false); + bool map_heap_regions() NOT_CDS_JAVA_HEAP_RETURN_(false); void map_heap_regions_impl() NOT_CDS_JAVA_HEAP_RETURN; - char* map_bitmap_region(); MapArchiveResult map_region(int i, intx addr_delta, char* mapped_base_address, ReservedSpace rs); - bool read_region(int i, char* base, size_t size); bool relocate_pointers_in_core_regions(intx addr_delta); static size_t set_oopmaps_offset(GrowableArray *oopmaps, size_t curr_size); static size_t write_oopmaps(GrowableArray *oopmaps, size_t curr_offset, char* buffer); + address decode_start_address(FileMapRegion* spc, bool with_current_oop_encoding_mode); + // The starting address of spc, as calculated with CompressedOop::decode_non_null() address start_address_as_decoded_with_current_oop_encoding_mode(FileMapRegion* spc) { return decode_start_address(spc, true); } - +public: // The starting address of spc, as calculated with HeapShared::decode_from_archive() address start_address_as_decoded_from_archive(FileMapRegion* spc) { return decode_start_address(spc, false); } - address decode_start_address(FileMapRegion* spc, bool with_current_oop_encoding_mode); +private: #if INCLUDE_JVMTI static ClassPathEntry** _classpath_entries_for_jvmti; diff --git a/src/hotspot/share/cds/heapShared.cpp b/src/hotspot/share/cds/heapShared.cpp index 3e50dd7506b2765bab7844911a44a6f057d103a9..7d88f802f90110a846df386ecf765a2be1adcf40 100644 --- a/src/hotspot/share/cds/heapShared.cpp +++ b/src/hotspot/share/cds/heapShared.cpp @@ -38,6 +38,7 @@ #include "classfile/systemDictionaryShared.hpp" #include "classfile/vmClasses.hpp" #include "classfile/vmSymbols.hpp" +#include "gc/shared/collectedHeap.hpp" #include "gc/shared/gcLocker.hpp" #include "gc/shared/gcVMOperations.hpp" #include "logging/log.hpp" @@ -67,13 +68,25 @@ #if INCLUDE_CDS_JAVA_HEAP -bool HeapShared::_closed_archive_heap_region_mapped = false; -bool HeapShared::_open_archive_heap_region_mapped = false; -bool HeapShared::_archive_heap_region_fixed = false; +bool HeapShared::_closed_regions_mapped = false; +bool HeapShared::_open_regions_mapped = false; +bool HeapShared::_is_loaded = false; address HeapShared::_narrow_oop_base; int HeapShared::_narrow_oop_shift; DumpedInternedStrings *HeapShared::_dumped_interned_strings = NULL; +uintptr_t HeapShared::_loaded_heap_bottom = 0; +uintptr_t HeapShared::_loaded_heap_top = 0; +uintptr_t HeapShared::_dumptime_base_0 = UINTPTR_MAX; +uintptr_t HeapShared::_dumptime_base_1 = UINTPTR_MAX; +uintptr_t HeapShared::_dumptime_base_2 = UINTPTR_MAX; +uintptr_t HeapShared::_dumptime_base_3 = UINTPTR_MAX; +uintptr_t HeapShared::_dumptime_top = 0; +intx HeapShared::_runtime_offset_0 = 0; +intx HeapShared::_runtime_offset_1 = 0; +intx HeapShared::_runtime_offset_2 = 0; +intx HeapShared::_runtime_offset_3 = 0; +bool HeapShared::_loading_failed = false; // // If you add new entries to the following tables, you should know what you're doing! // @@ -117,16 +130,27 @@ GrowableArrayCHeap* HeapShared::_pending_roots = NULL; narrowOop HeapShared::_roots_narrow; OopHandle HeapShared::_roots; +#ifdef ASSERT +bool HeapShared::is_archived_object_during_dumptime(oop p) { + assert(HeapShared::can_write(), "must be"); + assert(DumpSharedSpaces, "this function is only used with -Xshare:dump"); + return Universe::heap()->is_archived_object(p); +} +#endif + //////////////////////////////////////////////////////////////// // // Java heap object archiving support // //////////////////////////////////////////////////////////////// -void HeapShared::fixup_mapped_heap_regions() { - FileMapInfo *mapinfo = FileMapInfo::current_info(); - mapinfo->fixup_mapped_heap_regions(); - set_archive_heap_region_fixed(); +void HeapShared::fixup_regions() { + FileMapInfo* mapinfo = FileMapInfo::current_info(); if (is_mapped()) { + mapinfo->fixup_mapped_heap_regions(); + } else if (_loading_failed) { + fill_failed_loaded_region(); + } + if (is_fully_available()) { _roots = OopHandle(Universe::vm_global(), decode_from_archive(_roots_narrow)); if (!MetaspaceShared::use_full_module_graph()) { // Need to remove all the archived java.lang.Module objects from HeapShared::roots(). @@ -137,8 +161,6 @@ void HeapShared::fixup_mapped_heap_regions() { } unsigned HeapShared::oop_hash(oop const& p) { - assert(!p->mark().has_bias_pattern(), - "this object should never have been locked"); // so identity_hash won't safepoin unsigned hash = (unsigned)p->identity_hash(); return hash; } @@ -201,7 +223,7 @@ int HeapShared::append_root(oop obj) { objArrayOop HeapShared::roots() { if (DumpSharedSpaces) { assert(Thread::current() == (Thread*)VMThread::vm_thread(), "should be in vm thread"); - if (!is_heap_object_archiving_allowed()) { + if (!HeapShared::can_write()) { return NULL; } } else { @@ -215,7 +237,7 @@ objArrayOop HeapShared::roots() { void HeapShared::set_roots(narrowOop roots) { assert(UseSharedSpaces, "runtime only"); - assert(open_archive_heap_region_mapped(), "must be"); + assert(is_fully_available(), "must be"); _roots_narrow = roots; } @@ -240,7 +262,7 @@ oop HeapShared::get_root(int index, bool clear) { void HeapShared::clear_root(int index) { assert(index >= 0, "sanity"); assert(UseSharedSpaces, "must be"); - if (open_archive_heap_region_mapped()) { + if (is_fully_available()) { if (log_is_enabled(Debug, cds, heap)) { oop old = roots()->obj_at(index); log_debug(cds, heap)("Clearing root %d: was " PTR_FORMAT, index, p2i(old)); @@ -249,7 +271,7 @@ void HeapShared::clear_root(int index) { } } -oop HeapShared::archive_heap_object(oop obj) { +oop HeapShared::archive_object(oop obj) { assert(DumpSharedSpaces, "dump-time only"); oop ao = find_archived_heap_object(obj); @@ -317,7 +339,7 @@ void HeapShared::archive_klass_objects() { } void HeapShared::run_full_gc_in_vm_thread() { - if (is_heap_object_archiving_allowed()) { + if (HeapShared::can_write()) { // Avoid fragmentation while archiving heap objects. // We do this inside a safepoint, so that no further allocation can happen after GC // has finished. @@ -335,8 +357,8 @@ void HeapShared::run_full_gc_in_vm_thread() { } } -void HeapShared::archive_java_heap_objects(GrowableArray* closed, - GrowableArray* open) { +void HeapShared::archive_objects(GrowableArray* closed_regions, + GrowableArray* open_regions) { G1HeapVerifier::verify_ready_for_archiving(); @@ -349,10 +371,10 @@ void HeapShared::archive_java_heap_objects(GrowableArray* closed, log_info(cds)("Heap range = [" PTR_FORMAT " - " PTR_FORMAT "]", p2i(CompressedOops::begin()), p2i(CompressedOops::end())); log_info(cds)("Dumping objects to closed archive heap region ..."); - copy_closed_archive_heap_objects(closed); + copy_closed_objects(closed_regions); log_info(cds)("Dumping objects to open archive heap region ..."); - copy_open_archive_heap_objects(open); + copy_open_objects(open_regions); destroy_archived_object_cache(); } @@ -360,9 +382,8 @@ void HeapShared::archive_java_heap_objects(GrowableArray* closed, G1HeapVerifier::verify_archive_regions(); } -void HeapShared::copy_closed_archive_heap_objects( - GrowableArray * closed_archive) { - assert(is_heap_object_archiving_allowed(), "Cannot archive java heap objects"); +void HeapShared::copy_closed_objects(GrowableArray* closed_regions) { + assert(HeapShared::can_write(), "must be"); G1CollectedHeap::heap()->begin_archive_alloc_range(); @@ -374,13 +395,12 @@ void HeapShared::copy_closed_archive_heap_objects( true /* is_closed_archive */, false /* is_full_module_graph */); - G1CollectedHeap::heap()->end_archive_alloc_range(closed_archive, + G1CollectedHeap::heap()->end_archive_alloc_range(closed_regions, os::vm_allocation_granularity()); } -void HeapShared::copy_open_archive_heap_objects( - GrowableArray * open_archive) { - assert(is_heap_object_archiving_allowed(), "Cannot archive java heap objects"); +void HeapShared::copy_open_objects(GrowableArray* open_regions) { + assert(HeapShared::can_write(), "must be"); G1CollectedHeap::heap()->begin_archive_alloc_range(true /* open */); @@ -402,7 +422,7 @@ void HeapShared::copy_open_archive_heap_objects( copy_roots(); - G1CollectedHeap::heap()->end_archive_alloc_range(open_archive, + G1CollectedHeap::heap()->end_archive_alloc_range(open_regions, os::vm_allocation_granularity()); } @@ -416,11 +436,7 @@ void HeapShared::copy_roots() { memset(mem, 0, size * BytesPerWord); { // This is copied from MemAllocator::finish - if (UseBiasedLocking) { - oopDesc::set_mark(mem, k->prototype_header()); - } else { - oopDesc::set_mark(mem, markWord::prototype()); - } + oopDesc::set_mark(mem, markWord::prototype()); oopDesc::release_set_klass(mem, k); } { @@ -658,7 +674,7 @@ void HeapShared::serialize_subgraph_info_table_header(SerializeClosure* soc) { } static void verify_the_heap(Klass* k, const char* which) { - if (VerifyArchivedFields) { + if (VerifyArchivedFields > 0) { ResourceMark rm; log_info(cds, heap)("Verify heap %s initializing static field(s) in %s", which, k->external_name()); @@ -666,15 +682,20 @@ static void verify_the_heap(Klass* k, const char* which) { VM_Verify verify_op; VMThread::execute(&verify_op); - if (!FLAG_IS_DEFAULT(VerifyArchivedFields)) { - // If VerifyArchivedFields has a non-default value (e.g., specified on the command-line), do - // more expensive checks. - if (is_init_completed()) { - FlagSetting fs1(VerifyBeforeGC, true); - FlagSetting fs2(VerifyDuringGC, true); - FlagSetting fs3(VerifyAfterGC, true); - Universe::heap()->collect(GCCause::_java_lang_system_gc); - } + if (VerifyArchivedFields > 1 && is_init_completed()) { + // At this time, the oop->klass() of some archived objects in the heap may not + // have been loaded into the system dictionary yet. Nevertheless, oop->klass() should + // have enough information (object size, oop maps, etc) so that a GC can be safely + // performed. + // + // -XX:VerifyArchivedFields=2 force a GC to happen in such an early stage + // to check for GC safety. + log_info(cds, heap)("Trigger GC %s initializing static field(s) in %s", + which, k->external_name()); + FlagSetting fs1(VerifyBeforeGC, true); + FlagSetting fs2(VerifyDuringGC, true); + FlagSetting fs3(VerifyAfterGC, true); + Universe::heap()->collect(GCCause::_java_lang_system_gc); } } } @@ -686,7 +707,7 @@ static void verify_the_heap(Klass* k, const char* which) { // ClassFileLoadHook is enabled, it's possible for this class to be dynamically replaced. In // this case, we will not load the ArchivedKlassSubGraphInfoRecord and will clear its roots. void HeapShared::resolve_classes(JavaThread* THREAD) { - if (!is_mapped()) { + if (!is_fully_available()) { return; // nothing to do } resolve_classes_for_subgraphs(closed_archive_subgraph_entry_fields, @@ -724,7 +745,7 @@ void HeapShared::resolve_classes_for_subgraph_of(Klass* k, JavaThread* THREAD) { } void HeapShared::initialize_from_archived_subgraph(Klass* k, JavaThread* THREAD) { - if (!is_mapped()) { + if (!is_fully_available()) { return; // nothing to do } @@ -883,7 +904,7 @@ class WalkOopAndArchiveClosure: public BasicOopIterateClosure { template void do_oop_work(T *p) { oop obj = RawAccess<>::oop_load(p); if (!CompressedOops::is_null(obj)) { - assert(!HeapShared::is_archived_object(obj), + assert(!HeapShared::is_archived_object_during_dumptime(obj), "original objects must not point to archived objects"); size_t field_delta = pointer_delta(p, _orig_referencing_obj, sizeof(char)); @@ -902,7 +923,7 @@ class WalkOopAndArchiveClosure: public BasicOopIterateClosure { oop archived = HeapShared::archive_reachable_objects_from( _level + 1, _subgraph_info, obj, _is_closed_archive); assert(archived != NULL, "VM should have exited with unarchivable objects for _level > 1"); - assert(HeapShared::is_archived_object(archived), "must be"); + assert(HeapShared::is_archived_object_during_dumptime(archived), "must be"); if (!_record_klasses_only) { // Update the reference in the archived copy of the referencing object. @@ -914,7 +935,7 @@ class WalkOopAndArchiveClosure: public BasicOopIterateClosure { } }; -void HeapShared::check_closed_archive_heap_region_object(InstanceKlass* k) { +void HeapShared::check_closed_region_object(InstanceKlass* k) { // Check fields in the object for (JavaFieldStream fs(k); !fs.done(); fs.next()) { if (!fs.access_flags().is_static()) { @@ -958,7 +979,7 @@ oop HeapShared::archive_reachable_objects_from(int level, oop orig_obj, bool is_closed_archive) { assert(orig_obj != NULL, "must be"); - assert(!is_archived_object(orig_obj), "sanity"); + assert(!is_archived_object_during_dumptime(orig_obj), "sanity"); if (!JavaClasses::is_supported_for_archiving(orig_obj)) { // This object has injected fields that cannot be supported easily, so we disallow them for now. @@ -996,7 +1017,7 @@ oop HeapShared::archive_reachable_objects_from(int level, bool record_klasses_only = (archived_obj != NULL); if (archived_obj == NULL) { ++_num_new_archived_objs; - archived_obj = archive_heap_object(orig_obj); + archived_obj = archive_object(orig_obj); if (archived_obj == NULL) { // Skip archiving the sub-graph referenced from the current entry field. ResourceMark rm; @@ -1024,7 +1045,7 @@ oop HeapShared::archive_reachable_objects_from(int level, // class_data will be restored explicitly at run time. guarantee(orig_obj == SystemDictionary::java_platform_loader() || orig_obj == SystemDictionary::java_system_loader() || - java_lang_ClassLoader::loader_data_raw(orig_obj) == NULL, "must be"); + java_lang_ClassLoader::loader_data(orig_obj) == NULL, "must be"); java_lang_ClassLoader::release_set_loader_data(archived_obj, NULL); } } @@ -1037,7 +1058,7 @@ oop HeapShared::archive_reachable_objects_from(int level, subgraph_info, orig_obj, archived_obj); orig_obj->oop_iterate(&walker); if (is_closed_archive && orig_k->is_instance_klass()) { - check_closed_archive_heap_region_object(InstanceKlass::cast(orig_k)); + check_closed_region_object(InstanceKlass::cast(orig_k)); } return archived_obj; } @@ -1176,10 +1197,10 @@ void HeapShared::verify_reachable_objects_from(oop obj, bool is_archived) { set_has_been_seen_during_subgraph_recording(obj); if (is_archived) { - assert(is_archived_object(obj), "must be"); + assert(is_archived_object_during_dumptime(obj), "must be"); assert(find_archived_heap_object(obj) == NULL, "must be"); } else { - assert(!is_archived_object(obj), "must be"); + assert(!is_archived_object_during_dumptime(obj), "must be"); assert(find_archived_heap_object(obj) != NULL, "must be"); } @@ -1279,7 +1300,7 @@ void HeapShared::init_subgraph_entry_fields(ArchivableStaticFieldInfo fields[], } void HeapShared::init_subgraph_entry_fields(TRAPS) { - assert(is_heap_object_archiving_allowed(), "Sanity check"); + assert(HeapShared::can_write(), "must be"); _dump_time_subgraph_info_table = new (ResourceObj::C_HEAP, mtClass)DumpTimeKlassSubGraphInfoTable(); init_subgraph_entry_fields(closed_archive_subgraph_entry_fields, num_closed_archive_subgraph_entry_fields, @@ -1295,7 +1316,7 @@ void HeapShared::init_subgraph_entry_fields(TRAPS) { } void HeapShared::init_for_dumping(TRAPS) { - if (is_heap_object_archiving_allowed()) { + if (HeapShared::can_write()) { _dumped_interned_strings = new (ResourceObj::C_HEAP, mtClass)DumpedInternedStrings(); init_subgraph_entry_fields(CHECK); } @@ -1439,8 +1460,10 @@ class PatchEmbeddedPointers: public BitMapClosure { } }; -void HeapShared::patch_archived_heap_embedded_pointers(MemRegion region, address oopmap, - size_t oopmap_size_in_bits) { +// Patch all the non-null pointers that are embedded in the archived heap objects +// in this region +void HeapShared::patch_embedded_pointers(MemRegion region, address oopmap, + size_t oopmap_size_in_bits) { BitMapView bm((BitMap::bm_word_t*)oopmap, oopmap_size_in_bits); #ifndef PRODUCT @@ -1453,4 +1476,293 @@ void HeapShared::patch_archived_heap_embedded_pointers(MemRegion region, address bm.iterate(&patcher); } +// The CDS archive remembers each heap object by its address at dump time, but +// the heap object may be loaded at a different address at run time. This structure is used +// to translate the dump time addresses for all objects in FileMapInfo::space_at(region_index) +// to their runtime addresses. +struct LoadedArchiveHeapRegion { + int _region_index; // index for FileMapInfo::space_at(index) + size_t _region_size; // number of bytes in this region + uintptr_t _dumptime_base; // The dump-time (decoded) address of the first object in this region + intx _runtime_offset; // If an object's dump time address P is within in this region, its + // runtime address is P + _runtime_offset + + static int comparator(const void* a, const void* b) { + LoadedArchiveHeapRegion* reg_a = (LoadedArchiveHeapRegion*)a; + LoadedArchiveHeapRegion* reg_b = (LoadedArchiveHeapRegion*)b; + if (reg_a->_dumptime_base < reg_b->_dumptime_base) { + return -1; + } else if (reg_a->_dumptime_base == reg_b->_dumptime_base) { + return 0; + } else { + return 1; + } + } + + uintptr_t top() { + return _dumptime_base + _region_size; + } +}; + +void HeapShared::init_loaded_heap_relocation(LoadedArchiveHeapRegion* loaded_regions, + int num_loaded_regions) { + _dumptime_base_0 = loaded_regions[0]._dumptime_base; + _dumptime_base_1 = loaded_regions[1]._dumptime_base; + _dumptime_base_2 = loaded_regions[2]._dumptime_base; + _dumptime_base_3 = loaded_regions[3]._dumptime_base; + _dumptime_top = loaded_regions[num_loaded_regions-1].top(); + + _runtime_offset_0 = loaded_regions[0]._runtime_offset; + _runtime_offset_1 = loaded_regions[1]._runtime_offset; + _runtime_offset_2 = loaded_regions[2]._runtime_offset; + _runtime_offset_3 = loaded_regions[3]._runtime_offset; + + assert(2 <= num_loaded_regions && num_loaded_regions <= 4, "must be"); + if (num_loaded_regions < 4) { + _dumptime_base_3 = UINTPTR_MAX; + } + if (num_loaded_regions < 3) { + _dumptime_base_2 = UINTPTR_MAX; + } +} + +bool HeapShared::can_load() { + return Universe::heap()->can_load_archived_objects(); +} + +template +class PatchLoadedRegionPointers: public BitMapClosure { + narrowOop* _start; + intx _offset_0; + intx _offset_1; + intx _offset_2; + intx _offset_3; + uintptr_t _base_0; + uintptr_t _base_1; + uintptr_t _base_2; + uintptr_t _base_3; + uintptr_t _top; + + static_assert(MetaspaceShared::max_num_heap_regions == 4, "can't handle more than 4 regions"); + static_assert(NUM_LOADED_REGIONS >= 2, "we have at least 2 loaded regions"); + static_assert(NUM_LOADED_REGIONS <= 4, "we have at most 4 loaded regions"); + + public: + PatchLoadedRegionPointers(narrowOop* start, LoadedArchiveHeapRegion* loaded_regions) + : _start(start), + _offset_0(loaded_regions[0]._runtime_offset), + _offset_1(loaded_regions[1]._runtime_offset), + _offset_2(loaded_regions[2]._runtime_offset), + _offset_3(loaded_regions[3]._runtime_offset), + _base_0(loaded_regions[0]._dumptime_base), + _base_1(loaded_regions[1]._dumptime_base), + _base_2(loaded_regions[2]._dumptime_base), + _base_3(loaded_regions[3]._dumptime_base) { + _top = loaded_regions[NUM_LOADED_REGIONS-1].top(); + } + + bool do_bit(size_t offset) { + narrowOop* p = _start + offset; + narrowOop v = *p; + assert(!CompressedOops::is_null(v), "null oops should have been filtered out at dump time"); + uintptr_t o = cast_from_oop(HeapShared::decode_from_archive(v)); + assert(_base_0 <= o && o < _top, "must be"); + + + // We usually have only 2 regions for the default archive. Use template to avoid unnecessary comparisons. + if (NUM_LOADED_REGIONS > 3 && o >= _base_3) { + o += _offset_3; + } else if (NUM_LOADED_REGIONS > 2 && o >= _base_2) { + o += _offset_2; + } else if (o >= _base_1) { + o += _offset_1; + } else { + o += _offset_0; + } + HeapShared::assert_in_loaded_heap(o); + RawAccess::oop_store(p, cast_to_oop(o)); + return true; + } +}; + +int HeapShared::init_loaded_regions(FileMapInfo* mapinfo, LoadedArchiveHeapRegion* loaded_regions, + MemRegion& archive_space) { + size_t total_bytes = 0; + int num_loaded_regions = 0; + for (int i = MetaspaceShared::first_archive_heap_region; + i <= MetaspaceShared::last_archive_heap_region; i++) { + FileMapRegion* r = mapinfo->space_at(i); + r->assert_is_heap_region(); + if (r->used() > 0) { + assert(is_aligned(r->used(), HeapWordSize), "must be"); + total_bytes += r->used(); + LoadedArchiveHeapRegion* ri = &loaded_regions[num_loaded_regions++]; + ri->_region_index = i; + ri->_region_size = r->used(); + ri->_dumptime_base = (uintptr_t)mapinfo->start_address_as_decoded_from_archive(r); + } + } + + assert(is_aligned(total_bytes, HeapWordSize), "must be"); + size_t word_size = total_bytes / HeapWordSize; + HeapWord* buffer = Universe::heap()->allocate_loaded_archive_space(word_size); + if (buffer == nullptr) { + return 0; + } + + archive_space = MemRegion(buffer, word_size); + _loaded_heap_bottom = (uintptr_t)archive_space.start(); + _loaded_heap_top = _loaded_heap_bottom + total_bytes; + + return num_loaded_regions; +} + +void HeapShared::sort_loaded_regions(LoadedArchiveHeapRegion* loaded_regions, int num_loaded_regions, + uintptr_t buffer) { + // Find the relocation offset of the pointers in each region + qsort(loaded_regions, num_loaded_regions, sizeof(LoadedArchiveHeapRegion), + LoadedArchiveHeapRegion::comparator); + + uintptr_t p = buffer; + for (int i = 0; i < num_loaded_regions; i++) { + // This region will be loaded at p, so all objects inside this + // region will be shifted by ri->offset + LoadedArchiveHeapRegion* ri = &loaded_regions[i]; + ri->_runtime_offset = p - ri->_dumptime_base; + p += ri->_region_size; + } + assert(p == _loaded_heap_top, "must be"); +} + +bool HeapShared::load_regions(FileMapInfo* mapinfo, LoadedArchiveHeapRegion* loaded_regions, + int num_loaded_regions, uintptr_t buffer) { + uintptr_t bitmap_base = (uintptr_t)mapinfo->map_bitmap_region(); + uintptr_t load_address = buffer; + for (int i = 0; i < num_loaded_regions; i++) { + LoadedArchiveHeapRegion* ri = &loaded_regions[i]; + FileMapRegion* r = mapinfo->space_at(ri->_region_index); + + if (!mapinfo->read_region(ri->_region_index, (char*)load_address, r->used(), /* do_commit = */ false)) { + // There's no easy way to free the buffer, so we will fill it with zero later + // in fill_failed_loaded_region(), and it will eventually be GC'ed. + log_warning(cds)("Loading of heap region %d has failed. Archived objects are disabled", i); + _loading_failed = true; + return false; + } + log_info(cds)("Loaded heap region #%d at base " INTPTR_FORMAT " top " INTPTR_FORMAT + " size " SIZE_FORMAT_W(6) " delta " INTX_FORMAT, + ri->_region_index, load_address, load_address + ri->_region_size, + ri->_region_size, ri->_runtime_offset); + + uintptr_t oopmap = bitmap_base + r->oopmap_offset(); + BitMapView bm((BitMap::bm_word_t*)oopmap, r->oopmap_size_in_bits()); + + if (num_loaded_regions == 4) { + PatchLoadedRegionPointers<4> patcher((narrowOop*)load_address, loaded_regions); + bm.iterate(&patcher); + } else if (num_loaded_regions == 3) { + PatchLoadedRegionPointers<3> patcher((narrowOop*)load_address, loaded_regions); + bm.iterate(&patcher); + } else { + assert(num_loaded_regions == 2, "must be"); + PatchLoadedRegionPointers<2> patcher((narrowOop*)load_address, loaded_regions); + bm.iterate(&patcher); + } + + load_address += r->used(); + } + + return true; +} + +bool HeapShared::load_heap_regions(FileMapInfo* mapinfo) { + init_narrow_oop_decoding(mapinfo->narrow_oop_base(), mapinfo->narrow_oop_shift()); + + LoadedArchiveHeapRegion loaded_regions[MetaspaceShared::max_num_heap_regions]; + memset(loaded_regions, 0, sizeof(loaded_regions)); + + MemRegion archive_space; + int num_loaded_regions = init_loaded_regions(mapinfo, loaded_regions, archive_space); + if (num_loaded_regions <= 0) { + return false; + } + sort_loaded_regions(loaded_regions, num_loaded_regions, (uintptr_t)archive_space.start()); + if (!load_regions(mapinfo, loaded_regions, num_loaded_regions, (uintptr_t)archive_space.start())) { + assert(_loading_failed, "must be"); + return false; + } + + init_loaded_heap_relocation(loaded_regions, num_loaded_regions); + _is_loaded = true; + set_roots(mapinfo->heap_obj_roots()); + + return true; +} + +class VerifyLoadedHeapEmbeddedPointers: public BasicOopIterateClosure { + ResourceHashtable* _table; + + public: + VerifyLoadedHeapEmbeddedPointers(ResourceHashtable* table) : _table(table) {} + + virtual void do_oop(narrowOop* p) { + // This should be called before the loaded regions are modified, so all the embedded pointers + // must be NULL, or must point to a valid object in the loaded regions. + narrowOop v = *p; + if (!CompressedOops::is_null(v)) { + oop o = CompressedOops::decode_not_null(v); + uintptr_t u = cast_from_oop(o); + HeapShared::assert_in_loaded_heap(u); + guarantee(_table->contains(u), "must point to beginning of object in loaded archived regions"); + } + } + virtual void do_oop(oop* p) { + ShouldNotReachHere(); + } +}; + +void HeapShared::finish_initialization() { + if (is_loaded()) { + HeapWord* bottom = (HeapWord*)_loaded_heap_bottom; + HeapWord* top = (HeapWord*)_loaded_heap_top; + + MemRegion archive_space = MemRegion(bottom, top); + Universe::heap()->complete_loaded_archive_space(archive_space); + } + + if (VerifyArchivedFields <= 0 || !is_loaded()) { + return; + } + + log_info(cds, heap)("Verify all oops and pointers in loaded heap"); + + ResourceMark rm; + ResourceHashtable table; + VerifyLoadedHeapEmbeddedPointers verifier(&table); + HeapWord* bottom = (HeapWord*)_loaded_heap_bottom; + HeapWord* top = (HeapWord*)_loaded_heap_top; + + for (HeapWord* p = bottom; p < top; ) { + oop o = cast_to_oop(p); + table.put(cast_from_oop(o), true); + p += o->size(); + } + + for (HeapWord* p = bottom; p < top; ) { + oop o = cast_to_oop(p); + o->oop_iterate(&verifier); + p += o->size(); + } +} + +void HeapShared::fill_failed_loaded_region() { + assert(_loading_failed, "must be"); + if (_loaded_heap_bottom != 0) { + assert(_loaded_heap_top != 0, "must be"); + HeapWord* bottom = (HeapWord*)_loaded_heap_bottom; + HeapWord* top = (HeapWord*)_loaded_heap_top; + Universe::heap()->fill_with_objects(bottom, top - bottom); + } +} + #endif // INCLUDE_CDS_JAVA_HEAP diff --git a/src/hotspot/share/cds/heapShared.hpp b/src/hotspot/share/cds/heapShared.hpp index 74de74d6c9266b18aa867db9dcae70bd4c06ca82..0a673c51e864f7cd3d668f6e65d4f8d154848a17 100644 --- a/src/hotspot/share/cds/heapShared.hpp +++ b/src/hotspot/share/cds/heapShared.hpp @@ -42,6 +42,7 @@ #if INCLUDE_CDS_JAVA_HEAP class DumpedInternedStrings; +class FileMapInfo; struct ArchivableStaticFieldInfo { const char* klass_name; @@ -138,37 +139,98 @@ class ArchivedKlassSubGraphInfoRecord { }; #endif // INCLUDE_CDS_JAVA_HEAP +struct LoadedArchiveHeapRegion; + class HeapShared: AllStatic { friend class VerifySharedOopClosure; - private: +public: + // At runtime, heap regions in the CDS archive can be used in two different ways, + // depending on the GC type: + // - Mapped: (G1 only) the regions are directly mapped into the Java heap + // - Loaded: At VM start-up, the objects in the heap regions are copied into the + // Java heap. This is easier to implement than mapping but + // slightly less efficient, as the embedded pointers need to be relocated. + static bool can_use() { return can_map() || can_load(); } + + // Can this VM write heap regions into the CDS archive? Currently only G1+compressed{oops,cp} + static bool can_write() { + CDS_JAVA_HEAP_ONLY(return (UseG1GC && UseCompressedOops && UseCompressedClassPointers);) + NOT_CDS_JAVA_HEAP(return false;) + } + + // Can this VM map archived heap regions? Currently only G1+compressed{oops,cp} + static bool can_map() { + CDS_JAVA_HEAP_ONLY(return (UseG1GC && UseCompressedOops && UseCompressedClassPointers);) + NOT_CDS_JAVA_HEAP(return false;) + } + static bool is_mapped() { + return closed_regions_mapped() && open_regions_mapped(); + } + + // Can this VM load the objects from archived heap regions into the heap at start-up? + static bool can_load() NOT_CDS_JAVA_HEAP_RETURN_(false); + static void finish_initialization() NOT_CDS_JAVA_HEAP_RETURN; + static bool is_loaded() { + CDS_JAVA_HEAP_ONLY(return _is_loaded;) + NOT_CDS_JAVA_HEAP(return false;) + } + + static bool are_archived_strings_available() { + return is_loaded() || closed_regions_mapped(); + } + static bool are_archived_mirrors_available() { + return is_fully_available(); + } + static bool is_fully_available() { + return is_loaded() || is_mapped(); + } + +private: #if INCLUDE_CDS_JAVA_HEAP - static bool _closed_archive_heap_region_mapped; - static bool _open_archive_heap_region_mapped; - static bool _archive_heap_region_fixed; + static bool _closed_regions_mapped; + static bool _open_regions_mapped; + static bool _is_loaded; static DumpedInternedStrings *_dumped_interned_strings; + // Support for loaded archived heap. These are cached values from + // LoadedArchiveHeapRegion's. + static uintptr_t _dumptime_base_0; + static uintptr_t _dumptime_base_1; + static uintptr_t _dumptime_base_2; + static uintptr_t _dumptime_base_3; + static uintptr_t _dumptime_top; + static intx _runtime_offset_0; + static intx _runtime_offset_1; + static intx _runtime_offset_2; + static intx _runtime_offset_3; + static uintptr_t _loaded_heap_bottom; + static uintptr_t _loaded_heap_top; + static bool _loading_failed; + public: - static bool oop_equals(oop const& p1, oop const& p2) { - return p1 == p2; - } static unsigned oop_hash(oop const& p); static unsigned string_oop_hash(oop const& string) { return java_lang_String::hash_code(string); } + static bool load_heap_regions(FileMapInfo* mapinfo); + static void assert_in_loaded_heap(uintptr_t o) { + assert(is_in_loaded_heap(o), "must be"); + } + private: + static bool is_in_loaded_heap(uintptr_t o) { + return (_loaded_heap_bottom <= o && o < _loaded_heap_top); + } + typedef ResourceHashtable ArchivedObjectCache; + ResourceObj::C_HEAP, + mtClassShared, + HeapShared::oop_hash> ArchivedObjectCache; static ArchivedObjectCache* _archived_object_cache; - static bool klass_equals(Klass* const& p1, Klass* const& p2) { - return primitive_equals(p1, p2); - } - static unsigned klass_hash(Klass* const& klass) { // Generate deterministic hashcode even if SharedBaseAddress is changed due to ASLR. return primitive_hash
        (address(klass) - SharedBaseAddress); @@ -176,10 +238,10 @@ private: class DumpTimeKlassSubGraphInfoTable : public ResourceHashtable { + ResourceObj::C_HEAP, + mtClassShared, + HeapShared::klass_hash> { public: int _count; }; @@ -200,7 +262,7 @@ private: static DumpTimeKlassSubGraphInfoTable* _dump_time_subgraph_info_table; static RunTimeKlassSubGraphInfoTable _run_time_subgraph_info_table; - static void check_closed_archive_heap_region_object(InstanceKlass* k); + static void check_closed_region_object(InstanceKlass* k); static void archive_object_subgraphs(ArchivableStaticFieldInfo fields[], int num, @@ -231,10 +293,10 @@ private: static int _narrow_oop_shift; typedef ResourceHashtable SeenObjectsTable; + ResourceObj::C_HEAP, + mtClassShared, + HeapShared::oop_hash> SeenObjectsTable; static SeenObjectsTable *_seen_objects_table; @@ -282,6 +344,16 @@ private: resolve_or_init_classes_for_subgraph_of(Klass* k, bool do_init, TRAPS); static void resolve_or_init(Klass* k, bool do_init, TRAPS); static void init_archived_fields_for(Klass* k, const ArchivedKlassSubGraphInfoRecord* record); + + static int init_loaded_regions(FileMapInfo* mapinfo, LoadedArchiveHeapRegion* loaded_regions, + MemRegion& archive_space); + static void sort_loaded_regions(LoadedArchiveHeapRegion* loaded_regions, int num_loaded_regions, + uintptr_t buffer); + static bool load_regions(FileMapInfo* mapinfo, LoadedArchiveHeapRegion* loaded_regions, + int num_loaded_regions, uintptr_t buffer); + static void init_loaded_heap_relocation(LoadedArchiveHeapRegion* reloc_info, + int num_loaded_regions); + static void fill_failed_loaded_region(); public: static void reset_archived_object_states(TRAPS); static void create_archived_object_cache() { @@ -297,21 +369,14 @@ private: } static oop find_archived_heap_object(oop obj); - static oop archive_heap_object(oop obj); + static oop archive_object(oop obj); static void archive_klass_objects(); - static void set_archive_heap_region_fixed() { - _archive_heap_region_fixed = true; - } - static bool archive_heap_region_fixed() { - return _archive_heap_region_fixed; - } - - static void archive_java_heap_objects(GrowableArray *closed, - GrowableArray *open); - static void copy_closed_archive_heap_objects(GrowableArray * closed_archive); - static void copy_open_archive_heap_objects(GrowableArray * open_archive); + static void archive_objects(GrowableArray* closed_regions, + GrowableArray* open_regions); + static void copy_closed_objects(GrowableArray* closed_regions); + static void copy_open_objects(GrowableArray* open_regions); static oop archive_reachable_objects_from(int level, KlassSubGraphInfo* subgraph_info, @@ -351,40 +416,32 @@ private: public: static void run_full_gc_in_vm_thread() NOT_CDS_JAVA_HEAP_RETURN; - static bool is_heap_object_archiving_allowed() { - CDS_JAVA_HEAP_ONLY(return (UseG1GC && UseCompressedOops && UseCompressedClassPointers);) - NOT_CDS_JAVA_HEAP(return false;) - } - static bool is_heap_region(int idx) { - CDS_JAVA_HEAP_ONLY(return (idx >= MetaspaceShared::first_closed_archive_heap_region && - idx <= MetaspaceShared::last_open_archive_heap_region);) + CDS_JAVA_HEAP_ONLY(return (idx >= MetaspaceShared::first_closed_heap_region && + idx <= MetaspaceShared::last_open_heap_region);) NOT_CDS_JAVA_HEAP_RETURN_(false); } - static void set_closed_archive_heap_region_mapped() { - CDS_JAVA_HEAP_ONLY(_closed_archive_heap_region_mapped = true;) + static void set_closed_regions_mapped() { + CDS_JAVA_HEAP_ONLY(_closed_regions_mapped = true;) NOT_CDS_JAVA_HEAP_RETURN; } - static bool closed_archive_heap_region_mapped() { - CDS_JAVA_HEAP_ONLY(return _closed_archive_heap_region_mapped;) + static bool closed_regions_mapped() { + CDS_JAVA_HEAP_ONLY(return _closed_regions_mapped;) NOT_CDS_JAVA_HEAP_RETURN_(false); } - static void set_open_archive_heap_region_mapped() { - CDS_JAVA_HEAP_ONLY(_open_archive_heap_region_mapped = true;) + static void set_open_regions_mapped() { + CDS_JAVA_HEAP_ONLY(_open_regions_mapped = true;) NOT_CDS_JAVA_HEAP_RETURN; } - static bool open_archive_heap_region_mapped() { - CDS_JAVA_HEAP_ONLY(return _open_archive_heap_region_mapped;) + static bool open_regions_mapped() { + CDS_JAVA_HEAP_ONLY(return _open_regions_mapped;) NOT_CDS_JAVA_HEAP_RETURN_(false); } - static bool is_mapped() { - return closed_archive_heap_region_mapped() && open_archive_heap_region_mapped(); - } - static void fixup_mapped_heap_regions() NOT_CDS_JAVA_HEAP_RETURN; + static void fixup_regions() NOT_CDS_JAVA_HEAP_RETURN; - inline static bool is_archived_object(oop p) NOT_CDS_JAVA_HEAP_RETURN_(false); + static bool is_archived_object_during_dumptime(oop p) NOT_CDS_JAVA_HEAP_RETURN_(false); static void resolve_classes(JavaThread* THREAD) NOT_CDS_JAVA_HEAP_RETURN; static void initialize_from_archived_subgraph(Klass* k, JavaThread* THREAD) NOT_CDS_JAVA_HEAP_RETURN; @@ -397,8 +454,8 @@ private: static void init_narrow_oop_decoding(address base, int shift) NOT_CDS_JAVA_HEAP_RETURN; - static void patch_archived_heap_embedded_pointers(MemRegion mem, address oopmap, - size_t oopmap_in_bits) NOT_CDS_JAVA_HEAP_RETURN; + static void patch_embedded_pointers(MemRegion region, address oopmap, + size_t oopmap_in_bits) NOT_CDS_JAVA_HEAP_RETURN; static void init_for_dumping(TRAPS) NOT_CDS_JAVA_HEAP_RETURN; static void write_subgraph_info_table() NOT_CDS_JAVA_HEAP_RETURN; @@ -408,10 +465,10 @@ private: #if INCLUDE_CDS_JAVA_HEAP class DumpedInternedStrings : public ResourceHashtable + ResourceObj::C_HEAP, + mtClassShared, + HeapShared::string_oop_hash> {}; #endif diff --git a/src/hotspot/share/cds/heapShared.inline.hpp b/src/hotspot/share/cds/heapShared.inline.hpp index c3eeee060d4715d628cbd3828e378fcac2eddfab..25f53aa02886d3c2f15a733009a9e6ede8711a13 100644 --- a/src/hotspot/share/cds/heapShared.inline.hpp +++ b/src/hotspot/share/cds/heapShared.inline.hpp @@ -26,20 +26,28 @@ #define SHARE_CDS_HEAPSHARED_INLINE_HPP #include "cds/heapShared.hpp" - -#include "gc/shared/collectedHeap.inline.hpp" #include "oops/compressedOops.inline.hpp" #include "utilities/align.hpp" #if INCLUDE_CDS_JAVA_HEAP -bool HeapShared::is_archived_object(oop p) { - return Universe::heap()->is_archived_object(p); -} - inline oop HeapShared::decode_from_archive(narrowOop v) { assert(!CompressedOops::is_null(v), "narrow oop value can never be zero"); - oop result = cast_to_oop((uintptr_t)_narrow_oop_base + ((uintptr_t)v << _narrow_oop_shift)); + uintptr_t p = ((uintptr_t)_narrow_oop_base) + ((uintptr_t)v << _narrow_oop_shift); + if (p >= _dumptime_base_0) { + assert(p < _dumptime_top, "must be"); + if (p >= _dumptime_base_3) { + p += _runtime_offset_3; + } else if (p >= _dumptime_base_2) { + p += _runtime_offset_2; + } else if (p >= _dumptime_base_1) { + p += _runtime_offset_1; + } else { + p += _runtime_offset_0; + } + } + + oop result = cast_to_oop((uintptr_t)p); assert(is_object_aligned(result), "address not aligned: " INTPTR_FORMAT, p2i((void*) result)); return result; } diff --git a/src/hotspot/share/cds/lambdaProxyClassDictionary.cpp b/src/hotspot/share/cds/lambdaProxyClassDictionary.cpp new file mode 100644 index 0000000000000000000000000000000000000000..78917ebaae3f7b2d07209cec288fee3d02446486 --- /dev/null +++ b/src/hotspot/share/cds/lambdaProxyClassDictionary.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "cds/archiveBuilder.hpp" +#include "cds/lambdaProxyClassDictionary.hpp" +#include "classfile/systemDictionaryShared.hpp" + +DumpTimeLambdaProxyClassInfo DumpTimeLambdaProxyClassInfo::clone() { + DumpTimeLambdaProxyClassInfo res; + res._proxy_klasses = NULL; + if (_proxy_klasses != NULL && _proxy_klasses->length() > 0) { + int num_proxy_klasses = _proxy_klasses->length(); + res._proxy_klasses = new (ResourceObj::C_HEAP, mtClassShared) GrowableArray(num_proxy_klasses, mtClassShared); + for (int i = 0; i < num_proxy_klasses; i++) { + res._proxy_klasses->append(_proxy_klasses->at(i)); + } + } + return res; +} + +void LambdaProxyClassKey::mark_pointers() { + ArchivePtrMarker::mark_pointer(&_caller_ik); + ArchivePtrMarker::mark_pointer(&_instantiated_method_type); + ArchivePtrMarker::mark_pointer(&_invoked_name); + ArchivePtrMarker::mark_pointer(&_invoked_type); + ArchivePtrMarker::mark_pointer(&_member_method); + ArchivePtrMarker::mark_pointer(&_method_type); +} + +unsigned int LambdaProxyClassKey::hash() const { + return SystemDictionaryShared::hash_for_shared_dictionary((address)_caller_ik) + + SystemDictionaryShared::hash_for_shared_dictionary((address)_invoked_name) + + SystemDictionaryShared::hash_for_shared_dictionary((address)_invoked_type) + + SystemDictionaryShared::hash_for_shared_dictionary((address)_method_type) + + SystemDictionaryShared::hash_for_shared_dictionary((address)_instantiated_method_type); +} diff --git a/src/hotspot/share/cds/lambdaProxyClassDictionary.hpp b/src/hotspot/share/cds/lambdaProxyClassDictionary.hpp new file mode 100644 index 0000000000000000000000000000000000000000..55c49923f8263c752ecce075cc5b1839e7352f4a --- /dev/null +++ b/src/hotspot/share/cds/lambdaProxyClassDictionary.hpp @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARED_CDS_LAMBDAPROXYCLASSINFO_HPP +#define SHARED_CDS_LAMBDAPROXYCLASSINFO_HPP +#include "cds/metaspaceShared.hpp" +#include "classfile/javaClasses.hpp" +#include "utilities/growableArray.hpp" +#include "utilities/resourceHash.hpp" + +class InstanceKlass; +class Method; +class Symbol; + +class LambdaProxyClassKey { + InstanceKlass* _caller_ik; + Symbol* _invoked_name; + Symbol* _invoked_type; + Symbol* _method_type; + Method* _member_method; + Symbol* _instantiated_method_type; + +public: + LambdaProxyClassKey(InstanceKlass* caller_ik, + Symbol* invoked_name, + Symbol* invoked_type, + Symbol* method_type, + Method* member_method, + Symbol* instantiated_method_type) : + _caller_ik(caller_ik), + _invoked_name(invoked_name), + _invoked_type(invoked_type), + _method_type(method_type), + _member_method(member_method), + _instantiated_method_type(instantiated_method_type) {} + + void metaspace_pointers_do(MetaspaceClosure* it) { + it->push(&_caller_ik); + it->push(&_invoked_name); + it->push(&_invoked_type); + it->push(&_method_type); + it->push(&_member_method); + it->push(&_instantiated_method_type); + } + + bool equals(LambdaProxyClassKey const& other) const { + return _caller_ik == other._caller_ik && + _invoked_name == other._invoked_name && + _invoked_type == other._invoked_type && + _method_type == other._method_type && + _member_method == other._member_method && + _instantiated_method_type == other._instantiated_method_type; + } + + void mark_pointers(); + unsigned int hash() const; + + static unsigned int dumptime_hash(Symbol* sym) { + if (sym == NULL) { + // _invoked_name maybe NULL + return 0; + } + return java_lang_String::hash_code((const jbyte*)sym->bytes(), sym->utf8_length()); + } + + unsigned int dumptime_hash() const { + return dumptime_hash(_caller_ik->name()) + + dumptime_hash(_invoked_name) + + dumptime_hash(_invoked_type) + + dumptime_hash(_method_type) + + dumptime_hash(_instantiated_method_type); + } + + static inline unsigned int DUMPTIME_HASH(LambdaProxyClassKey const& key) { + return (key.dumptime_hash()); + } + + static inline bool DUMPTIME_EQUALS( + LambdaProxyClassKey const& k1, LambdaProxyClassKey const& k2) { + return (k1.equals(k2)); + } + + InstanceKlass* caller_ik() const { return _caller_ik; } +}; + +class DumpTimeLambdaProxyClassInfo { +public: + GrowableArray* _proxy_klasses; + DumpTimeLambdaProxyClassInfo() : _proxy_klasses(NULL) {} + void add_proxy_klass(InstanceKlass* proxy_klass) { + if (_proxy_klasses == NULL) { + _proxy_klasses = new (ResourceObj::C_HEAP, mtClassShared) GrowableArray(5, mtClassShared); + } + assert(_proxy_klasses != NULL, "sanity"); + _proxy_klasses->append(proxy_klass); + } + + void metaspace_pointers_do(MetaspaceClosure* it) { + for (int i=0; i<_proxy_klasses->length(); i++) { + it->push(_proxy_klasses->adr_at(i)); + } + } + DumpTimeLambdaProxyClassInfo clone(); // copy ctor will cause implicitly-declared +}; + +class RunTimeLambdaProxyClassInfo { + LambdaProxyClassKey _key; + InstanceKlass* _proxy_klass_head; +public: + RunTimeLambdaProxyClassInfo(LambdaProxyClassKey key, InstanceKlass* proxy_klass_head) : + _key(key), _proxy_klass_head(proxy_klass_head) {} + + InstanceKlass* proxy_klass_head() const { return _proxy_klass_head; } + + // Used by LambdaProxyClassDictionary to implement OffsetCompactHashtable::EQUALS + static inline bool EQUALS( + const RunTimeLambdaProxyClassInfo* value, LambdaProxyClassKey* key, int len_unused) { + return (value->_key.equals(*key)); + } + void init(LambdaProxyClassKey& key, DumpTimeLambdaProxyClassInfo& info) { + _key = key; + _key.mark_pointers(); + _proxy_klass_head = info._proxy_klasses->at(0); + ArchivePtrMarker::mark_pointer(&_proxy_klass_head); + } + + unsigned int hash() const { + return _key.hash(); + } + LambdaProxyClassKey key() const { + return _key; + } +}; + +class DumpTimeLambdaProxyClassDictionary + : public ResourceHashtable { +public: + DumpTimeLambdaProxyClassDictionary() : _count(0) {} + int _count; +}; + +class LambdaProxyClassDictionary : public OffsetCompactHashtable< + LambdaProxyClassKey*, + const RunTimeLambdaProxyClassInfo*, + RunTimeLambdaProxyClassInfo::EQUALS> {}; + +#endif // SHARED_CDS_LAMBDAPROXYCLASSINFO_HPP diff --git a/src/hotspot/share/cds/metaspaceShared.cpp b/src/hotspot/share/cds/metaspaceShared.cpp index c8ec44aee06c2f12bca772cbfec3a06e49e10585..17e27a5372f6a65fced5c0f6a7af3a94c1c486af 100644 --- a/src/hotspot/share/cds/metaspaceShared.cpp +++ b/src/hotspot/share/cds/metaspaceShared.cpp @@ -25,6 +25,8 @@ #include "precompiled.hpp" #include "jvm_io.h" #include "cds/archiveBuilder.hpp" +#include "cds/cdsProtectionDomain.hpp" +#include "cds/classListWriter.hpp" #include "cds/classListParser.hpp" #include "cds/cppVtables.hpp" #include "cds/dumpAllocStats.hpp" @@ -69,10 +71,12 @@ #include "runtime/sharedRuntime.hpp" #include "runtime/vmThread.hpp" #include "runtime/vmOperations.hpp" +#include "services/memTracker.hpp" #include "utilities/align.hpp" #include "utilities/bitMap.inline.hpp" #include "utilities/ostream.hpp" #include "utilities/defaultStream.hpp" +#include "utilities/resourceHash.hpp" #if INCLUDE_G1GC #include "gc/g1/g1CollectedHeap.inline.hpp" #endif @@ -112,7 +116,7 @@ bool MetaspaceShared::_use_full_module_graph = true; // [5] SymbolTable, StringTable, SystemDictionary, and a few other read-only data // are copied into the ro region as read-only tables. // -// The ca0/ca1 and oa0/oa1 regions are populated inside HeapShared::archive_java_heap_objects. +// The ca0/ca1 and oa0/oa1 regions are populated inside HeapShared::archive_objects. // Their layout is independent of the rw/ro regions. static DumpRegion _symbol_region("symbols"); @@ -146,16 +150,43 @@ static bool shared_base_valid(char* shared_base) { } class DumpClassListCLDClosure : public CLDClosure { + static const int INITIAL_TABLE_SIZE = 1987; + static const int MAX_TABLE_SIZE = 61333; + fileStream *_stream; + ResizeableResourceHashtable _dumped_classes; + + void dump(InstanceKlass* ik) { + bool created; + _dumped_classes.put_if_absent(ik, &created); + if (!created) { + return; + } + if (_dumped_classes.maybe_grow(MAX_TABLE_SIZE)) { + log_info(cds, hashtables)("Expanded _dumped_classes table to %d", _dumped_classes.table_size()); + } + if (ik->java_super()) { + dump(ik->java_super()); + } + Array* interfaces = ik->local_interfaces(); + int len = interfaces->length(); + for (int i = 0; i < len; i++) { + dump(interfaces->at(i)); + } + ClassListWriter::write_to_stream(ik, _stream); + } + public: - DumpClassListCLDClosure(fileStream* f) : CLDClosure() { _stream = f; } + DumpClassListCLDClosure(fileStream* f) + : CLDClosure(), _dumped_classes(INITIAL_TABLE_SIZE) { + _stream = f; + } + void do_cld(ClassLoaderData* cld) { for (Klass* klass = cld->klasses(); klass != NULL; klass = klass->next_link()) { if (klass->is_instance_klass()) { - InstanceKlass* ik = InstanceKlass::cast(klass); - if (ik->is_shareable()) { - _stream->print_cr("%s", ik->name()->as_C_string()); - } + dump(InstanceKlass::cast(klass)); } } } @@ -165,6 +196,7 @@ void MetaspaceShared::dump_loaded_classes(const char* file_name, TRAPS) { fileStream stream(file_name, "w"); if (stream.is_open()) { MutexLocker lock(ClassLoaderDataGraph_lock); + MutexLocker lock2(ClassListFile_lock, Mutex::_no_safepoint_check_flag); DumpClassListCLDClosure collect_classes(&stream); ClassLoaderDataGraph::loaded_cld_do(&collect_classes); } else { @@ -246,7 +278,7 @@ void MetaspaceShared::post_initialize(TRAPS) { if (UseSharedSpaces) { int size = FileMapInfo::get_number_of_shared_paths(); if (size > 0) { - SystemDictionaryShared::allocate_shared_data_arrays(size, CHECK); + CDSProtectionDomain::allocate_shared_data_arrays(size, CHECK); if (!DynamicDumpSharedSpaces) { FileMapInfo* info; if (FileMapInfo::dynamic_info() == NULL) { @@ -335,6 +367,11 @@ void MetaspaceShared::serialize(SerializeClosure* soc) { soc->do_tag(typeArrayOopDesc::base_offset_in_bytes(T_BYTE)); soc->do_tag(sizeof(Symbol)); + // Need to do this first, as subsequent steps may call virtual functions + // in archived Metadata objects. + CppVtables::serialize(soc); + soc->do_tag(--tag); + // Dump/restore miscellaneous metadata. JavaClasses::serialize_offsets(soc); Universe::serialize(soc); @@ -356,9 +393,6 @@ void MetaspaceShared::serialize(SerializeClosure* soc) { SystemDictionaryShared::serialize_vm_classes(soc); soc->do_tag(--tag); - CppVtables::serialize(soc); - soc->do_tag(--tag); - CDS_JAVA_HEAP_ONLY(ClassLoaderDataShared::serialize(soc);) LambdaFormInvokers::serialize(soc); @@ -390,7 +424,7 @@ static void rewrite_nofast_bytecode(const methodHandle& method) { void MetaspaceShared::rewrite_nofast_bytecodes_and_calculate_fingerprints(Thread* thread, InstanceKlass* ik) { for (int i = 0; i < ik->methods()->length(); i++) { methodHandle m(thread, ik->methods()->at(i)); - if (ik->can_be_verified_at_dumptime()) { + if (ik->can_be_verified_at_dumptime() && ik->is_linked()) { rewrite_nofast_bytecode(m); } Fingerprinter fp(m); @@ -401,15 +435,15 @@ void MetaspaceShared::rewrite_nofast_bytecodes_and_calculate_fingerprints(Thread class VM_PopulateDumpSharedSpace : public VM_GC_Operation { private: - GrowableArray *_closed_archive_heap_regions; - GrowableArray *_open_archive_heap_regions; + GrowableArray *_closed_heap_regions; + GrowableArray *_open_heap_regions; - GrowableArray *_closed_archive_heap_oopmaps; - GrowableArray *_open_archive_heap_oopmaps; + GrowableArray *_closed_heap_oopmaps; + GrowableArray *_open_heap_oopmaps; void dump_java_heap_objects(GrowableArray* klasses) NOT_CDS_JAVA_HEAP_RETURN; - void dump_archive_heap_oopmaps() NOT_CDS_JAVA_HEAP_RETURN; - void dump_archive_heap_oopmaps(GrowableArray* regions, + void dump_heap_oopmaps() NOT_CDS_JAVA_HEAP_RETURN; + void dump_heap_oopmaps(GrowableArray* regions, GrowableArray* oopmaps); void dump_shared_symbol_table(GrowableArray* symbols) { log_info(cds)("Dumping symbol table ..."); @@ -421,10 +455,10 @@ public: VM_PopulateDumpSharedSpace() : VM_GC_Operation(0 /* total collections, ignored */, GCCause::_archive_time_gc), - _closed_archive_heap_regions(NULL), - _open_archive_heap_regions(NULL), - _closed_archive_heap_oopmaps(NULL), - _open_archive_heap_oopmaps(NULL) {} + _closed_heap_regions(NULL), + _open_heap_regions(NULL), + _closed_heap_oopmaps(NULL), + _open_heap_oopmaps(NULL) {} bool skip_operation() const { return false; } @@ -470,7 +504,7 @@ char* VM_PopulateDumpSharedSpace::dump_read_only_tables() { MetaspaceShared::serialize(&wc); // Write the bitmaps for patching the archive heap regions - dump_archive_heap_oopmaps(); + dump_heap_oopmaps(); return start; } @@ -528,10 +562,10 @@ void VM_PopulateDumpSharedSpace::doit() { mapinfo->set_cloned_vtables(cloned_vtables); mapinfo->open_for_write(); builder.write_archive(mapinfo, - _closed_archive_heap_regions, - _open_archive_heap_regions, - _closed_archive_heap_oopmaps, - _open_archive_heap_oopmaps); + _closed_heap_regions, + _open_heap_regions, + _closed_heap_oopmaps, + _open_heap_oopmaps); if (PrintSystemDictionaryAtExit) { SystemDictionary::print(); @@ -550,29 +584,45 @@ void VM_PopulateDumpSharedSpace::doit() { class CollectCLDClosure : public CLDClosure { GrowableArray _loaded_cld; + GrowableArray _loaded_cld_handles; // keep the CLDs alive + Thread* _current_thread; public: - CollectCLDClosure() {} + CollectCLDClosure(Thread* thread) : _current_thread(thread) {} ~CollectCLDClosure() { - for (int i = 0; i < _loaded_cld.length(); i++) { - ClassLoaderData* cld = _loaded_cld.at(i); - cld->dec_keep_alive(); + for (int i = 0; i < _loaded_cld_handles.length(); i++) { + _loaded_cld_handles.at(i).release(Universe::vm_global()); } } void do_cld(ClassLoaderData* cld) { - if (!cld->is_unloading()) { - cld->inc_keep_alive(); - _loaded_cld.append(cld); - } + assert(cld->is_alive(), "must be"); + _loaded_cld.append(cld); + _loaded_cld_handles.append(OopHandle(Universe::vm_global(), cld->holder_phantom())); } int nof_cld() const { return _loaded_cld.length(); } ClassLoaderData* cld_at(int index) { return _loaded_cld.at(index); } }; -bool MetaspaceShared::linking_required(InstanceKlass* ik) { - // For static CDS dump, do not link old classes. - // For dynamic CDS dump, only link classes loaded by the builtin class loaders. - return DumpSharedSpaces ? ik->can_be_verified_at_dumptime() : !ik->is_shared_unregistered_class(); +// Check if we can eagerly link this class at dump time, so we can avoid the +// runtime linking overhead (especially verification) +bool MetaspaceShared::may_be_eagerly_linked(InstanceKlass* ik) { + if (!ik->can_be_verified_at_dumptime()) { + // For old classes, try to leave them in the unlinked state, so + // we can still store them in the archive. They must be + // linked/verified at runtime. + return false; + } + if (DynamicDumpSharedSpaces && ik->is_shared_unregistered_class()) { + // Linking of unregistered classes at this stage may cause more + // classes to be resolved, resulting in calls to ClassLoader.loadClass() + // that may not be expected by custom class loaders. + // + // It's OK to do this for the built-in loaders as we know they can + // tolerate this. (Note that unregistered classes are loaded by the NULL + // loader during DumpSharedSpaces). + return false; + } + return true; } bool MetaspaceShared::link_class_for_cds(InstanceKlass* ik, TRAPS) { @@ -590,12 +640,11 @@ bool MetaspaceShared::link_class_for_cds(InstanceKlass* ik, TRAPS) { return res; } -void MetaspaceShared::link_and_cleanup_shared_classes(TRAPS) { - // Collect all loaded ClassLoaderData. - ResourceMark rm; - +void MetaspaceShared::link_shared_classes(TRAPS) { LambdaFormInvokers::regenerate_holder_classes(CHECK); - CollectCLDClosure collect_cld; + + // Collect all loaded ClassLoaderData. + CollectCLDClosure collect_cld(THREAD); { // ClassLoaderDataGraph::loaded_cld_do requires ClassLoaderDataGraph_lock. // We cannot link the classes while holding this lock (or else we may run into deadlock). @@ -612,7 +661,7 @@ void MetaspaceShared::link_and_cleanup_shared_classes(TRAPS) { for (Klass* klass = cld->klasses(); klass != NULL; klass = klass->next_link()) { if (klass->is_instance_klass()) { InstanceKlass* ik = InstanceKlass::cast(klass); - if (linking_required(ik)) { + if (may_be_eagerly_linked(ik)) { has_linked |= link_class_for_cds(ik, CHECK); } } @@ -700,7 +749,7 @@ void MetaspaceShared::preload_classes(TRAPS) { // Exercise the manifest processing code to ensure classes used by CDS at runtime // are always archived const char* dummy = "Manifest-Version: 1.0\n"; - SystemDictionaryShared::create_jar_manifest(dummy, strlen(dummy), CHECK); + CDSProtectionDomain::create_jar_manifest(dummy, strlen(dummy), CHECK); log_info(cds)("Loading classes to share: done."); log_info(cds)("Shared spaces: preloaded %d classes", class_count); @@ -724,7 +773,7 @@ void MetaspaceShared::preload_and_dump_impl(TRAPS) { // were not explicitly specified in the classlist. E.g., if an interface implemented by class K // fails verification, all other interfaces that were not specified in the classlist but // are implemented by K are not verified. - link_and_cleanup_shared_classes(CHECK); + link_shared_classes(CHECK); log_info(cds)("Rewriting and linking classes: done"); #if INCLUDE_CDS_JAVA_HEAP @@ -779,7 +828,7 @@ bool MetaspaceShared::try_link_class(JavaThread* current, InstanceKlass* ik) { #if INCLUDE_CDS_JAVA_HEAP void VM_PopulateDumpSharedSpace::dump_java_heap_objects(GrowableArray* klasses) { - if(!HeapShared::is_heap_object_archiving_allowed()) { + if(!HeapShared::can_write()) { log_info(cds)( "Archived java heap is not supported as UseG1GC, " "UseCompressedOops and UseCompressedClassPointers are required." @@ -807,27 +856,26 @@ void VM_PopulateDumpSharedSpace::dump_java_heap_objects(GrowableArray* k } // The closed and open archive heap space has maximum two regions. - // See FileMapInfo::write_archive_heap_regions() for details. - _closed_archive_heap_regions = new GrowableArray(2); - _open_archive_heap_regions = new GrowableArray(2); - HeapShared::archive_java_heap_objects(_closed_archive_heap_regions, - _open_archive_heap_regions); + // See FileMapInfo::write_heap_regions() for details. + _closed_heap_regions = new GrowableArray(2); + _open_heap_regions = new GrowableArray(2); + HeapShared::archive_objects(_closed_heap_regions, _open_heap_regions); ArchiveBuilder::OtherROAllocMark mark; HeapShared::write_subgraph_info_table(); } -void VM_PopulateDumpSharedSpace::dump_archive_heap_oopmaps() { - if (HeapShared::is_heap_object_archiving_allowed()) { - _closed_archive_heap_oopmaps = new GrowableArray(2); - dump_archive_heap_oopmaps(_closed_archive_heap_regions, _closed_archive_heap_oopmaps); +void VM_PopulateDumpSharedSpace::dump_heap_oopmaps() { + if (HeapShared::can_write()) { + _closed_heap_oopmaps = new GrowableArray(2); + dump_heap_oopmaps(_closed_heap_regions, _closed_heap_oopmaps); - _open_archive_heap_oopmaps = new GrowableArray(2); - dump_archive_heap_oopmaps(_open_archive_heap_regions, _open_archive_heap_oopmaps); + _open_heap_oopmaps = new GrowableArray(2); + dump_heap_oopmaps(_open_heap_regions, _open_heap_oopmaps); } } -void VM_PopulateDumpSharedSpace::dump_archive_heap_oopmaps(GrowableArray* regions, - GrowableArray* oopmaps) { +void VM_PopulateDumpSharedSpace::dump_heap_oopmaps(GrowableArray* regions, + GrowableArray* oopmaps) { for (int i=0; ilength(); i++) { ResourceBitMap oopmap = HeapShared::calculate_oopmap(regions->at(i)); size_t size_in_bits = oopmap.size(); @@ -1089,7 +1137,7 @@ MapArchiveResult MetaspaceShared::map_archives(FileMapInfo* static_mapinfo, File // map_heap_regions() compares the current narrow oop and klass encodings // with the archived ones, so it must be done after all encodings are determined. - static_mapinfo->map_heap_regions(); + static_mapinfo->map_or_load_heap_regions(); } }); log_info(cds)("optimized module handling: %s", MetaspaceShared::use_optimized_module_handling() ? "enabled" : "disabled"); @@ -1373,7 +1421,7 @@ void MetaspaceShared::initialize_shared_spaces() { FileMapInfo *static_mapinfo = FileMapInfo::current_info(); // Verify various attributes of the archive, plus initialize the - // shared string/symbol tables + // shared string/symbol tables. char* buffer = static_mapinfo->serialized_data(); intptr_t* array = (intptr_t*)buffer; ReadClosure rc(&array); @@ -1382,7 +1430,10 @@ void MetaspaceShared::initialize_shared_spaces() { // Initialize the run-time symbol table. SymbolTable::create_table(); - static_mapinfo->patch_archived_heap_embedded_pointers(); + // Finish up archived heap initialization. These must be + // done after ReadClosure. + static_mapinfo->patch_heap_embedded_pointers(); + HeapShared::finish_initialization(); // Close the mapinfo file static_mapinfo->close(); @@ -1465,8 +1516,15 @@ bool MetaspaceShared::use_full_module_graph() { return true; } #endif - bool result = _use_optimized_module_handling && _use_full_module_graph && - (UseSharedSpaces || DumpSharedSpaces) && HeapShared::is_heap_object_archiving_allowed(); + bool result = _use_optimized_module_handling && _use_full_module_graph; + if (DumpSharedSpaces) { + result &= HeapShared::can_write(); + } else if (UseSharedSpaces) { + result &= HeapShared::can_use(); + } else { + result = false; + } + if (result && UseSharedSpaces) { // Classes used by the archived full module graph are loaded in JVMTI early phase. assert(!(JvmtiExport::should_post_class_file_load_hook() && JvmtiExport::has_early_class_hook_env()), diff --git a/src/hotspot/share/cds/metaspaceShared.hpp b/src/hotspot/share/cds/metaspaceShared.hpp index 553facc6134bfe25c6e34d75124fadccfd305525..74077d0fff05b582fca74a1d1c46e8760298de18 100644 --- a/src/hotspot/share/cds/metaspaceShared.hpp +++ b/src/hotspot/share/cds/metaspaceShared.hpp @@ -58,21 +58,25 @@ class MetaspaceShared : AllStatic { public: enum { // core archive spaces - rw = 0, // read-write shared space in the heap - ro = 1, // read-only shared space in the heap + rw = 0, // read-write shared space + ro = 1, // read-only shared space bm = 2, // relocation bitmaps (freed after file mapping is finished) num_core_region = 2, // rw and ro - num_non_heap_spaces = 3, // rw and ro and bm + num_non_heap_regions = 3, // rw and ro and bm - // mapped java heap regions - first_closed_archive_heap_region = bm + 1, - max_closed_archive_heap_region = 2, - last_closed_archive_heap_region = first_closed_archive_heap_region + max_closed_archive_heap_region - 1, - first_open_archive_heap_region = last_closed_archive_heap_region + 1, - max_open_archive_heap_region = 2, - last_open_archive_heap_region = first_open_archive_heap_region + max_open_archive_heap_region - 1, + // java heap regions + first_closed_heap_region = bm + 1, + max_num_closed_heap_regions = 2, + last_closed_heap_region = first_closed_heap_region + max_num_closed_heap_regions - 1, + first_open_heap_region = last_closed_heap_region + 1, + max_num_open_heap_regions = 2, + last_open_heap_region = first_open_heap_region + max_num_open_heap_regions - 1, + max_num_heap_regions = max_num_closed_heap_regions + max_num_open_heap_regions, - last_valid_region = last_open_archive_heap_region, + first_archive_heap_region = first_closed_heap_region, + last_archive_heap_region = last_open_heap_region, + + last_valid_region = last_open_heap_region, n_regions = last_valid_region + 1 // total number of regions }; @@ -101,19 +105,14 @@ public: _archive_loading_failed = true; } - static bool map_shared_spaces(FileMapInfo* mapinfo) NOT_CDS_RETURN_(false); static void initialize_shared_spaces() NOT_CDS_RETURN; // Return true if given address is in the shared metaspace regions (i.e., excluding any - // mapped shared heap regions.) + // mapped heap regions.) static bool is_in_shared_metaspace(const void* p) { return MetaspaceObj::is_shared((const MetaspaceObj*)p); } - static address shared_metaspace_top() { - return (address)MetaspaceObj::shared_metaspace_top(); - } - static void set_shared_metaspace_range(void* base, void *static_top, void* top) NOT_CDS_RETURN; // Return true if given address is in the shared region corresponding to the idx @@ -134,9 +133,9 @@ public: } static bool try_link_class(JavaThread* current, InstanceKlass* ik); - static void link_and_cleanup_shared_classes(TRAPS) NOT_CDS_RETURN; + static void link_shared_classes(TRAPS) NOT_CDS_RETURN; static bool link_class_for_cds(InstanceKlass* ik, TRAPS) NOT_CDS_RETURN_(false); - static bool linking_required(InstanceKlass* ik) NOT_CDS_RETURN_(false); + static bool may_be_eagerly_linked(InstanceKlass* ik) NOT_CDS_RETURN_(false); #if INCLUDE_CDS // Alignment for the 2 core CDS regions (RW/RO) only. diff --git a/src/hotspot/share/cds/runTimeClassInfo.cpp b/src/hotspot/share/cds/runTimeClassInfo.cpp new file mode 100644 index 0000000000000000000000000000000000000000..52fa94c119d9c12a05ccd9f02e9b577f6845d157 --- /dev/null +++ b/src/hotspot/share/cds/runTimeClassInfo.cpp @@ -0,0 +1,76 @@ + +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "cds/archiveBuilder.hpp" +#include "cds/runTimeClassInfo.hpp" + +void RunTimeClassInfo::init(DumpTimeClassInfo& info) { + ArchiveBuilder* builder = ArchiveBuilder::current(); + assert(builder->is_in_buffer_space(info._klass), "must be"); + _klass = info._klass; + if (!SystemDictionaryShared::is_builtin(_klass)) { + CrcInfo* c = crc(); + c->_clsfile_size = info._clsfile_size; + c->_clsfile_crc32 = info._clsfile_crc32; + } + _num_verifier_constraints = info.num_verifier_constraints(); + _num_loader_constraints = info.num_loader_constraints(); + int i; + if (_num_verifier_constraints > 0) { + RTVerifierConstraint* vf_constraints = verifier_constraints(); + char* flags = verifier_constraint_flags(); + for (i = 0; i < _num_verifier_constraints; i++) { + vf_constraints[i]._name = builder->any_to_offset_u4(info._verifier_constraints->at(i)._name); + vf_constraints[i]._from_name = builder->any_to_offset_u4(info._verifier_constraints->at(i)._from_name); + } + for (i = 0; i < _num_verifier_constraints; i++) { + flags[i] = info._verifier_constraint_flags->at(i); + } + } + + if (_num_loader_constraints > 0) { + RTLoaderConstraint* ld_constraints = loader_constraints(); + for (i = 0; i < _num_loader_constraints; i++) { + ld_constraints[i]._name = builder->any_to_offset_u4(info._loader_constraints->at(i)._name); + ld_constraints[i]._loader_type1 = info._loader_constraints->at(i)._loader_type1; + ld_constraints[i]._loader_type2 = info._loader_constraints->at(i)._loader_type2; + } + } + + if (_klass->is_hidden()) { + InstanceKlass* n_h = info.nest_host(); + set_nest_host(n_h); + } + ArchivePtrMarker::mark_pointer(&_klass); +} + +size_t RunTimeClassInfo::crc_size(InstanceKlass* klass) { + if (!SystemDictionaryShared::is_builtin(klass)) { + return sizeof(CrcInfo); + } else { + return 0; + } +} diff --git a/src/hotspot/share/cds/runTimeClassInfo.hpp b/src/hotspot/share/cds/runTimeClassInfo.hpp new file mode 100644 index 0000000000000000000000000000000000000000..adc828c4f88c28a9b3962913ef6e6d4d91a85713 --- /dev/null +++ b/src/hotspot/share/cds/runTimeClassInfo.hpp @@ -0,0 +1,226 @@ + +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARED_CDS_SHAREDCLASSINFO_HPP +#define SHARED_CDS_SHAREDCLASSINFO_HPP +#include "classfile/compactHashtable.hpp" +#include "classfile/javaClasses.hpp" +#include "classfile/systemDictionaryShared.hpp" +#include "cds/archiveBuilder.hpp" +#include "cds/archiveUtils.hpp" +#include "cds/metaspaceShared.hpp" +#include "memory/metaspaceClosure.hpp" +#include "oops/instanceKlass.hpp" +#include "prims/jvmtiExport.hpp" +#include "utilities/growableArray.hpp" +#include "utilities/resourceHash.hpp" + +class Method; +class Symbol; + +class RunTimeClassInfo { +public: + struct CrcInfo { + int _clsfile_size; + int _clsfile_crc32; + }; + + // This is different than DumpTimeClassInfo::DTVerifierConstraint. We use + // u4 instead of Symbol* to save space on 64-bit CPU. + struct RTVerifierConstraint { + u4 _name; + u4 _from_name; + Symbol* name() { return (Symbol*)(SharedBaseAddress + _name);} + Symbol* from_name() { return (Symbol*)(SharedBaseAddress + _from_name); } + }; + + struct RTLoaderConstraint { + u4 _name; + char _loader_type1; + char _loader_type2; + Symbol* constraint_name() { + return (Symbol*)(SharedBaseAddress + _name); + } + }; + + InstanceKlass* _klass; + int _num_verifier_constraints; + int _num_loader_constraints; + + // optional CrcInfo _crc; (only for UNREGISTERED classes) + // optional InstanceKlass* _nest_host + // optional RTLoaderConstraint _loader_constraint_types[_num_loader_constraints] + // optional RTVerifierConstraint _verifier_constraints[_num_verifier_constraints] + // optional char _verifier_constraint_flags[_num_verifier_constraints] + +private: + static size_t header_size_size() { + return sizeof(RunTimeClassInfo); + } + static size_t verifier_constraints_size(int num_verifier_constraints) { + return sizeof(RTVerifierConstraint) * num_verifier_constraints; + } + static size_t verifier_constraint_flags_size(int num_verifier_constraints) { + return sizeof(char) * num_verifier_constraints; + } + static size_t loader_constraints_size(int num_loader_constraints) { + return sizeof(RTLoaderConstraint) * num_loader_constraints; + } + static size_t nest_host_size(InstanceKlass* klass) { + if (klass->is_hidden()) { + return sizeof(InstanceKlass*); + } else { + return 0; + } + } + + static size_t crc_size(InstanceKlass* klass); +public: + static size_t byte_size(InstanceKlass* klass, int num_verifier_constraints, int num_loader_constraints) { + return header_size_size() + + crc_size(klass) + + nest_host_size(klass) + + loader_constraints_size(num_loader_constraints) + + verifier_constraints_size(num_verifier_constraints) + + verifier_constraint_flags_size(num_verifier_constraints); + } + +private: + size_t crc_offset() const { + return header_size_size(); + } + + size_t nest_host_offset() const { + return crc_offset() + crc_size(_klass); + } + + size_t loader_constraints_offset() const { + return nest_host_offset() + nest_host_size(_klass); + } + size_t verifier_constraints_offset() const { + return loader_constraints_offset() + loader_constraints_size(_num_loader_constraints); + } + size_t verifier_constraint_flags_offset() const { + return verifier_constraints_offset() + verifier_constraints_size(_num_verifier_constraints); + } + + void check_verifier_constraint_offset(int i) const { + assert(0 <= i && i < _num_verifier_constraints, "sanity"); + } + + void check_loader_constraint_offset(int i) const { + assert(0 <= i && i < _num_loader_constraints, "sanity"); + } + +public: + CrcInfo* crc() const { + assert(crc_size(_klass) > 0, "must be"); + return (CrcInfo*)(address(this) + crc_offset()); + } + RTVerifierConstraint* verifier_constraints() { + assert(_num_verifier_constraints > 0, "sanity"); + return (RTVerifierConstraint*)(address(this) + verifier_constraints_offset()); + } + RTVerifierConstraint* verifier_constraint_at(int i) { + check_verifier_constraint_offset(i); + return verifier_constraints() + i; + } + + char* verifier_constraint_flags() { + assert(_num_verifier_constraints > 0, "sanity"); + return (char*)(address(this) + verifier_constraint_flags_offset()); + } + + InstanceKlass** nest_host_addr() { + assert(_klass->is_hidden(), "sanity"); + return (InstanceKlass**)(address(this) + nest_host_offset()); + } + InstanceKlass* nest_host() { + return *nest_host_addr(); + } + void set_nest_host(InstanceKlass* k) { + *nest_host_addr() = k; + ArchivePtrMarker::mark_pointer((address*)nest_host_addr()); + } + + RTLoaderConstraint* loader_constraints() { + assert(_num_loader_constraints > 0, "sanity"); + return (RTLoaderConstraint*)(address(this) + loader_constraints_offset()); + } + + RTLoaderConstraint* loader_constraint_at(int i) { + check_loader_constraint_offset(i); + return loader_constraints() + i; + } + + void init(DumpTimeClassInfo& info); + + bool matches(int clsfile_size, int clsfile_crc32) const { + return crc()->_clsfile_size == clsfile_size && + crc()->_clsfile_crc32 == clsfile_crc32; + } + + char verifier_constraint_flag(int i) { + check_verifier_constraint_offset(i); + return verifier_constraint_flags()[i]; + } + +private: + // ArchiveBuilder::make_shallow_copy() has reserved a pointer immediately + // before archived InstanceKlasses. We can use this slot to do a quick + // lookup of InstanceKlass* -> RunTimeClassInfo* without + // building a new hashtable. + // + // info_pointer_addr(klass) --> 0x0100 RunTimeClassInfo* + // InstanceKlass* klass --> 0x0108 + // 0x0110 fields from Klass ... + static RunTimeClassInfo** info_pointer_addr(InstanceKlass* klass) { + return &((RunTimeClassInfo**)klass)[-1]; + } + +public: + static RunTimeClassInfo* get_for(InstanceKlass* klass) { + assert(klass->is_shared(), "don't call for non-shared class"); + return *info_pointer_addr(klass); + } + static void set_for(InstanceKlass* klass, RunTimeClassInfo* record) { + assert(ArchiveBuilder::current()->is_in_buffer_space(klass), "must be"); + assert(ArchiveBuilder::current()->is_in_buffer_space(record), "must be"); + *info_pointer_addr(klass) = record; + ArchivePtrMarker::mark_pointer(info_pointer_addr(klass)); + } + + // Used by RunTimeSharedDictionary to implement OffsetCompactHashtable::EQUALS + static inline bool EQUALS( + const RunTimeClassInfo* value, Symbol* key, int len_unused) { + return (value->_klass->name() == key); + } +}; + +class RunTimeSharedDictionary : public OffsetCompactHashtable< + Symbol*, + const RunTimeClassInfo*, + RunTimeClassInfo::EQUALS> {}; +#endif // SHARED_CDS_SHAREDCLASSINFO_HPP diff --git a/src/hotspot/share/cds/unregisteredClasses.cpp b/src/hotspot/share/cds/unregisteredClasses.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c0b20e9a9664b265e5274b7243823e4477b16002 --- /dev/null +++ b/src/hotspot/share/cds/unregisteredClasses.cpp @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "cds/unregisteredClasses.hpp" +#include "classfile/classFileStream.hpp" +#include "classfile/classLoader.inline.hpp" +#include "classfile/classLoaderExt.hpp" +#include "classfile/javaClasses.inline.hpp" +#include "classfile/symbolTable.hpp" +#include "classfile/systemDictionaryShared.hpp" +#include "classfile/vmSymbols.hpp" +#include "memory/oopFactory.hpp" +#include "memory/resourceArea.hpp" +#include "oops/instanceKlass.hpp" +#include "oops/oopHandle.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/javaCalls.hpp" +#include "services/threadService.hpp" + +// Load the class of the given name from the location given by path. The path is specified by +// the "source:" in the class list file (see classListParser.cpp), and can be a directory or +// a JAR file. +InstanceKlass* UnregisteredClasses::load_class(Symbol* name, const char* path, TRAPS) { + assert(name != NULL, "invariant"); + assert(DumpSharedSpaces, "this function is only used with -Xshare:dump"); + + { + PerfClassTraceTime vmtimer(ClassLoader::perf_sys_class_lookup_time(), + THREAD->get_thread_stat()->perf_timers_addr(), + PerfClassTraceTime::CLASS_LOAD); + } + + Symbol* path_symbol = SymbolTable::new_symbol(path); + Handle url_classloader = get_url_classloader(path_symbol, CHECK_NULL); + Handle ext_class_name = java_lang_String::externalize_classname(name, CHECK_NULL); + + JavaValue result(T_OBJECT); + JavaCallArguments args(2); + args.set_receiver(url_classloader); + args.push_oop(ext_class_name); + args.push_int(JNI_FALSE); + JavaCalls::call_virtual(&result, + vmClasses::URLClassLoader_klass(), + vmSymbols::loadClass_name(), + vmSymbols::string_boolean_class_signature(), + &args, + CHECK_NULL); + assert(result.get_type() == T_OBJECT, "just checking"); + oop obj = result.get_oop(); + return InstanceKlass::cast(java_lang_Class::as_Klass(obj)); +} + +class URLClassLoaderTable : public ResourceHashtable< + Symbol*, OopHandle, + 137, // prime number + ResourceObj::C_HEAP> {}; + +static URLClassLoaderTable* _url_classloader_table = NULL; + +Handle UnregisteredClasses::create_url_classloader(Symbol* path, TRAPS) { + ResourceMark rm(THREAD); + JavaValue result(T_OBJECT); + Handle path_string = java_lang_String::create_from_str(path->as_C_string(), CHECK_NH); + JavaCalls::call_static(&result, + vmClasses::jdk_internal_loader_ClassLoaders_klass(), + vmSymbols::toFileURL_name(), + vmSymbols::toFileURL_signature(), + path_string, CHECK_NH); + assert(result.get_type() == T_OBJECT, "just checking"); + oop url_h = result.get_oop(); + objArrayHandle urls = oopFactory::new_objArray_handle(vmClasses::URL_klass(), 1, CHECK_NH); + urls->obj_at_put(0, url_h); + + Handle url_classloader = JavaCalls::construct_new_instance( + vmClasses::URLClassLoader_klass(), + vmSymbols::url_array_classloader_void_signature(), + urls, Handle(), CHECK_NH); + return url_classloader; +} + +Handle UnregisteredClasses::get_url_classloader(Symbol* path, TRAPS) { + if (_url_classloader_table == NULL) { + _url_classloader_table = new (ResourceObj::C_HEAP, mtClass)URLClassLoaderTable(); + } + OopHandle* url_classloader_ptr = _url_classloader_table->get(path); + if (url_classloader_ptr != NULL) { + return Handle(THREAD, (*url_classloader_ptr).resolve()); + } else { + Handle url_classloader = create_url_classloader(path, CHECK_NH); + _url_classloader_table->put(path, OopHandle(Universe::vm_global(), url_classloader())); + path->increment_refcount(); + return url_classloader; + } +} diff --git a/src/hotspot/share/cds/unregisteredClasses.hpp b/src/hotspot/share/cds/unregisteredClasses.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a9f7dceaead62b2e1225e1a5a7806f2ba6b5348e --- /dev/null +++ b/src/hotspot/share/cds/unregisteredClasses.hpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_CDS_UNREGISTEREDCLASSES_HPP +#define SHARE_CDS_UNREGISTEREDCLASSES_HPP + +#include "runtime/handles.hpp" + +class UnregisteredClasses: AllStatic { +public: + static InstanceKlass* load_class(Symbol* h_name, const char* path, TRAPS); + +private: + static Handle create_url_classloader(Symbol* path, TRAPS); + static Handle get_url_classloader(Symbol* path, TRAPS); +}; + +#endif // SHARE_CDS_UNREGISTEREDCLASSES_HPP diff --git a/src/hotspot/share/ci/ciEnv.cpp b/src/hotspot/share/ci/ciEnv.cpp index e8030859be31037915af259ff575f2f6fde59161..1d58cdb95250dbf7808d6ee6abfda8ebfbc2561c 100644 --- a/src/hotspot/share/ci/ciEnv.cpp +++ b/src/hotspot/share/ci/ciEnv.cpp @@ -35,6 +35,7 @@ #include "ci/ciSymbols.hpp" #include "ci/ciUtilities.inline.hpp" #include "classfile/javaClasses.hpp" +#include "classfile/javaClasses.inline.hpp" #include "classfile/symbolTable.hpp" #include "classfile/systemDictionary.hpp" #include "classfile/vmClasses.hpp" @@ -48,6 +49,7 @@ #include "compiler/compileTask.hpp" #include "compiler/disassembler.hpp" #include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/bytecodeStream.hpp" #include "interpreter/linkResolver.hpp" #include "jfr/jfrEvents.hpp" #include "logging/log.hpp" @@ -64,6 +66,7 @@ #include "oops/oop.inline.hpp" #include "prims/jvmtiExport.hpp" #include "prims/methodHandles.hpp" +#include "runtime/fieldDescriptor.inline.hpp" #include "runtime/handles.inline.hpp" #include "runtime/init.hpp" #include "runtime/reflection.hpp" @@ -166,8 +169,73 @@ ciEnv::ciEnv(CompileTask* task) _jvmti_can_access_local_variables = false; _jvmti_can_post_on_exceptions = false; _jvmti_can_pop_frame = false; + + _dyno_klasses = NULL; + _dyno_locs = NULL; + _dyno_name[0] = '\0'; } +// Record components of a location descriptor string. Components are appended by the constructor and +// removed by the destructor, like a stack, so scope matters. These location descriptors are used to +// locate dynamic classes, and terminate at a Method* or oop field associated with dynamic/hidden class. +// +// Example use: +// +// { +// RecordLocation fp(this, "field1"); +// // location: "field1" +// { RecordLocation fp(this, " field2"); // location: "field1 field2" } +// // location: "field1" +// { RecordLocation fp(this, " field3"); // location: "field1 field3" } +// // location: "field1" +// } +// // location: "" +// +// Examples of actual locations +// @bci compiler/ciReplay/CiReplayBase$TestMain test (I)V 1 argL0 ; +// // resolve invokedynamic at bci 1 of TestMain.test, then read field "argL0" from appendix +// @bci compiler/ciReplay/CiReplayBase$TestMain main ([Ljava/lang/String;)V 0 form vmentry ; +// // resolve invokedynamic at bci 0 of TestMain.main, then read field "form.vmentry.method.vmtarget" from appendix +// @cpi compiler/ciReplay/CiReplayBase$TestMain 56 form vmentry ; +// // resolve MethodHandle at cpi 56 of TestMain, then read field "vmentry.method.vmtarget" from resolved MethodHandle +class RecordLocation { +private: + char* end; + + ATTRIBUTE_PRINTF(3, 4) + void push(ciEnv* ci, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + push_va(ci, fmt, args); + va_end(args); + } + +public: + ATTRIBUTE_PRINTF(3, 0) + void push_va(ciEnv* ci, const char* fmt, va_list args) { + char *e = ci->_dyno_name + strlen(ci->_dyno_name); + char *m = ci->_dyno_name + ARRAY_SIZE(ci->_dyno_name) - 1; + os::vsnprintf(e, m - e, fmt, args); + assert(strlen(ci->_dyno_name) < (ARRAY_SIZE(ci->_dyno_name) - 1), "overflow"); + } + + // append a new component + ATTRIBUTE_PRINTF(3, 4) + RecordLocation(ciEnv* ci, const char* fmt, ...) { + end = ci->_dyno_name + strlen(ci->_dyno_name); + va_list args; + va_start(args, fmt); + push(ci, " "); + push_va(ci, fmt, args); + va_end(args); + } + + // reset to previous state + ~RecordLocation() { + *end = '\0'; + } +}; + ciEnv::ciEnv(Arena* arena) : _ciEnv_arena(mtCompiler) { ASSERT_IN_VM; @@ -222,6 +290,9 @@ ciEnv::ciEnv(Arena* arena) : _ciEnv_arena(mtCompiler) { _jvmti_can_access_local_variables = false; _jvmti_can_post_on_exceptions = false; _jvmti_can_pop_frame = false; + + _dyno_klasses = NULL; + _dyno_locs = NULL; } ciEnv::~ciEnv() { @@ -565,8 +636,15 @@ ciKlass* ciEnv::get_klass_by_index_impl(const constantPoolHandle& cpool, } // It is known to be accessible, since it was found in the constant pool. + ciKlass* ciKlass = get_klass(klass); is_accessible = true; - return get_klass(klass); +#ifndef PRODUCT + if (ReplayCompiles && ciKlass == _unloaded_ciinstance_klass) { + // Klass was unresolved at replay dump time and therefore not accessible. + is_accessible = false; + } +#endif + return ciKlass; } // ------------------------------------------------------------------ @@ -1190,10 +1268,340 @@ ciInstance* ciEnv::unloaded_ciinstance() { } // ------------------------------------------------------------------ -// ciEnv::dump_replay_data* +// Replay support + + +// Lookup location descriptor for the class, if any. +// Returns false if not found. +bool ciEnv::dyno_loc(const InstanceKlass* ik, const char *&loc) const { + bool found = false; + int pos = _dyno_klasses->find_sorted(ik, found); + if (!found) { + return false; + } + loc = _dyno_locs->at(pos); + return found; +} + +// Associate the current location descriptor with the given class and record for later lookup. +void ciEnv::set_dyno_loc(const InstanceKlass* ik) { + const char *loc = os::strdup(_dyno_name); + bool found = false; + int pos = _dyno_klasses->find_sorted(ik, found); + if (found) { + _dyno_locs->at_put(pos, loc); + } else { + _dyno_klasses->insert_before(pos, ik); + _dyno_locs->insert_before(pos, loc); + } +} + +// Associate the current location descriptor with the given class and record for later lookup. +// If it turns out that there are multiple locations for the given class, that conflict should +// be handled here. Currently we choose the first location found. +void ciEnv::record_best_dyno_loc(const InstanceKlass* ik) { + if (!ik->is_hidden()) { + return; + } + const char *loc0; + if (dyno_loc(ik, loc0)) { + // TODO: found multiple references, see if we can improve + if (Verbose) { + tty->print_cr("existing call site @ %s for %s", + loc0, ik->external_name()); + } + } else { + set_dyno_loc(ik); + } +} -// Don't change thread state and acquire any locks. -// Safe to call from VM error reporter. +// Look up the location descriptor for the given class and print it to the output stream. +bool ciEnv::print_dyno_loc(outputStream* out, const InstanceKlass* ik) const { + const char *loc; + if (dyno_loc(ik, loc)) { + out->print("%s", loc); + return true; + } else { + return false; + } +} + +// Look up the location descriptor for the given class and return it as a string. +// Returns NULL if no location is found. +const char *ciEnv::dyno_name(const InstanceKlass* ik) const { + if (ik->is_hidden()) { + stringStream ss; + if (print_dyno_loc(&ss, ik)) { + ss.print(" ;"); // add terminator + const char* call_site = ss.as_string(); + return call_site; + } + } + return NULL; +} + +// Look up the location descriptor for the given class and return it as a string. +// Returns the class name as a fallback if no location is found. +const char *ciEnv::replay_name(ciKlass* k) const { + if (k->is_instance_klass()) { + return replay_name(k->as_instance_klass()->get_instanceKlass()); + } + return k->name()->as_quoted_ascii(); +} + +// Look up the location descriptor for the given class and return it as a string. +// Returns the class name as a fallback if no location is found. +const char *ciEnv::replay_name(const InstanceKlass* ik) const { + const char* name = dyno_name(ik); + if (name != NULL) { + return name; + } + return ik->name()->as_quoted_ascii(); +} + +// Process a java.lang.invoke.MemberName object and record any dynamic locations. +void ciEnv::record_member(Thread* thread, oop member) { + assert(java_lang_invoke_MemberName::is_instance(member), "!"); + // Check MemberName.clazz field + oop clazz = java_lang_invoke_MemberName::clazz(member); + if (clazz->klass()->is_instance_klass()) { + RecordLocation fp(this, "clazz"); + InstanceKlass* ik = InstanceKlass::cast(clazz->klass()); + record_best_dyno_loc(ik); + } + // Check MemberName.method.vmtarget field + Method* vmtarget = java_lang_invoke_MemberName::vmtarget(member); + if (vmtarget != NULL) { + RecordLocation fp2(this, ""); + InstanceKlass* ik = vmtarget->method_holder(); + record_best_dyno_loc(ik); + } +} + +// Read an object field. Lookup is done by name only. +static inline oop obj_field(oop obj, const char* name) { + return ciReplay::obj_field(obj, name); +} + +// Process a java.lang.invoke.LambdaForm object and record any dynamic locations. +void ciEnv::record_lambdaform(Thread* thread, oop form) { + assert(java_lang_invoke_LambdaForm::is_instance(form), "!"); + + { + // Check LambdaForm.vmentry field + oop member = java_lang_invoke_LambdaForm::vmentry(form); + RecordLocation fp0(this, "vmentry"); + record_member(thread, member); + } + + // Check LambdaForm.names array + objArrayOop names = (objArrayOop)obj_field(form, "names"); + if (names != NULL) { + RecordLocation lp0(this, "names"); + int len = names->length(); + for (int i = 0; i < len; ++i) { + oop name = names->obj_at(i); + RecordLocation lp1(this, "%d", i); + // Check LambdaForm.names[i].function field + RecordLocation lp2(this, "function"); + oop function = obj_field(name, "function"); + if (function != NULL) { + // Check LambdaForm.names[i].function.member field + oop member = obj_field(function, "member"); + if (member != NULL) { + RecordLocation lp3(this, "member"); + record_member(thread, member); + } + // Check LambdaForm.names[i].function.resolvedHandle field + oop mh = obj_field(function, "resolvedHandle"); + if (mh != NULL) { + RecordLocation lp3(this, "resolvedHandle"); + record_mh(thread, mh); + } + // Check LambdaForm.names[i].function.invoker field + oop invoker = obj_field(function, "invoker"); + if (invoker != NULL) { + RecordLocation lp3(this, "invoker"); + record_mh(thread, invoker); + } + } + } + } +} + +// Process a java.lang.invoke.MethodHandle object and record any dynamic locations. +void ciEnv::record_mh(Thread* thread, oop mh) { + { + // Check MethodHandle.form field + oop form = java_lang_invoke_MethodHandle::form(mh); + RecordLocation fp(this, "form"); + record_lambdaform(thread, form); + } + // Check DirectMethodHandle.member field + if (java_lang_invoke_DirectMethodHandle::is_instance(mh)) { + oop member = java_lang_invoke_DirectMethodHandle::member(mh); + RecordLocation fp(this, "member"); + record_member(thread, member); + } else { + // Check .argL0 field + // Probably BoundMethodHandle.Species_L, but we only care if the field exists + oop arg = obj_field(mh, "argL0"); + if (arg != NULL) { + RecordLocation fp(this, "argL0"); + if (arg->klass()->is_instance_klass()) { + InstanceKlass* ik2 = InstanceKlass::cast(arg->klass()); + record_best_dyno_loc(ik2); + } + } + } +} + +// Process an object found at an invokedynamic/invokehandle call site and record any dynamic locations. +// Types currently supported are MethodHandle and CallSite. +// The object is typically the "appendix" object, or Bootstrap Method (BSM) object. +void ciEnv::record_call_site_obj(Thread* thread, const constantPoolHandle& pool, const Handle obj) +{ + if (obj.not_null()) { + if (java_lang_invoke_MethodHandle::is_instance(obj())) { + record_mh(thread, obj()); + } else if (java_lang_invoke_ConstantCallSite::is_instance(obj())) { + oop target = java_lang_invoke_CallSite::target(obj()); + if (target->klass()->is_instance_klass()) { + RecordLocation fp(this, "target"); + InstanceKlass* ik = InstanceKlass::cast(target->klass()); + record_best_dyno_loc(ik); + } + } + } +} + +// Process an adapter Method* found at an invokedynamic/invokehandle call site and record any dynamic locations. +void ciEnv::record_call_site_method(Thread* thread, const constantPoolHandle& pool, Method* adapter) { + InstanceKlass* holder = adapter->method_holder(); + if (!holder->is_hidden()) { + return; + } + RecordLocation fp(this, ""); + record_best_dyno_loc(holder); +} + +// Process an invokedynamic call site and record any dynamic locations. +void ciEnv::process_invokedynamic(const constantPoolHandle &cp, int indy_index, JavaThread* thread) { + ConstantPoolCacheEntry* cp_cache_entry = cp->invokedynamic_cp_cache_entry_at(indy_index); + if (cp_cache_entry->is_resolved(Bytecodes::_invokedynamic)) { + // process the adapter + Method* adapter = cp_cache_entry->f1_as_method(); + record_call_site_method(thread, cp, adapter); + // process the appendix + Handle appendix(thread, cp_cache_entry->appendix_if_resolved(cp)); + { + RecordLocation fp(this, ""); + record_call_site_obj(thread, cp, appendix); + } + // process the BSM + int pool_index = cp_cache_entry->constant_pool_index(); + BootstrapInfo bootstrap_specifier(cp, pool_index, indy_index); + oop bsm_oop = cp->resolve_possibly_cached_constant_at(bootstrap_specifier.bsm_index(), thread); + Handle bsm(thread, bsm_oop); + { + RecordLocation fp(this, ""); + record_call_site_obj(thread, cp, bsm); + } + } +} + +// Process an invokehandle call site and record any dynamic locations. +void ciEnv::process_invokehandle(const constantPoolHandle &cp, int index, JavaThread* thread) { + const int holder_index = cp->klass_ref_index_at(index); + if (!cp->tag_at(holder_index).is_klass()) { + return; // not resolved + } + Klass* holder = ConstantPool::klass_at_if_loaded(cp, holder_index); + Symbol* name = cp->name_ref_at(index); + if (MethodHandles::is_signature_polymorphic_name(holder, name)) { + ConstantPoolCacheEntry* cp_cache_entry = cp->cache()->entry_at(cp->decode_cpcache_index(index)); + if (cp_cache_entry->is_resolved(Bytecodes::_invokehandle)) { + // process the adapter + Method* adapter = cp_cache_entry->f1_as_method(); + Handle appendix(thread, cp_cache_entry->appendix_if_resolved(cp)); + record_call_site_method(thread, cp, adapter); + // process the appendix + { + RecordLocation fp(this, ""); + record_call_site_obj(thread, cp, appendix); + } + } + } +} + +// Search the class hierarchy for dynamic classes reachable through dynamic call sites or +// constant pool entries and record for future lookup. +void ciEnv::find_dynamic_call_sites() { + _dyno_klasses = new (arena()) GrowableArray(arena(), 100, 0, NULL); + _dyno_locs = new (arena()) GrowableArray(arena(), 100, 0, NULL); + + // Iterate over the class hierarchy + for (ClassHierarchyIterator iter(vmClasses::Object_klass()); !iter.done(); iter.next()) { + Klass* sub = iter.klass(); + if (sub->is_instance_klass()) { + InstanceKlass *isub = InstanceKlass::cast(sub); + InstanceKlass* ik = isub; + if (!ik->is_linked()) { + continue; + } + if (ik->is_hidden()) { + continue; + } + JavaThread* thread = JavaThread::current(); + const constantPoolHandle pool(thread, ik->constants()); + + // Look for invokedynamic/invokehandle call sites + for (int i = 0; i < ik->methods()->length(); ++i) { + Method* m = ik->methods()->at(i); + + BytecodeStream bcs(methodHandle(thread, m)); + while (!bcs.is_last_bytecode()) { + Bytecodes::Code opcode = bcs.next(); + opcode = bcs.raw_code(); + switch (opcode) { + case Bytecodes::_invokedynamic: + case Bytecodes::_invokehandle: { + RecordLocation fp(this, "@bci %s %s %s %d", + ik->name()->as_quoted_ascii(), + m->name()->as_quoted_ascii(), m->signature()->as_quoted_ascii(), + bcs.bci()); + if (opcode == Bytecodes::_invokedynamic) { + int index = bcs.get_index_u4(); + process_invokedynamic(pool, index, thread); + } else { + assert(opcode == Bytecodes::_invokehandle, "new switch label added?"); + int cp_cache_index = bcs.get_index_u2_cpcache(); + process_invokehandle(pool, cp_cache_index, thread); + } + break; + } + default: + break; + } + } + } + + // Look for MethodHandle contant pool entries + RecordLocation fp(this, "@cpi %s", ik->name()->as_quoted_ascii()); + int len = pool->length(); + for (int i = 0; i < len; ++i) { + if (pool->tag_at(i).is_method_handle()) { + bool found_it; + oop mh = pool->find_cached_constant_at(i, found_it, thread); + if (mh != NULL) { + RecordLocation fp(this, "%d", i); + record_mh(thread, mh); + } + } + } + } + } +} void ciEnv::dump_compile_data(outputStream* out) { CompileTask* task = this->task(); @@ -1201,11 +1609,9 @@ void ciEnv::dump_compile_data(outputStream* out) { Method* method = task->method(); int entry_bci = task->osr_bci(); int comp_level = task->comp_level(); - out->print("compile %s %s %s %d %d", - method->klass_name()->as_quoted_ascii(), - method->name()->as_quoted_ascii(), - method->signature()->as_quoted_ascii(), - entry_bci, comp_level); + out->print("compile "); + get_method(method)->dump_name_as_ascii(out); + out->print(" %d %d", entry_bci, comp_level); if (compiler_data() != NULL) { if (is_c2_compile(comp_level)) { #ifdef COMPILER2 @@ -1223,14 +1629,21 @@ void ciEnv::dump_compile_data(outputStream* out) { } } -void ciEnv::dump_replay_data_unsafe(outputStream* out) { +// Called from VM error reporter, so be careful. +// Don't safepoint or acquire any locks. +// +void ciEnv::dump_replay_data_helper(outputStream* out) { + NoSafepointVerifier no_safepoint; ResourceMark rm; + #if INCLUDE_JVMTI out->print_cr("JvmtiExport can_access_local_variables %d", _jvmti_can_access_local_variables); out->print_cr("JvmtiExport can_hotswap_or_post_breakpoint %d", _jvmti_can_hotswap_or_post_breakpoint); out->print_cr("JvmtiExport can_post_on_exceptions %d", _jvmti_can_post_on_exceptions); #endif // INCLUDE_JVMTI + find_dynamic_call_sites(); + GrowableArray* objects = _factory->get_ci_metadata(); out->print_cr("# %d ciObject found", objects->length()); for (int i = 0; i < objects->length(); i++) { @@ -1240,10 +1653,19 @@ void ciEnv::dump_replay_data_unsafe(outputStream* out) { out->flush(); } +// Called from VM error reporter, so be careful. +// Don't safepoint or acquire any locks. +// +void ciEnv::dump_replay_data_unsafe(outputStream* out) { + GUARDED_VM_ENTRY( + dump_replay_data_helper(out); + ) +} + void ciEnv::dump_replay_data(outputStream* out) { GUARDED_VM_ENTRY( MutexLocker ml(Compile_lock); - dump_replay_data_unsafe(out); + dump_replay_data_helper(out); ) } diff --git a/src/hotspot/share/ci/ciEnv.hpp b/src/hotspot/share/ci/ciEnv.hpp index 3afe66ce1a6e648f4d354a7fbf9b1028fd0583d9..32dc5f51eaf9caaaad4c842e6fe441c52f81a841 100644 --- a/src/hotspot/share/ci/ciEnv.hpp +++ b/src/hotspot/share/ci/ciEnv.hpp @@ -27,6 +27,7 @@ #include "ci/ciClassList.hpp" #include "ci/ciObjectFactory.hpp" +#include "ci/ciReplay.hpp" #include "classfile/vmClassMacros.hpp" #include "code/debugInfoRec.hpp" #include "code/dependencies.hpp" @@ -44,9 +45,9 @@ class OopMapSet; // to the VM. class ciEnv : StackObj { CI_PACKAGE_ACCESS_TO - friend class CompileBroker; friend class Dependencies; // for get_object, during logging + friend class RecordLocation; friend class PrepareExtraDataClosure; private: @@ -187,6 +188,15 @@ private: if (o == NULL) { return NULL; } else { +#ifndef PRODUCT + if (ReplayCompiles && o->is_klass()) { + Klass* k = (Klass*)o; + if (k->is_instance_klass() && ciReplay::is_klass_unresolved((InstanceKlass*)k)) { + // Klass was unresolved at replay dump time. Simulate this case. + return ciEnv::_unloaded_ciinstance_klass; + } + } +#endif return _factory->get_metadata(o); } } @@ -460,12 +470,49 @@ public: // RedefineClasses support void metadata_do(MetadataClosure* f) { _factory->metadata_do(f); } + // Replay support +private: + static int klass_compare(const InstanceKlass* const &ik1, const InstanceKlass* const &ik2) { + if (ik1 > ik2) { + return 1; + } else if (ik1 < ik2) { + return -1; + } else { + return 0; + } + } + bool dyno_loc(const InstanceKlass* ik, const char *&loc) const; + void set_dyno_loc(const InstanceKlass* ik); + void record_best_dyno_loc(const InstanceKlass* ik); + bool print_dyno_loc(outputStream* out, const InstanceKlass* ik) const; + + GrowableArray* _dyno_klasses; + GrowableArray* _dyno_locs; + +#define MAX_DYNO_NAME_LENGTH 1024 + char _dyno_name[MAX_DYNO_NAME_LENGTH+1]; + +public: // Dump the compilation replay data for the ciEnv to the stream. void dump_replay_data(int compile_id); void dump_inline_data(int compile_id); void dump_replay_data(outputStream* out); void dump_replay_data_unsafe(outputStream* out); + void dump_replay_data_helper(outputStream* out); void dump_compile_data(outputStream* out); + + const char *dyno_name(const InstanceKlass* ik) const; + const char *replay_name(const InstanceKlass* ik) const; + const char *replay_name(ciKlass* i) const; + + void record_lambdaform(Thread* thread, oop obj); + void record_member(Thread* thread, oop obj); + void record_mh(Thread* thread, oop obj); + void record_call_site_obj(Thread* thread, const constantPoolHandle& pool, const Handle appendix); + void record_call_site_method(Thread* thread, const constantPoolHandle& pool, Method* adapter); + void process_invokedynamic(const constantPoolHandle &cp, int index, JavaThread* thread); + void process_invokehandle(const constantPoolHandle &cp, int index, JavaThread* thread); + void find_dynamic_call_sites(); }; #endif // SHARE_CI_CIENV_HPP diff --git a/src/hotspot/share/ci/ciInstanceKlass.cpp b/src/hotspot/share/ci/ciInstanceKlass.cpp index a9fa3855607f9553d22d6f2d658479e71e61d5ea..8557410f956a6a14c9eb6def1db7df5eab800f3b 100644 --- a/src/hotspot/share/ci/ciInstanceKlass.cpp +++ b/src/hotspot/share/ci/ciInstanceKlass.cpp @@ -205,12 +205,12 @@ ciConstantPoolCache* ciInstanceKlass::field_cache() { // ciInstanceKlass* ciInstanceKlass::get_canonical_holder(int offset) { #ifdef ASSERT - if (!(offset >= 0 && offset < layout_helper())) { + if (!(offset >= 0 && offset < layout_helper_size_in_bytes())) { tty->print("*** get_canonical_holder(%d) on ", offset); this->print(); tty->print_cr(" ***"); }; - assert(offset >= 0 && offset < layout_helper(), "offset must be tame"); + assert(offset >= 0 && offset < layout_helper_size_in_bytes(), "offset must be tame"); #endif if (offset < instanceOopDesc::base_offset_in_bytes()) { @@ -227,7 +227,9 @@ ciInstanceKlass* ciInstanceKlass::get_canonical_holder(int offset) { for (;;) { assert(self->is_loaded(), "must be loaded to have size"); ciInstanceKlass* super = self->super(); - if (super == NULL || super->nof_nonstatic_fields() == 0) { + if (super == NULL || + super->nof_nonstatic_fields() == 0 || + super->layout_helper_size_in_bytes() <= offset) { return self; } else { self = super; // return super->get_canonical_holder(offset) @@ -731,6 +733,9 @@ class StaticFinalFieldPrinter : public FieldClosure { } }; +const char *ciInstanceKlass::replay_name() const { + return CURRENT_ENV->replay_name(get_instanceKlass()); +} void ciInstanceKlass::dump_replay_data(outputStream* out) { ResourceMark rm; @@ -741,8 +746,18 @@ void ciInstanceKlass::dump_replay_data(outputStream* out) { // Try to record related loaded classes Klass* sub = ik->subklass(); while (sub != NULL) { - if (sub->is_instance_klass() && !sub->is_hidden()) { - out->print_cr("instanceKlass %s", sub->name()->as_quoted_ascii()); + if (sub->is_instance_klass()) { + InstanceKlass *isub = InstanceKlass::cast(sub); + if (isub->is_hidden()) { + const char *name = CURRENT_ENV->dyno_name(isub); + if (name != NULL) { + out->print_cr("instanceKlass %s # %s", name, sub->name()->as_quoted_ascii()); + } else { + out->print_cr("# instanceKlass %s", sub->name()->as_quoted_ascii()); + } + } else { + out->print_cr("instanceKlass %s", sub->name()->as_quoted_ascii()); + } } sub = sub->next_sibling(); } @@ -751,7 +766,8 @@ void ciInstanceKlass::dump_replay_data(outputStream* out) { // tags will be validated for things which shouldn't change and // classes will be resolved if the tags indicate that they were // resolved at compile time. - out->print("ciInstanceKlass %s %d %d %d", ik->name()->as_quoted_ascii(), + const char *name = replay_name(); + out->print("ciInstanceKlass %s %d %d %d", name, is_linked(), is_initialized(), cp->length()); for (int index = 1; index < cp->length(); index++) { out->print(" %d", cp->tags()->at(index)); @@ -760,7 +776,7 @@ void ciInstanceKlass::dump_replay_data(outputStream* out) { if (is_initialized()) { // Dump out the static final fields in case the compilation relies // on their value for correct replay. - StaticFinalFieldPrinter sffp(out, ik->name()->as_quoted_ascii()); + StaticFinalFieldPrinter sffp(out, name); ik->do_local_static_fields(&sffp); } } diff --git a/src/hotspot/share/ci/ciInstanceKlass.hpp b/src/hotspot/share/ci/ciInstanceKlass.hpp index 1e4a0a9ae6c38274cad0be2597b46d4ec07940f0..d77d9c4c2773f53afe86f98a4f61de205edf87dd 100644 --- a/src/hotspot/share/ci/ciInstanceKlass.hpp +++ b/src/hotspot/share/ci/ciInstanceKlass.hpp @@ -43,6 +43,7 @@ class ciInstanceKlass : public ciKlass { friend class ciExceptionHandler; friend class ciMethod; friend class ciField; + friend class ciReplay; private: enum SubklassValue { subklass_unknown, subklass_false, subklass_true }; @@ -165,6 +166,9 @@ public: return compute_shared_has_subklass(); } + jint layout_helper_size_in_bytes() { + return Klass::layout_helper_size_in_bytes(layout_helper()); + } jint size_helper() { return (Klass::layout_helper_size_in_bytes(layout_helper()) >> LogHeapWordSize); @@ -293,9 +297,14 @@ public: return !is_interface() && !is_abstract(); } + // Replay support + // Dump the current state of this klass for compilation replay. virtual void dump_replay_data(outputStream* out); + // Return stable class name suitable for replay file. + const char *replay_name() const; + #ifdef ASSERT bool debug_final_field_at(int offset); bool debug_stable_field_at(int offset); diff --git a/src/hotspot/share/ci/ciMethod.cpp b/src/hotspot/share/ci/ciMethod.cpp index 7d03cfb6aa5b479652eff0935697a3caea394623..862824c5b72f9bc75b0d42c01195c21fbf50780e 100644 --- a/src/hotspot/share/ci/ciMethod.cpp +++ b/src/hotspot/share/ci/ciMethod.cpp @@ -70,6 +70,7 @@ ciMethod::ciMethod(const methodHandle& h_m, ciInstanceKlass* holder) : _holder(holder) { assert(h_m() != NULL, "no null method"); + assert(_holder->get_instanceKlass() == h_m->method_holder(), ""); if (LogTouchedMethods) { h_m->log_touched(Thread::current()); @@ -880,17 +881,16 @@ ciKlass* ciMethod::get_declared_method_holder_at_bci(int bci) { // invocation counts in methods. int ciMethod::scale_count(int count, float prof_factor) { if (count > 0 && method_data() != NULL) { - int counter_life; + int counter_life = method_data()->invocation_count(); int method_life = interpreter_invocation_count(); - // In tiered the MDO's life is measured directly, so just use the snapshotted counters - counter_life = MAX2(method_data()->invocation_count(), method_data()->backedge_count()); - - // counter_life due to backedge_counter could be > method_life - if (counter_life > method_life) - counter_life = method_life; - if (0 < counter_life && counter_life <= method_life) { + if (method_life < counter_life) { // may happen because of the snapshot timing + method_life = counter_life; + } + if (counter_life > 0) { count = (int)((double)count * prof_factor * method_life / counter_life + 0.5); count = (count > 0) ? count : 1; + } else { + count = 1; } } return count; @@ -1289,17 +1289,25 @@ ciMethodBlocks *ciMethod::get_method_blocks() { #undef FETCH_FLAG_FROM_VM -void ciMethod::dump_name_as_ascii(outputStream* st) { - Method* method = get_Method(); +void ciMethod::dump_name_as_ascii(outputStream* st, Method* method) { st->print("%s %s %s", - method->klass_name()->as_quoted_ascii(), + CURRENT_ENV->replay_name(method->method_holder()), method->name()->as_quoted_ascii(), method->signature()->as_quoted_ascii()); } +void ciMethod::dump_name_as_ascii(outputStream* st) { + Method* method = get_Method(); + dump_name_as_ascii(st, method); +} + void ciMethod::dump_replay_data(outputStream* st) { ResourceMark rm; Method* method = get_Method(); + if (MethodHandles::is_signature_polymorphic_method(method)) { + // ignore for now + return; + } MethodCounters* mcs = method->method_counters(); st->print("ciMethod "); dump_name_as_ascii(st); diff --git a/src/hotspot/share/ci/ciMethod.hpp b/src/hotspot/share/ci/ciMethod.hpp index 7955e1752a95daea248c11afc47725142eebd358..926badd2f381f44c041ac40da8ca6fe591c59e9d 100644 --- a/src/hotspot/share/ci/ciMethod.hpp +++ b/src/hotspot/share/ci/ciMethod.hpp @@ -365,6 +365,7 @@ class ciMethod : public ciMetadata { bool can_be_statically_bound(ciInstanceKlass* context) const; // Replay data methods + static void dump_name_as_ascii(outputStream* st, Method* method); void dump_name_as_ascii(outputStream* st); void dump_replay_data(outputStream* st); diff --git a/src/hotspot/share/ci/ciMethodData.cpp b/src/hotspot/share/ci/ciMethodData.cpp index 8d71bbd5229e7f0424201102052dd762de7ae2b7..cdf85bb1b4757ce4ed21a3440210c28c8bb955f2 100644 --- a/src/hotspot/share/ci/ciMethodData.cpp +++ b/src/hotspot/share/ci/ciMethodData.cpp @@ -647,7 +647,8 @@ void ciMethodData::dump_replay_data_type_helper(outputStream* out, int round, in if (round == 0) { count++; } else { - out->print(" %d %s", (int)(dp_to_di(pdata->dp() + in_bytes(offset)) / sizeof(intptr_t)), k->name()->as_quoted_ascii()); + out->print(" %d %s", (int)(dp_to_di(pdata->dp() + in_bytes(offset)) / sizeof(intptr_t)), + CURRENT_ENV->replay_name(k)); } } } @@ -703,13 +704,9 @@ void ciMethodData::dump_replay_data(outputStream* out) { ResourceMark rm; MethodData* mdo = get_MethodData(); Method* method = mdo->method(); - Klass* holder = method->method_holder(); - out->print("ciMethodData %s %s %s %d %d", - holder->name()->as_quoted_ascii(), - method->name()->as_quoted_ascii(), - method->signature()->as_quoted_ascii(), - _state, - current_mileage()); + out->print("ciMethodData "); + ciMethod::dump_name_as_ascii(out, method); + out->print(" %d %d", _state, current_mileage()); // dump the contents of the MDO header as raw data unsigned char* orig = (unsigned char*)&_orig; diff --git a/src/hotspot/share/ci/ciObjectFactory.cpp b/src/hotspot/share/ci/ciObjectFactory.cpp index 979136316e957d2c2eec6032363a5fa8766f663e..0f53c9957ce66bcec084643479a05ca583450f5c 100644 --- a/src/hotspot/share/ci/ciObjectFactory.cpp +++ b/src/hotspot/share/ci/ciObjectFactory.cpp @@ -378,6 +378,7 @@ ciMetadata* ciObjectFactory::create_new_metadata(Metadata* o) { if (o->is_klass()) { Klass* k = (Klass*)o; if (k->is_instance_klass()) { + assert(!ReplayCompiles || ciReplay::no_replay_state() || !ciReplay::is_klass_unresolved((InstanceKlass*)k), "must be whitelisted for replay compilation"); return new (arena()) ciInstanceKlass(k); } else if (k->is_objArray_klass()) { return new (arena()) ciObjArrayKlass(k); diff --git a/src/hotspot/share/ci/ciReplay.cpp b/src/hotspot/share/ci/ciReplay.cpp index c5e95626b88c9d51be9250b002c55f75045ac575..a5f69bec4eabddcbbcbdecce75b476930de2bf56 100644 --- a/src/hotspot/share/ci/ciReplay.cpp +++ b/src/hotspot/share/ci/ciReplay.cpp @@ -34,17 +34,22 @@ #include "classfile/systemDictionary.hpp" #include "compiler/compilationPolicy.hpp" #include "compiler/compileBroker.hpp" +#include "interpreter/linkResolver.hpp" #include "memory/allocation.inline.hpp" #include "memory/oopFactory.hpp" #include "memory/resourceArea.hpp" #include "oops/constantPool.hpp" +#include "oops/cpCache.inline.hpp" +#include "oops/fieldStreams.inline.hpp" #include "oops/klass.inline.hpp" #include "oops/method.inline.hpp" #include "oops/oop.inline.hpp" #include "prims/jvmtiExport.hpp" +#include "prims/methodHandles.hpp" #include "runtime/fieldDescriptor.inline.hpp" #include "runtime/globals_extension.hpp" #include "runtime/handles.inline.hpp" +#include "runtime/jniHandles.inline.hpp" #include "runtime/java.hpp" #include "utilities/copy.hpp" #include "utilities/macros.hpp" @@ -86,6 +91,11 @@ typedef struct _ciMethodRecord { int _backedge_counter; } ciMethodRecord; +typedef struct _ciInstanceKlassRecord { + const InstanceKlass* _klass; + jobject _java_mirror; // Global handle to java mirror to prevent unloading +} ciInstanceKlassRecord; + typedef struct _ciInlineRecord { const char* _klass_name; const char* _method_name; @@ -107,6 +117,7 @@ class CompileReplay : public StackObj { GrowableArray _ci_method_records; GrowableArray _ci_method_data_records; + GrowableArray _ci_instance_klass_records; // Use pointer because we may need to return inline records // without destroying them. @@ -257,7 +268,7 @@ class CompileReplay : public StackObj { } } - const char* parse_escaped_string() { + char* parse_escaped_string() { char* result = parse_quoted_string(); if (result != NULL) { unescape_string(result); @@ -340,7 +351,7 @@ class CompileReplay : public StackObj { } // Parse a possibly quoted version of a symbol into a symbolOop - Symbol* parse_symbol(TRAPS) { + Symbol* parse_symbol() { const char* str = parse_escaped_string(); if (str != NULL) { Symbol* sym = SymbolTable::new_symbol(str); @@ -349,9 +360,180 @@ class CompileReplay : public StackObj { return NULL; } + bool parse_terminator() { + char* terminator = parse_string(); + if (terminator != NULL && strcmp(terminator, ";") == 0) { + return true; + } + return false; + } + + // Parse a special hidden klass location syntax + // syntax: @bci * ; + // syntax: @cpi * ; + Klass* parse_cp_ref(TRAPS) { + JavaThread* thread = THREAD; + oop obj = NULL; + char* ref = parse_string(); + if (strcmp(ref, "bci") == 0) { + Method* m = parse_method(CHECK_NULL); + if (m == NULL) { + return NULL; + } + + InstanceKlass* ik = m->method_holder(); + const constantPoolHandle cp(Thread::current(), ik->constants()); + + // invokedynamic or invokehandle + + methodHandle caller(Thread::current(), m); + int bci = parse_int("bci"); + if (m->validate_bci(bci) != bci) { + report_error("bad bci"); + return NULL; + } + + ik->link_class(CHECK_NULL); + + Bytecode_invoke bytecode(caller, bci); + int index = bytecode.index(); + + ConstantPoolCacheEntry* cp_cache_entry = NULL; + CallInfo callInfo; + Bytecodes::Code bc = bytecode.invoke_code(); + LinkResolver::resolve_invoke(callInfo, Handle(), cp, index, bc, CHECK_NULL); + if (bytecode.is_invokedynamic()) { + cp_cache_entry = cp->invokedynamic_cp_cache_entry_at(index); + cp_cache_entry->set_dynamic_call(cp, callInfo); + } else if (bytecode.is_invokehandle()) { +#ifdef ASSERT + Klass* holder = cp->klass_ref_at(index, CHECK_NULL); + Symbol* name = cp->name_ref_at(index); + assert(MethodHandles::is_signature_polymorphic_name(holder, name), ""); +#endif + cp_cache_entry = cp->cache()->entry_at(cp->decode_cpcache_index(index)); + cp_cache_entry->set_method_handle(cp, callInfo); + } else { + report_error("no dynamic invoke found"); + return NULL; + } + char* dyno_ref = parse_string(); + if (strcmp(dyno_ref, "") == 0) { + obj = cp_cache_entry->appendix_if_resolved(cp); + } else if (strcmp(dyno_ref, "") == 0) { + if (!parse_terminator()) { + report_error("no dynamic invoke found"); + return NULL; + } + Method* adapter = cp_cache_entry->f1_as_method(); + if (adapter == NULL) { + report_error("no adapter found"); + return NULL; + } + return adapter->method_holder(); + } else if (strcmp(dyno_ref, "") == 0) { + int pool_index = cp_cache_entry->constant_pool_index(); + BootstrapInfo bootstrap_specifier(cp, pool_index, index); + obj = cp->resolve_possibly_cached_constant_at(bootstrap_specifier.bsm_index(), thread); + } else { + report_error("unrecognized token"); + return NULL; + } + } else { + // constant pool ref (MethodHandle) + if (strcmp(ref, "cpi") != 0) { + report_error("unexpected token"); + return NULL; + } + + Klass* k = parse_klass(CHECK_NULL); + if (k == NULL) { + return NULL; + } + InstanceKlass* ik = InstanceKlass::cast(k); + const constantPoolHandle cp(Thread::current(), ik->constants()); + + int cpi = parse_int("cpi"); + + if (cpi >= cp->length()) { + report_error("bad cpi"); + return NULL; + } + if (!cp->tag_at(cpi).is_method_handle()) { + report_error("no method handle found at cpi"); + return NULL; + } + { + bool found_it; + obj = cp->find_cached_constant_at(cpi, found_it, thread); + } + } + Klass* k = NULL; + if (obj != NULL) { + skip_ws(); + // loop: read fields + char* field = NULL; + do { + field = parse_string(); + if (field == NULL) { + report_error("no field found"); + return NULL; + } + if (strcmp(field, ";") == 0) { + break; + } + // raw Method* + if (strcmp(field, "") == 0) { + Method* vmtarget = java_lang_invoke_MemberName::vmtarget(obj); + k = (vmtarget == NULL) ? NULL : vmtarget->method_holder(); + if (k == NULL) { + report_error("null vmtarget found"); + return NULL; + } + if (!parse_terminator()) { + report_error("missing terminator"); + return NULL; + } + return k; + } + obj = ciReplay::obj_field(obj, field); + // array + if (obj != NULL && obj->is_objArray()) { + objArrayOop arr = (objArrayOop)obj; + int index = parse_int("index"); + if (index >= arr->length()) { + report_error("bad array index"); + return NULL; + } + obj = arr->obj_at(index); + } + } while (obj != NULL); + if (obj == NULL) { + report_error("null field found"); + return NULL; + } + k = obj->klass(); + } + return k; + } + // Parse a valid klass name and look it up + // syntax: + // syntax: Klass* parse_klass(TRAPS) { - const char* str = parse_escaped_string(); + skip_ws(); + // check for constant pool object reference (for a dynamic/hidden class) + bool cp_ref = (*_bufptr == '@'); + if (cp_ref) { + ++_bufptr; + Klass* k = parse_cp_ref(CHECK_NULL); + if (k != NULL && !k->is_hidden()) { + report_error("expected hidden class"); + return NULL; + } + return k; + } + char* str = parse_escaped_string(); Symbol* klass_name = SymbolTable::new_symbol(str); if (klass_name != NULL) { Klass* k = NULL; @@ -389,8 +571,8 @@ class CompileReplay : public StackObj { report_error("Can't find holder klass"); return NULL; } - Symbol* method_name = parse_symbol(CHECK_NULL); - Symbol* method_signature = parse_symbol(CHECK_NULL); + Symbol* method_name = parse_symbol(); + Symbol* method_signature = parse_symbol(); Method* m = k->find_method(method_name, method_signature); if (m == NULL) { report_error("Can't find method"); @@ -679,12 +861,26 @@ class CompileReplay : public StackObj { } // instanceKlass + // instanceKlass #