diff --git a/.github/workflows/submit.yml b/.github/workflows/submit.yml index 99041df6e3b79cc23067a48ca0586ce98e73f2e0..9d19715a9862a7aed3803170feef90cd35ba527e 100644 --- a/.github/workflows/submit.yml +++ b/.github/workflows/submit.yml @@ -884,7 +884,7 @@ jobs: run: | New-Item -Force -ItemType directory -Path "$HOME\cygwin" & curl -L "https://www.cygwin.com/setup-x86_64.exe" -o "$HOME/cygwin/setup-x86_64.exe" - Start-Process -FilePath "$HOME\cygwin\setup-x86_64.exe" -ArgumentList "--quiet-mode --packages autoconf,make,zip,unzip --root $HOME\cygwin\cygwin64 --local-package-dir $HOME\cygwin\packages --site http://mirrors.kernel.org/sourceware/cygwin --no-desktop --no-shortcuts --no-startmenu --no-admin" -Wait -NoNewWindow + Start-Process -FilePath "$HOME\cygwin\setup-x86_64.exe" -ArgumentList "--quiet-mode --packages cygwin=3.2.0-1,autoconf,make,zip,unzip --root $HOME\cygwin\cygwin64 --local-package-dir $HOME\cygwin\packages --site http://mirrors.kernel.org/sourceware/cygwin --no-desktop --no-shortcuts --no-startmenu --no-admin" -Wait -NoNewWindow - name: Checkout the source uses: actions/checkout@v2 @@ -973,7 +973,7 @@ jobs: run: | New-Item -Force -ItemType directory -Path "$HOME\cygwin" & curl -L "https://www.cygwin.com/setup-x86_64.exe" -o "$HOME/cygwin/setup-x86_64.exe" - Start-Process -FilePath "$HOME\cygwin\setup-x86_64.exe" -ArgumentList "--quiet-mode --packages autoconf,make,zip,unzip --root $HOME\cygwin\cygwin64 --local-package-dir $HOME\cygwin\packages --site http://mirrors.kernel.org/sourceware/cygwin --no-desktop --no-shortcuts --no-startmenu --no-admin" -Wait -NoNewWindow + Start-Process -FilePath "$HOME\cygwin\setup-x86_64.exe" -ArgumentList "--quiet-mode --packages cygwin=3.2.0-1,autoconf,make,zip,unzip --root $HOME\cygwin\cygwin64 --local-package-dir $HOME\cygwin\packages --site http://mirrors.kernel.org/sourceware/cygwin --no-desktop --no-shortcuts --no-startmenu --no-admin" -Wait -NoNewWindow - name: Checkout the source uses: actions/checkout@v2 @@ -1145,7 +1145,7 @@ jobs: run: | New-Item -Force -ItemType directory -Path "$HOME\cygwin" & curl -L "https://www.cygwin.com/setup-x86_64.exe" -o "$HOME/cygwin/setup-x86_64.exe" - Start-Process -FilePath "$HOME\cygwin\setup-x86_64.exe" -ArgumentList "--quiet-mode --packages autoconf,make,zip,unzip --root $HOME\cygwin\cygwin64 --local-package-dir $HOME\cygwin\packages --site http://mirrors.kernel.org/sourceware/cygwin --no-desktop --no-shortcuts --no-startmenu --no-admin" -Wait -NoNewWindow + Start-Process -FilePath "$HOME\cygwin\setup-x86_64.exe" -ArgumentList "--quiet-mode --packages cygwin=3.2.0-1,autoconf,make,zip,unzip --root $HOME\cygwin\cygwin64 --local-package-dir $HOME\cygwin\packages --site http://mirrors.kernel.org/sourceware/cygwin --no-desktop --no-shortcuts --no-startmenu --no-admin" -Wait -NoNewWindow - name: Restore jtreg artifact id: jtreg_restore diff --git a/make/Hsdis.gmk b/make/Hsdis.gmk index 2253da906797293b3089c450688a76760c15df0d..02f09b320f095522d29e0e7d7e73438a10f2ee72 100644 --- a/make/Hsdis.gmk +++ b/make/Hsdis.gmk @@ -44,15 +44,44 @@ ifeq ($(call isTargetOs, windows), true) MINGW_BASE := x86_64-w64-mingw32 + MINGW_SYSROOT = $(shell $(MINGW_BASE)-gcc -print-sysroot) + ifeq ($(wildcard $(MINGW_SYSROOT)), ) + # Use fallback path + MINGW_SYSROOT := /usr/$(MINGW_BASE) + ifeq ($(wildcard $(MINGW_SYSROOT)), ) + $(error mingw sysroot not found) + endif + endif + $(eval $(call DefineNativeToolchain, TOOLCHAIN_MINGW, \ CC := $(MINGW_BASE)-gcc, \ LD := $(MINGW_BASE)-ld, \ OBJCOPY := $(MINGW_BASE)-objcopy, \ RC := $(RC), \ - SYSROOT_CFLAGS := --sysroot=/usr/$(MINGW_BASE)/sys-root, \ - SYSROOT_LDFLAGS := --sysroot=/usr/$(MINGW_BASE)/sys-root, \ + SYSROOT_CFLAGS := --sysroot=$(MINGW_SYSROOT), \ + SYSROOT_LDFLAGS := --sysroot=$(MINGW_SYSROOT), \ )) + MINGW_SYSROOT_LIB_PATH := $(MINGW_SYSROOT)/mingw/lib + ifeq ($(wildcard $(MINGW_SYSROOT_LIB_PATH)), ) + # Try without mingw + MINGW_SYSROOT_LIB_PATH := $(MINGW_SYSROOT)/lib + ifeq ($(wildcard $(MINGW_SYSROOT_LIB_PATH)), ) + $(error mingw sysroot lib path not found) + endif + endif + + MINGW_VERSION = $(shell $(MINGW_BASE)-gcc -v 2>&1 | $(GREP) "gcc version" | $(CUT) -d " " -f 3) + MINGW_GCC_LIB_PATH := /usr/lib/gcc/$(MINGW_BASE)/$(MINGW_VERSION) + ifeq ($(wildcard $(MINGW_GCC_LIB_PATH)), ) + # Try using only major version number + MINGW_VERSION_MAJOR := $(firstword $(subst ., , $(MINGW_VERSION))) + MINGW_GCC_LIB_PATH := /usr/lib/gcc/$(MINGW_BASE)/$(MINGW_VERSION_MAJOR) + ifeq ($(wildcard $(MINGW_GCC_LIB_PATH)), ) + $(error mingw gcc lib path not found) + endif + endif + TOOLCHAIN_TYPE := gcc OPENJDK_TARGET_OS := linux CC_OUT_OPTION := -o$(SPACE) @@ -66,9 +95,8 @@ ifeq ($(call isTargetOs, windows), true) HSDIS_TOOLCHAIN := TOOLCHAIN_MINGW HSDIS_TOOLCHAIN_CFLAGS := - HSDIS_TOOLCHAIN_LDFLAGS := -L/usr/lib/gcc/$(MINGW_BASE)/9.2.0 \ - -L/usr/$(MINGW_BASE)/sys-root/mingw/lib - MINGW_DLLCRT := /usr/$(MINGW_BASE)/sys-root/mingw/lib/dllcrt2.o + HSDIS_TOOLCHAIN_LDFLAGS := -L$(MINGW_GCC_LIB_PATH) -L$(MINGW_SYSROOT_LIB_PATH) + MINGW_DLLCRT := $(MINGW_SYSROOT_LIB_PATH)/dllcrt2.o HSDIS_TOOLCHAIN_LIBS := $(MINGW_DLLCRT) -lmingw32 -lgcc -lgcc_eh -lmoldname \ -lmingwex -lmsvcrt -lpthread -ladvapi32 -lshell32 -luser32 -lkernel32 else diff --git a/make/autoconf/jdk-options.m4 b/make/autoconf/jdk-options.m4 index c937101c2c71af49c93377beb8cf4f7e35e50d6a..cef3a1c1bd77b815bbd51777dce9dc32c6b4f7f6 100644 --- a/make/autoconf/jdk-options.m4 +++ b/make/autoconf/jdk-options.m4 @@ -817,26 +817,53 @@ AC_DEFUN_ONCE([JDKOPT_SETUP_HSDIS], BINUTILS_DIR="$with_binutils" fi - AC_MSG_CHECKING([for binutils to use with hsdis]) - if test "x$BINUTILS_DIR" != x; then + binutils_system_error="" + HSDIS_LIBS="" + if test "x$BINUTILS_DIR" = xsystem; then + AC_CHECK_LIB(bfd, bfd_openr, [ HSDIS_LIBS="-lbfd" ], [ binutils_system_error="libbfd not found" ]) + AC_CHECK_LIB(opcodes, disassembler, [ HSDIS_LIBS="$HSDIS_LIBS -lopcodes" ], [ binutils_system_error="libopcodes not found" ]) + AC_CHECK_LIB(iberty, xmalloc, [ HSDIS_LIBS="$HSDIS_LIBS -liberty" ], [ binutils_system_error="libiberty not found" ]) + AC_CHECK_LIB(z, deflate, [ HSDIS_LIBS="$HSDIS_LIBS -lz" ], [ binutils_system_error="libz not found" ]) + elif test "x$BINUTILS_DIR" != x; then if test -e $BINUTILS_DIR/bfd/libbfd.a && \ test -e $BINUTILS_DIR/opcodes/libopcodes.a && \ test -e $BINUTILS_DIR/libiberty/libiberty.a; then - AC_MSG_RESULT([$BINUTILS_DIR]) HSDIS_CFLAGS="-I$BINUTILS_DIR/include -I$BINUTILS_DIR/bfd -DLIBARCH_$OPENJDK_TARGET_CPU_LEGACY_LIB" HSDIS_LIBS="$BINUTILS_DIR/bfd/libbfd.a $BINUTILS_DIR/opcodes/libopcodes.a $BINUTILS_DIR/libiberty/libiberty.a $BINUTILS_DIR/zlib/libz.a" - else - AC_MSG_RESULT([invalid]) - AC_MSG_ERROR([$BINUTILS_DIR does not contain a proper binutils installation]) fi - else - AC_MSG_RESULT([missing]) - AC_MSG_NOTICE([--with-hsdis=binutils requires specifying a binutils installation.]) - AC_MSG_NOTICE([Download binutils from https://www.gnu.org/software/binutils and unpack it,]) - AC_MSG_NOTICE([and point --with-binutils-src to the resulting directory, or use]) - AC_MSG_NOTICE([--with-binutils to point to a pre-built binutils installation.]) - AC_MSG_ERROR([Cannot continue]) fi + + AC_MSG_CHECKING([for binutils to use with hsdis]) + case "x$BINUTILS_DIR" in + xsystem) + if test "x$OPENJDK_TARGET_OS" != xlinux; then + AC_MSG_RESULT([invalid]) + AC_MSG_ERROR([binutils on system is supported for Linux only]) + elif test "x$binutils_system_error" = x; then + AC_MSG_RESULT([system]) + HSDIS_CFLAGS="-DSYSTEM_BINUTILS" + else + AC_MSG_RESULT([invalid]) + AC_MSG_ERROR([$binutils_system_error]) + fi + ;; + x) + AC_MSG_RESULT([missing]) + AC_MSG_NOTICE([--with-hsdis=binutils requires specifying a binutils installation.]) + AC_MSG_NOTICE([Download binutils from https://www.gnu.org/software/binutils and unpack it,]) + AC_MSG_NOTICE([and point --with-binutils-src to the resulting directory, or use]) + AC_MSG_NOTICE([--with-binutils to point to a pre-built binutils installation.]) + AC_MSG_ERROR([Cannot continue]) + ;; + *) + if test "x$HSDIS_LIBS" != x; then + AC_MSG_RESULT([$BINUTILS_DIR]) + else + AC_MSG_RESULT([invalid]) + AC_MSG_ERROR([$BINUTILS_DIR does not contain a proper binutils installation]) + fi + ;; + esac else AC_MSG_RESULT([invalid]) AC_MSG_ERROR([Incorrect hsdis backend "$with_hsdis"]) diff --git a/make/autoconf/jvm-features.m4 b/make/autoconf/jvm-features.m4 index 1f76c323129fd366a358b72f1e67c6a0a63b7568..906a2857877216e7fdc80d406230105fbba07fe2 100644 --- a/make/autoconf/jvm-features.m4 +++ b/make/autoconf/jvm-features.m4 @@ -307,7 +307,8 @@ AC_DEFUN_ONCE([JVM_FEATURES_CHECK_SHENANDOAHGC], JVM_FEATURES_CHECK_AVAILABILITY(shenandoahgc, [ AC_MSG_CHECKING([if platform is supported by Shenandoah]) if test "x$OPENJDK_TARGET_CPU_ARCH" = "xx86" || \ - test "x$OPENJDK_TARGET_CPU" = "xaarch64" ; then + test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \ + test "x$OPENJDK_TARGET_CPU" = "xppc64le"; then AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no, $OPENJDK_TARGET_CPU]) diff --git a/make/conf/test-dependencies b/make/conf/test-dependencies index 1e53b8f10f1435e961aebad20dbfe08cd9a30682..b45de4180f954367a9b56a05b72ff0fa44a3a309 100644 --- a/make/conf/test-dependencies +++ b/make/conf/test-dependencies @@ -30,14 +30,14 @@ JTREG_VERSION=6.1 JTREG_BUILD=1 GTEST_VERSION=1.8.1 -LINUX_X64_BOOT_JDK_FILENAME=openjdk-17_linux-x64_bin.tar.gz -LINUX_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk17/0d483333a00540d886896bac774ff48b/35/GPL/openjdk-17_linux-x64_bin.tar.gz -LINUX_X64_BOOT_JDK_SHA256=aef49cc7aa606de2044302e757fa94c8e144818e93487081c4fd319ca858134b +LINUX_X64_BOOT_JDK_FILENAME=openjdk-17.0.1_linux-x64_bin.tar.gz +LINUX_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk17.0.1/2a2082e5a09d4267845be086888add4f/12/GPL/openjdk-17.0.1_linux-x64_bin.tar.gz +LINUX_X64_BOOT_JDK_SHA256=1c0a73cbb863aad579b967316bf17673b8f98a9bb938602a140ba2e5c38f880a -WINDOWS_X64_BOOT_JDK_FILENAME=openjdk-17_windows-x64_bin.zip -WINDOWS_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk17/0d483333a00540d886896bac774ff48b/35/GPL/openjdk-17_windows-x64_bin.zip -WINDOWS_X64_BOOT_JDK_SHA256=e88b0df00021c9d266bb435c9a95fdc67d1948cce4518daf85c234907bd393c5 +WINDOWS_X64_BOOT_JDK_FILENAME=openjdk-17.0.1_windows-x64_bin.zip +WINDOWS_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk17.0.1/2a2082e5a09d4267845be086888add4f/12/GPL/openjdk-17.0.1_windows-x64_bin.zip +WINDOWS_X64_BOOT_JDK_SHA256=329900a6673b237b502bdcf77bc334da34bc91355c5fd2d457fc00f53fd71ef1 -MACOS_X64_BOOT_JDK_FILENAME=openjdk-17_macos-x64_bin.tar.gz -MACOS_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk17/0d483333a00540d886896bac774ff48b/35/GPL/openjdk-17_macos-x64_bin.tar.gz -MACOS_X64_BOOT_JDK_SHA256=18e11cf9bbc6f584031e801b11ae05a233c32086f8e1b84eb8a1e9bb8e1f5d90 +MACOS_X64_BOOT_JDK_FILENAME=openjdk-17.0.1_macos-x64_bin.tar.gz +MACOS_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk17.0.1/2a2082e5a09d4267845be086888add4f/12/GPL/openjdk-17.0.1_macos-x64_bin.tar.gz +MACOS_X64_BOOT_JDK_SHA256=6ccb35800e723cabe15af60e67099d1a07c111d2d3208aa75523614dde68bee1 diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk index f9f1bb3868879e9fe5a95fa551e142ca7a6f4416..25c132729142e426262022a700f1e925edd09972 100644 --- a/make/hotspot/gensrc/GensrcAdlc.gmk +++ b/make/hotspot/gensrc/GensrcAdlc.gmk @@ -149,6 +149,7 @@ ifeq ($(call check-jvm-feature, compiler2), true) ifeq ($(call check-jvm-feature, shenandoahgc), true) AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU_ARCH).ad \ ))) endif diff --git a/make/modules/java.desktop/lib/Awt2dLibraries.gmk b/make/modules/java.desktop/lib/Awt2dLibraries.gmk index ef7eadae20672fe31d0e94be54b46807322fe862..a0c4082554626e942d200e73373a734eb99f8e3e 100644 --- a/make/modules/java.desktop/lib/Awt2dLibraries.gmk +++ b/make/modules/java.desktop/lib/Awt2dLibraries.gmk @@ -834,6 +834,19 @@ endif ################################################################################ +# MACOSX_METAL_VERSION_MIN specifies the lowest version of Macosx +# that should be used to compile Metal shaders. We support Metal +# pipeline only on Macosx >=10.14. For Macosx versions <10.14 even if +# we enable Metal pipeline using -Dsun.java2d.metal=true, at +# runtime we force it to use OpenGL pipeline. And MACOSX_VERSION_MIN +# for aarch64 has always been >10.14 so we use continue to use +# MACOSX_VERSION_MIN for aarch64. +ifeq ($(OPENJDK_TARGET_CPU_ARCH), xaarch64) + MACOSX_METAL_VERSION_MIN=$(MACOSX_VERSION_MIN) +else + MACOSX_METAL_VERSION_MIN=10.14.0 +endif + ifeq ($(call isTargetOs, macosx), true) SHADERS_SRC := $(TOPDIR)/src/java.desktop/macosx/native/libawt_lwawt/java2d/metal/shaders.metal SHADERS_SUPPORT_DIR := $(SUPPORT_OUTPUTDIR)/native/java.desktop/libosxui @@ -845,7 +858,9 @@ ifeq ($(call isTargetOs, macosx), true) DEPS := $(SHADERS_SRC), \ OUTPUT_FILE := $(SHADERS_AIR), \ SUPPORT_DIR := $(SHADERS_SUPPORT_DIR), \ - COMMAND := $(METAL) -c -std=osx-metal2.0 -o $(SHADERS_AIR) $(SHADERS_SRC), \ + COMMAND := $(METAL) -c -std=osx-metal2.0 \ + -mmacosx-version-min=$(MACOSX_METAL_VERSION_MIN) \ + -o $(SHADERS_AIR) $(SHADERS_SRC), \ )) $(eval $(call SetupExecute, metallib_shaders, \ diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 108194c4f9ccfefd979eaf4717f1b3acaad20aa8..11c6ec7df0a26296f545c2a4bb2cdd315e40820c 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -2059,7 +2059,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register"); - if (src_hi != OptoReg::Bad) { + if (src_hi != OptoReg::Bad && !bottom_type()->isa_vectmask()) { assert((src_lo&1)==0 && src_lo+1==src_hi && (dst_lo&1)==0 && dst_lo+1==dst_hi, "expected aligned-adjacent pairs"); @@ -2074,7 +2074,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo int src_offset = ra_->reg2offset(src_lo); int dst_offset = ra_->reg2offset(dst_lo); - if (bottom_type()->isa_vect() != NULL) { + if (bottom_type()->isa_vect() && !bottom_type()->isa_vectmask()) { uint ireg = ideal_reg(); if (ireg == Op_VecA && cbuf) { C2_MacroAssembler _masm(cbuf); @@ -2180,10 +2180,29 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo } else if (dst_lo_rc == rc_float) { // stack --> fpr load __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), is64 ? __ D : __ S, src_offset); + } else if (dst_lo_rc == rc_predicate) { + __ unspill_sve_predicate(as_PRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo), + Matcher::scalable_vector_reg_size(T_BYTE) >> 3); } else { // stack --> stack copy assert(dst_lo_rc == rc_stack, "spill to bad register class"); - __ unspill(rscratch1, is64, src_offset); - __ spill(rscratch1, is64, dst_offset); + if (ideal_reg() == Op_RegVectMask) { + __ spill_copy_sve_predicate_stack_to_stack(src_offset, dst_offset, + Matcher::scalable_vector_reg_size(T_BYTE) >> 3); + } else { + __ unspill(rscratch1, is64, src_offset); + __ spill(rscratch1, is64, dst_offset); + } + } + break; + case rc_predicate: + if (dst_lo_rc == rc_predicate) { + __ sve_mov(as_PRegister(Matcher::_regEncode[dst_lo]), as_PRegister(Matcher::_regEncode[src_lo])); + } else if (dst_lo_rc == rc_stack) { + __ spill_sve_predicate(as_PRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo), + Matcher::scalable_vector_reg_size(T_BYTE) >> 3); + } else { + assert(false, "bad src and dst rc_class combination."); + ShouldNotReachHere(); } break; default: @@ -2204,7 +2223,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo } else { st->print("%s", Matcher::regName[dst_lo]); } - if (bottom_type()->isa_vect() != NULL) { + if (bottom_type()->isa_vect() && !bottom_type()->isa_vectmask()) { int vsize = 0; switch (ideal_reg()) { case Op_VecD: @@ -2221,6 +2240,10 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo ShouldNotReachHere(); } st->print("\t# vector spill size = %d", vsize); + } else if (ideal_reg() == Op_RegVectMask) { + assert(Matcher::supports_scalable_vector(), "bad register type for spill"); + int vsize = Matcher::scalable_predicate_reg_slots() * 32; + st->print("\t# predicate spill size = %d", vsize); } else { st->print("\t# spill size = %d", is64 ? 64 : 32); } @@ -2373,6 +2396,8 @@ const bool Matcher::match_rule_supported(int opcode) { bool ret_value = true; switch (opcode) { + case Op_OnSpinWait: + return VM_Version::supports_on_spin_wait(); case Op_CacheWB: case Op_CacheWBPreSync: case Op_CacheWBPostSync: @@ -2380,6 +2405,18 @@ const bool Matcher::match_rule_supported(int opcode) { ret_value = false; } break; + case Op_LoadVectorMasked: + case Op_StoreVectorMasked: + case Op_LoadVectorGatherMasked: + case Op_StoreVectorScatterMasked: + case Op_MaskAll: + case Op_AndVMask: + case Op_OrVMask: + case Op_XorVMask: + if (UseSVE == 0) { + ret_value = false; + } + break; } return ret_value; // Per default match rules are supported. @@ -2428,6 +2465,15 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType return vector_size_supported(bt, vlen); } +const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { + // Only SVE supports masked operations. + if (UseSVE == 0) { + return false; + } + return match_rule_supported(opcode) && + masked_op_sve_supported(opcode, vlen, bt); +} + const RegMask* Matcher::predicate_reg_mask(void) { return &_PR_REG_mask; } @@ -2639,12 +2685,59 @@ bool size_fits_all_mem_uses(AddPNode* addp, int shift) { return true; } +bool can_combine_with_imm(Node* binary_node, Node* replicate_node) { + if (UseSVE == 0 || !VectorNode::is_invariant_vector(replicate_node)){ + return false; + } + Node* imm_node = replicate_node->in(1); + if (!imm_node->is_Con()) { + return false; + } + + const Type* t = imm_node->bottom_type(); + if (!(t->isa_int() || t->isa_long())) { + return false; + } + + switch (binary_node->Opcode()) { + case Op_AndV: + case Op_OrV: + case Op_XorV: { + Assembler::SIMD_RegVariant T = Assembler::elemType_to_regVariant(Matcher::vector_element_basic_type(binary_node)); + uint64_t value = t->isa_long() ? (uint64_t)imm_node->get_long() : (uint64_t)imm_node->get_int(); + return Assembler::operand_valid_for_sve_logical_immediate(Assembler::regVariant_to_elemBits(T), value); + } + case Op_AddVB: + return (imm_node->get_int() <= 255 && imm_node->get_int() >= -255); + case Op_AddVS: + case Op_AddVI: + return Assembler::operand_valid_for_sve_add_sub_immediate((int64_t)imm_node->get_int()); + case Op_AddVL: + return Assembler::operand_valid_for_sve_add_sub_immediate(imm_node->get_long()); + default: + return false; + } +} + +bool is_vector_arith_imm_pattern(Node* n, Node* m) { + if (n != NULL && m != NULL) { + return can_combine_with_imm(n, m); + } + return false; +} + // Should the matcher clone input 'm' of node 'n'? bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { - if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) - mstack.push(m, Visit); // m = ShiftCntV + // ShiftV src (ShiftCntV con) + // StoreVector (VectorStoreMask src) + // Binary src (Replicate con) + if (is_vshift_con_pattern(n, m) || + (UseSVE > 0 && m->Opcode() == Op_VectorStoreMask && n->Opcode() == Op_StoreVector) || + is_vector_arith_imm_pattern(n, m)) { + mstack.push(m, Visit); return true; } + return false; } @@ -3852,7 +3945,7 @@ encode %{ // Try to CAS m->owner from NULL to current thread. __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markWord::monitor_value)); __ cmpxchg(tmp, zr, rthread, Assembler::xword, /*acquire*/ true, - /*release*/ true, /*weak*/ false, noreg); // Sets flags for result + /*release*/ true, /*weak*/ false, rscratch1); // Sets flags for result // Store a non-null value into the box to avoid looking like a re-entrant // lock. The fast-path monitor unlock code checks for @@ -3861,6 +3954,15 @@ encode %{ __ mov(tmp, (address)markWord::unused_mark().value()); __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + __ br(Assembler::EQ, cont); // CAS success means locking succeeded + + __ cmp(rscratch1, rthread); + __ br(Assembler::NE, cont); // Check for recursive locking + + // Recursive lock case + __ increment(Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value), 1); + // flag == EQ still from the cmp above, checking if this is a reentrant lock + __ bind(cont); // flag == EQ indicates success // flag == NE indicates failure @@ -3904,11 +4006,20 @@ encode %{ __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); - __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner. - __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions - __ cmp(rscratch1, zr); // Sets flags for result + + Label notRecursive; + __ cmp(rscratch1, rthread); __ br(Assembler::NE, cont); + __ cbz(disp_hdr, notRecursive); + + // Recursive lock + __ sub(disp_hdr, disp_hdr, 1u); + __ str(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); + // flag == EQ was set in the ownership check above + __ b(cont); + + __ bind(notRecursive); __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0. @@ -4561,6 +4672,17 @@ operand immL8_shift8() interface(CONST_INTER); %} +// 8 bit integer valid for vector add sub immediate +operand immBAddSubV() +%{ + predicate(n->get_int() <= 255 && n->get_int() >= -255); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + // 32 bit integer valid for add sub immediate operand immIAddSub() %{ @@ -4571,8 +4693,39 @@ operand immIAddSub() interface(CONST_INTER); %} +// 32 bit integer valid for vector add sub immediate +operand immIAddSubV() +%{ + predicate(Assembler::operand_valid_for_sve_add_sub_immediate((int64_t)n->get_int())); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + // 32 bit unsigned integer valid for logical immediate -// TODO -- check this is right when e.g the mask is 0x80000000 + +operand immBLog() +%{ + predicate(Assembler::operand_valid_for_sve_logical_immediate(BitsPerByte, (uint64_t)n->get_int())); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immSLog() +%{ + predicate(Assembler::operand_valid_for_sve_logical_immediate(BitsPerShort, (uint64_t)n->get_int())); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + operand immILog() %{ predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (uint64_t)n->get_int())); @@ -4650,6 +4803,17 @@ operand immLAddSub() interface(CONST_INTER); %} +// 64 bit integer valid for addv subv immediate +operand immLAddSubV() +%{ + predicate(Assembler::operand_valid_for_sve_add_sub_immediate(n->get_long())); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + // 64 bit integer valid for logical immediate operand immLLog() %{ @@ -5503,6 +5667,7 @@ operand pReg() %{ constraint(ALLOC_IN_RC(pr_reg)); match(RegVectMask); + match(pRegGov); op_cost(0); format %{ %} interface(REG_INTER); @@ -8852,6 +9017,17 @@ instruct castVV(vReg dst) ins_pipe(pipe_class_empty); %} +instruct castVVMask(pRegGov dst) +%{ + match(Set dst (CastVV dst)); + + size(0); + format %{ "# castVV of $dst" %} + ins_encode(/* empty encoding */); + ins_cost(0); + ins_pipe(pipe_class_empty); +%} + // ============================================================================ // Atomic operation instructions // @@ -14333,6 +14509,18 @@ instruct signumF_reg(vRegF dst, vRegF src, vRegF zero, vRegF one) %{ ins_pipe(fp_uop_d); %} +instruct onspinwait() %{ + match(OnSpinWait); + ins_cost(INSN_COST); + + format %{ "onspinwait" %} + + ins_encode %{ + __ spin_wait(); + %} + ins_pipe(pipe_class_empty); +%} + // ============================================================================ // Logical Instructions diff --git a/src/hotspot/cpu/aarch64/aarch64_sve.ad b/src/hotspot/cpu/aarch64/aarch64_sve.ad index 1910ef42b255ec689691d8d1fe1006a52a31d369..8260459f2231fa7461f2fdc5d4d6b966b4bda69e 100644 --- a/src/hotspot/cpu/aarch64/aarch64_sve.ad +++ b/src/hotspot/cpu/aarch64/aarch64_sve.ad @@ -88,6 +88,7 @@ opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4); source_hpp %{ bool op_sve_supported(int opcode, int vlen, BasicType bt); + bool masked_op_sve_supported(int opcode, int vlen, BasicType bt); %} source %{ @@ -144,11 +145,7 @@ source %{ // Vector API specific case Op_VectorLoadShuffle: case Op_VectorRearrange: - if (vlen < 4 || length_in_bytes > MaxVectorSize) { - return false; - } else { - return true; - } + return vlen >= 4 && length_in_bytes <= MaxVectorSize; case Op_LoadVector: case Op_StoreVector: return Matcher::vector_size_supported(bt, vlen); @@ -158,6 +155,14 @@ source %{ // By default, we only support vector operations with no less than 8 bytes and 2 elements. return 8 <= length_in_bytes && length_in_bytes <= MaxVectorSize && vlen >= 2; } + + bool masked_op_sve_supported(int opcode, int vlen, BasicType bt) { + if (opcode == Op_VectorRearrange) { + return false; + } + return op_sve_supported(opcode, vlen, bt); + } + %} definitions %{ @@ -294,46 +299,252 @@ instruct storeV16_vreg(vReg src, vmem16 mem) // Only load/store values in the range of the memory_size. This is needed // when the memory_size is lower than the hardware supported max vector size. // And this might happen for Vector API mask vector load/store. -instruct loadV_partial(vReg dst, vmemA mem, pRegGov pTmp, rFlagsReg cr) %{ +instruct loadV_partial(vReg dst, vmemA mem, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() > 16 && n->as_LoadVector()->memory_size() < MaxVectorSize); match(Set dst (LoadVector mem)); - effect(TEMP pTmp, KILL cr); + effect(TEMP pgtmp, KILL cr); ins_cost(6 * SVE_COST); - format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" - "sve_ldr $dst, $pTmp, $mem\t# load vector predicated" %} + format %{ "sve_whilelo_zr_imm $pgtmp, vector_length\n\t" + "sve_ldr $dst, $pgtmp, $mem\t# load vector partial" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); - __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ elemType_to_regVariant(bt), + __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ elemType_to_regVariant(bt), Matcher::vector_length(this)); FloatRegister dst_reg = as_FloatRegister($dst$$reg); loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, - as_PRegister($pTmp$$reg), bt, bt, $mem->opcode(), + as_PRegister($pgtmp$$reg), bt, bt, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} ins_pipe(pipe_slow); %} -instruct storeV_partial(vReg src, vmemA mem, pRegGov pTmp, rFlagsReg cr) %{ +instruct storeV_partial(vReg src, vmemA mem, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() > 16 && n->as_StoreVector()->memory_size() < MaxVectorSize); match(Set mem (StoreVector mem src)); - effect(TEMP pTmp, KILL cr); + effect(TEMP pgtmp, KILL cr); ins_cost(5 * SVE_COST); - format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" - "sve_str $src, $pTmp, $mem\t# store vector predicated" %} + format %{ "sve_whilelo_zr_imm $pgtmp, vector_length\n\t" + "sve_str $src, $pgtmp, $mem\t# store vector partial" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src); - __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ elemType_to_regVariant(bt), + __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ elemType_to_regVariant(bt), Matcher::vector_length(this, $src)); FloatRegister src_reg = as_FloatRegister($src$$reg); loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, src_reg, - as_PRegister($pTmp$$reg), bt, bt, $mem->opcode(), + as_PRegister($pgtmp$$reg), bt, bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%} + +// vector load/store - predicated + +instruct loadV_masked(vReg dst, vmemA mem, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->as_LoadVector()->memory_size() == MaxVectorSize); + match(Set dst (LoadVectorMasked mem pg)); + ins_cost(4 * SVE_COST); + format %{ "sve_ldr $dst, $pg, $mem\t# load vector predicated (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($dst$$reg), + as_PRegister($pg$$reg), bt, bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%} + +instruct loadV_masked_partial(vReg dst, vmemA mem, pRegGov pg, pRegGov pgtmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_LoadVector()->memory_size() < MaxVectorSize); + match(Set dst (LoadVectorMasked mem pg)); + effect(TEMP pgtmp, KILL cr); + ins_cost(6 * SVE_COST); + format %{ "sve_ldr $dst, $pg, $mem\t# load vector predicated partial (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ elemType_to_regVariant(bt), + Matcher::vector_length(this)); + __ sve_and(as_PRegister($pgtmp$$reg), as_PRegister($pgtmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($dst$$reg), + as_PRegister($pgtmp$$reg), bt, bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%} + +instruct storeV_masked(vReg src, vmemA mem, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->as_StoreVector()->memory_size() == MaxVectorSize); + match(Set mem (StoreVectorMasked mem (Binary src pg))); + ins_cost(4 * SVE_COST); + format %{ "sve_str $mem, $pg, $src\t# store vector predicated (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($src$$reg), + as_PRegister($pg$$reg), bt, bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%} + +instruct storeV_masked_partial(vReg src, vmemA mem, pRegGov pg, pRegGov pgtmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_StoreVector()->memory_size() < MaxVectorSize); + match(Set mem (StoreVectorMasked mem (Binary src pg))); + effect(TEMP pgtmp, KILL cr); + ins_cost(6 * SVE_COST); + format %{ "sve_str $mem, $pg, $src\t# store vector predicated partial (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src); + __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ elemType_to_regVariant(bt), + Matcher::vector_length(this, $src)); + __ sve_and(as_PRegister($pgtmp$$reg), as_PRegister($pgtmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($src$$reg), + as_PRegister($pgtmp$$reg), bt, bt, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} ins_pipe(pipe_slow); %} +// maskAll + +instruct vmaskAll_immI(pRegGov dst, immI src) %{ + predicate(UseSVE > 0); + match(Set dst (MaskAll src)); + ins_cost(SVE_COST); + format %{ "sve_ptrue/sve_pfalse $dst\t# mask all (sve) (B/H/S)" %} + ins_encode %{ + int con = (int)$src$$constant; + if (con == 0) { + __ sve_pfalse(as_PRegister($dst$$reg)); + } else { + assert(con == -1, "invalid constant value for mask"); + BasicType bt = Matcher::vector_element_basic_type(this); + __ sve_ptrue(as_PRegister($dst$$reg), __ elemType_to_regVariant(bt)); + } + %} + ins_pipe(pipe_slow); +%} + +instruct vmaskAllI(pRegGov dst, iRegIorL2I src, vReg tmp, rFlagsReg cr) %{ + predicate(UseSVE > 0); + match(Set dst (MaskAll src)); + effect(TEMP tmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_dup $tmp, $src\n\t" + "sve_cmpne $dst, $tmp, 0\t# mask all (sve) (B/H/S)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_dup(as_FloatRegister($tmp$$reg), size, as_Register($src$$reg)); + __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($tmp$$reg), 0); + %} + ins_pipe(pipe_slow); +%} + +instruct vmaskAll_immL(pRegGov dst, immL src) %{ + predicate(UseSVE > 0); + match(Set dst (MaskAll src)); + ins_cost(SVE_COST); + format %{ "sve_ptrue/sve_pfalse $dst\t# mask all (sve) (D)" %} + ins_encode %{ + long con = (long)$src$$constant; + if (con == 0) { + __ sve_pfalse(as_PRegister($dst$$reg)); + } else { + assert(con == -1, "invalid constant value for mask"); + BasicType bt = Matcher::vector_element_basic_type(this); + __ sve_ptrue(as_PRegister($dst$$reg), __ elemType_to_regVariant(bt)); + } + %} + ins_pipe(pipe_slow); +%} + +instruct vmaskAllL(pRegGov dst, iRegL src, vReg tmp, rFlagsReg cr) %{ + predicate(UseSVE > 0); + match(Set dst (MaskAll src)); + effect(TEMP tmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_dup $tmp, $src\n\t" + "sve_cmpne $dst, $tmp, 0\t# mask all (sve) (D)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_dup(as_FloatRegister($tmp$$reg), size, as_Register($src$$reg)); + __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($tmp$$reg), 0); + %} + ins_pipe(pipe_slow); +%} + +// mask logical and/or/xor + +instruct vmask_and(pRegGov pd, pRegGov pn, pRegGov pm) %{ + predicate(UseSVE > 0); + match(Set pd (AndVMask pn pm)); + ins_cost(SVE_COST); + format %{ "sve_and $pd, $pn, $pm\t# predicate (sve)" %} + ins_encode %{ + __ sve_and(as_PRegister($pd$$reg), ptrue, + as_PRegister($pn$$reg), as_PRegister($pm$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_or(pRegGov pd, pRegGov pn, pRegGov pm) %{ + predicate(UseSVE > 0); + match(Set pd (OrVMask pn pm)); + ins_cost(SVE_COST); + format %{ "sve_orr $pd, $pn, $pm\t# predicate (sve)" %} + ins_encode %{ + __ sve_orr(as_PRegister($pd$$reg), ptrue, + as_PRegister($pn$$reg), as_PRegister($pm$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_xor(pRegGov pd, pRegGov pn, pRegGov pm) %{ + predicate(UseSVE > 0); + match(Set pd (XorVMask pn pm)); + ins_cost(SVE_COST); + format %{ "sve_eor $pd, $pn, $pm\t# predicate (sve)" %} + ins_encode %{ + __ sve_eor(as_PRegister($pd$$reg), ptrue, + as_PRegister($pn$$reg), as_PRegister($pm$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// mask logical and_not + +instruct vmask_and_notI(pRegGov pd, pRegGov pn, pRegGov pm, immI_M1 m1) %{ + predicate(UseSVE > 0); + match(Set pd (AndVMask pn (XorVMask pm (MaskAll m1)))); + ins_cost(SVE_COST); + format %{ "sve_bic $pd, $pn, $pm\t# predciate (sve) (B/H/S)" %} + ins_encode %{ + __ sve_bic(as_PRegister($pd$$reg), ptrue, + as_PRegister($pn$$reg), as_PRegister($pm$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_and_notL(pRegGov pd, pRegGov pn, pRegGov pm, immL_M1 m1) %{ + predicate(UseSVE > 0); + match(Set pd (AndVMask pn (XorVMask pm (MaskAll m1)))); + ins_cost(SVE_COST); + format %{ "sve_bic $pd, $pn, $pm\t# predciate (sve) (D)" %} + ins_encode %{ + __ sve_bic(as_PRegister($pd$$reg), ptrue, + as_PRegister($pn$$reg), as_PRegister($pm$$reg)); + %} + ins_pipe(pipe_slow); +%} + // vector reinterpret instruct reinterpret(vReg dst) %{ @@ -348,11 +559,11 @@ instruct reinterpret(vReg dst) %{ ins_pipe(pipe_class_empty); %} -instruct reinterpretResize(vReg dst, vReg src, pRegGov pTmp, rFlagsReg cr) %{ +instruct reinterpretResize(vReg dst, vReg src, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() != n->in(1)->bottom_type()->is_vect()->length_in_bytes()); // src != dst match(Set dst (VectorReinterpret src)); - effect(TEMP_DEF dst, TEMP pTmp, KILL cr); + effect(TEMP_DEF dst, TEMP pgtmp, KILL cr); ins_cost(3 * SVE_COST); format %{ "reinterpretResize $dst, $src\t# vector (sve)" %} ins_encode %{ @@ -362,19 +573,53 @@ instruct reinterpretResize(vReg dst, vReg src, pRegGov pTmp, rFlagsReg cr) %{ length_in_bytes_src : length_in_bytes_dst; assert(length_in_bytes_src <= MaxVectorSize && length_in_bytes_dst <= MaxVectorSize, "invalid vector length"); - __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ B, length_in_bytes_resize); + __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ B, length_in_bytes_resize); __ sve_dup(as_FloatRegister($dst$$reg), __ B, 0); - __ sve_sel(as_FloatRegister($dst$$reg), __ B, as_PRegister($pTmp$$reg), + __ sve_sel(as_FloatRegister($dst$$reg), __ B, as_PRegister($pgtmp$$reg), as_FloatRegister($src$$reg), as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_slow); %} +// vector mask reinterpret + +instruct vmask_reinterpret_same_esize(pRegGov dst_src) %{ + predicate(UseSVE > 0 && + n->as_Vector()->length() == n->in(1)->bottom_type()->is_vect()->length() && + n->as_Vector()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes()); + match(Set dst_src (VectorReinterpret dst_src)); + ins_cost(0); + format %{ "# vmask_reinterpret $dst_src\t# do nothing" %} + ins_encode %{ + // empty + %} + ins_pipe(pipe_class_empty); +%} + +instruct vmask_reinterpret_diff_esize(pRegGov dst, pRegGov src, vReg tmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_Vector()->length() != n->in(1)->bottom_type()->is_vect()->length() && + n->as_Vector()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes()); + match(Set dst (VectorReinterpret src)); + effect(TEMP tmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "# vmask_reinterpret $dst, $src\t# vector (sve)" %} + ins_encode %{ + BasicType from_bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant from_size = __ elemType_to_regVariant(from_bt); + BasicType to_bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt); + __ sve_cpy(as_FloatRegister($tmp$$reg), from_size, as_PRegister($src$$reg), -1, false); + __ sve_cmp(Assembler::EQ, as_PRegister($dst$$reg), to_size, ptrue, as_FloatRegister($tmp$$reg), -1); + %} + ins_pipe(pipe_slow); +%} + // vector abs instruct vabsB(vReg dst, vReg src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + !n->as_Vector()->is_predicated_vector()); match(Set dst (AbsVB src)); ins_cost(SVE_COST); format %{ "sve_abs $dst, $src\t# vector (sve) (B)" %} @@ -387,7 +632,7 @@ instruct vabsB(vReg dst, vReg src) %{ instruct vabsS(vReg dst, vReg src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + !n->as_Vector()->is_predicated_vector()); match(Set dst (AbsVS src)); ins_cost(SVE_COST); format %{ "sve_abs $dst, $src\t# vector (sve) (H)" %} @@ -400,7 +645,7 @@ instruct vabsS(vReg dst, vReg src) %{ instruct vabsI(vReg dst, vReg src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_INT); + !n->as_Vector()->is_predicated_vector()); match(Set dst (AbsVI src)); ins_cost(SVE_COST); format %{ "sve_abs $dst, $src\t# vector (sve) (S)" %} @@ -413,7 +658,7 @@ instruct vabsI(vReg dst, vReg src) %{ instruct vabsL(vReg dst, vReg src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + !n->as_Vector()->is_predicated_vector()); match(Set dst (AbsVL src)); ins_cost(SVE_COST); format %{ "sve_abs $dst, $src\t# vector (sve) (D)" %} @@ -426,7 +671,7 @@ instruct vabsL(vReg dst, vReg src) %{ instruct vabsF(vReg dst, vReg src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + !n->as_Vector()->is_predicated_vector()); match(Set dst (AbsVF src)); ins_cost(SVE_COST); format %{ "sve_fabs $dst, $src\t# vector (sve) (S)" %} @@ -439,7 +684,7 @@ instruct vabsF(vReg dst, vReg src) %{ instruct vabsD(vReg dst, vReg src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + !n->as_Vector()->is_predicated_vector()); match(Set dst (AbsVD src)); ins_cost(SVE_COST); format %{ "sve_fabs $dst, $src\t# vector (sve) (D)" %} @@ -450,6 +695,86 @@ instruct vabsD(vReg dst, vReg src) %{ ins_pipe(pipe_slow); %} +// vector abs - predicated + +instruct vabsB_masked(vReg dst_src, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (AbsVB dst_src pg)); + ins_cost(SVE_COST); + format %{ "sve_abs $dst_src, $pg, $dst_src\t# vector (sve) (B)" %} + ins_encode %{ + __ sve_abs(as_FloatRegister($dst_src$$reg), __ B, + as_PRegister($pg$$reg), + as_FloatRegister($dst_src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsS_masked(vReg dst_src, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (AbsVS dst_src pg)); + ins_cost(SVE_COST); + format %{ "sve_abs $dst_src, $pg, $dst_src\t# vector (sve) (H)" %} + ins_encode %{ + __ sve_abs(as_FloatRegister($dst_src$$reg), __ H, + as_PRegister($pg$$reg), + as_FloatRegister($dst_src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsI_masked(vReg dst_src, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (AbsVI dst_src pg)); + ins_cost(SVE_COST); + format %{ "sve_abs $dst_src, $pg, $dst_src\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_abs(as_FloatRegister($dst_src$$reg), __ S, + as_PRegister($pg$$reg), + as_FloatRegister($dst_src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsL_masked(vReg dst_src, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (AbsVL dst_src pg)); + ins_cost(SVE_COST); + format %{ "sve_abs $dst_src, $pg, $dst_src\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_abs(as_FloatRegister($dst_src$$reg), __ D, + as_PRegister($pg$$reg), + as_FloatRegister($dst_src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsF_masked(vReg dst_src, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (AbsVF dst_src pg)); + ins_cost(SVE_COST); + format %{ "sve_fabs $dst_src, $pg, $dst_src\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_fabs(as_FloatRegister($dst_src$$reg), __ S, + as_PRegister($pg$$reg), + as_FloatRegister($dst_src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsD_masked(vReg dst_src, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (AbsVD dst_src pg)); + ins_cost(SVE_COST); + format %{ "sve_fabs $dst_src, $pg, $dst_src\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_fabs(as_FloatRegister($dst_src$$reg), __ D, + as_PRegister($pg$$reg), + as_FloatRegister($dst_src$$reg)); + %} + ins_pipe(pipe_slow); +%} + // vector add instruct vaddB(vReg dst, vReg src1, vReg src2) %{ @@ -530,1311 +855,2090 @@ instruct vaddD(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} -// vector and +// vector add - predicated -instruct vand(vReg dst, vReg src1, vReg src2) %{ +instruct vaddB_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst (AndV src1 src2)); + match(Set dst_src1 (AddVB (Binary dst_src1 src2) pg)); ins_cost(SVE_COST); - format %{ "sve_and $dst, $src1, $src2\t# vector (sve)" %} + format %{ "sve_add $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (B)" %} ins_encode %{ - __ sve_and(as_FloatRegister($dst$$reg), - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); + __ sve_add(as_FloatRegister($dst_src1$$reg), __ B, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -// vector or - -instruct vor(vReg dst, vReg src1, vReg src2) %{ +instruct vaddS_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst (OrV src1 src2)); + match(Set dst_src1 (AddVS (Binary dst_src1 src2) pg)); ins_cost(SVE_COST); - format %{ "sve_orr $dst, $src1, $src2\t# vector (sve)" %} + format %{ "sve_add $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (H)" %} ins_encode %{ - __ sve_orr(as_FloatRegister($dst$$reg), - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); + __ sve_add(as_FloatRegister($dst_src1$$reg), __ H, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -// vector xor - -instruct vxor(vReg dst, vReg src1, vReg src2) %{ +instruct vaddI_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst (XorV src1 src2)); + match(Set dst_src1 (AddVI (Binary dst_src1 src2) pg)); ins_cost(SVE_COST); - format %{ "sve_eor $dst, $src1, $src2\t# vector (sve)" %} + format %{ "sve_add $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %} ins_encode %{ - __ sve_eor(as_FloatRegister($dst$$reg), - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); + __ sve_add(as_FloatRegister($dst_src1$$reg), __ S, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -// vector not - -instruct vnotI(vReg dst, vReg src, immI_M1 m1) %{ +instruct vaddL_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst (XorV src (ReplicateB m1))); - match(Set dst (XorV src (ReplicateS m1))); - match(Set dst (XorV src (ReplicateI m1))); + match(Set dst_src1 (AddVL (Binary dst_src1 src2) pg)); ins_cost(SVE_COST); - format %{ "sve_not $dst, $src\t# vector (sve) B/H/S" %} + format %{ "sve_add $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %} ins_encode %{ - __ sve_not(as_FloatRegister($dst$$reg), __ D, - ptrue, as_FloatRegister($src$$reg)); + __ sve_add(as_FloatRegister($dst_src1$$reg), __ D, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -instruct vnotL(vReg dst, vReg src, immL_M1 m1) %{ +instruct vaddF_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst (XorV src (ReplicateL m1))); + match(Set dst_src1 (AddVF (Binary dst_src1 src2) pg)); ins_cost(SVE_COST); - format %{ "sve_not $dst, $src\t# vector (sve) D" %} + format %{ "sve_fadd $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %} ins_encode %{ - __ sve_not(as_FloatRegister($dst$$reg), __ D, - ptrue, as_FloatRegister($src$$reg)); + __ sve_fadd(as_FloatRegister($dst_src1$$reg), __ S, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} - -// vector and_not - -instruct vand_notI(vReg dst, vReg src1, vReg src2, immI_M1 m1) %{ +instruct vaddD_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst (AndV src1 (XorV src2 (ReplicateB m1)))); - match(Set dst (AndV src1 (XorV src2 (ReplicateS m1)))); - match(Set dst (AndV src1 (XorV src2 (ReplicateI m1)))); + match(Set dst_src1 (AddVD (Binary dst_src1 src2) pg)); ins_cost(SVE_COST); - format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) B/H/S" %} + format %{ "sve_fadd $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %} ins_encode %{ - __ sve_bic(as_FloatRegister($dst$$reg), - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); + __ sve_fadd(as_FloatRegister($dst_src1$$reg), __ D, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -instruct vand_notL(vReg dst, vReg src1, vReg src2, immL_M1 m1) %{ +// vector add reg imm (unpredicated) + +instruct vaddImmB(vReg dst_src, immBAddSubV con) %{ predicate(UseSVE > 0); - match(Set dst (AndV src1 (XorV src2 (ReplicateL m1)))); + match(Set dst_src (AddVB dst_src (ReplicateB con))); ins_cost(SVE_COST); - format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) D" %} + format %{ "sve_add $dst_src, $dst_src, $con\t # vector (sve) (B)" %} ins_encode %{ - __ sve_bic(as_FloatRegister($dst$$reg), - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); + int32_t val = $con$$constant; + if (val > 0){ + __ sve_add(as_FloatRegister($dst_src$$reg), __ B, val); + } else if (val < 0){ + __ sve_sub(as_FloatRegister($dst_src$$reg), __ B, -val); + } %} ins_pipe(pipe_slow); %} - -// vector float div - -instruct vdivF(vReg dst_src1, vReg src2) %{ +instruct vaddImmS(vReg dst_src, immIAddSubV con) %{ predicate(UseSVE > 0); - match(Set dst_src1 (DivVF dst_src1 src2)); + match(Set dst_src (AddVS dst_src (ReplicateS con))); ins_cost(SVE_COST); - format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) (S)" %} + format %{ "sve_add $dst_src, $dst_src, $con\t # vector (sve) (H)" %} ins_encode %{ - __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ S, - ptrue, as_FloatRegister($src2$$reg)); + int32_t val = $con$$constant; + if (val > 0){ + __ sve_add(as_FloatRegister($dst_src$$reg), __ H, val); + } else if (val < 0){ + __ sve_sub(as_FloatRegister($dst_src$$reg), __ H, -val); + } %} ins_pipe(pipe_slow); %} -instruct vdivD(vReg dst_src1, vReg src2) %{ +instruct vaddImmI(vReg dst_src, immIAddSubV con) %{ predicate(UseSVE > 0); - match(Set dst_src1 (DivVD dst_src1 src2)); + match(Set dst_src (AddVI dst_src (ReplicateI con))); ins_cost(SVE_COST); - format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) (D)" %} + format %{ "sve_add $dst_src, $dst_src, $con\t # vector (sve) (S)" %} ins_encode %{ - __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ D, - ptrue, as_FloatRegister($src2$$reg)); + int32_t val = $con$$constant; + if (val > 0){ + __ sve_add(as_FloatRegister($dst_src$$reg), __ S, val); + } else if (val < 0){ + __ sve_sub(as_FloatRegister($dst_src$$reg), __ S, -val); + } %} ins_pipe(pipe_slow); %} -// vector min/max - -instruct vmin(vReg dst_src1, vReg src2) %{ +instruct vaddImmL(vReg dst_src, immLAddSubV con) %{ predicate(UseSVE > 0); - match(Set dst_src1 (MinV dst_src1 src2)); + match(Set dst_src (AddVL dst_src (ReplicateL con))); ins_cost(SVE_COST); - format %{ "sve_min $dst_src1, $dst_src1, $src2\t # vector (sve)" %} + format %{ "sve_add $dst_src, $dst_src, $con\t # vector (sve) (D)" %} ins_encode %{ - BasicType bt = Matcher::vector_element_basic_type(this); - Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); - if (is_floating_point_type(bt)) { - __ sve_fmin(as_FloatRegister($dst_src1$$reg), size, - ptrue, as_FloatRegister($src2$$reg)); - } else { - assert(is_integral_type(bt), "Unsupported type"); - __ sve_smin(as_FloatRegister($dst_src1$$reg), size, - ptrue, as_FloatRegister($src2$$reg)); + int32_t val = $con$$constant; + if (val > 0){ + __ sve_add(as_FloatRegister($dst_src$$reg), __ D, val); + } else if (val < 0){ + __ sve_sub(as_FloatRegister($dst_src$$reg), __ D, -val); } %} ins_pipe(pipe_slow); %} -instruct vmax(vReg dst_src1, vReg src2) %{ +// vector binary op reg imm (unpredicated) + +instruct vandB(vReg dst_src, immBLog con) %{ predicate(UseSVE > 0); - match(Set dst_src1 (MaxV dst_src1 src2)); + match(Set dst_src (AndV dst_src (ReplicateB con))); ins_cost(SVE_COST); - format %{ "sve_max $dst_src1, $dst_src1, $src2\t # vector (sve)" %} + format %{ "sve_and $dst_src, $dst_src, $con\t # vector (sve) (B)" %} ins_encode %{ - BasicType bt = Matcher::vector_element_basic_type(this); - Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); - if (is_floating_point_type(bt)) { - __ sve_fmax(as_FloatRegister($dst_src1$$reg), size, - ptrue, as_FloatRegister($src2$$reg)); - } else { - assert(is_integral_type(bt), "Unsupported type"); - __ sve_smax(as_FloatRegister($dst_src1$$reg), size, - ptrue, as_FloatRegister($src2$$reg)); - } + __ sve_and(as_FloatRegister($dst_src$$reg), __ B, + (uint64_t)($con$$constant)); %} ins_pipe(pipe_slow); %} -// vector fmla - -// dst_src1 = dst_src1 + src2 * src3 -instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0); - match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); +instruct vandH(vReg dst_src, immSLog con) %{ + predicate(UseSVE > 0); + match(Set dst_src (AndV dst_src (ReplicateS con))); ins_cost(SVE_COST); - format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %} + format %{ "sve_and $dst_src, $dst_src, $con\t # vector (sve) (H)" %} ins_encode %{ - __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ S, - ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + __ sve_and(as_FloatRegister($dst_src$$reg), __ H, + (uint64_t)($con$$constant)); %} ins_pipe(pipe_slow); %} -// dst_src1 = dst_src1 + src2 * src3 -instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0); - match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); +instruct vandS(vReg dst_src, immILog con) %{ + predicate(UseSVE > 0); + match(Set dst_src (AndV dst_src (ReplicateI con))); ins_cost(SVE_COST); - format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %} + format %{ "sve_and $dst_src, $dst_src, $con\t # vector (sve) (S)" %} ins_encode %{ - __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ D, - ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + __ sve_and(as_FloatRegister($dst_src$$reg), __ S, + (uint64_t)($con$$constant)); %} ins_pipe(pipe_slow); %} -// vector fmls - -// dst_src1 = dst_src1 + -src2 * src3 -// dst_src1 = dst_src1 + src2 * -src3 -instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0); - match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3))); - match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3)))); +instruct vandD(vReg dst_src, immLLog con) %{ + predicate(UseSVE > 0); + match(Set dst_src (AndV dst_src (ReplicateL con))); ins_cost(SVE_COST); - format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %} + format %{ "sve_and $dst_src, $dst_src, $con\t # vector (sve) (D)" %} ins_encode %{ - __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ S, - ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + __ sve_and(as_FloatRegister($dst_src$$reg), __ D, + (uint64_t)($con$$constant)); %} ins_pipe(pipe_slow); %} -// dst_src1 = dst_src1 + -src2 * src3 -// dst_src1 = dst_src1 + src2 * -src3 -instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0); - match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3))); - match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3)))); +instruct vorB(vReg dst_src, immBLog con) %{ + predicate(UseSVE > 0); + match(Set dst_src (OrV dst_src (ReplicateB con))); ins_cost(SVE_COST); - format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %} + format %{ "sve_orr $dst_src, $dst_src, $con\t # vector (sve) (B)" %} ins_encode %{ - __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ D, - ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + __ sve_orr(as_FloatRegister($dst_src$$reg), __ B, + (uint64_t)($con$$constant)); %} ins_pipe(pipe_slow); %} -// vector fnmla - -// dst_src1 = -dst_src1 + -src2 * src3 -// dst_src1 = -dst_src1 + src2 * -src3 -instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0); - match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3))); - match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3)))); +instruct vorH(vReg dst_src, immSLog con) %{ + predicate(UseSVE > 0); + match(Set dst_src (OrV dst_src (ReplicateS con))); ins_cost(SVE_COST); - format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %} + format %{ "sve_orr $dst_src, $dst_src, $con\t # vector (sve) (H)" %} ins_encode %{ - __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ S, - ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + __ sve_orr(as_FloatRegister($dst_src$$reg), __ H, + (uint64_t)($con$$constant)); %} ins_pipe(pipe_slow); %} -// dst_src1 = -dst_src1 + -src2 * src3 -// dst_src1 = -dst_src1 + src2 * -src3 -instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0); - match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3))); - match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3)))); +instruct vorS(vReg dst_src, immILog con) %{ + predicate(UseSVE > 0); + match(Set dst_src (OrV dst_src (ReplicateI con))); ins_cost(SVE_COST); - format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %} + format %{ "sve_orr $dst_src, $dst_src, $con\t # vector (sve) (S)" %} ins_encode %{ - __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ D, - ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + __ sve_orr(as_FloatRegister($dst_src$$reg), __ S, + (uint64_t)($con$$constant)); %} ins_pipe(pipe_slow); %} -// vector fnmls - -// dst_src1 = -dst_src1 + src2 * src3 -instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0); - match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3))); +instruct vorD(vReg dst_src, immLLog con) %{ + predicate(UseSVE > 0); + match(Set dst_src (OrV dst_src (ReplicateL con))); ins_cost(SVE_COST); - format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %} + format %{ "sve_orr $dst_src, $dst_src, $con\t # vector (sve) (D)" %} ins_encode %{ - __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ S, - ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + __ sve_orr(as_FloatRegister($dst_src$$reg), __ D, + (uint64_t)($con$$constant)); %} ins_pipe(pipe_slow); %} -// dst_src1 = -dst_src1 + src2 * src3 -instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0); - match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3))); +instruct vxorB(vReg dst_src, immBLog con) %{ + predicate(UseSVE > 0); + match(Set dst_src (XorV dst_src (ReplicateB con))); ins_cost(SVE_COST); - format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %} + format %{ "sve_eor $dst_src, $dst_src, $con\t # vector (sve) (B)" %} ins_encode %{ - __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ D, - ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + __ sve_eor(as_FloatRegister($dst_src$$reg), __ B, + (uint64_t)($con$$constant)); %} ins_pipe(pipe_slow); %} -// vector mla - -// dst_src1 = dst_src1 + src2 * src3 -instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) -%{ +instruct vxorH(vReg dst_src, immSLog con) %{ predicate(UseSVE > 0); - match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3))); + match(Set dst_src (XorV dst_src (ReplicateS con))); ins_cost(SVE_COST); - format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (B)" %} + format %{ "sve_eor $dst_src, $dst_src, $con\t # vector (sve) (H)" %} ins_encode %{ - __ sve_mla(as_FloatRegister($dst_src1$$reg), __ B, - ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + __ sve_eor(as_FloatRegister($dst_src$$reg), __ H, + (uint64_t)($con$$constant)); %} ins_pipe(pipe_slow); %} -// dst_src1 = dst_src1 + src2 * src3 -instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) -%{ +instruct vxorS(vReg dst_src, immILog con) %{ predicate(UseSVE > 0); - match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3))); + match(Set dst_src (XorV dst_src (ReplicateI con))); ins_cost(SVE_COST); - format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (H)" %} + format %{ "sve_eor $dst_src, $dst_src, $con\t # vector (sve) (S)" %} ins_encode %{ - __ sve_mla(as_FloatRegister($dst_src1$$reg), __ H, - ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + __ sve_eor(as_FloatRegister($dst_src$$reg), __ S, + (uint64_t)($con$$constant)); %} ins_pipe(pipe_slow); %} -// dst_src1 = dst_src1 + src2 * src3 -instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) -%{ +instruct vxorD(vReg dst_src, immLLog con) %{ predicate(UseSVE > 0); - match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3))); + match(Set dst_src (XorV dst_src (ReplicateL con))); ins_cost(SVE_COST); - format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (S)" %} + format %{ "sve_eor $dst_src, $dst_src, $con\t # vector (sve) (D)" %} ins_encode %{ - __ sve_mla(as_FloatRegister($dst_src1$$reg), __ S, - ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + __ sve_eor(as_FloatRegister($dst_src$$reg), __ D, + (uint64_t)($con$$constant)); %} ins_pipe(pipe_slow); %} +// vector and -// dst_src1 = dst_src1 + src2 * src3 -instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) -%{ +instruct vand(vReg dst, vReg src1, vReg src2) %{ predicate(UseSVE > 0); - match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3))); + match(Set dst (AndV src1 src2)); ins_cost(SVE_COST); - format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (D)" %} + format %{ "sve_and $dst, $src1, $src2\t# vector (sve)" %} ins_encode %{ - __ sve_mla(as_FloatRegister($dst_src1$$reg), __ D, - ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + __ sve_and(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -// vector mls +// vector or -// dst_src1 = dst_src1 - src2 * src3 -instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) -%{ +instruct vor(vReg dst, vReg src1, vReg src2) %{ predicate(UseSVE > 0); - match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3))); + match(Set dst (OrV src1 src2)); ins_cost(SVE_COST); - format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (B)" %} + format %{ "sve_orr $dst, $src1, $src2\t# vector (sve)" %} ins_encode %{ - __ sve_mls(as_FloatRegister($dst_src1$$reg), __ B, - ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + __ sve_orr(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -// dst_src1 = dst_src1 - src2 * src3 -instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) -%{ +// vector xor + +instruct vxor(vReg dst, vReg src1, vReg src2) %{ predicate(UseSVE > 0); - match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3))); + match(Set dst (XorV src1 src2)); ins_cost(SVE_COST); - format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (H)" %} + format %{ "sve_eor $dst, $src1, $src2\t# vector (sve)" %} ins_encode %{ - __ sve_mls(as_FloatRegister($dst_src1$$reg), __ H, - ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + __ sve_eor(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -// dst_src1 = dst_src1 - src2 * src3 -instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) -%{ +// vector and - predicated + +instruct vand_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3))); + match(Set dst_src1 (AndV (Binary dst_src1 src2) pg)); ins_cost(SVE_COST); - format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (S)" %} + format %{ "sve_and $dst_src1, $pg, $dst_src1, $src2\t # vector (sve)" %} ins_encode %{ - __ sve_mls(as_FloatRegister($dst_src1$$reg), __ S, - ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_and(as_FloatRegister($dst_src1$$reg), size, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -// dst_src1 = dst_src1 - src2 * src3 -instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) -%{ +// vector or - predicated + +instruct vor_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3))); + match(Set dst_src1 (OrV (Binary dst_src1 src2) pg)); ins_cost(SVE_COST); - format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (D)" %} + format %{ "sve_orr $dst_src1, $pg, $dst_src1, $src2\t # vector (sve)" %} ins_encode %{ - __ sve_mls(as_FloatRegister($dst_src1$$reg), __ D, - ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_orr(as_FloatRegister($dst_src1$$reg), size, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} +// vector xor - predicated -// vector mul - -instruct vmulB(vReg dst_src1, vReg src2) %{ +instruct vxor_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst_src1 (MulVB dst_src1 src2)); + match(Set dst_src1 (XorV (Binary dst_src1 src2) pg)); ins_cost(SVE_COST); - format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (B)" %} + format %{ "sve_eor $dst_src1, $pg, $dst_src1, $src2\t # vector (sve)" %} ins_encode %{ - __ sve_mul(as_FloatRegister($dst_src1$$reg), __ B, - ptrue, as_FloatRegister($src2$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_eor(as_FloatRegister($dst_src1$$reg), size, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -instruct vmulS(vReg dst_src1, vReg src2) %{ +// vector not + +instruct vnotI(vReg dst, vReg src, immI_M1 m1) %{ predicate(UseSVE > 0); - match(Set dst_src1 (MulVS dst_src1 src2)); + match(Set dst (XorV src (ReplicateB m1))); + match(Set dst (XorV src (ReplicateS m1))); + match(Set dst (XorV src (ReplicateI m1))); ins_cost(SVE_COST); - format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (H)" %} + format %{ "sve_not $dst, $src\t# vector (sve) B/H/S" %} ins_encode %{ - __ sve_mul(as_FloatRegister($dst_src1$$reg), __ H, - ptrue, as_FloatRegister($src2$$reg)); + __ sve_not(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); %} ins_pipe(pipe_slow); %} -instruct vmulI(vReg dst_src1, vReg src2) %{ +instruct vnotL(vReg dst, vReg src, immL_M1 m1) %{ predicate(UseSVE > 0); - match(Set dst_src1 (MulVI dst_src1 src2)); + match(Set dst (XorV src (ReplicateL m1))); ins_cost(SVE_COST); - format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (S)" %} + format %{ "sve_not $dst, $src\t# vector (sve) D" %} ins_encode %{ - __ sve_mul(as_FloatRegister($dst_src1$$reg), __ S, - ptrue, as_FloatRegister($src2$$reg)); + __ sve_not(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); %} ins_pipe(pipe_slow); %} -instruct vmulL(vReg dst_src1, vReg src2) %{ +// vector and_not + +instruct vand_notI(vReg dst, vReg src1, vReg src2, immI_M1 m1) %{ predicate(UseSVE > 0); - match(Set dst_src1 (MulVL dst_src1 src2)); + match(Set dst (AndV src1 (XorV src2 (ReplicateB m1)))); + match(Set dst (AndV src1 (XorV src2 (ReplicateS m1)))); + match(Set dst (AndV src1 (XorV src2 (ReplicateI m1)))); ins_cost(SVE_COST); - format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (D)" %} + format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) B/H/S" %} ins_encode %{ - __ sve_mul(as_FloatRegister($dst_src1$$reg), __ D, - ptrue, as_FloatRegister($src2$$reg)); + __ sve_bic(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -instruct vmulF(vReg dst, vReg src1, vReg src2) %{ +instruct vand_notL(vReg dst, vReg src1, vReg src2, immL_M1 m1) %{ predicate(UseSVE > 0); - match(Set dst (MulVF src1 src2)); + match(Set dst (AndV src1 (XorV src2 (ReplicateL m1)))); ins_cost(SVE_COST); - format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (S)" %} + format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) D" %} ins_encode %{ - __ sve_fmul(as_FloatRegister($dst$$reg), __ S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); + __ sve_bic(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -instruct vmulD(vReg dst, vReg src1, vReg src2) %{ +// vector float div + +instruct vdivF(vReg dst_src1, vReg src2) %{ predicate(UseSVE > 0); - match(Set dst (MulVD src1 src2)); + match(Set dst_src1 (DivVF dst_src1 src2)); ins_cost(SVE_COST); - format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (D)" %} + format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) (S)" %} ins_encode %{ - __ sve_fmul(as_FloatRegister($dst$$reg), __ D, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); + __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -// vector fneg +instruct vdivD(vReg dst_src1, vReg src2) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (DivVD dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} -instruct vnegF(vReg dst, vReg src) %{ +// vector float div - predicated + +instruct vfdivF_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst (NegVF src)); + match(Set dst_src1 (DivVF (Binary dst_src1 src2) pg)); ins_cost(SVE_COST); - format %{ "sve_fneg $dst, $src\t# vector (sve) (S)" %} + format %{ "sve_fdiv $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %} ins_encode %{ - __ sve_fneg(as_FloatRegister($dst$$reg), __ S, - ptrue, as_FloatRegister($src$$reg)); + __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ S, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -instruct vnegD(vReg dst, vReg src) %{ +instruct vfdivD_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst (NegVD src)); + match(Set dst_src1 (DivVD (Binary dst_src1 src2) pg)); ins_cost(SVE_COST); - format %{ "sve_fneg $dst, $src\t# vector (sve) (D)" %} + format %{ "sve_fdiv $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %} ins_encode %{ - __ sve_fneg(as_FloatRegister($dst$$reg), __ D, - ptrue, as_FloatRegister($src$$reg)); + __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ D, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -// popcount vector +// vector min/max -instruct vpopcountI(vReg dst, vReg src) %{ +instruct vmin(vReg dst_src1, vReg src2) %{ predicate(UseSVE > 0); - match(Set dst (PopCountVI src)); - format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %} + match(Set dst_src1 (MinV dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_min $dst_src1, $dst_src1, $src2\t # vector (sve)" %} ins_encode %{ - __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + if (is_floating_point_type(bt)) { + __ sve_fmin(as_FloatRegister($dst_src1$$reg), size, + ptrue, as_FloatRegister($src2$$reg)); + } else { + assert(is_integral_type(bt), "unsupported type"); + __ sve_smin(as_FloatRegister($dst_src1$$reg), size, + ptrue, as_FloatRegister($src2$$reg)); + } %} ins_pipe(pipe_slow); %} -// vector mask compare - -instruct vmaskcmp(vReg dst, vReg src1, vReg src2, immI cond, pRegGov pTmp, rFlagsReg cr) %{ +instruct vmax(vReg dst_src1, vReg src2) %{ predicate(UseSVE > 0); - match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); - effect(TEMP pTmp, KILL cr); - ins_cost(2 * SVE_COST); - format %{ "sve_cmp $pTmp, $src1, $src2\n\t" - "sve_cpy $dst, $pTmp, -1\t# vector mask cmp (sve)" %} + match(Set dst_src1 (MaxV dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_max $dst_src1, $dst_src1, $src2\t # vector (sve)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); - __ sve_compare(as_PRegister($pTmp$$reg), bt, ptrue, as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg), (int)$cond$$constant); - __ sve_cpy(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt), - as_PRegister($pTmp$$reg), -1, false); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + if (is_floating_point_type(bt)) { + __ sve_fmax(as_FloatRegister($dst_src1$$reg), size, + ptrue, as_FloatRegister($src2$$reg)); + } else { + assert(is_integral_type(bt), "unsupported type"); + __ sve_smax(as_FloatRegister($dst_src1$$reg), size, + ptrue, as_FloatRegister($src2$$reg)); + } %} ins_pipe(pipe_slow); %} -// vector blend +// vector min/max - predicated -instruct vblend(vReg dst, vReg src1, vReg src2, vReg src3, pRegGov pTmp, rFlagsReg cr) %{ +instruct vmin_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst (VectorBlend (Binary src1 src2) src3)); - effect(TEMP pTmp, KILL cr); - ins_cost(2 * SVE_COST); - format %{ "sve_cmpeq $pTmp, $src3, -1\n\t" - "sve_sel $dst, $pTmp, $src2, $src1\t# vector blend (sve)" %} + match(Set dst_src1 (MinV (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_min $dst_src1, $pg, $dst_src1, $src2\t# vector (sve)" %} ins_encode %{ - Assembler::SIMD_RegVariant size = - __ elemType_to_regVariant(Matcher::vector_element_basic_type(this)); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, - ptrue, as_FloatRegister($src3$$reg), -1); - __ sve_sel(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg), - as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + if (is_floating_point_type(bt)) { + __ sve_fmin(as_FloatRegister($dst_src1$$reg), size, + as_PRegister($pg$$reg), as_FloatRegister($src2$$reg)); + } else { + assert(is_integral_type(bt), "unsupported type"); + __ sve_smin(as_FloatRegister($dst_src1$$reg), size, + as_PRegister($pg$$reg), as_FloatRegister($src2$$reg)); + } %} ins_pipe(pipe_slow); %} -// vector blend with compare - -instruct vblend_maskcmp(vReg dst, vReg src1, vReg src2, vReg src3, - vReg src4, pRegGov pTmp, immI cond, rFlagsReg cr) %{ +instruct vmax_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst (VectorBlend (Binary src1 src2) (VectorMaskCmp (Binary src3 src4) cond))); - effect(TEMP pTmp, KILL cr); - ins_cost(2 * SVE_COST); - format %{ "sve_cmp $pTmp, $src3, $src4\t# vector cmp (sve)\n\t" - "sve_sel $dst, $pTmp, $src2, $src1\t# vector blend (sve)" %} + match(Set dst_src1 (MaxV (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_max $dst_src1, $pg, $dst_src1, $src2\t# vector (sve)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); - __ sve_compare(as_PRegister($pTmp$$reg), bt, ptrue, as_FloatRegister($src3$$reg), - as_FloatRegister($src4$$reg), (int)$cond$$constant); - __ sve_sel(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt), - as_PRegister($pTmp$$reg), as_FloatRegister($src2$$reg), - as_FloatRegister($src1$$reg)); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + if (is_floating_point_type(bt)) { + __ sve_fmax(as_FloatRegister($dst_src1$$reg), size, + as_PRegister($pg$$reg), as_FloatRegister($src2$$reg)); + } else { + assert(is_integral_type(bt), "unsupported type"); + __ sve_smax(as_FloatRegister($dst_src1$$reg), size, + as_PRegister($pg$$reg), as_FloatRegister($src2$$reg)); + } %} ins_pipe(pipe_slow); %} -// vector load mask +// vector fmla -instruct vloadmaskB(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); - match(Set dst (VectorLoadMask src)); +// dst_src1 = dst_src1 + src2 * src3 +instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0); + match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); ins_cost(SVE_COST); - format %{ "sve_neg $dst, $src\t# vector load mask (B)" %} + format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %} ins_encode %{ - __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, as_FloatRegister($src$$reg)); + __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); %} ins_pipe(pipe_slow); %} -instruct vloadmaskS(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); - match(Set dst (VectorLoadMask src)); - ins_cost(2 * SVE_COST); - format %{ "sve_uunpklo $dst, H, $src\n\t" - "sve_neg $dst, $dst\t# vector load mask (B to H)" %} +// dst_src1 = dst_src1 + src2 * src3 +instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0); + match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %} ins_encode %{ - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); - __ sve_neg(as_FloatRegister($dst$$reg), __ H, ptrue, as_FloatRegister($dst$$reg)); + __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); %} ins_pipe(pipe_slow); %} -instruct vloadmaskI(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && - (n->bottom_type()->is_vect()->element_basic_type() == T_INT || - n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); - match(Set dst (VectorLoadMask src)); - ins_cost(3 * SVE_COST); - format %{ "sve_uunpklo $dst, H, $src\n\t" - "sve_uunpklo $dst, S, $dst\n\t" - "sve_neg $dst, $dst\t# vector load mask (B to S)" %} +// vector fmla - predicated + +// dst_src1 = dst_src1 * src2 + src3 +instruct vfmlaF_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ + predicate(UseFMA && UseSVE > 0); + match(Set dst_src1 (FmaVF (Binary dst_src1 src2) (Binary src3 pg))); + ins_cost(SVE_COST); + format %{ "sve_fmad $dst_src1, $pg, $src2, $src3\t# vector (sve) (S)" %} ins_encode %{ - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); - __ sve_neg(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($dst$$reg)); + __ sve_fmad(as_FloatRegister($dst_src1$$reg), __ S, as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); %} ins_pipe(pipe_slow); %} -instruct vloadmaskL(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && - (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || - n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); - match(Set dst (VectorLoadMask src)); - ins_cost(4 * SVE_COST); - format %{ "sve_uunpklo $dst, H, $src\n\t" - "sve_uunpklo $dst, S, $dst\n\t" - "sve_uunpklo $dst, D, $dst\n\t" - "sve_neg $dst, $dst\t# vector load mask (B to D)" %} +// dst_src1 = dst_src1 * src2 + src3 +instruct vfmlaD_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ + predicate(UseFMA && UseSVE > 0); + match(Set dst_src1 (FmaVD (Binary dst_src1 src2) (Binary src3 pg))); + ins_cost(SVE_COST); + format %{ "sve_fmad $dst_src1, $pg, $src2, $src3\t# vector (sve) (D)" %} ins_encode %{ - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg)); - __ sve_neg(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg)); + __ sve_fmad(as_FloatRegister($dst_src1$$reg), __ D, as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); %} ins_pipe(pipe_slow); %} -// vector store mask +// vector fmls -instruct vstoremaskB(vReg dst, vReg src, immI_1 size) %{ - predicate(UseSVE > 0); - match(Set dst (VectorStoreMask src size)); +// dst_src1 = dst_src1 + -src2 * src3 +// dst_src1 = dst_src1 + src2 * -src3 +instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0); + match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3))); + match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3)))); ins_cost(SVE_COST); - format %{ "sve_neg $dst, $src\t# vector store mask (B)" %} + format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %} ins_encode %{ - __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, - as_FloatRegister($src$$reg)); + __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); %} ins_pipe(pipe_slow); %} -instruct vstoremaskS(vReg dst, vReg src, vReg tmp, immI_2 size) %{ - predicate(UseSVE > 0); - match(Set dst (VectorStoreMask src size)); - effect(TEMP_DEF dst, TEMP tmp); - ins_cost(3 * SVE_COST); - format %{ "sve_dup $tmp, H, 0\n\t" - "sve_uzp1 $dst, B, $src, $tmp\n\t" - "sve_neg $dst, B, $dst\t# vector store mask (sve) (H to B)" %} +// dst_src1 = dst_src1 + -src2 * src3 +// dst_src1 = dst_src1 + src2 * -src3 +instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0); + match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3))); + match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3)))); + ins_cost(SVE_COST); + format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, - as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); - __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, - as_FloatRegister($dst$$reg)); + __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector fnmla +// dst_src1 = -dst_src1 + -src2 * src3 +// dst_src1 = -dst_src1 + src2 * -src3 +instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0); + match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3))); + match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3)))); + ins_cost(SVE_COST); + format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); %} ins_pipe(pipe_slow); %} -instruct vstoremaskI(vReg dst, vReg src, vReg tmp, immI_4 size) %{ - predicate(UseSVE > 0); - match(Set dst (VectorStoreMask src size)); - effect(TEMP_DEF dst, TEMP tmp); - ins_cost(4 * SVE_COST); - format %{ "sve_dup $tmp, S, 0\n\t" - "sve_uzp1 $dst, H, $src, $tmp\n\t" - "sve_uzp1 $dst, B, $dst, $tmp\n\t" - "sve_neg $dst, B, $dst\t# vector store mask (sve) (S to B)" %} +// dst_src1 = -dst_src1 + -src2 * src3 +// dst_src1 = -dst_src1 + src2 * -src3 +instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0); + match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3))); + match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3)))); + ins_cost(SVE_COST); + format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector fnmls + +// dst_src1 = -dst_src1 + src2 * src3 +instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0); + match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = -dst_src1 + src2 * src3 +instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA && UseSVE > 0); + match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, - as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, - as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, - as_FloatRegister($dst$$reg)); + __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); %} ins_pipe(pipe_slow); %} -instruct vstoremaskL(vReg dst, vReg src, vReg tmp, immI_8 size) %{ +// vector mla + +// dst_src1 = dst_src1 + src2 * src3 +instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) +%{ predicate(UseSVE > 0); - match(Set dst (VectorStoreMask src size)); - effect(TEMP_DEF dst, TEMP tmp); - ins_cost(5 * SVE_COST); - format %{ "sve_dup $tmp, D, 0\n\t" - "sve_uzp1 $dst, S, $src, $tmp\n\t" - "sve_uzp1 $dst, H, $dst, $tmp\n\t" - "sve_uzp1 $dst, B, $dst, $tmp\n\t" - "sve_neg $dst, B, $dst\t# vector store mask (sve) (D to B)" %} + match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (B)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($tmp$$reg), __ D, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, - as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, - as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, - as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, - as_FloatRegister($dst$$reg)); + __ sve_mla(as_FloatRegister($dst_src1$$reg), __ B, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); %} ins_pipe(pipe_slow); %} -// load/store mask vector +// dst_src1 = dst_src1 + src2 * src3 +instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) +%{ + predicate(UseSVE > 0); + match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (H)" %} + ins_encode %{ + __ sve_mla(as_FloatRegister($dst_src1$$reg), __ H, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} -instruct vloadmask_loadV_byte(vReg dst, vmemA mem) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() == MaxVectorSize && - type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) == 1); - match(Set dst (VectorLoadMask (LoadVector mem))); - ins_cost(5 * SVE_COST); - format %{ "sve_ld1b $dst, $mem\n\t" - "sve_neg $dst, $dst\t# load vector mask (sve)" %} +// dst_src1 = dst_src1 + src2 * src3 +instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) +%{ + predicate(UseSVE > 0); + match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (S)" %} ins_encode %{ - FloatRegister dst_reg = as_FloatRegister($dst$$reg); + __ sve_mla(as_FloatRegister($dst_src1$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 + src2 * src3 +instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) +%{ + predicate(UseSVE > 0); + match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_mla(as_FloatRegister($dst_src1$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector mls + +// dst_src1 = dst_src1 - src2 * src3 +instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) +%{ + predicate(UseSVE > 0); + match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (B)" %} + ins_encode %{ + __ sve_mls(as_FloatRegister($dst_src1$$reg), __ B, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 - src2 * src3 +instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) +%{ + predicate(UseSVE > 0); + match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (H)" %} + ins_encode %{ + __ sve_mls(as_FloatRegister($dst_src1$$reg), __ H, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 - src2 * src3 +instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) +%{ + predicate(UseSVE > 0); + match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_mls(as_FloatRegister($dst_src1$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 - src2 * src3 +instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) +%{ + predicate(UseSVE > 0); + match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3))); + ins_cost(SVE_COST); + format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_mls(as_FloatRegister($dst_src1$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector mul + +instruct vmulB(vReg dst_src1, vReg src2) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (MulVB dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (B)" %} + ins_encode %{ + __ sve_mul(as_FloatRegister($dst_src1$$reg), __ B, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulS(vReg dst_src1, vReg src2) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (MulVS dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (H)" %} + ins_encode %{ + __ sve_mul(as_FloatRegister($dst_src1$$reg), __ H, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulI(vReg dst_src1, vReg src2) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (MulVI dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_mul(as_FloatRegister($dst_src1$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulL(vReg dst_src1, vReg src2) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (MulVL dst_src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_mul(as_FloatRegister($dst_src1$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulF(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0); + match(Set dst (MulVF src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (S)" %} + ins_encode %{ + __ sve_fmul(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulD(vReg dst, vReg src1, vReg src2) %{ + predicate(UseSVE > 0); + match(Set dst (MulVD src1 src2)); + ins_cost(SVE_COST); + format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (D)" %} + ins_encode %{ + __ sve_fmul(as_FloatRegister($dst$$reg), __ D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector mul - predicated + +instruct vmulB_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (MulVB (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_mul $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (B)" %} + ins_encode %{ + __ sve_mul(as_FloatRegister($dst_src1$$reg), __ B, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulS_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (MulVS (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_mul $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (H)" %} + ins_encode %{ + __ sve_mul(as_FloatRegister($dst_src1$$reg), __ H, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulI_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (MulVI (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_mul $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_mul(as_FloatRegister($dst_src1$$reg), __ S, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulL_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (MulVL (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_mul $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_mul(as_FloatRegister($dst_src1$$reg), __ D, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulF_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (MulVF (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_fmul $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_fmul(as_FloatRegister($dst_src1$$reg), __ S, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulD_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (MulVD (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_fmul $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_fmul(as_FloatRegister($dst_src1$$reg), __ D, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector fneg + +instruct vnegF(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && + !n->as_Vector()->is_predicated_vector()); + match(Set dst (NegVF src)); + ins_cost(SVE_COST); + format %{ "sve_fneg $dst, $src\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_fneg(as_FloatRegister($dst$$reg), __ S, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vnegD(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && + !n->as_Vector()->is_predicated_vector()); + match(Set dst (NegVD src)); + ins_cost(SVE_COST); + format %{ "sve_fneg $dst, $src\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_fneg(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector fneg - predicated + +instruct vnegF_masked(vReg dst_src, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (NegVF dst_src pg)); + ins_cost(SVE_COST); + format %{ "sve_fneg $dst_src, $pg, $dst_src\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_fneg(as_FloatRegister($dst_src$$reg), __ S, + as_PRegister($pg$$reg), + as_FloatRegister($dst_src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vnegD_masked(vReg dst_src, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (NegVD dst_src pg)); + ins_cost(SVE_COST); + format %{ "sve_fneg $dst_src, $pg, $dst_src\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_fneg(as_FloatRegister($dst_src$$reg), __ D, + as_PRegister($pg$$reg), + as_FloatRegister($dst_src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// popcount vector + +instruct vpopcountI(vReg dst, vReg src) %{ + predicate(UseSVE > 0); + match(Set dst (PopCountVI src)); + format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %} + ins_encode %{ + __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector mask compare + +instruct vmaskcmp(pRegGov dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %{ + predicate(UseSVE > 0); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + effect(KILL cr); + ins_cost(SVE_COST); + format %{ "sve_cmp $dst, $src1, $src2\t# vector mask cmp (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ sve_compare(as_PRegister($dst$$reg), bt, ptrue, as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), (int)$cond$$constant); + %} + ins_pipe(pipe_slow); +%} + +instruct vmaskcmp_masked(pRegGov dst, vReg src1, vReg src2, immI cond, pRegGov pg, rFlagsReg cr) %{ + predicate(UseSVE > 0); + match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond pg))); + effect(KILL cr); + ins_cost(SVE_COST); + format %{ "sve_cmp $dst, $pg, $src1, $src2\t# vector mask cmp (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ sve_compare(as_PRegister($dst$$reg), bt, as_PRegister($pg$$reg), as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), (int)$cond$$constant); + %} + ins_pipe(pipe_slow); +%} + +// vector blend + +instruct vblend(vReg dst, vReg src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst (VectorBlend (Binary src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_sel $dst, $pg, $src2, $src1\t# vector blend (sve)" %} + ins_encode %{ + Assembler::SIMD_RegVariant size = + __ elemType_to_regVariant(Matcher::vector_element_basic_type(this)); + __ sve_sel(as_FloatRegister($dst$$reg), size, as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector load mask + +instruct vloadmaskB(pRegGov dst, vReg src, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorLoadMask src)); + effect(KILL cr); + ins_cost(SVE_COST); + format %{ "vloadmaskB $dst, $src\t# vector load mask (sve) (B)" %} + ins_encode %{ + __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), __ B, + ptrue, as_FloatRegister($src$$reg), 0); + %} + ins_pipe(pipe_slow); +%} + +instruct vloadmask_extend(pRegGov dst, vReg src, vReg tmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() != T_BYTE); + match(Set dst (VectorLoadMask src)); + effect(TEMP tmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "vloadmask $dst, $src\t# vector load mask (sve) (H/S/D)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_vector_extend(as_FloatRegister($tmp$$reg), size, as_FloatRegister($src$$reg), __ B); + __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($tmp$$reg), 0); + %} + ins_pipe(pipe_slow); +%} + +// vector store mask + +instruct vstoremaskB(vReg dst, pRegGov src, immI_1 size) %{ + predicate(UseSVE > 0); + match(Set dst (VectorStoreMask src size)); + ins_cost(SVE_COST); + format %{ "vstoremask $dst, $src\t# vector store mask (sve) (B)" %} + ins_encode %{ + __ sve_cpy(as_FloatRegister($dst$$reg), __ B, as_PRegister($src$$reg), 1, false); + %} + ins_pipe(pipe_slow); +%} + +instruct vstoremask_narrow(vReg dst, pRegGov src, vReg tmp, immI_gt_1 size) %{ + predicate(UseSVE > 0); + match(Set dst (VectorStoreMask src size)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(3 * SVE_COST); + format %{ "vstoremask $dst, $src\t# vector store mask (sve) (H/S/D)" %} + ins_encode %{ + Assembler::SIMD_RegVariant size = __ elemBytes_to_regVariant((int)$size$$constant); + __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($src$$reg), 1, false); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($dst$$reg), size, as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// Combine LoadVector+VectorLoadMask when the vector element type is not T_BYTE + +instruct vloadmask_loadV(pRegGov dst, indirect mem, vReg tmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_Vector()->length_in_bytes() == MaxVectorSize && + type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) > 1); + match(Set dst (VectorLoadMask (LoadVector mem))); + effect(TEMP tmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "sve_ld1b $tmp, $mem\n\t" + "sve_cmpne $dst, $tmp, 0\t# load vector mask (sve) (H/S/D)" %} + ins_encode %{ + // Load mask values which are boolean type, and extend them to the + // expected vector element type. Convert the vector to predicate. BasicType to_vect_bt = Matcher::vector_element_basic_type(this); - Assembler::SIMD_RegVariant to_vect_variant = __ elemType_to_regVariant(to_vect_bt); - loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue, - T_BOOLEAN, to_vect_bt, $mem->opcode(), + loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($tmp$$reg), + ptrue, T_BOOLEAN, to_vect_bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), __ elemType_to_regVariant(to_vect_bt), + ptrue, as_FloatRegister($tmp$$reg), 0); + %} + ins_pipe(pipe_slow); +%} + +instruct vloadmask_loadV_partial(pRegGov dst, indirect mem, vReg vtmp, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_Vector()->length_in_bytes() > 16 && + n->as_Vector()->length_in_bytes() < MaxVectorSize && + type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) > 1); + match(Set dst (VectorLoadMask (LoadVector mem))); + effect(TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(6 * SVE_COST); + format %{ "vloadmask_loadV $dst, $mem\t# load vector mask partial (sve) (H/S/D)" %} + ins_encode %{ + // Load valid mask values which are boolean type, and extend them to the + // expected vector element type. Convert the vector to predicate. + BasicType to_vect_bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(to_vect_bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this)); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($vtmp$$reg), + as_PRegister($ptmp$$reg), T_BOOLEAN, to_vect_bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($vtmp$$reg), 0); + %} + ins_pipe(pipe_slow); +%} + +// Combine VectorStoreMask+StoreVector when the vector element type is not T_BYTE + +instruct storeV_vstoremask(indirect mem, pRegGov src, vReg tmp, immI_gt_1 esize) %{ + predicate(UseSVE > 0 && + Matcher::vector_length_in_bytes(n->as_StoreVector()->in(MemNode::ValueIn)->in(1)) == MaxVectorSize); + match(Set mem (StoreVector mem (VectorStoreMask src esize))); + effect(TEMP tmp); + ins_cost(3 * SVE_COST); + format %{ "sve_cpy $tmp, $src, 1\n\t" + "sve_st1b $tmp, $mem\t# store vector mask (sve) (H/S/D)" %} + ins_encode %{ + BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src); + assert(type2aelembytes(from_vect_bt) == (int)$esize$$constant, "unsupported type."); + Assembler::SIMD_RegVariant size = __ elemBytes_to_regVariant($esize$$constant); + __ sve_cpy(as_FloatRegister($tmp$$reg), size, as_PRegister($src$$reg), 1, false); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($tmp$$reg), + ptrue, T_BOOLEAN, from_vect_bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%} + +instruct storeV_vstoremask_partial(indirect mem, pRegGov src, vReg vtmp, + immI_gt_1 esize, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_StoreVector()->memory_size() > 16 && + type2aelembytes(n->as_StoreVector()->vect_type()->element_basic_type()) > 1 && + Matcher::vector_length_in_bytes(n->as_StoreVector()->in(MemNode::ValueIn)->in(1)) < MaxVectorSize); + match(Set mem (StoreVector mem (VectorStoreMask src esize))); + effect(TEMP vtmp, TEMP ptmp, KILL cr); + format %{ "storeV_vstoremask $src, $mem\t# store vector mask partial (sve) (H/S/D)" %} + ins_cost(6 * SVE_COST); + ins_encode %{ + // Convert the valid src predicate to vector, and store the vector + // elements as boolean values. + BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(from_vect_bt); + __ sve_cpy(as_FloatRegister($vtmp$$reg), size, as_PRegister($src$$reg), 1, false); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this, $src)); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($vtmp$$reg), + as_PRegister($ptmp$$reg), T_BOOLEAN, from_vect_bt, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); - __ sve_neg(dst_reg, to_vect_variant, ptrue, dst_reg); %} ins_pipe(pipe_slow); %} -instruct vloadmask_loadV_non_byte(vReg dst, indirect mem) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() == MaxVectorSize && - type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) > 1); - match(Set dst (VectorLoadMask (LoadVector mem))); - ins_cost(5 * SVE_COST); - format %{ "sve_ld1b $dst, $mem\n\t" - "sve_neg $dst, $dst\t# load vector mask (sve)" %} +// vector add reduction + +instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(SVE_COST); + format %{ "sve_reduce_addI $dst, $src1, $src2\t# addI reduction (sve) (may extend)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + ptrue, as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (AddReductionVL src1 src2)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(SVE_COST); + format %{ "sve_reduce_addL $dst, $src1, $src2\t# addL reduction (sve)" %} + ins_encode %{ + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + ptrue, as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addF(vRegF src1_dst, vReg src2) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set src1_dst (AddReductionVF src1_dst src2)); + ins_cost(SVE_COST); + format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addD(vRegD src1_dst, vReg src2) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set src1_dst (AddReductionVD src1_dst src2)); + ins_cost(SVE_COST); + format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D, + ptrue, as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_reduce_addI $dst, $src1, $src2\t# addI reduction partial (sve) (may extend)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (AddReductionVL src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_reduce_addL $dst, $src1, $src2\t# addL reduction partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addF_partial(vRegF src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set src1_dst (AddReductionVF src1_dst src2)); + ins_cost(SVE_COST); + effect(TEMP ptmp, KILL cr); + format %{ "sve_reduce_addF $src1_dst, $src1_dst, $src2\t# addF reduction partial (sve) (S)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, + Matcher::vector_length(this, $src2)); + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addD_partial(vRegD src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set src1_dst (AddReductionVD src1_dst src2)); + ins_cost(SVE_COST); + effect(TEMP ptmp, KILL cr); + format %{ "sve_reduce_addD $src1_dst, $src1_dst, $src2\t# addD reduction partial (sve) (D)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector add reduction - predicated + +instruct reduce_addI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (AddReductionVI (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(SVE_COST); + format %{ "sve_reduce_addI $dst, $src1, $pg, $src2\t# addI reduction predicated (sve) (may extend)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (AddReductionVL (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(SVE_COST); + format %{ "sve_reduce_addL $dst, $src1, $pg, $src2\t# addL reduction predicated (sve)" %} + ins_encode %{ + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addF_masked(vRegF src1_dst, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set src1_dst (AddReductionVF (Binary src1_dst src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_reduce_addF $src1_dst, $pg, $src2\t# addF reduction predicated (sve)" %} + ins_encode %{ + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S, + as_PRegister($pg$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addD_masked(vRegD src1_dst, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set src1_dst (AddReductionVD (Binary src1_dst src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_reduce_addD $src1_dst, $pg, $src2\t# addD reduction predicated (sve)" %} + ins_encode %{ + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D, + as_PRegister($pg$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addI_masked_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (AddReductionVI (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_addI $dst, $src1, $pg, $src2\t# addI reduction predicated partial (sve) (may extend)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addL_masked_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (AddReductionVL (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_addL $dst, $src1, $pg, $src2\t# addL reduction predicated partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addF_masked_partial(vRegF src1_dst, vReg src2, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set src1_dst (AddReductionVF (Binary src1_dst src2) pg)); + effect(TEMP ptmp, KILL cr); + ins_cost(SVE_COST); + format %{ "sve_reduce_addF $src1_dst, $pg, $src2\t# addF reduction predicated partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, + Matcher::vector_length(this, $src2)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addD_masked_partial(vRegD src1_dst, vReg src2, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set src1_dst (AddReductionVD (Binary src1_dst src2) pg)); + effect(TEMP ptmp, KILL cr); + ins_cost(SVE_COST); + format %{ "sve_reduce_addD $src1_dst, $pg, $src2\t# addD reduction predicated partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector and reduction + +instruct reduce_andI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (AndReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(SVE_COST); + format %{ "sve_reduce_andI $dst, $src1, $src2\t# andI reduction (sve) (may extend)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + ptrue, as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_andL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (AndReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(SVE_COST); + format %{ "sve_reduce_andL $dst, $src1, $src2\t# andL reduction (sve)" %} ins_encode %{ - FloatRegister dst_reg = as_FloatRegister($dst$$reg); - BasicType to_vect_bt = Matcher::vector_element_basic_type(this); - Assembler::SIMD_RegVariant to_vect_variant = __ elemType_to_regVariant(to_vect_bt); - loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue, - T_BOOLEAN, to_vect_bt, $mem->opcode(), - as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); - __ sve_neg(dst_reg, to_vect_variant, ptrue, dst_reg); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + ptrue, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct storeV_vstoremask_byte(vmemA mem, vReg src, vReg tmp, immI_1 esize) %{ - predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() * - n->as_StoreVector()->in(MemNode::ValueIn)->in(2)->get_int() == MaxVectorSize); - match(Set mem (StoreVector mem (VectorStoreMask src esize))); - effect(TEMP tmp); - ins_cost(5 * SVE_COST); - format %{ "sve_neg $tmp, $src\n\t" - "sve_st1b $tmp, $mem\t# store vector mask (sve)" %} +instruct reduce_andI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (AndReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_reduce_andI $dst, $src1, $src2\t# andI reduction partial (sve) (may extend)" %} ins_encode %{ - BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src); - assert(type2aelembytes(from_vect_bt) == (int)$esize$$constant, "unsupported type."); - Assembler::SIMD_RegVariant from_vect_variant = __ elemBytes_to_regVariant($esize$$constant); - __ sve_neg(as_FloatRegister($tmp$$reg), from_vect_variant, ptrue, - as_FloatRegister($src$$reg)); - loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($tmp$$reg), - ptrue, T_BOOLEAN, from_vect_bt, $mem->opcode(), - as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct storeV_vstoremask_non_byte(indirect mem, vReg src, vReg tmp, immI_gt_1 esize) %{ - predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() * - n->as_StoreVector()->in(MemNode::ValueIn)->in(2)->get_int() == MaxVectorSize); - match(Set mem (StoreVector mem (VectorStoreMask src esize))); - effect(TEMP tmp); - ins_cost(5 * SVE_COST); - format %{ "sve_neg $tmp, $src\n\t" - "sve_st1b $tmp, $mem\t# store vector mask (sve)" %} +instruct reduce_andL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (AndReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_reduce_andL $dst, $src1, $src2\t# andL reduction partial (sve)" %} ins_encode %{ - BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src); - assert(type2aelembytes(from_vect_bt) == (int)$esize$$constant, "unsupported type."); - Assembler::SIMD_RegVariant from_vect_variant = __ elemBytes_to_regVariant($esize$$constant); - __ sve_neg(as_FloatRegister($tmp$$reg), from_vect_variant, ptrue, - as_FloatRegister($src$$reg)); - loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($tmp$$reg), - ptrue, T_BOOLEAN, from_vect_bt, $mem->opcode(), - as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); %} -// vector add reduction +// vector and reduction - predicated -instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (AddReductionVI src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); +instruct reduce_andI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (AndReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP tmp); ins_cost(SVE_COST); - format %{ "sve_reduce_addI $dst, $src1, $src2\t# addB/S/I reduction (sve) (may extend)" %} + format %{ "sve_reduce_andI $dst, $src1, $pg, $src2\t# andI reduction predicated (sve) (may extend)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); - __ sve_uaddv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ addw($dst$$Register, $dst$$Register, $src1$$Register); - if (bt == T_BYTE) { - __ sxtb($dst$$Register, $dst$$Register); - } else if (bt == T_SHORT) { - __ sxth($dst$$Register, $dst$$Register); - } else { - assert(bt == T_INT, "unsupported type"); - } + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_addI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, - pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (AddReductionVI src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); +instruct reduce_andL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (AndReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP tmp); ins_cost(SVE_COST); - format %{ "sve_reduce_addI $dst, $src1, $src2\t# addI reduction partial (sve) (may extend)" %} + format %{ "sve_reduce_andL $dst, $src1, $pg, $src2\t# andL reduction predicated (sve)" %} ins_encode %{ - BasicType bt = Matcher::vector_element_basic_type(this, $src2); - Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); - __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, - Matcher::vector_length(this, $src2)); - __ sve_uaddv(as_FloatRegister($vtmp$$reg), variant, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ addw($dst$$Register, $dst$$Register, $src1$$Register); - if (bt == T_BYTE) { - __ sxtb($dst$$Register, $dst$$Register); - } else if (bt == T_SHORT) { - __ sxth($dst$$Register, $dst$$Register); - } else { - assert(bt == T_INT, "unsupported type"); - } + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (AddReductionVL src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); - ins_cost(SVE_COST); - format %{ "sve_reduce_addL $dst, $src1, $src2\t# addL reduction (sve)" %} +instruct reduce_andI_masked_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (AndReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_andI $dst, $src1, $pg, $src2\t# andI reduction predicated partial (sve) (may extend)" %} ins_encode %{ - __ sve_uaddv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ add($dst$$Register, $dst$$Register, $src1$$Register); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_addL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, - pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (AddReductionVL src1 src2)); +instruct reduce_andL_masked_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (AndReductionV (Binary src1 src2) pg)); effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); - ins_cost(SVE_COST); - format %{ "sve_reduce_addL $dst, $src1, $src2\t# addL reduction partial (sve)" %} + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_andL $dst, $src1, $pg, $src2\t# andL reduction predicated partial (sve)" %} ins_encode %{ __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this, $src2)); - __ sve_uaddv(as_FloatRegister($vtmp$$reg), __ D, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ add($dst$$Register, $dst$$Register, $src1$$Register); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); %} +// vector or reduction -instruct reduce_addF(vRegF src1_dst, vReg src2) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set src1_dst (AddReductionVF src1_dst src2)); +instruct reduce_orI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (OrReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP tmp); ins_cost(SVE_COST); - format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (S)" %} + format %{ "sve_reduce_orI $dst, $src1, $src2\t# orI reduction (sve) (may extend)" %} ins_encode %{ - __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S, - ptrue, as_FloatRegister($src2$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + ptrue, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_addF_partial(vRegF src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set src1_dst (AddReductionVF src1_dst src2)); +instruct reduce_orL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (OrReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP tmp); ins_cost(SVE_COST); - effect(TEMP ptmp, KILL cr); - format %{ "sve_reduce_addF $src1_dst, $src1_dst, $src2\t# addF reduction partial (sve) (S)" %} + format %{ "sve_reduce_orL $dst, $src1, $src2\t# orL reduction (sve)" %} ins_encode %{ - __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, - Matcher::vector_length(this, $src2)); - __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + ptrue, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_addD(vRegD src1_dst, vReg src2) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set src1_dst (AddReductionVD src1_dst src2)); - ins_cost(SVE_COST); - format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (D)" %} +instruct reduce_orI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (OrReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_reduce_orI $dst, $src1, $src2\t# orI reduction partial (sve) (may extend)" %} ins_encode %{ - __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D, - ptrue, as_FloatRegister($src2$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_addD_partial(vRegD src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set src1_dst (AddReductionVD src1_dst src2)); - ins_cost(SVE_COST); - effect(TEMP ptmp, KILL cr); - format %{ "sve_reduce_addD $src1_dst, $src1_dst, $src2\t# addD reduction partial (sve) (D)" %} +instruct reduce_orL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (OrReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_reduce_orL $dst, $src1, $src2\t# orL reduction partial (sve)" %} ins_encode %{ __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this, $src2)); - __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); %} -// vector and reduction +// vector or reduction - predicated -instruct reduce_andI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (AndReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); +instruct reduce_orI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (OrReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP tmp); ins_cost(SVE_COST); - format %{ "sve_reduce_andI $dst, $src1, $src2\t# andB/S/I reduction (sve) (may extend)" %} + format %{ "sve_reduce_orI $dst, $src1, $pg, $src2\t# orI reduction predicated (sve) (may extend)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); - __ sve_andv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ andw($dst$$Register, $dst$$Register, $src1$$Register); - if (bt == T_BYTE) { - __ sxtb($dst$$Register, $dst$$Register); - } else if (bt == T_SHORT) { - __ sxth($dst$$Register, $dst$$Register); - } else { - assert(bt == T_INT, "unsupported type"); - } + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_andI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, - pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (AndReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); +instruct reduce_orL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (OrReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP tmp); ins_cost(SVE_COST); - format %{ "sve_reduce_andI $dst, $src1, $src2\t# andI reduction partial (sve) (may extend)" %} + format %{ "sve_reduce_orL $dst, $src1, $pg, $src2\t# orL reduction predicated (sve)" %} ins_encode %{ - BasicType bt = Matcher::vector_element_basic_type(this, $src2); - Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); - __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, - Matcher::vector_length(this, $src2)); - __ sve_andv(as_FloatRegister($vtmp$$reg), variant, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ andw($dst$$Register, $dst$$Register, $src1$$Register); - if (bt == T_BYTE) { - __ sxtb($dst$$Register, $dst$$Register); - } else if (bt == T_SHORT) { - __ sxth($dst$$Register, $dst$$Register); - } else { - assert(bt == T_INT, "unsupported type"); - } + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_andL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (AndReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); - ins_cost(SVE_COST); - format %{ "sve_reduce_andL $dst, $src1, $src2\t# andL reduction (sve)" %} +instruct reduce_orI_masked_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (OrReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_orI $dst, $src1, $pg, $src2\t# orI reduction predicated partial (sve) (may extend)" %} ins_encode %{ - __ sve_andv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ andr($dst$$Register, $dst$$Register, $src1$$Register); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_andL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, - pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (AndReductionV src1 src2)); +instruct reduce_orL_masked_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (OrReductionV (Binary src1 src2) pg)); effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); - ins_cost(SVE_COST); - format %{ "sve_reduce_andL $dst, $src1, $src2\t# andL reduction partial (sve)" %} + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_orL $dst, $src1, $pg, $src2\t# orL reduction predicated partial (sve)" %} ins_encode %{ __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this, $src2)); - __ sve_andv(as_FloatRegister($vtmp$$reg), __ D, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ andr($dst$$Register, $dst$$Register, $src1$$Register); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); %} -// vector or reduction +// vector xor reduction -instruct reduce_orI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && +instruct reduce_eorI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (OrReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); + match(Set dst (XorReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP tmp); ins_cost(SVE_COST); - format %{ "sve_reduce_orI $dst, $src1, $src2\t# orB/S/I reduction (sve) (may extend)" %} + format %{ "sve_reduce_eorI $dst, $src1, $src2\t# eorI reduction (sve) (may extend)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); - __ sve_orv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ orrw($dst$$Register, $dst$$Register, $src1$$Register); - if (bt == T_BYTE) { - __ sxtb($dst$$Register, $dst$$Register); - } else if (bt == T_SHORT) { - __ sxth($dst$$Register, $dst$$Register); - } else { - assert(bt == T_INT, "unsupported type"); - } + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + ptrue, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_orI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, +instruct reduce_eorL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (XorReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(SVE_COST); + format %{ "sve_reduce_eorL $dst, $src1, $src2\t# eorL reduction (sve)" %} + ins_encode %{ + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + ptrue, as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_eorI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (OrReductionV src1 src2)); + match(Set dst (XorReductionV src1 src2)); effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); - ins_cost(SVE_COST); - format %{ "sve_reduce_orI $dst, $src1, $src2\t# orI reduction partial (sve) (may extend)" %} + ins_cost(2 * SVE_COST); + format %{ "sve_reduce_eorI $dst, $src1, $src2\t# eorI reduction partial (sve) (may extend)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, Matcher::vector_length(this, $src2)); - __ sve_orv(as_FloatRegister($vtmp$$reg), variant, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ orrw($dst$$Register, $dst$$Register, $src1$$Register); - if (bt == T_BYTE) { - __ sxtb($dst$$Register, $dst$$Register); - } else if (bt == T_SHORT) { - __ sxth($dst$$Register, $dst$$Register); - } else { - assert(bt == T_INT, "unsupported type"); - } - %} - ins_pipe(pipe_slow); -%} - -instruct reduce_orL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (OrReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); - ins_cost(SVE_COST); - format %{ "sve_reduce_orL $dst, $src1, $src2\t# orL reduction (sve)" %} - ins_encode %{ - __ sve_orv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ orr($dst$$Register, $dst$$Register, $src1$$Register); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_orL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, +instruct reduce_eorL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (OrReductionV src1 src2)); + match(Set dst (XorReductionV src1 src2)); effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); - ins_cost(SVE_COST); - format %{ "sve_reduce_orL $dst, $src1, $src2\t# orL reduction partial (sve)" %} + ins_cost(2 * SVE_COST); + format %{ "sve_reduce_eorL $dst, $src1, $src2\t# eorL reduction partial (sve)" %} ins_encode %{ __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this, $src2)); - __ sve_orv(as_FloatRegister($vtmp$$reg), __ D, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ orr($dst$$Register, $dst$$Register, $src1$$Register); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); %} -// vector xor reduction +// vector xor reduction - predicated + +instruct reduce_eorI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (XorReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(SVE_COST); + format %{ "sve_reduce_eorI $dst, $src1, $pg, $src2\t# eorI reduction predicated (sve) (may extend)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} -instruct reduce_eorI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (XorReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); +instruct reduce_eorL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (XorReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP tmp); ins_cost(SVE_COST); - format %{ "sve_reduce_eorI $dst, $src1, $src2\t# xorB/H/I reduction (sve) (may extend)" %} + format %{ "sve_reduce_eorL $dst, $src1, $pg, $src2\t# eorL reduction predicated (sve)" %} ins_encode %{ - BasicType bt = Matcher::vector_element_basic_type(this, $src2); - Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); - __ sve_eorv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ eorw($dst$$Register, $dst$$Register, $src1$$Register); - if (bt == T_BYTE) { - __ sxtb($dst$$Register, $dst$$Register); - } else if (bt == T_SHORT) { - __ sxth($dst$$Register, $dst$$Register); - } else { - assert(bt == T_INT, "unsupported type"); - } + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_eorI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, - pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (XorReductionV src1 src2)); +instruct reduce_eorI_masked_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (XorReductionV (Binary src1 src2) pg)); effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); - ins_cost(SVE_COST); - format %{ "sve_reduce_eorI $dst, $src1, $src2\t# xorI reduction partial (sve) (may extend)" %} + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_eorI $dst, $src1, $pg, $src2\t# eorI reduction predicated partial (sve) (may extend)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, Matcher::vector_length(this, $src2)); - __ sve_eorv(as_FloatRegister($vtmp$$reg), variant, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ eorw($dst$$Register, $dst$$Register, $src1$$Register); - if (bt == T_BYTE) { - __ sxtb($dst$$Register, $dst$$Register); - } else if (bt == T_SHORT) { - __ sxth($dst$$Register, $dst$$Register); - } else { - assert(bt == T_INT, "unsupported type"); - } - %} - ins_pipe(pipe_slow); -%} - -instruct reduce_eorL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (XorReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); - ins_cost(SVE_COST); - format %{ "sve_reduce_eorL $dst, $src1, $src2\t# xorL reduction (sve)" %} - ins_encode %{ - __ sve_eorv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ eor($dst$$Register, $dst$$Register, $src1$$Register); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_eorL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, - pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (XorReductionV src1 src2)); +instruct reduce_eorL_masked_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (XorReductionV (Binary src1 src2) pg)); effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); - ins_cost(SVE_COST); - format %{ "sve_reduce_eorL $dst, $src1, $src2\t# xorL reduction partial (sve)" %} + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_eorL $dst, $src1, $pg, $src2\t# eorL reduction predicated partial (sve)" %} ins_encode %{ __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this, $src2)); - __ sve_eorv(as_FloatRegister($vtmp$$reg), __ D, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ eor($dst$$Register, $dst$$Register, $src1$$Register); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); %} - // vector max reduction -instruct reduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && - (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE || - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT || - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT)); +instruct reduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + is_integral_type(n->in(2)->bottom_type()->is_vect()->element_basic_type())); match(Set dst (MaxReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_reduce_maxI $dst, $src1, $src2\t# reduce maxB/S/I (sve)" %} + format %{ "sve_reduce_maxI $dst, $src1, $src2\t# maxI reduction (sve)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); - __ sve_smaxv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ cmpw($dst$$Register, $src1$$Register); - __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::GT); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + ptrue, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_maxI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, - pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && - (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE || - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT || - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT)); +instruct reduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MaxReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_reduce_maxI $dst, $src1, $src2\t# reduce maxI partial (sve)" %} + format %{ "sve_reduce_maxL $dst, $src1, $src2\t# maxL reduction (sve)" %} ins_encode %{ - BasicType bt = Matcher::vector_element_basic_type(this, $src2); - Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); - __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, - Matcher::vector_length(this, $src2)); - __ sve_smaxv(as_FloatRegister($vtmp$$reg), variant, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ cmpw($dst$$Register, $src1$$Register); - __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::GT); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + ptrue, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); +instruct reduce_maxI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + is_integral_type(n->in(2)->bottom_type()->is_vect()->element_basic_type())); match(Set dst (MaxReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); - ins_cost(SVE_COST); - format %{ "sve_reduce_maxL $dst, $src1, $src2\t# reduce maxL partial (sve)" %} + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_reduce_maxI $dst, $src1, $src2\t# maxI reduction partial (sve)" %} ins_encode %{ - __ sve_smaxv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ cmp($dst$$Register, $src1$$Register); - __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::GT); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); %} instruct reduce_maxL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MaxReductionV src1 src2)); effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); - ins_cost(SVE_COST); - format %{ "sve_reduce_maxL $dst, $src1, $src2\t# reduce maxL partial (sve)" %} + ins_cost(2 * SVE_COST); + format %{ "sve_reduce_maxL $dst, $src1, $src2\t# maxL reduction partial (sve)" %} ins_encode %{ __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this, $src2)); - __ sve_smaxv(as_FloatRegister($vtmp$$reg), __ D, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ cmp($dst$$Register, $src1$$Register); - __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::GT); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); %} instruct reduce_maxF(vRegF dst, vRegF src1, vReg src2) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); match(Set dst (MaxReductionV src1 src2)); ins_cost(INSN_COST); effect(TEMP_DEF dst); - format %{ "sve_fmaxv $dst, $src2 # vector (sve) (S)\n\t" - "fmaxs $dst, $dst, $src1\t# max reduction F" %} + format %{ "sve_reduce_maxF $dst, $src1, $src2\t# maxF reduction (sve)" %} ins_encode %{ - __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S, - ptrue, as_FloatRegister($src2$$reg)); + __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src2$$reg)); __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); %} ins_pipe(pipe_slow); @@ -1842,33 +2946,32 @@ instruct reduce_maxF(vRegF dst, vRegF src1, vReg src2) %{ instruct reduce_maxF_partial(vRegF dst, vRegF src1, vReg src2, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); match(Set dst (MaxReductionV src1 src2)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP ptmp, KILL cr); - format %{ "sve_reduce_maxF $dst, $src1, $src2\t# reduce max S partial (sve)" %} + format %{ "sve_reduce_maxF $dst, $src1, $src2\t# maxF reduction partial (sve)" %} ins_encode %{ __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, Matcher::vector_length(this, $src2)); - __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S, as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); %} ins_pipe(pipe_slow); %} instruct reduce_maxD(vRegD dst, vRegD src1, vReg src2) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); match(Set dst (MaxReductionV src1 src2)); ins_cost(INSN_COST); effect(TEMP_DEF dst); - format %{ "sve_fmaxv $dst, $src2 # vector (sve) (D)\n\t" - "fmaxs $dst, $dst, $src1\t# max reduction D" %} + format %{ "sve_reduce_maxD $dst, $src1, $src2\t# maxD reduction (sve)" %} ins_encode %{ - __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D, - ptrue, as_FloatRegister($src2$$reg)); + __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); %} ins_pipe(pipe_slow); @@ -1876,115 +2979,262 @@ instruct reduce_maxD(vRegD dst, vRegD src1, vReg src2) %{ instruct reduce_maxD_partial(vRegD dst, vRegD src1, vReg src2, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); match(Set dst (MaxReductionV src1 src2)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP ptmp, KILL cr); - format %{ "sve_reduce_maxD $dst, $src1, $src2\t# reduce max D partial (sve)" %} + format %{ "sve_reduce_maxD $dst, $src1, $src2\t# maxD reduction partial (sve)" %} ins_encode %{ __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this, $src2)); - __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D, as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); %} ins_pipe(pipe_slow); %} -// vector min reduction +// vector max reduction - predicated -instruct reduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && - (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE || - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT || - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT)); - match(Set dst (MinReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); +instruct reduce_maxI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, + pRegGov pg, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + is_integral_type(n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type())); + match(Set dst (MaxReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_reduce_minI $dst, $src1, $src2\t# reduce minB/S/I (sve)" %} + format %{ "sve_reduce_maxI $dst, $src1, $pg, $src2\t# maxI reduction predicated (sve)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); - __ sve_sminv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ cmpw($dst$$Register, $src1$$Register); - __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::LT); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_minI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, - pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && - (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE || - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT || - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT)); - match(Set dst (MinReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); +instruct reduce_maxL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, + pRegGov pg, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MaxReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_reduce_minI $dst, $src1, $src2\t# reduce minI partial (sve)" %} + format %{ "sve_reduce_maxL $dst, $src1, $pg, $src2\t# maxL reduction predicated (sve)" %} + ins_encode %{ + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_maxI_masked_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + is_integral_type(n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type())); + match(Set dst (MaxReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_maxI $dst, $src1, $pg, $src2\t# maxI reduction predicated partial (sve)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, Matcher::vector_length(this, $src2)); - __ sve_sminv(as_FloatRegister($vtmp$$reg), variant, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ cmpw($dst$$Register, $src1$$Register); - __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::LT); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_maxL_masked_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MaxReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_maxL $dst, $src1, $pg, $src2\t# maxL reduction predicated partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_maxF_masked(vRegF dst, vRegF src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (MaxReductionV (Binary src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_reduce_maxF $dst, $src1, $pg, $src2\t# maxF reduction predicated (sve)" %} + ins_encode %{ + __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S, as_PRegister($pg$$reg), as_FloatRegister($src2$$reg)); + __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_maxD_masked(vRegD dst, vRegD src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (MaxReductionV (Binary src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_reduce_maxD $dst, $src1, $pg, $src2\t# maxD reduction predicated (sve)" %} + ins_encode %{ + __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D, as_PRegister($pg$$reg), as_FloatRegister($src2$$reg)); + __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_maxF_masked_partial(vRegF dst, vRegF src1, vReg src2, pRegGov pg, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (MaxReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_maxF $dst, $src1, $pg, $src2\t# maxF reduction predicated partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, + Matcher::vector_length(this, $src2)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_maxD_masked_partial(vRegD dst, vRegD src1, vReg src2, pRegGov pg, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (MaxReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_maxD $dst, $src1, $pg, $src2\t# maxD reduction predicated partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector min reduction + +instruct reduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + is_integral_type(n->in(2)->bottom_type()->is_vect()->element_basic_type())); + match(Set dst (MinReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + ins_cost(SVE_COST); + format %{ "sve_reduce_minI $dst, $src1, $src2\t# minI reduction (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + ptrue, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && +instruct reduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MinReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_reduce_minL $dst, $src1, $src2\t# reduce minL partial (sve)" %} + format %{ "sve_reduce_minL $dst, $src1, $src2\t# minL reduction (sve)" %} + ins_encode %{ + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + ptrue, as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_minI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + is_integral_type(n->in(2)->bottom_type()->is_vect()->element_basic_type())); + match(Set dst (MinReductionV src1 src2)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_reduce_minI $dst, $src1, $src2\t# minI reduction partial (sve)" %} ins_encode %{ - __ sve_sminv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ cmp($dst$$Register, $src1$$Register); - __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::LT); + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); %} instruct reduce_minL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (MinReductionV src1 src2)); effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); - ins_cost(SVE_COST); - format %{ "sve_reduce_minL $dst, $src1, $src2\t# reduce minL partial (sve)" %} + ins_cost(2 * SVE_COST); + format %{ "sve_reduce_minL $dst, $src1, $src2\t# minL reduction partial (sve)" %} ins_encode %{ __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this, $src2)); - __ sve_sminv(as_FloatRegister($vtmp$$reg), __ D, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ cmp($dst$$Register, $src1$$Register); - __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::LT); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); %} instruct reduce_minF(vRegF dst, vRegF src1, vReg src2) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); match(Set dst (MinReductionV src1 src2)); ins_cost(INSN_COST); effect(TEMP_DEF dst); - format %{ "sve_fminv $dst, $src2 # vector (sve) (S)\n\t" - "fmins $dst, $dst, $src1\t# min reduction F" %} + format %{ "sve_reduce_minF $dst, $src1, $src2\t# minF reduction (sve)" %} ins_encode %{ - __ sve_fminv(as_FloatRegister($dst$$reg), __ S, - ptrue, as_FloatRegister($src2$$reg)); + __ sve_fminv(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src2$$reg)); __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); %} ins_pipe(pipe_slow); @@ -1992,51 +3242,203 @@ instruct reduce_minF(vRegF dst, vRegF src1, vReg src2) %{ instruct reduce_minF_partial(vRegF dst, vRegF src1, vReg src2, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); match(Set dst (MinReductionV src1 src2)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP ptmp, KILL cr); - format %{ "sve_reduce_minF $dst, $src1, $src2\t# reduce min S partial (sve)" %} + format %{ "sve_reduce_minF $dst, $src1, $src2\t# minF reduction partial (sve)" %} ins_encode %{ __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, Matcher::vector_length(this, $src2)); - __ sve_fminv(as_FloatRegister($dst$$reg), __ S, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ sve_fminv(as_FloatRegister($dst$$reg), __ S, as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); %} ins_pipe(pipe_slow); %} instruct reduce_minD(vRegD dst, vRegD src1, vReg src2) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); match(Set dst (MinReductionV src1 src2)); ins_cost(INSN_COST); effect(TEMP_DEF dst); - format %{ "sve_fminv $dst, $src2 # vector (sve) (D)\n\t" - "fmins $dst, $dst, $src1\t# min reduction D" %} + format %{ "sve_reduce_minD $dst, $src1, $src2\t# minD reduction (sve)" %} ins_encode %{ - __ sve_fminv(as_FloatRegister($dst$$reg), __ D, - ptrue, as_FloatRegister($src2$$reg)); + __ sve_fminv(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); + __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_minD_partial(vRegD dst, vRegD src1, vReg src2, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (MinReductionV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP ptmp, KILL cr); + format %{ "sve_reduce_minD $dst, $src1, $src2\t# minD reduction partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_fminv(as_FloatRegister($dst$$reg), __ D, as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector min reduction - predicated + +instruct reduce_minI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, + pRegGov pg, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + is_integral_type(n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type())); + match(Set dst (MinReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + ins_cost(SVE_COST); + format %{ "sve_reduce_minI $dst, $src1, $pg, $src2\t# minI reduction predicated (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_minL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, + pRegGov pg, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MinReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + ins_cost(SVE_COST); + format %{ "sve_reduce_minL $dst, $src1, $pg, $src2\t# minL reduction predicated (sve)" %} + ins_encode %{ + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_minI_masked_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + is_integral_type(n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type())); + match(Set dst (MinReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_minI $dst, $src1, $pg, $src2\t# minI reduction predicated partial (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + Matcher::vector_length(this, $src2)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_minL_masked_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MinReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_minL $dst, $src1, $pg, $src2\t# minL reduction predicated partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src2)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_minF_masked(vRegF dst, vRegF src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (MinReductionV (Binary src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_reduce_minF $dst, $src1, $pg, $src2\t# minF reduction predicated (sve)" %} + ins_encode %{ + __ sve_fminv(as_FloatRegister($dst$$reg), __ S, as_PRegister($pg$$reg), as_FloatRegister($src2$$reg)); + __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_minD_masked(vRegD dst, vRegD src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (MinReductionV (Binary src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_reduce_minD $dst, $src1, $pg, $src2\t# minD reduction predicated (sve)" %} + ins_encode %{ + __ sve_fminv(as_FloatRegister($dst$$reg), __ D, as_PRegister($pg$$reg), as_FloatRegister($src2$$reg)); __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); %} ins_pipe(pipe_slow); %} -instruct reduce_minD_partial(vRegD dst, vRegD src1, vReg src2, - pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (MinReductionV src1 src2)); - ins_cost(INSN_COST); +instruct reduce_minF_masked_partial(vRegF dst, vRegF src1, vReg src2, pRegGov pg, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (MinReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_minF $dst, $src1, $pg, $src2\t# minF reduction predicated partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, + Matcher::vector_length(this, $src2)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_fminv(as_FloatRegister($dst$$reg), __ S, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_minD_masked_partial(vRegD dst, vRegD src1, vReg src2, pRegGov pg, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (MinReductionV (Binary src1 src2) pg)); effect(TEMP_DEF dst, TEMP ptmp, KILL cr); - format %{ "sve_reduce_minD $dst, $src1, $src2\t# reduce min D partial (sve)" %} + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_minD $dst, $src1, $pg, $src2\t# minD reduction predicated partial (sve)" %} ins_encode %{ __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this, $src2)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); __ sve_fminv(as_FloatRegister($dst$$reg), __ D, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); %} ins_pipe(pipe_slow); @@ -2448,149 +3850,463 @@ instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ ins_pipe(pipe_slow); %} -instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ +instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst (URShiftVI src (RShiftCntV shift))); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (S)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_lsr(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst (URShiftVL src (RShiftCntV shift))); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (D)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con == 0) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_lsr(as_FloatRegister($dst$$reg), __ D, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst (LShiftVB src (LShiftCntV shift))); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (B)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con >= 8) { + __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_lsl(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst (LShiftVS src (LShiftCntV shift))); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (H)" %} + ins_encode %{ + int con = (int)$shift$$constant; + if (con >= 16) { + __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + } + __ sve_lsl(as_FloatRegister($dst$$reg), __ H, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst (LShiftVI src (LShiftCntV shift))); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (S)" %} + ins_encode %{ + int con = (int)$shift$$constant; + __ sve_lsl(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslL_imm(vReg dst, vReg src, immI shift) %{ + predicate(UseSVE > 0); + match(Set dst (LShiftVL src (LShiftCntV shift))); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (D)" %} + ins_encode %{ + int con = (int)$shift$$constant; + __ sve_lsl(as_FloatRegister($dst$$reg), __ D, + as_FloatRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{ + predicate(UseSVE > 0 && + (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE)); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (B)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($cnt$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{ + predicate(UseSVE > 0 && + (n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || + (n->bottom_type()->is_vect()->element_basic_type() == T_CHAR))); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (H)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($cnt$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{ + predicate(UseSVE > 0 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT)); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (S)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($cnt$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{ + predicate(UseSVE > 0 && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG)); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (D)" %} + ins_encode %{ + __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($cnt$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector shift - predicated + +instruct vasrB_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (RShiftVB (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_asr $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (B)" %} + ins_encode %{ + __ sve_asr(as_FloatRegister($dst_src1$$reg), __ B, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrS_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (RShiftVS (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_asr $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (H)" %} + ins_encode %{ + __ sve_asr(as_FloatRegister($dst_src1$$reg), __ H, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrI_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (RShiftVI (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_asr $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_asr(as_FloatRegister($dst_src1$$reg), __ S, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrL_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (RShiftVL (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_asr $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_asr(as_FloatRegister($dst_src1$$reg), __ D, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslB_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (LShiftVB (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (B)" %} + ins_encode %{ + __ sve_lsl(as_FloatRegister($dst_src1$$reg), __ B, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslS_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (LShiftVS (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (H)" %} + ins_encode %{ + __ sve_lsl(as_FloatRegister($dst_src1$$reg), __ H, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslI_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (LShiftVI (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_lsl(as_FloatRegister($dst_src1$$reg), __ S, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslL_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (LShiftVL (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_lsl(as_FloatRegister($dst_src1$$reg), __ D, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrB_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (URShiftVB (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (B)" %} + ins_encode %{ + __ sve_lsr(as_FloatRegister($dst_src1$$reg), __ B, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrS_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (URShiftVS (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (H)" %} + ins_encode %{ + __ sve_lsr(as_FloatRegister($dst_src1$$reg), __ H, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrI_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (URShiftVI (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_lsr(as_FloatRegister($dst_src1$$reg), __ S, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrL_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (URShiftVL (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_lsr $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_lsr(as_FloatRegister($dst_src1$$reg), __ D, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrB_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (RShiftVB (Binary dst_src (RShiftCntV shift)) pg)); + ins_cost(SVE_COST); + format %{ "sve_asr $dst_src, $pg, $dst_src, $shift\t# vector (sve) (B)" %} + ins_encode %{ + int con = (int)$shift$$constant; + assert(con > 0 && con < 8, "invalid shift immediate"); + __ sve_asr(as_FloatRegister($dst_src$$reg), __ B, as_PRegister($pg$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrS_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (RShiftVS (Binary dst_src (RShiftCntV shift)) pg)); + ins_cost(SVE_COST); + format %{ "sve_asr $dst_src, $pg, $dst_src, $shift\t# vector (sve) (H)" %} + ins_encode %{ + int con = (int)$shift$$constant; + assert(con > 0 && con < 16, "invalid shift immediate"); + __ sve_asr(as_FloatRegister($dst_src$$reg), __ H, as_PRegister($pg$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrI_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst (URShiftVI src (RShiftCntV shift))); + match(Set dst_src (RShiftVI (Binary dst_src (RShiftCntV shift)) pg)); ins_cost(SVE_COST); - format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (S)" %} + format %{ "sve_asr $dst_src, $pg, $dst_src, $shift\t# vector (sve) (S)" %} ins_encode %{ int con = (int)$shift$$constant; - if (con == 0) { - __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - return; - } - __ sve_lsr(as_FloatRegister($dst$$reg), __ S, - as_FloatRegister($src$$reg), con); + assert(con > 0 && con < 32, "invalid shift immediate"); + __ sve_asr(as_FloatRegister($dst_src$$reg), __ S, as_PRegister($pg$$reg), con); %} ins_pipe(pipe_slow); %} -instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{ +instruct vasrL_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst (URShiftVL src (RShiftCntV shift))); + match(Set dst_src (RShiftVL (Binary dst_src (RShiftCntV shift)) pg)); ins_cost(SVE_COST); - format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (D)" %} + format %{ "sve_asr $dst_src, $pg, $dst_src, $shift\t# vector (sve) (D)" %} ins_encode %{ int con = (int)$shift$$constant; - if (con == 0) { - __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - return; - } - __ sve_lsr(as_FloatRegister($dst$$reg), __ D, - as_FloatRegister($src$$reg), con); + assert(con > 0 && con < 64, "invalid shift immediate"); + __ sve_asr(as_FloatRegister($dst_src$$reg), __ D, as_PRegister($pg$$reg), con); %} ins_pipe(pipe_slow); %} -instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ +instruct vlsrB_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst (LShiftVB src (LShiftCntV shift))); + match(Set dst_src (URShiftVB (Binary dst_src (RShiftCntV shift)) pg)); ins_cost(SVE_COST); - format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (B)" %} + format %{ "sve_lsr $dst_src, $pg, $dst_src, $shift\t# vector (sve) (B)" %} ins_encode %{ int con = (int)$shift$$constant; - if (con >= 8) { - __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - return; - } - __ sve_lsl(as_FloatRegister($dst$$reg), __ B, - as_FloatRegister($src$$reg), con); + assert(con > 0 && con < 8, "invalid shift immediate"); + __ sve_lsr(as_FloatRegister($dst_src$$reg), __ B, as_PRegister($pg$$reg), con); %} ins_pipe(pipe_slow); %} -instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ +instruct vlsrS_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst (LShiftVS src (LShiftCntV shift))); + match(Set dst_src (URShiftVS (Binary dst_src (RShiftCntV shift)) pg)); ins_cost(SVE_COST); - format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (H)" %} + format %{ "sve_lsr $dst_src, $pg, $dst_src, $shift\t# vector (sve) (H)" %} ins_encode %{ int con = (int)$shift$$constant; - if (con >= 16) { - __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - return; - } - __ sve_lsl(as_FloatRegister($dst$$reg), __ H, - as_FloatRegister($src$$reg), con); + assert(con > 0 && con < 16, "invalid shift immediate"); + __ sve_lsr(as_FloatRegister($dst_src$$reg), __ H, as_PRegister($pg$$reg), con); %} ins_pipe(pipe_slow); %} -instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ +instruct vlsrI_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst (LShiftVI src (LShiftCntV shift))); + match(Set dst_src (URShiftVI (Binary dst_src (RShiftCntV shift)) pg)); ins_cost(SVE_COST); - format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (S)" %} + format %{ "sve_lsr $dst_src, $pg, $dst_src, $shift\t# vector (sve) (S)" %} ins_encode %{ int con = (int)$shift$$constant; - __ sve_lsl(as_FloatRegister($dst$$reg), __ S, - as_FloatRegister($src$$reg), con); + assert(con > 0 && con < 32, "invalid shift immediate"); + __ sve_lsr(as_FloatRegister($dst_src$$reg), __ S, as_PRegister($pg$$reg), con); %} ins_pipe(pipe_slow); %} -instruct vlslL_imm(vReg dst, vReg src, immI shift) %{ +instruct vlsrL_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst (LShiftVL src (LShiftCntV shift))); + match(Set dst_src (URShiftVL (Binary dst_src (RShiftCntV shift)) pg)); ins_cost(SVE_COST); - format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (D)" %} + format %{ "sve_lsr $dst_src, $pg, $dst_src, $shift\t# vector (sve) (D)" %} ins_encode %{ int con = (int)$shift$$constant; - __ sve_lsl(as_FloatRegister($dst$$reg), __ D, - as_FloatRegister($src$$reg), con); + assert(con > 0 && con < 64, "invalid shift immediate"); + __ sve_lsr(as_FloatRegister($dst_src$$reg), __ D, as_PRegister($pg$$reg), con); %} ins_pipe(pipe_slow); %} -instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{ - predicate(UseSVE > 0 && - (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE)); - match(Set dst (LShiftCntV cnt)); - match(Set dst (RShiftCntV cnt)); - format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (B)" %} +instruct vlslB_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (LShiftVB (Binary dst_src (LShiftCntV shift)) pg)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst_src, $pg, $dst_src, $shift\t# vector (sve) (B)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($cnt$$reg)); + int con = (int)$shift$$constant; + assert(con >= 0 && con < 8, "invalid shift immediate"); + __ sve_lsl(as_FloatRegister($dst_src$$reg), __ B, as_PRegister($pg$$reg), con); %} ins_pipe(pipe_slow); %} -instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{ - predicate(UseSVE > 0 && - (n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || - (n->bottom_type()->is_vect()->element_basic_type() == T_CHAR))); - match(Set dst (LShiftCntV cnt)); - match(Set dst (RShiftCntV cnt)); - format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (H)" %} +instruct vlslS_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (LShiftVS (Binary dst_src (LShiftCntV shift)) pg)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst_src, $pg, $dst_src, $shift\t# vector (sve) (H)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($cnt$$reg)); + int con = (int)$shift$$constant; + assert(con >= 0 && con < 16, "invalid shift immediate"); + __ sve_lsl(as_FloatRegister($dst_src$$reg), __ H, as_PRegister($pg$$reg), con); %} ins_pipe(pipe_slow); %} -instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{ - predicate(UseSVE > 0 && - (n->bottom_type()->is_vect()->element_basic_type() == T_INT)); - match(Set dst (LShiftCntV cnt)); - match(Set dst (RShiftCntV cnt)); - format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (S)" %} +instruct vlslI_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (LShiftVI (Binary dst_src (LShiftCntV shift)) pg)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst_src, $pg, $dst_src, $shift\t# vector (sve) (S)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($cnt$$reg)); + int con = (int)$shift$$constant; + assert(con >= 0 && con < 32, "invalid shift immediate"); + __ sve_lsl(as_FloatRegister($dst_src$$reg), __ S, as_PRegister($pg$$reg), con); %} ins_pipe(pipe_slow); %} -instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{ - predicate(UseSVE > 0 && - (n->bottom_type()->is_vect()->element_basic_type() == T_LONG)); - match(Set dst (LShiftCntV cnt)); - match(Set dst (RShiftCntV cnt)); - format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (D)" %} +instruct vlslL_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (LShiftVL (Binary dst_src (LShiftCntV shift)) pg)); + ins_cost(SVE_COST); + format %{ "sve_lsl $dst_src, $pg, $dst_src, $shift\t# vector (sve) (D)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($cnt$$reg)); + int con = (int)$shift$$constant; + assert(con >= 0 && con < 64, "invalid shift immediate"); + __ sve_lsl(as_FloatRegister($dst_src$$reg), __ D, as_PRegister($pg$$reg), con); %} ins_pipe(pipe_slow); %} @@ -2598,7 +4314,8 @@ instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{ // vector sqrt instruct vsqrtF(vReg dst, vReg src) %{ - predicate(UseSVE > 0); + predicate(UseSVE > 0 && + !n->as_Vector()->is_predicated_vector()); match(Set dst (SqrtVF src)); ins_cost(SVE_COST); format %{ "sve_fsqrt $dst, $src\t# vector (sve) (S)" %} @@ -2610,7 +4327,8 @@ instruct vsqrtF(vReg dst, vReg src) %{ %} instruct vsqrtD(vReg dst, vReg src) %{ - predicate(UseSVE > 0); + predicate(UseSVE > 0 && + !n->as_Vector()->is_predicated_vector()); match(Set dst (SqrtVD src)); ins_cost(SVE_COST); format %{ "sve_fsqrt $dst, $src\t# vector (sve) (D)" %} @@ -2621,6 +4339,34 @@ instruct vsqrtD(vReg dst, vReg src) %{ ins_pipe(pipe_slow); %} +// vector sqrt - predicated + +instruct vsqrtF_masked(vReg dst_src, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (SqrtVF dst_src pg)); + ins_cost(SVE_COST); + format %{ "sve_fsqrt $dst_src, $pg, $dst_src\t# vector (sve) (S)" %} + ins_encode %{ + __ sve_fsqrt(as_FloatRegister($dst_src$$reg), __ S, + as_PRegister($pg$$reg), + as_FloatRegister($dst_src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsqrtD_masked(vReg dst_src, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (SqrtVD dst_src pg)); + ins_cost(SVE_COST); + format %{ "sve_fsqrt $dst_src, $pg, $dst_src\t# vector (sve) (D)" %} + ins_encode %{ + __ sve_fsqrt(as_FloatRegister($dst_src$$reg), __ D, + as_PRegister($pg$$reg), + as_FloatRegister($dst_src$$reg)); + %} + ins_pipe(pipe_slow); +%} + // vector sub instruct vsubB(vReg dst, vReg src1, vReg src2) %{ @@ -2701,175 +4447,181 @@ instruct vsubD(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} -// vector mask cast +// vector sub - predicated -instruct vmaskcast(vReg dst) %{ - predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length() && - n->bottom_type()->is_vect()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes()); - match(Set dst (VectorMaskCast dst)); - ins_cost(0); - format %{ "vmaskcast $dst\t# empty (sve)" %} +instruct vsubB_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (SubVB (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_sub $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (B)" %} ins_encode %{ - // empty + __ sve_sub(as_FloatRegister($dst_src1$$reg), __ B, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); %} - ins_pipe(pipe_class_empty); + ins_pipe(pipe_slow); %} -// ------------------------------ Vector cast ------------------------------- +instruct vsubS_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (SubVS (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_sub $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (H)" %} + ins_encode %{ + __ sve_sub(as_FloatRegister($dst_src1$$reg), __ H, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} -instruct vcvtBtoS(vReg dst, vReg src) -%{ - predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); - match(Set dst (VectorCastB2X src)); +instruct vsubI_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (SubVI (Binary dst_src1 src2) pg)); ins_cost(SVE_COST); - format %{ "sve_sunpklo $dst, H, $src\t# convert B to S vector" %} + format %{ "sve_sub $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %} ins_encode %{ - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); + __ sve_sub(as_FloatRegister($dst_src1$$reg), __ S, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -instruct vcvtBtoI(vReg dst, vReg src) -%{ - predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_INT); - match(Set dst (VectorCastB2X src)); - ins_cost(2 * SVE_COST); - format %{ "sve_sunpklo $dst, H, $src\n\t" - "sve_sunpklo $dst, S, $dst\t# convert B to I vector" %} +instruct vsubL_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (SubVL (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_sub $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %} ins_encode %{ - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); + __ sve_sub(as_FloatRegister($dst_src1$$reg), __ D, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -instruct vcvtBtoL(vReg dst, vReg src) -%{ - predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_LONG); - match(Set dst (VectorCastB2X src)); - ins_cost(3 * SVE_COST); - format %{ "sve_sunpklo $dst, H, $src\n\t" - "sve_sunpklo $dst, S, $dst\n\t" - "sve_sunpklo $dst, D, $dst\t# convert B to L vector" %} +instruct vsubF_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (SubVF (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_fsub $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %} ins_encode %{ - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg)); + __ sve_fsub(as_FloatRegister($dst_src1$$reg), __ S, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -instruct vcvtBtoF(vReg dst, vReg src) -%{ - predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); - match(Set dst (VectorCastB2X src)); - ins_cost(3 * SVE_COST); - format %{ "sve_sunpklo $dst, H, $src\n\t" - "sve_sunpklo $dst, S, $dst\n\t" - "sve_scvtf $dst, S, $dst, S\t# convert B to F vector" %} +instruct vsubD_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 (SubVD (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_fsub $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %} ins_encode %{ - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); - __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($dst$$reg), __ S); + __ sve_fsub(as_FloatRegister($dst_src1$$reg), __ D, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %} -instruct vcvtBtoD(vReg dst, vReg src) -%{ +// ------------------------------ Vector mask cast -------------------------- + +instruct vmaskcast(pRegGov dst_src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); - match(Set dst (VectorCastB2X src)); - ins_cost(4 * SVE_COST); - format %{ "sve_sunpklo $dst, H, $src\n\t" - "sve_sunpklo $dst, S, $dst\n\t" - "sve_sunpklo $dst, D, $dst\n\t" - "sve_scvtf $dst, D, $dst, D\t# convert B to D vector" %} - ins_encode %{ - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg)); - __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D); + n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length() && + n->bottom_type()->is_vect()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes()); + match(Set dst_src (VectorMaskCast dst_src)); + ins_cost(0); + format %{ "vmaskcast $dst_src\t# empty (sve)" %} + ins_encode %{ + // empty %} - ins_pipe(pipe_slow); + ins_pipe(pipe_class_empty); %} -instruct vcvtStoB(vReg dst, vReg src, vReg tmp) +instruct vmaskcast_extend(pRegGov dst, pReg src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); - match(Set dst (VectorCastS2X src)); - effect(TEMP tmp); - ins_cost(2 * SVE_COST); - format %{ "sve_dup $tmp, B, 0\n\t" - "sve_uzp1 $dst, B, $src, tmp\t# convert S to B vector" %} + (Matcher::vector_length_in_bytes(n) == 2 * Matcher::vector_length_in_bytes(n->in(1)) || + Matcher::vector_length_in_bytes(n) == 4 * Matcher::vector_length_in_bytes(n->in(1)) || + Matcher::vector_length_in_bytes(n) == 8 * Matcher::vector_length_in_bytes(n->in(1)))); + match(Set dst (VectorMaskCast src)); + ins_cost(SVE_COST * 3); + format %{ "sve_vmaskcast_extend $dst, $src\t# extend predicate $src" %} ins_encode %{ - __ sve_dup(as_FloatRegister($tmp$$reg), __ B, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + __ sve_vmaskcast_extend(as_PRegister($dst$$reg), as_PRegister($src$$reg), + Matcher::vector_length_in_bytes(this), Matcher::vector_length_in_bytes(this, $src)); %} ins_pipe(pipe_slow); %} -instruct vcvtStoI(vReg dst, vReg src) +instruct vmaskcast_narrow(pRegGov dst, pReg src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_INT); - match(Set dst (VectorCastS2X src)); - ins_cost(SVE_COST); - format %{ "sve_sunpklo $dst, S, $src\t# convert S to I vector" %} + (Matcher::vector_length_in_bytes(n) * 2 == Matcher::vector_length_in_bytes(n->in(1)) || + Matcher::vector_length_in_bytes(n) * 4 == Matcher::vector_length_in_bytes(n->in(1)) || + Matcher::vector_length_in_bytes(n) * 8 == Matcher::vector_length_in_bytes(n->in(1)))); + match(Set dst (VectorMaskCast src)); + ins_cost(SVE_COST * 3); + format %{ "sve_vmaskcast_narrow $dst, $src\t# narrow predicate $src" %} ins_encode %{ - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg)); + __ sve_vmaskcast_narrow(as_PRegister($dst$$reg), as_PRegister($src$$reg), + Matcher::vector_length_in_bytes(this), Matcher::vector_length_in_bytes(this, $src)); %} ins_pipe(pipe_slow); %} -instruct vcvtStoL(vReg dst, vReg src) +// ------------------------------ Vector cast ------------------------------- + +instruct vcvtBtoX_extend(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_LONG); - match(Set dst (VectorCastS2X src)); + predicate(UseSVE > 0); + match(Set dst (VectorCastB2X src)); ins_cost(2 * SVE_COST); - format %{ "sve_sunpklo $dst, S, $src\n\t" - "sve_sunpklo $dst, D, $dst\t# convert S to L vector" %} + format %{ "sve_vectorcast_b2x $dst, $src\t# convert B to X vector (extend)" %} ins_encode %{ - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg)); - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg)); + BasicType to_bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt); + __ sve_vector_extend(as_FloatRegister($dst$$reg), to_size, as_FloatRegister($src$$reg), __ B); + if (to_bt == T_FLOAT || to_bt == T_DOUBLE) { + __ sve_scvtf(as_FloatRegister($dst$$reg), to_size, ptrue, as_FloatRegister($dst$$reg), to_size); + } %} ins_pipe(pipe_slow); %} -instruct vcvtStoF(vReg dst, vReg src) +instruct vcvtStoB(vReg dst, vReg src, vReg tmp) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorCastS2X src)); + effect(TEMP tmp); ins_cost(2 * SVE_COST); - format %{ "sve_sunpklo $dst, S, $src\n\t" - "sve_scvtf $dst, S, $dst, S\t# convert S to F vector" %} + format %{ "sve_vectorcast_s2b $dst, $src\t# convert H to B vector" %} ins_encode %{ - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg)); - __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($dst$$reg), __ S); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($src$$reg), __ H, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct vcvtStoD(vReg dst, vReg src) +instruct vcvtStoX_extend(vReg dst, vReg src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + type2aelembytes(Matcher::vector_element_basic_type(n)) > 2); match(Set dst (VectorCastS2X src)); - ins_cost(3 * SVE_COST); - format %{ "sve_sunpklo $dst, S, $src\n\t" - "sve_sunpklo $dst, D, $dst\n\t" - "sve_scvtf $dst, D, $dst, D\t# convert S to D vector" %} + ins_cost(2 * SVE_COST); + format %{ "sve_vectorcast_s2x $dst, $src\t# convert H to X vector (extend)" %} ins_encode %{ - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg)); - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg)); - __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D); + BasicType to_bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt); + __ sve_vector_extend(as_FloatRegister($dst$$reg), to_size, as_FloatRegister($src$$reg), __ H); + if (to_bt == T_FLOAT || to_bt == T_DOUBLE) { + __ sve_scvtf(as_FloatRegister($dst$$reg), to_size, ptrue, as_FloatRegister($dst$$reg), to_size); + } %} ins_pipe(pipe_slow); %} @@ -2881,13 +4633,10 @@ instruct vcvtItoB(vReg dst, vReg src, vReg tmp) match(Set dst (VectorCastI2X src)); effect(TEMP_DEF dst, TEMP tmp); ins_cost(3 * SVE_COST); - format %{ "sve_dup $tmp, H, 0\n\t" - "sve_uzp1 $dst, H, $src, tmp\n\t" - "sve_uzp1 $dst, B, $dst, tmp\n\t# convert I to B vector" %} + format %{ "sve_vectorcast_i2b $dst, $src\t# convert I to B vector" %} ins_encode %{ - __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($src$$reg), __ S, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} @@ -2899,11 +4648,10 @@ instruct vcvtItoS(vReg dst, vReg src, vReg tmp) match(Set dst (VectorCastI2X src)); effect(TEMP tmp); ins_cost(2 * SVE_COST); - format %{ "sve_dup $tmp, H, 0\n\t" - "sve_uzp1 $dst, H, $src, tmp\t# convert I to S vector" %} + format %{ "sve_vectorcast_i2s $dst, $src\t# convert I to H vector" %} ins_encode %{ - __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ H, + as_FloatRegister($src$$reg), __ S, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} @@ -2914,9 +4662,9 @@ instruct vcvtItoL(vReg dst, vReg src) n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorCastI2X src)); ins_cost(SVE_COST); - format %{ "sve_sunpklo $dst, D, $src\t# convert I to L vector" %} + format %{ "sve_vectorcast_i2l $dst, $src\t# convert I to L vector" %} ins_encode %{ - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg)); + __ sve_vector_extend(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg), __ S); %} ins_pipe(pipe_slow); %} @@ -2927,7 +4675,7 @@ instruct vcvtItoF(vReg dst, vReg src) n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastI2X src)); ins_cost(SVE_COST); - format %{ "sve_scvtf $dst, S, $src, S\t# convert I to F vector" %} + format %{ "sve_vectorcast_i2f $dst, $src\t# convert I to F vector" %} ins_encode %{ __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S); %} @@ -2940,8 +4688,7 @@ instruct vcvtItoD(vReg dst, vReg src) n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorCastI2X src)); ins_cost(2 * SVE_COST); - format %{ "sve_sunpklo $dst, D, $src\n\t" - "sve_scvtf $dst, D, $dst, D\t# convert I to D vector" %} + format %{ "sve_vectorcast_i2d $dst, $src\t# convert I to D vector" %} ins_encode %{ __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg)); __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D); @@ -2949,56 +4696,18 @@ instruct vcvtItoD(vReg dst, vReg src) ins_pipe(pipe_slow); %} -instruct vcvtLtoB(vReg dst, vReg src, vReg tmp) -%{ - predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); - match(Set dst (VectorCastL2X src)); - effect(TEMP_DEF dst, TEMP tmp); - ins_cost(4 * SVE_COST); - format %{ "sve_dup $tmp, S, 0\n\t" - "sve_uzp1 $dst, S, $src, tmp\n\t" - "sve_uzp1 $dst, H, $dst, tmp\n\t" - "sve_uzp1 $dst, B, $dst, tmp\n\t# convert L to B vector" %} - ins_encode %{ - __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - %} - ins_pipe(pipe_slow); -%} - -instruct vcvtLtoS(vReg dst, vReg src, vReg tmp) +instruct vcvtLtoX_narrow(vReg dst, vReg src, vReg tmp) %{ - predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + predicate(UseSVE > 0 && is_integral_type(Matcher::vector_element_basic_type(n))); match(Set dst (VectorCastL2X src)); effect(TEMP_DEF dst, TEMP tmp); - ins_cost(3 * SVE_COST); - format %{ "sve_dup $tmp, S, 0\n\t" - "sve_uzp1 $dst, S, $src, tmp\n\t" - "sve_uzp1 $dst, H, $dst, tmp\n\t# convert L to S vector" %} - ins_encode %{ - __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - %} - ins_pipe(pipe_slow); -%} - -instruct vcvtLtoI(vReg dst, vReg src, vReg tmp) -%{ - predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_INT); - match(Set dst (VectorCastL2X src)); - effect(TEMP tmp); ins_cost(2 * SVE_COST); - format %{ "sve_dup $tmp, S, 0\n\t" - "sve_uzp1 $dst, S, $src, tmp\t# convert L to I vector" %} + format %{ "sve_vectorcast_l2x $dst, $src\t# convert L to B/H/S vector (narrow)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + BasicType to_bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), to_size, + as_FloatRegister($src$$reg), __ D, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} @@ -3010,13 +4719,12 @@ instruct vcvtLtoF(vReg dst, vReg src, vReg tmp) match(Set dst (VectorCastL2X src)); effect(TEMP_DEF dst, TEMP tmp); ins_cost(3 * SVE_COST); - format %{ "sve_scvtf $dst, S, $src, D\n\t" - "sve_dup $tmp, S, 0\n\t" - "sve_uzp1 $dst, S, $dst, $tmp\t# convert L to F vector" %} + format %{ "sve_vectorcast_l2f $dst, $src\t# convert L to F vector" %} ins_encode %{ __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D); - __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($dst$$reg), __ D, as_FloatRegister($tmp$$reg)); + %} ins_pipe(pipe_slow); %} @@ -3027,47 +4735,28 @@ instruct vcvtLtoD(vReg dst, vReg src) n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorCastL2X src)); ins_cost(SVE_COST); - format %{ "sve_scvtf $dst, D, $src, D\t# convert L to D vector" %} + format %{ "sve_vectorcast_l2d $dst, $src\t# convert L to D vector" %} ins_encode %{ __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D); %} ins_pipe(pipe_slow); %} -instruct vcvtFtoB(vReg dst, vReg src, vReg tmp) -%{ - predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); - match(Set dst (VectorCastF2X src)); - effect(TEMP_DEF dst, TEMP tmp); - ins_cost(4 * SVE_COST); - format %{ "sve_fcvtzs $dst, S, $src, S\n\t" - "sve_dup $tmp, H, 0\n\t" - "sve_uzp1 $dst, H, $dst, tmp\n\t" - "sve_uzp1 $dst, B, $dst, tmp\n\t# convert F to B vector" %} - ins_encode %{ - __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S); - __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - %} - ins_pipe(pipe_slow); -%} - -instruct vcvtFtoS(vReg dst, vReg src, vReg tmp) +instruct vcvtFtoX_narrow(vReg dst, vReg src, vReg tmp) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE || + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT)); match(Set dst (VectorCastF2X src)); effect(TEMP_DEF dst, TEMP tmp); ins_cost(3 * SVE_COST); - format %{ "sve_fcvtzs $dst, S, $src, S\n\t" - "sve_dup $tmp, H, 0\n\t" - "sve_uzp1 $dst, H, $dst, tmp\t# convert F to S vector" %} + format %{ "sve_vectorcast_f2x $dst, $src\t# convert F to B/H vector" %} ins_encode %{ + BasicType to_bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt); __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S); - __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), to_size, + as_FloatRegister($dst$$reg), __ S, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} @@ -3075,10 +4764,10 @@ instruct vcvtFtoS(vReg dst, vReg src, vReg tmp) instruct vcvtFtoI(vReg dst, vReg src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_INT); + (n->bottom_type()->is_vect()->element_basic_type() == T_INT)); match(Set dst (VectorCastF2X src)); ins_cost(SVE_COST); - format %{ "sve_fcvtzs $dst, S, $src, S\t# convert F to I vector" %} + format %{ "sve_vectorcast_f2x $dst, $src\t# convert F to I vector" %} ins_encode %{ __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S); %} @@ -3088,89 +4777,47 @@ instruct vcvtFtoI(vReg dst, vReg src) instruct vcvtFtoL(vReg dst, vReg src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_LONG); - match(Set dst (VectorCastF2X src)); - ins_cost(2 * SVE_COST); - format %{ "sve_fcvtzs $dst, S, $src, S\n\t" - "sve_sunpklo $dst, D, $dst\t# convert F to L vector" %} - ins_encode %{ - __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S); - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg)); - %} - ins_pipe(pipe_slow); -%} - -instruct vcvtFtoD(vReg dst, vReg src) -%{ - predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); - match(Set dst (VectorCastF2X src)); - ins_cost(2 * SVE_COST); - format %{ "sve_sunpklo $dst, D, $src\n\t" - "sve_fcvt $dst, D, $dst, S\t# convert F to D vector" %} - ins_encode %{ - __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg)); - __ sve_fcvt(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ S); - %} - ins_pipe(pipe_slow); -%} - -instruct vcvtDtoB(vReg dst, vReg src, vReg tmp) -%{ - predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); - match(Set dst (VectorCastD2X src)); - effect(TEMP_DEF dst, TEMP tmp); - ins_cost(5 * SVE_COST); - format %{ "sve_fcvtzs $dst, D, $src, D\n\t" - "sve_dup $tmp, S, 0\n\t" - "sve_uzp1 $dst, S, $dst, tmp\n\t" - "sve_uzp1 $dst, H, $dst, tmp\n\t" - "sve_uzp1 $dst, B, $dst, tmp\n\t# convert D to B vector" %} + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG)); + match(Set dst (VectorCastF2X src)); + ins_cost(SVE_COST * 2); + format %{ "sve_vectorcast_f2x $dst, $src\t# convert F to L vector" %} ins_encode %{ - __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D); - __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg)); + __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ S); %} ins_pipe(pipe_slow); %} -instruct vcvtDtoS(vReg dst, vReg src, vReg tmp) +instruct vcvtFtoD(vReg dst, vReg src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); - match(Set dst (VectorCastD2X src)); - effect(TEMP_DEF dst, TEMP tmp); - ins_cost(4 * SVE_COST); - format %{ "sve_fcvtzs $dst, D, $src, D\n\t" - "sve_dup $tmp, S, 0\n\t" - "sve_uzp1 $dst, S, $dst, tmp\n\t" - "sve_uzp1 $dst, H, $dst, tmp\n\t# convert D to S vector" %} + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorCastF2X src)); + ins_cost(2 * SVE_COST); + format %{ "sve_vectorcast_f2d $dst, $dst\t# convert F to D vector" %} ins_encode %{ - __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D); - __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_vector_extend(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg), __ S); + __ sve_fcvt(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ S); %} ins_pipe(pipe_slow); %} -instruct vcvtDtoI(vReg dst, vReg src, vReg tmp) +instruct vcvtDtoX_narrow(vReg dst, vReg src, vReg tmp) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_INT); + (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE || + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || + n->bottom_type()->is_vect()->element_basic_type() == T_INT)); match(Set dst (VectorCastD2X src)); effect(TEMP_DEF dst, TEMP tmp); ins_cost(3 * SVE_COST); - format %{ "sve_fcvtzs $dst, D, $src, D\n\t" - "sve_dup $tmp, S, 0\n\t" - "sve_uzp1 $dst, S, $dst, tmp\t# convert D to I vector" %} + format %{ "sve_vectorcast_d2x $dst, $src\t# convert D to X vector (narrow)" %} ins_encode %{ - __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D); - __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + BasicType to_bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt); + __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), to_size, + as_FloatRegister($dst$$reg), __ D, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} @@ -3181,7 +4828,7 @@ instruct vcvtDtoL(vReg dst, vReg src) n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorCastD2X src)); ins_cost(SVE_COST); - format %{ "sve_fcvtzs $dst, D, $src, D\t# convert D to L vector" %} + format %{ "sve_vectorcast_d2l $dst, $src\t# convert D to L vector" %} ins_encode %{ __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D); %} @@ -3195,44 +4842,43 @@ instruct vcvtDtoF(vReg dst, vReg src, vReg tmp) match(Set dst (VectorCastD2X src)); effect(TEMP_DEF dst, TEMP tmp); ins_cost(3 * SVE_COST); - format %{ "sve_fcvt $dst, S, $src, D\n\t" - "sve_dup $tmp, S, 0\n\t" - "sve_uzp1 $dst, S, $dst, $tmp\t# convert D to F vector" %} + format %{ "sve_vectorcast_d2f $dst, S, $dst\t# convert D to F vector" %} ins_encode %{ __ sve_fcvt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D); - __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($dst$$reg), __ D, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} + // ------------------------------ Vector extract --------------------------------- -instruct extractB(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr) +instruct extractB(iRegINoSp dst, vReg src, immI idx, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0); match(Set dst (ExtractB src idx)); - effect(TEMP pTmp, KILL cr); + effect(TEMP pgtmp, KILL cr); ins_cost(2 * SVE_COST); - format %{ "sve_extract $dst, B, $pTmp, $src, $idx\n\t" + format %{ "sve_extract $dst, B, $pgtmp, $src, $idx\n\t" "sbfmw $dst, $dst, 0U, 7U\t# extract from vector(B)" %} ins_encode %{ - __ sve_extract(as_Register($dst$$reg), __ B, as_PRegister($pTmp$$reg), + __ sve_extract(as_Register($dst$$reg), __ B, as_PRegister($pgtmp$$reg), as_FloatRegister($src$$reg), (int)($idx$$constant)); __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 7U); %} ins_pipe(pipe_slow); %} -instruct extractS(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr) +instruct extractS(iRegINoSp dst, vReg src, immI idx, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0); match(Set dst (ExtractS src idx)); - effect(TEMP pTmp, KILL cr); + effect(TEMP pgtmp, KILL cr); ins_cost(2 * SVE_COST); - format %{ "sve_extract $dst, H, $pTmp, $src, $idx\n\t" + format %{ "sve_extract $dst, H, $pgtmp, $src, $idx\n\t" "sbfmw $dst, $dst, 0U, 15U\t# extract from vector(S)" %} ins_encode %{ - __ sve_extract(as_Register($dst$$reg), __ H, as_PRegister($pTmp$$reg), + __ sve_extract(as_Register($dst$$reg), __ H, as_PRegister($pgtmp$$reg), as_FloatRegister($src$$reg), (int)($idx$$constant)); __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U); %} @@ -3240,57 +4886,57 @@ instruct extractS(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr) %} -instruct extractI(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr) +instruct extractI(iRegINoSp dst, vReg src, immI idx, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0); match(Set dst (ExtractI src idx)); - effect(TEMP pTmp, KILL cr); + effect(TEMP pgtmp, KILL cr); ins_cost(2 * SVE_COST); - format %{ "sve_extract $dst, S, $pTmp, $src, $idx\t# extract from vector(I)" %} + format %{ "sve_extract $dst, S, $pgtmp, $src, $idx\t# extract from vector(I)" %} ins_encode %{ - __ sve_extract(as_Register($dst$$reg), __ S, as_PRegister($pTmp$$reg), + __ sve_extract(as_Register($dst$$reg), __ S, as_PRegister($pgtmp$$reg), as_FloatRegister($src$$reg), (int)($idx$$constant)); %} ins_pipe(pipe_slow); %} -instruct extractL(iRegLNoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr) +instruct extractL(iRegLNoSp dst, vReg src, immI idx, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0); match(Set dst (ExtractL src idx)); - effect(TEMP pTmp, KILL cr); + effect(TEMP pgtmp, KILL cr); ins_cost(2 * SVE_COST); - format %{ "sve_extract $dst, D, $pTmp, $src, $idx\t# extract from vector(L)" %} + format %{ "sve_extract $dst, D, $pgtmp, $src, $idx\t# extract from vector(L)" %} ins_encode %{ - __ sve_extract(as_Register($dst$$reg), __ D, as_PRegister($pTmp$$reg), + __ sve_extract(as_Register($dst$$reg), __ D, as_PRegister($pgtmp$$reg), as_FloatRegister($src$$reg), (int)($idx$$constant)); %} ins_pipe(pipe_slow); %} -instruct extractF(vRegF dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr) +instruct extractF(vRegF dst, vReg src, immI idx, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0); match(Set dst (ExtractF src idx)); - effect(TEMP pTmp, KILL cr); + effect(TEMP pgtmp, KILL cr); ins_cost(2 * SVE_COST); - format %{ "sve_extract $dst, S, $pTmp, $src, $idx\t# extract from vector(F)" %} + format %{ "sve_extract $dst, S, $pgtmp, $src, $idx\t# extract from vector(F)" %} ins_encode %{ - __ sve_extract(as_FloatRegister($dst$$reg), __ S, as_PRegister($pTmp$$reg), + __ sve_extract(as_FloatRegister($dst$$reg), __ S, as_PRegister($pgtmp$$reg), as_FloatRegister($src$$reg), (int)($idx$$constant)); %} ins_pipe(pipe_slow); %} -instruct extractD(vRegD dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr) +instruct extractD(vRegD dst, vReg src, immI idx, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0); match(Set dst (ExtractD src idx)); - effect(TEMP pTmp, KILL cr); + effect(TEMP pgtmp, KILL cr); ins_cost(2 * SVE_COST); - format %{ "sve_extract $dst, D, $pTmp, $src, $idx\t# extract from vector(D)" %} + format %{ "sve_extract $dst, D, $pgtmp, $src, $idx\t# extract from vector(D)" %} ins_encode %{ - __ sve_extract(as_FloatRegister($dst$$reg), __ D, as_PRegister($pTmp$$reg), + __ sve_extract(as_FloatRegister($dst$$reg), __ D, as_PRegister($pgtmp$$reg), as_FloatRegister($src$$reg), (int)($idx$$constant)); %} ins_pipe(pipe_slow); @@ -3298,83 +4944,79 @@ instruct extractD(vRegD dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr) // ------------------------------- VectorTest ---------------------------------- -instruct vtest_alltrue(iRegINoSp dst, vReg src1, vReg src2, pReg pTmp, rFlagsReg cr) +instruct vtest_alltrue(iRegINoSp dst, pRegGov src1, pRegGov src2, pReg ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + predicate(UseSVE > 0 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && static_cast(n)->get_predicate() == BoolTest::overflow); match(Set dst (VectorTest src1 src2)); - effect(TEMP pTmp, KILL cr); + effect(TEMP ptmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_cmpeq $pTmp, $src1, 0\n\t" + format %{ "sve_eors $ptmp, $src1, $src2\t# $src2 is all true mask\n" "csetw $dst, EQ\t# VectorTest (sve) - alltrue" %} ins_encode %{ - // "src2" is not used for sve. - BasicType bt = Matcher::vector_element_basic_type(this, $src1); - Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, - ptrue, as_FloatRegister($src1$$reg), 0); + __ sve_eors(as_PRegister($ptmp$$reg), ptrue, + as_PRegister($src1$$reg), as_PRegister($src2$$reg)); __ csetw(as_Register($dst$$reg), Assembler::EQ); %} ins_pipe(pipe_slow); %} -instruct vtest_anytrue(iRegINoSp dst, vReg src1, vReg src2, pReg pTmp, rFlagsReg cr) +instruct vtest_anytrue(iRegINoSp dst, pRegGov src1, pRegGov src2, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + predicate(UseSVE > 0 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && static_cast(n)->get_predicate() == BoolTest::ne); match(Set dst (VectorTest src1 src2)); - effect(TEMP pTmp, KILL cr); + effect(KILL cr); ins_cost(SVE_COST); - format %{ "sve_cmpeq $pTmp, $src1, -1\n\t" + format %{ "sve_ptest $src1\n\t" "csetw $dst, NE\t# VectorTest (sve) - anytrue" %} ins_encode %{ // "src2" is not used for sve. - BasicType bt = Matcher::vector_element_basic_type(this, $src1); - Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, - ptrue, as_FloatRegister($src1$$reg), -1); + __ sve_ptest(ptrue, as_PRegister($src1$$reg)); __ csetw(as_Register($dst$$reg), Assembler::NE); %} ins_pipe(pipe_slow); %} -instruct vtest_alltrue_partial(iRegINoSp dst, vReg src1, vReg src2, pRegGov pTmp, rFlagsReg cr) +instruct vtest_alltrue_partial(iRegINoSp dst, pRegGov src1, pRegGov src2, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + predicate(UseSVE > 0 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && static_cast(n)->get_predicate() == BoolTest::overflow); match(Set dst (VectorTest src1 src2)); - effect(TEMP pTmp, KILL cr); + effect(TEMP ptmp, KILL cr); ins_cost(SVE_COST); format %{ "vtest_alltrue_partial $dst, $src1, $src2\t# VectorTest partial (sve) - alltrue" %} ins_encode %{ - // "src2" is not used for sve. BasicType bt = Matcher::vector_element_basic_type(this, $src1); Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); - __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), size, + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this, $src1)); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, - as_PRegister($pTmp$$reg), as_FloatRegister($src1$$reg), 0); + __ sve_eors(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($src1$$reg), as_PRegister($src2$$reg)); __ csetw(as_Register($dst$$reg), Assembler::EQ); %} ins_pipe(pipe_slow); %} -instruct vtest_anytrue_partial(iRegINoSp dst, vReg src1, vReg src2, pRegGov pTmp, rFlagsReg cr) +instruct vtest_anytrue_partial(iRegINoSp dst, pRegGov src1, pRegGov src2, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + predicate(UseSVE > 0 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && static_cast(n)->get_predicate() == BoolTest::ne); match(Set dst (VectorTest src1 src2)); - effect(TEMP pTmp, KILL cr); + effect(TEMP ptmp, KILL cr); ins_cost(SVE_COST); format %{ "vtest_anytrue_partial $dst, $src1, $src2\t# VectorTest partial (sve) - anytrue" %} ins_encode %{ - // "src2" is not used for sve. BasicType bt = Matcher::vector_element_basic_type(this, $src1); Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); - __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), size, + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this, $src1)); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, - as_PRegister($pTmp$$reg), as_FloatRegister($src1$$reg), -1); + __ sve_ands(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($src1$$reg), as_PRegister($src2$$reg)); __ csetw(as_Register($dst$$reg), Assembler::NE); %} ins_pipe(pipe_slow); @@ -3382,211 +5024,169 @@ instruct vtest_anytrue_partial(iRegINoSp dst, vReg src1, vReg src2, pRegGov pTmp // ------------------------------ Vector insert --------------------------------- -instruct insertI_small(vReg dst, vReg src, iRegIorL2I val, immI idx, pRegGov pTmp, rFlagsReg cr) +instruct insertI_small(vReg dst, vReg src, iRegIorL2I val, immI idx, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_Vector()->length() <= 32 && (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE || n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || n->bottom_type()->is_vect()->element_basic_type() == T_INT)); match(Set dst (VectorInsert (Binary src val) idx)); - effect(TEMP_DEF dst, TEMP pTmp, KILL cr); + effect(TEMP_DEF dst, TEMP pgtmp, KILL cr); ins_cost(4 * SVE_COST); - format %{ "sve_index $dst, -16, 1\t# (B/S/I)\n\t" - "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" + format %{ "sve_index $dst, -16, 1\t# (B/H/S)\n\t" + "sve_cmpeq $pgtmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" "sve_orr $dst, $src, $src\n\t" - "sve_cpy $dst, $pTmp, $val\t# insert into vector (B/S/I)" %} + "sve_cpy $dst, $pgtmp, $val\t# insert into vector (B/H/S)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src); Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); __ sve_index(as_FloatRegister($dst$$reg), size, -16, 1); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, ptrue, + __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), size, ptrue, as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16); __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); - __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg), as_Register($val$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pgtmp$$reg), as_Register($val$$reg)); %} ins_pipe(pipe_slow); %} -instruct insertF_small(vReg dst, vReg src, vRegF val, immI idx, pRegGov pTmp, rFlagsReg cr) +instruct insertF_small(vReg dst, vReg src, vRegF val, immI idx, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_Vector()->length() <= 32 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorInsert (Binary src val) idx)); - effect(TEMP_DEF dst, TEMP pTmp, KILL cr); + effect(TEMP_DEF dst, TEMP pgtmp, KILL cr); ins_cost(4 * SVE_COST); format %{ "sve_index $dst, S, -16, 1\n\t" - "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" + "sve_cmpeq $pgtmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" "sve_orr $dst, $src, $src\n\t" - "sve_cpy $dst, $pTmp, $val\t# insert into vector (F)" %} + "sve_cpy $dst, $pgtmp, $val\t# insert into vector (F)" %} ins_encode %{ __ sve_index(as_FloatRegister($dst$$reg), __ S, -16, 1); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ S, ptrue, + __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), __ S, ptrue, as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16); __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); - __ sve_cpy(as_FloatRegister($dst$$reg), __ S, as_PRegister($pTmp$$reg), as_FloatRegister($val$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), __ S, as_PRegister($pgtmp$$reg), as_FloatRegister($val$$reg)); %} ins_pipe(pipe_slow); %} -instruct insertI(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg tmp1, pRegGov pTmp, rFlagsReg cr) +instruct insertI(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg tmp1, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_Vector()->length() > 32 && (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE || n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || n->bottom_type()->is_vect()->element_basic_type() == T_INT)); match(Set dst (VectorInsert (Binary src val) idx)); - effect(TEMP_DEF dst, TEMP tmp1, TEMP pTmp, KILL cr); + effect(TEMP_DEF dst, TEMP tmp1, TEMP pgtmp, KILL cr); ins_cost(5 * SVE_COST); - format %{ "sve_index $tmp1, 0, 1\t# (B/S/I)\n\t" - "sve_dup $dst, $idx\t# (B/S/I)\n\t" - "sve_cmpeq $pTmp, $tmp1, $dst\n\t" + format %{ "sve_index $tmp1, 0, 1\t# (B/H/S)\n\t" + "sve_dup $dst, $idx\t# (B/H/S)\n\t" + "sve_cmpeq $pgtmp, $tmp1, $dst\n\t" "sve_orr $dst, $src, $src\n\t" - "sve_cpy $dst, $pTmp, $val\t# insert into vector (B/S/I)" %} + "sve_cpy $dst, $pgtmp, $val\t# insert into vector (B/H/S)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src); Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); __ sve_index(as_FloatRegister($tmp1$$reg), size, 0, 1); __ sve_dup(as_FloatRegister($dst$$reg), size, (int)($idx$$constant)); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, ptrue, + __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), size, ptrue, as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg)); __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); - __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg), as_Register($val$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pgtmp$$reg), as_Register($val$$reg)); %} ins_pipe(pipe_slow); %} -instruct insertL(vReg dst, vReg src, iRegL val, immI idx, pRegGov pTmp, rFlagsReg cr) +instruct insertL(vReg dst, vReg src, iRegL val, immI idx, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorInsert (Binary src val) idx)); - effect(TEMP_DEF dst, TEMP pTmp, KILL cr); + effect(TEMP_DEF dst, TEMP pgtmp, KILL cr); ins_cost(4 * SVE_COST); format %{ "sve_index $dst, D, -16, 1\n\t" - "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" + "sve_cmpeq $pgtmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" "sve_orr $dst, $src, $src\n\t" - "sve_cpy $dst, $pTmp, $val\t# insert into vector (L)" %} + "sve_cpy $dst, $pgtmp, $val\t# insert into vector (L)" %} ins_encode %{ __ sve_index(as_FloatRegister($dst$$reg), __ D, -16, 1); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ D, ptrue, + __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16); __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); - __ sve_cpy(as_FloatRegister($dst$$reg), __ D, as_PRegister($pTmp$$reg), as_Register($val$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), __ D, as_PRegister($pgtmp$$reg), as_Register($val$$reg)); %} ins_pipe(pipe_slow); %} -instruct insertD(vReg dst, vReg src, vRegD val, immI idx, pRegGov pTmp, rFlagsReg cr) +instruct insertD(vReg dst, vReg src, vRegD val, immI idx, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (VectorInsert (Binary src val) idx)); - effect(TEMP_DEF dst, TEMP pTmp, KILL cr); + effect(TEMP_DEF dst, TEMP pgtmp, KILL cr); ins_cost(4 * SVE_COST); format %{ "sve_index $dst, D, -16, 1\n\t" - "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" + "sve_cmpeq $pgtmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" "sve_orr $dst, $src, $src\n\t" - "sve_cpy $dst, $pTmp, $val\t# insert into vector (D)" %} + "sve_cpy $dst, $pgtmp, $val\t# insert into vector (D)" %} ins_encode %{ __ sve_index(as_FloatRegister($dst$$reg), __ D, -16, 1); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ D, ptrue, + __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16); __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); - __ sve_cpy(as_FloatRegister($dst$$reg), __ D, as_PRegister($pTmp$$reg), as_FloatRegister($val$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), __ D, as_PRegister($pgtmp$$reg), as_FloatRegister($val$$reg)); %} ins_pipe(pipe_slow); %} -instruct insertF(vReg dst, vReg src, vRegF val, immI idx, vReg tmp1, pRegGov pTmp, rFlagsReg cr) +instruct insertF(vReg dst, vReg src, vRegF val, immI idx, vReg tmp1, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_Vector()->length() > 32 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorInsert (Binary src val) idx)); - effect(TEMP_DEF dst, TEMP tmp1, TEMP pTmp, KILL cr); + effect(TEMP_DEF dst, TEMP tmp1, TEMP pgtmp, KILL cr); ins_cost(5 * SVE_COST); format %{ "sve_index $tmp1, S, 0, 1\n\t" "sve_dup $dst, S, $idx\n\t" - "sve_cmpeq $pTmp, $tmp1, $dst\n\t" + "sve_cmpeq $pgtmp, $tmp1, $dst\n\t" "sve_orr $dst, $src, $src\n\t" - "sve_cpy $dst, $pTmp, $val\t# insert into vector (F)" %} + "sve_cpy $dst, $pgtmp, $val\t# insert into vector (F)" %} ins_encode %{ __ sve_index(as_FloatRegister($tmp1$$reg), __ S, 0, 1); __ sve_dup(as_FloatRegister($dst$$reg), __ S, (int)($idx$$constant)); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ S, ptrue, + __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), __ S, ptrue, as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg)); __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); __ sve_cpy(as_FloatRegister($dst$$reg), __ S, - as_PRegister($pTmp$$reg), as_FloatRegister($val$$reg)); + as_PRegister($pgtmp$$reg), as_FloatRegister($val$$reg)); %} ins_pipe(pipe_slow); %} // ------------------------------ Vector shuffle ------------------------------- -instruct loadshuffleB(vReg dst, vReg src) -%{ - predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); +instruct loadshuffle(vReg dst, vReg src) %{ + predicate(UseSVE > 0); match(Set dst (VectorLoadShuffle src)); ins_cost(SVE_COST); - format %{ "sve_orr $dst, $src, $src\t# vector load shuffle (B)" %} + format %{ "sve_loadshuffle $dst, $src\t# vector load shuffle (B/H/S/D)" %} ins_encode %{ - if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { - __ sve_orr(as_FloatRegister($dst$$reg), - as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this); + if (bt == T_BYTE) { + if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } + } else { + __ sve_vector_extend(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt), + as_FloatRegister($src$$reg), __ B); } %} ins_pipe(pipe_slow); %} -instruct loadshuffleS(vReg dst, vReg src) -%{ - predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); - match(Set dst (VectorLoadShuffle src)); - ins_cost(SVE_COST); - format %{ "sve_uunpklo $dst, $src\t# vector load shuffle (B to H)" %} - ins_encode %{ - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); - %} - ins_pipe(pipe_slow); -%} - -instruct loadshuffleI(vReg dst, vReg src) -%{ - predicate(UseSVE > 0 && - (n->bottom_type()->is_vect()->element_basic_type() == T_INT || - n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); - match(Set dst (VectorLoadShuffle src)); - ins_cost(2 * SVE_COST); - format %{ "sve_uunpklo $dst, H, $src\n\t" - "sve_uunpklo $dst, S, $dst\t# vector load shuffle (B to S)" %} - ins_encode %{ - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); - %} - ins_pipe(pipe_slow); -%} - -instruct loadshuffleL(vReg dst, vReg src) -%{ - predicate(UseSVE > 0 && - (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || - n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); - match(Set dst (VectorLoadShuffle src)); - ins_cost(3 * SVE_COST); - format %{ "sve_uunpklo $dst, H, $src\n\t" - "sve_uunpklo $dst, S, $dst\n\t" - "sve_uunpklo $dst, D, $dst\t# vector load shuffle (B to D)" %} - ins_encode %{ - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg)); - %} - ins_pipe(pipe_slow); -%} - // ------------------------------ Vector rearrange ------------------------------- instruct rearrange(vReg dst, vReg src, vReg shuffle) @@ -3613,7 +5213,7 @@ instruct gatherI(vReg dst, indirect mem, vReg idx) %{ n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set dst (LoadVectorGather mem idx)); ins_cost(SVE_COST); - format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (I/F)" %} + format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (S)" %} ins_encode %{ __ sve_ld1w_gather(as_FloatRegister($dst$$reg), ptrue, as_Register($mem$$base), as_FloatRegister($idx$$reg)); @@ -3628,52 +5228,123 @@ instruct gatherL(vReg dst, indirect mem, vReg idx) %{ n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set dst (LoadVectorGather mem idx)); ins_cost(2 * SVE_COST); - format %{ "sve_uunpklo $idx, $idx\n\t" - "load_vector_gather $dst, $mem, $idx\t# vector load gather (L/D)" %} + format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (D)" %} ins_encode %{ __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); - __ sve_ld1d_gather(as_FloatRegister($dst$$reg), ptrue, as_Register($mem$$base), as_FloatRegister($idx$$reg)); + __ sve_ld1d_gather(as_FloatRegister($dst$$reg), ptrue, as_Register($mem$$base), + as_FloatRegister($idx$$reg)); %} ins_pipe(pipe_slow); %} // ------------------------------ Vector Load Gather Partial------------------------------- -instruct gatherI_partial(vReg dst, indirect mem, vReg idx, pRegGov pTmp, rFlagsReg cr) %{ +instruct gatherI_partial(vReg dst, indirect mem, vReg idx, pRegGov ptmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_LoadVectorGather()->memory_size() < MaxVectorSize && (n->bottom_type()->is_vect()->element_basic_type() == T_INT || n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set dst (LoadVectorGather mem idx)); - effect(TEMP pTmp, KILL cr); + effect(TEMP ptmp, KILL cr); ins_cost(2 * SVE_COST + INSN_COST); - format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" - "load_vector_gather $dst, $pTmp, $mem, $idx\t# vector load gather partial (I/F)" %} + format %{ "load_vector_gather $dst, $ptmp, $mem, $idx\t# vector load gather partial (S)" %} ins_encode %{ - __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ S, - Matcher::vector_length(this)); - __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($pTmp$$reg), + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, Matcher::vector_length(this)); + __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($ptmp$$reg), as_Register($mem$$base), as_FloatRegister($idx$$reg)); %} ins_pipe(pipe_slow); %} -instruct gatherL_partial(vReg dst, indirect mem, vReg idx, pRegGov pTmp, rFlagsReg cr) %{ +instruct gatherL_partial(vReg dst, indirect mem, vReg idx, pRegGov ptmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_LoadVectorGather()->memory_size() < MaxVectorSize && (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set dst (LoadVectorGather mem idx)); - effect(TEMP pTmp, KILL cr); + effect(TEMP ptmp, KILL cr); ins_cost(3 * SVE_COST + INSN_COST); - format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" - "sve_uunpklo $idx, $idx\n\t" - "load_vector_gather $dst, $pTmp, $mem, $idx\t# vector load gather partial (L/D)" %} + format %{ "load_vector_gather $dst, $ptmp, $mem, $idx\t# vector load gather partial (D)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this)); + __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); + __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($ptmp$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector Load Gather Predicated ------------------------------- + +instruct gatherI_masked(vReg dst, indirect mem, vReg idx, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->as_LoadVector()->memory_size() == MaxVectorSize && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (LoadVectorGatherMasked mem (Binary idx pg))); + ins_cost(SVE_COST); + format %{ "load_vector_gather $dst, $pg, $mem, $idx\t# vector load gather predicated (S)" %} + ins_encode %{ + __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($pg$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct gatherL_masked(vReg dst, indirect mem, vReg idx, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->as_LoadVector()->memory_size() == MaxVectorSize && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (LoadVectorGatherMasked mem (Binary idx pg))); + ins_cost(2 * SVE_COST); + format %{ "load_vector_gather $dst, $pg, $mem, $idx\t# vector load gather predicated (D)" %} ins_encode %{ - __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ D, + __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); + __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($pg$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector Load Gather Predicated Partial ------------------------------- + +instruct gatherI_masked_partial(vReg dst, indirect mem, vReg idx, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_LoadVector()->memory_size() < MaxVectorSize && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (LoadVectorGatherMasked mem (Binary idx pg))); + effect(TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "load_vector_gather $dst, $pg, $mem, $idx\t# vector load gather predicated partial (S)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, Matcher::vector_length(this)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($ptmp$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct gatherL_masked_partial(vReg dst, indirect mem, vReg idx, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_LoadVector()->memory_size() < MaxVectorSize && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (LoadVectorGatherMasked mem (Binary idx pg))); + effect(TEMP ptmp, KILL cr); + ins_cost(4 * SVE_COST); + format %{ "load_vector_gather $dst, $pg, $mem, $idx\t# vector load gather predicated partial (D)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); - __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($pTmp$$reg), + __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($ptmp$$reg), as_Register($mem$$base), as_FloatRegister($idx$$reg)); %} ins_pipe(pipe_slow); @@ -3688,7 +5359,7 @@ instruct scatterI(indirect mem, vReg src, vReg idx) %{ n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set mem (StoreVectorScatter mem (Binary src idx))); ins_cost(SVE_COST); - format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (I/F)" %} + format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (S)" %} ins_encode %{ __ sve_st1w_scatter(as_FloatRegister($src$$reg), ptrue, as_Register($mem$$base), as_FloatRegister($idx$$reg)); @@ -3703,59 +5374,129 @@ instruct scatterL(indirect mem, vReg src, vReg idx) %{ n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set mem (StoreVectorScatter mem (Binary src idx))); ins_cost(2 * SVE_COST); - format %{ "sve_uunpklo $idx, $idx\n\t" - "store_vector_scatter $mem, $idx, $src\t# vector store scatter (L/D)" %} + format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (D)" %} ins_encode %{ - __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, - as_FloatRegister($idx$$reg)); + __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); __ sve_st1d_scatter(as_FloatRegister($src$$reg), ptrue, as_Register($mem$$base), as_FloatRegister($idx$$reg)); %} ins_pipe(pipe_slow); %} -// ------------------------------ Vector Store Scatter Partial------------------------------- +// ------------------------------ Vector Store Scatter Partial ------------------------------- -instruct scatterI_partial(indirect mem, vReg src, vReg idx, pRegGov pTmp, rFlagsReg cr) %{ +instruct scatterI_partial(indirect mem, vReg src, vReg idx, pRegGov ptmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_StoreVectorScatter()->memory_size() < MaxVectorSize && (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT || n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set mem (StoreVectorScatter mem (Binary src idx))); - effect(TEMP pTmp, KILL cr); + effect(TEMP ptmp, KILL cr); ins_cost(2 * SVE_COST + INSN_COST); - format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" - "store_vector_scatter $mem, $pTmp, $idx, $src\t# vector store scatter partial (I/F)" %} + format %{ "store_vector_scatter $mem, $ptmp, $idx, $src\t# vector store scatter partial (S)" %} ins_encode %{ - __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ S, + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, Matcher::vector_length(this, $src)); - __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($pTmp$$reg), + __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($ptmp$$reg), as_Register($mem$$base), as_FloatRegister($idx$$reg)); %} ins_pipe(pipe_slow); %} -instruct scatterL_partial(indirect mem, vReg src, vReg idx, pRegGov pTmp, rFlagsReg cr) %{ +instruct scatterL_partial(indirect mem, vReg src, vReg idx, pRegGov ptmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_StoreVectorScatter()->memory_size() < MaxVectorSize && (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG || n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set mem (StoreVectorScatter mem (Binary src idx))); - effect(TEMP pTmp, KILL cr); + effect(TEMP ptmp, KILL cr); ins_cost(3 * SVE_COST + INSN_COST); - format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" - "sve_uunpklo $idx, $idx\n\t" - "store_vector_scatter $mem, $pTmp, $idx, $src\t# vector store scatter partial (L/D)" %} + format %{ "store_vector_scatter $mem, $ptmp, $idx, $src\t# vector store scatter partial (D)" %} ins_encode %{ - __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ D, + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this, $src)); __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); - __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($pTmp$$reg), + __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($ptmp$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector Store Scatter Predicated ------------------------------- + +instruct scatterI_masked(indirect mem, vReg src, vReg idx, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->as_StoreVector()->memory_size() == MaxVectorSize && + (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx pg)))); + ins_cost(SVE_COST); + format %{ "store_vector_scatter $mem, $pg, $idx, $src\t# vector store scatter predicate (S)" %} + ins_encode %{ + __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($pg$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct scatterL_masked(indirect mem, vReg src, vReg idx, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->as_StoreVector()->memory_size() == MaxVectorSize && + (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx pg)))); + ins_cost(2 * SVE_COST); + format %{ "store_vector_scatter $mem, $pg, $idx, $src\t# vector store scatter predicated (D)" %} + ins_encode %{ + __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); + __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($pg$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector Store Scatter Predicated Partial ------------------------------- + +instruct scatterI_masked_partial(indirect mem, vReg src, vReg idx, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_StoreVector()->memory_size() < MaxVectorSize && + (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx pg)))); + effect(TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "store_vector_scatter $mem, $pg, $idx, $src\t# vector store scatter predicated partial (S)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, + Matcher::vector_length(this, $src)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($ptmp$$reg), as_Register($mem$$base), as_FloatRegister($idx$$reg)); %} ins_pipe(pipe_slow); %} +instruct scatterL_masked_partial(indirect mem, vReg src, vReg idx, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_StoreVector()->memory_size() < MaxVectorSize && + (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx pg)))); + effect(TEMP ptmp, KILL cr); + ins_cost(4 * SVE_COST); + format %{ "store_vector_scatter $mem, $pg, $idx, $src\t# vector store scatter predicated partial (D)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); + __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($ptmp$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} // ------------------------------ Vector Load Const ------------------------------- @@ -3811,203 +5552,97 @@ instruct stringU_indexof_char_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, %} // ---------------------------- Vector mask reductions --------------------------- - -instruct vmask_truecount(iRegINoSp dst, vReg src, pReg ptmp, rFlagsReg cr) %{ +instruct vmask_truecount(iRegINoSp dst, pReg src) %{ predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); match(Set dst (VectorMaskTrueCount src)); - effect(TEMP ptmp, KILL cr); - ins_cost(2 * SVE_COST); + ins_cost(SVE_COST); format %{ "vmask_truecount $dst, $src\t# vector mask truecount (sve)" %} ins_encode %{ - __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, - as_FloatRegister($src$$reg), ptrue, as_PRegister($ptmp$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_cntp($dst$$Register, size, ptrue, as_PRegister($src$$reg)); %} ins_pipe(pipe_slow); %} -instruct vmask_firsttrue(iRegINoSp dst, vReg src, pReg ptmp, rFlagsReg cr) %{ +instruct vmask_firsttrue(iRegINoSp dst, pReg src, pReg ptmp) %{ predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); match(Set dst (VectorMaskFirstTrue src)); - effect(TEMP ptmp, KILL cr); - ins_cost(3 * SVE_COST); + effect(TEMP ptmp); + ins_cost(2 * SVE_COST); format %{ "vmask_firsttrue $dst, $src\t# vector mask firsttrue (sve)" %} ins_encode %{ - __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, - as_FloatRegister($src$$reg), ptrue, as_PRegister($ptmp$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_brkb(as_PRegister($ptmp$$reg), ptrue, as_PRegister($src$$reg), false); + __ sve_cntp($dst$$Register, size, ptrue, as_PRegister($ptmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct vmask_lasttrue(iRegINoSp dst, vReg src, pReg ptmp, rFlagsReg cr) %{ +instruct vmask_lasttrue(iRegINoSp dst, pReg src, pReg ptmp) %{ predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); match(Set dst (VectorMaskLastTrue src)); - effect(TEMP ptmp, KILL cr); - ins_cost(4 * SVE_COST); + effect(TEMP ptmp); + ins_cost(3 * SVE_COST); format %{ "vmask_lasttrue $dst, $src\t# vector mask lasttrue (sve)" %} ins_encode %{ - __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, - as_FloatRegister($src$$reg), ptrue, as_PRegister($ptmp$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this, $src); + __ sve_vmask_lasttrue($dst$$Register, bt, as_PRegister($src$$reg), as_PRegister($ptmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct vmask_truecount_partial(iRegINoSp dst, vReg src, pRegGov ptmp, rFlagsReg cr) %{ +instruct vmask_truecount_partial(iRegINoSp dst, pReg src, pReg ptmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); match(Set dst (VectorMaskTrueCount src)); effect(TEMP ptmp, KILL cr); - ins_cost(3 * SVE_COST); - format %{ "vmask_truecount $dst, $src\t# vector mask truecount partial (sve)" %} + ins_cost(2 * SVE_COST); + format %{ "vmask_truecount_partial $dst, $src\t# vector mask truecount partial (sve)" %} ins_encode %{ - __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ B, - Matcher::vector_length(this, $src)); - __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, as_FloatRegister($src$$reg), - as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this, $src)); + __ sve_cntp($dst$$Register, size, as_PRegister($ptmp$$reg), as_PRegister($src$$reg)); %} ins_pipe(pipe_slow); %} -instruct vmask_firsttrue_partial(iRegINoSp dst, vReg src, pRegGov pgtmp, pReg ptmp, rFlagsReg cr) %{ +instruct vmask_firsttrue_partial(iRegINoSp dst, pReg src, pReg ptmp1, pReg ptmp2, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); match(Set dst (VectorMaskFirstTrue src)); - effect(TEMP pgtmp, TEMP ptmp, KILL cr); - ins_cost(4 * SVE_COST); - format %{ "vmask_firsttrue $dst, $src\t# vector mask firsttrue partial (sve)" %} + effect(TEMP ptmp1, TEMP ptmp2, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "vmask_firsttrue_partial $dst, $src\t# vector mask firsttrue partial (sve)" %} ins_encode %{ - __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ B, + BasicType bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp1$$reg), size, Matcher::vector_length(this, $src)); - __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, as_FloatRegister($src$$reg), - as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg)); + __ sve_brkb(as_PRegister($ptmp2$$reg), as_PRegister($ptmp1$$reg), as_PRegister($src$$reg), false); + __ sve_cntp($dst$$Register, size, as_PRegister($ptmp1$$reg), as_PRegister($ptmp2$$reg)); %} ins_pipe(pipe_slow); %} -instruct vmask_lasttrue_partial(iRegINoSp dst, vReg src, pRegGov ptmp, rFlagsReg cr) %{ +instruct vmask_lasttrue_partial(iRegINoSp dst, pReg src, pReg ptmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); match(Set dst (VectorMaskLastTrue src)); effect(TEMP ptmp, KILL cr); ins_cost(5 * SVE_COST); - format %{ "vmask_lasttrue $dst, $src\t# vector mask lasttrue partial (sve)" %} - ins_encode %{ - __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ B, - Matcher::vector_length(this, $src)); - __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, as_FloatRegister($src$$reg), - as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg)); - %} - ins_pipe(pipe_slow); -%} - -// ----------------- Vector mask reductions combined with VectorMaskStore --------------- - -instruct vstoremask_truecount(iRegINoSp dst, vReg src, immI esize, pReg ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && - n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (VectorMaskTrueCount (VectorStoreMask src esize))); - effect(TEMP ptmp, KILL cr); - ins_cost(2 * SVE_COST); - format %{ "vstoremask_truecount $dst, $src\t# vector mask truecount (sve)" %} - ins_encode %{ - unsigned size = $esize$$constant; - assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size"); - Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size); - __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg), - ptrue, as_PRegister($ptmp$$reg), Matcher::vector_length(this, $src)); - %} - ins_pipe(pipe_slow); -%} - -instruct vstoremask_firsttrue(iRegINoSp dst, vReg src, immI esize, pReg ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && - n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (VectorMaskFirstTrue (VectorStoreMask src esize))); - effect(TEMP ptmp, KILL cr); - ins_cost(3 * SVE_COST); - format %{ "vstoremask_firsttrue $dst, $src\t# vector mask firsttrue (sve)" %} - ins_encode %{ - unsigned size = $esize$$constant; - assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size"); - Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size); - __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg), - ptrue, as_PRegister($ptmp$$reg), Matcher::vector_length(this, $src)); - %} - ins_pipe(pipe_slow); -%} - -instruct vstoremask_lasttrue(iRegINoSp dst, vReg src, immI esize, pReg ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && - n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (VectorMaskLastTrue (VectorStoreMask src esize))); - effect(TEMP ptmp, KILL cr); - ins_cost(4 * SVE_COST); - format %{ "vstoremask_lasttrue $dst, $src\t# vector mask lasttrue (sve)" %} - ins_encode %{ - unsigned size = $esize$$constant; - assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size"); - Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size); - __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg), - ptrue, as_PRegister($ptmp$$reg), Matcher::vector_length(this, $src)); - %} - ins_pipe(pipe_slow); -%} - -instruct vstoremask_truecount_partial(iRegINoSp dst, vReg src, immI esize, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && - n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (VectorMaskTrueCount (VectorStoreMask src esize))); - effect(TEMP ptmp, KILL cr); - ins_cost(3 * SVE_COST); - format %{ "vstoremask_truecount $dst, $src\t# vector mask truecount partial (sve)" %} - ins_encode %{ - unsigned size = $esize$$constant; - assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size"); - Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size); - __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, - Matcher::vector_length(this, $src)); - __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg), - as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), MaxVectorSize / size); - %} - ins_pipe(pipe_slow); -%} - -instruct vstoremask_firsttrue_partial(iRegINoSp dst, vReg src, immI esize, pRegGov pgtmp, pReg ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && - n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (VectorMaskFirstTrue (VectorStoreMask src esize))); - effect(TEMP pgtmp, TEMP ptmp, KILL cr); - ins_cost(4 * SVE_COST); - format %{ "vstoremask_firsttrue $dst, $src\t# vector mask firsttrue partial (sve)" %} - ins_encode %{ - unsigned size = $esize$$constant; - assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size"); - Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size); - __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), variant, - Matcher::vector_length(this, $src)); - __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg), - as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg), MaxVectorSize / size); - %} - ins_pipe(pipe_slow); -%} - -instruct vstoremask_lasttrue_partial(iRegINoSp dst, vReg src, immI esize, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && - n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (VectorMaskLastTrue (VectorStoreMask src esize))); - effect(TEMP ptmp, KILL cr); - ins_cost(5 * SVE_COST); - format %{ "vstoremask_lasttrue $dst, $src\t# vector mask lasttrue partial (sve)" %} + format %{ "vmask_lasttrue_partial $dst, $src\t# vector mask lasttrue partial (sve)" %} ins_encode %{ - unsigned size = $esize$$constant; - assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size"); - Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size); - __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, - Matcher::vector_length(this, $src)); - __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg), - as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), MaxVectorSize / size); + BasicType bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this, $src)); + __ sve_and(as_PRegister($ptmp$$reg), ptrue, as_PRegister($ptmp$$reg), as_PRegister($src$$reg)); + __ sve_vmask_lasttrue($dst$$Register, bt, as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg)); %} ins_pipe(pipe_slow); -%} +%} \ No newline at end of file diff --git a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 index dfdc6a2fda9e4420f0f4a60fea24037fba9fa429..7589735365eea58a51163ac3bd8759b5f5582c91 100644 --- a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 @@ -83,6 +83,7 @@ opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4); source_hpp %{ bool op_sve_supported(int opcode, int vlen, BasicType bt); + bool masked_op_sve_supported(int opcode, int vlen, BasicType bt); %} source %{ @@ -139,11 +140,7 @@ source %{ // Vector API specific case Op_VectorLoadShuffle: case Op_VectorRearrange: - if (vlen < 4 || length_in_bytes > MaxVectorSize) { - return false; - } else { - return true; - } + return vlen >= 4 && length_in_bytes <= MaxVectorSize; case Op_LoadVector: case Op_StoreVector: return Matcher::vector_size_supported(bt, vlen); @@ -153,6 +150,14 @@ source %{ // By default, we only support vector operations with no less than 8 bytes and 2 elements. return 8 <= length_in_bytes && length_in_bytes <= MaxVectorSize && vlen >= 2; } + + bool masked_op_sve_supported(int opcode, int vlen, BasicType bt) { + if (opcode == Op_VectorRearrange) { + return false; + } + return op_sve_supported(opcode, vlen, bt); + } + %} definitions %{ @@ -231,46 +236,205 @@ VLoadStore(strq, Q, store, 16, X, 128, src) // Only load/store values in the range of the memory_size. This is needed // when the memory_size is lower than the hardware supported max vector size. // And this might happen for Vector API mask vector load/store. -instruct loadV_partial(vReg dst, vmemA mem, pRegGov pTmp, rFlagsReg cr) %{ +instruct loadV_partial(vReg dst, vmemA mem, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() > 16 && n->as_LoadVector()->memory_size() < MaxVectorSize); match(Set dst (LoadVector mem)); - effect(TEMP pTmp, KILL cr); + effect(TEMP pgtmp, KILL cr); ins_cost(6 * SVE_COST); - format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" - "sve_ldr $dst, $pTmp, $mem\t# load vector predicated" %} + format %{ "sve_whilelo_zr_imm $pgtmp, vector_length\n\t" + "sve_ldr $dst, $pgtmp, $mem\t# load vector partial" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); - __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ elemType_to_regVariant(bt), + __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ elemType_to_regVariant(bt), Matcher::vector_length(this)); FloatRegister dst_reg = as_FloatRegister($dst$$reg); loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, - as_PRegister($pTmp$$reg), bt, bt, $mem->opcode(), + as_PRegister($pgtmp$$reg), bt, bt, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} ins_pipe(pipe_slow); %} -instruct storeV_partial(vReg src, vmemA mem, pRegGov pTmp, rFlagsReg cr) %{ +instruct storeV_partial(vReg src, vmemA mem, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() > 16 && n->as_StoreVector()->memory_size() < MaxVectorSize); match(Set mem (StoreVector mem src)); - effect(TEMP pTmp, KILL cr); + effect(TEMP pgtmp, KILL cr); ins_cost(5 * SVE_COST); - format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" - "sve_str $src, $pTmp, $mem\t# store vector predicated" %} + format %{ "sve_whilelo_zr_imm $pgtmp, vector_length\n\t" + "sve_str $src, $pgtmp, $mem\t# store vector partial" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src); - __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ elemType_to_regVariant(bt), + __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ elemType_to_regVariant(bt), Matcher::vector_length(this, $src)); FloatRegister src_reg = as_FloatRegister($src$$reg); loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, src_reg, - as_PRegister($pTmp$$reg), bt, bt, $mem->opcode(), + as_PRegister($pgtmp$$reg), bt, bt, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} ins_pipe(pipe_slow); -%}dnl +%} + +// vector load/store - predicated + +instruct loadV_masked(vReg dst, vmemA mem, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->as_LoadVector()->memory_size() == MaxVectorSize); + match(Set dst (LoadVectorMasked mem pg)); + ins_cost(4 * SVE_COST); + format %{ "sve_ldr $dst, $pg, $mem\t# load vector predicated (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($dst$$reg), + as_PRegister($pg$$reg), bt, bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%} + +instruct loadV_masked_partial(vReg dst, vmemA mem, pRegGov pg, pRegGov pgtmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_LoadVector()->memory_size() < MaxVectorSize); + match(Set dst (LoadVectorMasked mem pg)); + effect(TEMP pgtmp, KILL cr); + ins_cost(6 * SVE_COST); + format %{ "sve_ldr $dst, $pg, $mem\t# load vector predicated partial (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ elemType_to_regVariant(bt), + Matcher::vector_length(this)); + __ sve_and(as_PRegister($pgtmp$$reg), as_PRegister($pgtmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($dst$$reg), + as_PRegister($pgtmp$$reg), bt, bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%} + +instruct storeV_masked(vReg src, vmemA mem, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->as_StoreVector()->memory_size() == MaxVectorSize); + match(Set mem (StoreVectorMasked mem (Binary src pg))); + ins_cost(4 * SVE_COST); + format %{ "sve_str $mem, $pg, $src\t# store vector predicated (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($src$$reg), + as_PRegister($pg$$reg), bt, bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%} + +instruct storeV_masked_partial(vReg src, vmemA mem, pRegGov pg, pRegGov pgtmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_StoreVector()->memory_size() < MaxVectorSize); + match(Set mem (StoreVectorMasked mem (Binary src pg))); + effect(TEMP pgtmp, KILL cr); + ins_cost(6 * SVE_COST); + format %{ "sve_str $mem, $pg, $src\t# store vector predicated partial (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src); + __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ elemType_to_regVariant(bt), + Matcher::vector_length(this, $src)); + __ sve_and(as_PRegister($pgtmp$$reg), as_PRegister($pgtmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($src$$reg), + as_PRegister($pgtmp$$reg), bt, bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%} + +dnl +dnl MASKALL_IMM($1, $2 ) +dnl MASKALL_IMM(type, size) +define(`MASKALL_IMM', ` +instruct vmaskAll_imm$1(pRegGov dst, imm$1 src) %{ + predicate(UseSVE > 0); + match(Set dst (MaskAll src)); + ins_cost(SVE_COST); + format %{ "sve_ptrue/sve_pfalse $dst\t# mask all (sve) ($2)" %} + ins_encode %{ + ifelse($1, `I', int, long) con = (ifelse($1, `I', int, long))$src$$constant; + if (con == 0) { + __ sve_pfalse(as_PRegister($dst$$reg)); + } else { + assert(con == -1, "invalid constant value for mask"); + BasicType bt = Matcher::vector_element_basic_type(this); + __ sve_ptrue(as_PRegister($dst$$reg), __ elemType_to_regVariant(bt)); + } + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl MASKALL($1, $2 ) +dnl MASKALL(type, size) +define(`MASKALL', ` +instruct vmaskAll$1(pRegGov dst, ifelse($1, `I', iRegIorL2I, iRegL) src, vReg tmp, rFlagsReg cr) %{ + predicate(UseSVE > 0); + match(Set dst (MaskAll src)); + effect(TEMP tmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_dup $tmp, $src\n\t" + "sve_cmpne $dst, $tmp, 0\t# mask all (sve) ($2)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_dup(as_FloatRegister($tmp$$reg), size, as_Register($src$$reg)); + __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($tmp$$reg), 0); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +// maskAll +MASKALL_IMM(I, B/H/S) +MASKALL(I, B/H/S) +MASKALL_IMM(L, D) +MASKALL(L, D) + +dnl +dnl MASK_LOGICAL_OP($1, $2, $3 ) +dnl MASK_LOGICAL_OP(insn_name, op_name, insn) +define(`MASK_LOGICAL_OP', ` +instruct vmask_$1(pRegGov pd, pRegGov pn, pRegGov pm) %{ + predicate(UseSVE > 0); + match(Set pd ($2 pn pm)); + ins_cost(SVE_COST); + format %{ "$3 $pd, $pn, $pm\t# predicate (sve)" %} + ins_encode %{ + __ $3(as_PRegister($pd$$reg), ptrue, + as_PRegister($pn$$reg), as_PRegister($pm$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +// mask logical and/or/xor +MASK_LOGICAL_OP(and, AndVMask, sve_and) +MASK_LOGICAL_OP(or, OrVMask, sve_orr) +MASK_LOGICAL_OP(xor, XorVMask, sve_eor) +dnl +dnl MASK_LOGICAL_AND_NOT($1, $2 ) +dnl MASK_LOGICAL_AND_NOT(type, size) +define(`MASK_LOGICAL_AND_NOT', ` +instruct vmask_and_not$1(pRegGov pd, pRegGov pn, pRegGov pm, imm$1_M1 m1) %{ + predicate(UseSVE > 0); + match(Set pd (AndVMask pn (XorVMask pm (MaskAll m1)))); + ins_cost(SVE_COST); + format %{ "sve_bic $pd, $pn, $pm\t# predciate (sve) ($2)" %} + ins_encode %{ + __ sve_bic(as_PRegister($pd$$reg), ptrue, + as_PRegister($pn$$reg), as_PRegister($pm$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +// mask logical and_not +MASK_LOGICAL_AND_NOT(I, B/H/S) +MASK_LOGICAL_AND_NOT(L, D) // vector reinterpret @@ -286,11 +450,11 @@ instruct reinterpret(vReg dst) %{ ins_pipe(pipe_class_empty); %} -instruct reinterpretResize(vReg dst, vReg src, pRegGov pTmp, rFlagsReg cr) %{ +instruct reinterpretResize(vReg dst, vReg src, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() != n->in(1)->bottom_type()->is_vect()->length_in_bytes()); // src != dst match(Set dst (VectorReinterpret src)); - effect(TEMP_DEF dst, TEMP pTmp, KILL cr); + effect(TEMP_DEF dst, TEMP pgtmp, KILL cr); ins_cost(3 * SVE_COST); format %{ "reinterpretResize $dst, $src\t# vector (sve)" %} ins_encode %{ @@ -300,25 +464,59 @@ instruct reinterpretResize(vReg dst, vReg src, pRegGov pTmp, rFlagsReg cr) %{ length_in_bytes_src : length_in_bytes_dst; assert(length_in_bytes_src <= MaxVectorSize && length_in_bytes_dst <= MaxVectorSize, "invalid vector length"); - __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ B, length_in_bytes_resize); + __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ B, length_in_bytes_resize); __ sve_dup(as_FloatRegister($dst$$reg), __ B, 0); - __ sve_sel(as_FloatRegister($dst$$reg), __ B, as_PRegister($pTmp$$reg), + __ sve_sel(as_FloatRegister($dst$$reg), __ B, as_PRegister($pgtmp$$reg), as_FloatRegister($src$$reg), as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_slow); %} + +// vector mask reinterpret + +instruct vmask_reinterpret_same_esize(pRegGov dst_src) %{ + predicate(UseSVE > 0 && + n->as_Vector()->length() == n->in(1)->bottom_type()->is_vect()->length() && + n->as_Vector()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes()); + match(Set dst_src (VectorReinterpret dst_src)); + ins_cost(0); + format %{ "# vmask_reinterpret $dst_src\t# do nothing" %} + ins_encode %{ + // empty + %} + ins_pipe(pipe_class_empty); +%} + +instruct vmask_reinterpret_diff_esize(pRegGov dst, pRegGov src, vReg tmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_Vector()->length() != n->in(1)->bottom_type()->is_vect()->length() && + n->as_Vector()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes()); + match(Set dst (VectorReinterpret src)); + effect(TEMP tmp, KILL cr); + ins_cost(2 * SVE_COST); + format %{ "# vmask_reinterpret $dst, $src\t# vector (sve)" %} + ins_encode %{ + BasicType from_bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant from_size = __ elemType_to_regVariant(from_bt); + BasicType to_bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt); + __ sve_cpy(as_FloatRegister($tmp$$reg), from_size, as_PRegister($src$$reg), -1, false); + __ sve_cmp(Assembler::EQ, as_PRegister($dst$$reg), to_size, ptrue, as_FloatRegister($tmp$$reg), -1); + %} + ins_pipe(pipe_slow); +%} dnl -dnl UNARY_OP_TRUE_PREDICATE_ETYPE($1, $2, $3, $4, $5, %6 ) -dnl UNARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn) -define(`UNARY_OP_TRUE_PREDICATE_ETYPE', ` +dnl UNARY_OP_TRUE_PREDICATE($1, $2, $3, $4 ) +dnl UNARY_OP_TRUE_PREDICATE(insn_name, op_name, size, insn) +define(`UNARY_OP_TRUE_PREDICATE', ` instruct $1(vReg dst, vReg src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == $3); + !n->as_Vector()->is_predicated_vector()); match(Set dst ($2 src)); ins_cost(SVE_COST); - format %{ "$6 $dst, $src\t# vector (sve) ($4)" %} + format %{ "$4 $dst, $src\t# vector (sve) ($3)" %} ins_encode %{ - __ $6(as_FloatRegister($dst$$reg), __ $4, + __ $4(as_FloatRegister($dst$$reg), __ $3, ptrue, as_FloatRegister($src$$reg)); %} ins_pipe(pipe_slow); @@ -326,16 +524,40 @@ instruct $1(vReg dst, vReg src) %{ dnl // vector abs -UNARY_OP_TRUE_PREDICATE_ETYPE(vabsB, AbsVB, T_BYTE, B, 16, sve_abs) -UNARY_OP_TRUE_PREDICATE_ETYPE(vabsS, AbsVS, T_SHORT, H, 8, sve_abs) -UNARY_OP_TRUE_PREDICATE_ETYPE(vabsI, AbsVI, T_INT, S, 4, sve_abs) -UNARY_OP_TRUE_PREDICATE_ETYPE(vabsL, AbsVL, T_LONG, D, 2, sve_abs) -UNARY_OP_TRUE_PREDICATE_ETYPE(vabsF, AbsVF, T_FLOAT, S, 4, sve_fabs) -UNARY_OP_TRUE_PREDICATE_ETYPE(vabsD, AbsVD, T_DOUBLE, D, 2, sve_fabs) -dnl -dnl BINARY_OP_UNPREDICATED($1, $2 $3, $4 $5 ) -dnl BINARY_OP_UNPREDICATED(insn_name, op_name, size, min_vec_len, insn) -define(`BINARY_OP_UNPREDICATED', ` +UNARY_OP_TRUE_PREDICATE(vabsB, AbsVB, B, sve_abs) +UNARY_OP_TRUE_PREDICATE(vabsS, AbsVS, H, sve_abs) +UNARY_OP_TRUE_PREDICATE(vabsI, AbsVI, S, sve_abs) +UNARY_OP_TRUE_PREDICATE(vabsL, AbsVL, D, sve_abs) +UNARY_OP_TRUE_PREDICATE(vabsF, AbsVF, S, sve_fabs) +UNARY_OP_TRUE_PREDICATE(vabsD, AbsVD, D, sve_fabs) + +dnl UNARY_OP_PREDICATE($1, $2, $3, $4 ) +dnl UNARY_OP_PREDICATE(insn_name, op_name, size, insn) +define(`UNARY_OP_PREDICATE', ` +instruct $1_masked(vReg dst_src, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src ($2 dst_src pg)); + ins_cost(SVE_COST); + format %{ "$4 $dst_src, $pg, $dst_src\t# vector (sve) ($3)" %} + ins_encode %{ + __ $4(as_FloatRegister($dst_src$$reg), __ $3, + as_PRegister($pg$$reg), + as_FloatRegister($dst_src$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +// vector abs - predicated +UNARY_OP_PREDICATE(vabsB, AbsVB, B, sve_abs) +UNARY_OP_PREDICATE(vabsS, AbsVS, H, sve_abs) +UNARY_OP_PREDICATE(vabsI, AbsVI, S, sve_abs) +UNARY_OP_PREDICATE(vabsL, AbsVL, D, sve_abs) +UNARY_OP_PREDICATE(vabsF, AbsVF, S, sve_fabs) +UNARY_OP_PREDICATE(vabsD, AbsVD, D, sve_fabs) + +dnl +dnl BINARY_OP_UNPREDICATE($1, $2 $3, $4 $5 ) +dnl BINARY_OP_UNPREDICATE(insn_name, op_name, size, min_vec_len, insn) +define(`BINARY_OP_UNPREDICATE', ` instruct $1(vReg dst, vReg src1, vReg src2) %{ predicate(UseSVE > 0); match(Set dst ($2 src1 src2)); @@ -348,39 +570,146 @@ instruct $1(vReg dst, vReg src1, vReg src2) %{ %} ins_pipe(pipe_slow); %}')dnl - +dnl +dnl +dnl BINARY_OP_PREDICATE($1, $2, $3, $4 ) +dnl BINARY_OP_PREDICATE(insn_name, op_name, size, insn) +define(`BINARY_OP_PREDICATE', ` +instruct $1_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 ($2 (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "$4 $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) ($3)" %} + ins_encode %{ + __ $4(as_FloatRegister($dst_src1$$reg), __ $3, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl // vector add -BINARY_OP_UNPREDICATED(vaddB, AddVB, B, 16, sve_add) -BINARY_OP_UNPREDICATED(vaddS, AddVS, H, 8, sve_add) -BINARY_OP_UNPREDICATED(vaddI, AddVI, S, 4, sve_add) -BINARY_OP_UNPREDICATED(vaddL, AddVL, D, 2, sve_add) -BINARY_OP_UNPREDICATED(vaddF, AddVF, S, 4, sve_fadd) -BINARY_OP_UNPREDICATED(vaddD, AddVD, D, 2, sve_fadd) -dnl -dnl BINARY_OP_UNSIZED($1, $2, $3, $4 ) -dnl BINARY_OP_UNSIZED(insn_name, op_name, min_vec_len, insn) +BINARY_OP_UNPREDICATE(vaddB, AddVB, B, 16, sve_add) +BINARY_OP_UNPREDICATE(vaddS, AddVS, H, 8, sve_add) +BINARY_OP_UNPREDICATE(vaddI, AddVI, S, 4, sve_add) +BINARY_OP_UNPREDICATE(vaddL, AddVL, D, 2, sve_add) +BINARY_OP_UNPREDICATE(vaddF, AddVF, S, 4, sve_fadd) +BINARY_OP_UNPREDICATE(vaddD, AddVD, D, 2, sve_fadd) + +// vector add - predicated +BINARY_OP_PREDICATE(vaddB, AddVB, B, sve_add) +BINARY_OP_PREDICATE(vaddS, AddVS, H, sve_add) +BINARY_OP_PREDICATE(vaddI, AddVI, S, sve_add) +BINARY_OP_PREDICATE(vaddL, AddVL, D, sve_add) +BINARY_OP_PREDICATE(vaddF, AddVF, S, sve_fadd) +BINARY_OP_PREDICATE(vaddD, AddVD, D, sve_fadd) +dnl +dnl ADD_IMM($1, $2, $3 ) +dnl ADD_IMM(name_suffix, size, imm_type) +define(`ADD_IMM', ` +instruct vaddImm$1(vReg dst_src, $3 con) %{ + predicate(UseSVE > 0); + match(Set dst_src (AddV$1 dst_src (Replicate$1 con))); + ins_cost(SVE_COST); + format %{ "sve_add $dst_src, $dst_src, $con\t # vector (sve) ($2)" %} + ins_encode %{ + int32_t val = $con$$constant; + if (val > 0){ + __ sve_add(as_FloatRegister($dst_src$$reg), __ $2, val); + } else if (val < 0){ + __ sve_sub(as_FloatRegister($dst_src$$reg), __ $2, -val); + } + %} + ins_pipe(pipe_slow); +%}')dnl + +// vector add reg imm (unpredicated) +ADD_IMM(B, B, immBAddSubV) +ADD_IMM(S, H, immIAddSubV) +ADD_IMM(I, S, immIAddSubV) +ADD_IMM(L, D, immLAddSubV) +dnl +dnl BITWISE_OP_IMM($1, $2 $3, $4 $5 ) +dnl BITWISE_OP_IMM(insn_name, op_name1, size, type, op_name2) +define(`BITWISE_OP_IMM', ` +instruct $1(vReg dst_src, imm$4Log con) %{ + predicate(UseSVE > 0); + match(Set dst_src ($2 dst_src (Replicate$4 con))); + ins_cost(SVE_COST); + format %{ "$5 $dst_src, $dst_src, $con\t # vector (sve) ($3)" %} + ins_encode %{ + __ $5(as_FloatRegister($dst_src$$reg), __ $3, + (uint64_t)($con$$constant)); + %} + ins_pipe(pipe_slow); +%}')dnl + +// vector binary op reg imm (unpredicated) +BITWISE_OP_IMM(vandB, AndV, B, B, sve_and) +BITWISE_OP_IMM(vandH, AndV, H, S, sve_and) +BITWISE_OP_IMM(vandS, AndV, S, I, sve_and) +BITWISE_OP_IMM(vandD, AndV, D, L, sve_and) +BITWISE_OP_IMM(vorB, OrV, B, B, sve_orr) +BITWISE_OP_IMM(vorH, OrV, H, S, sve_orr) +BITWISE_OP_IMM(vorS, OrV, S, I, sve_orr) +BITWISE_OP_IMM(vorD, OrV, D, L, sve_orr) +BITWISE_OP_IMM(vxorB, XorV, B, B, sve_eor) +BITWISE_OP_IMM(vxorH, XorV, H, S, sve_eor) +BITWISE_OP_IMM(vxorS, XorV, S, I, sve_eor) +BITWISE_OP_IMM(vxorD, XorV, D, L, sve_eor) +dnl +dnl +dnl BINARY_OP_UNSIZED($1, $2, $3 ) +dnl BINARY_OP_UNSIZED(insn_name, op_name, insn) define(`BINARY_OP_UNSIZED', ` instruct $1(vReg dst, vReg src1, vReg src2) %{ predicate(UseSVE > 0); match(Set dst ($2 src1 src2)); ins_cost(SVE_COST); - format %{ "$4 $dst, $src1, $src2\t# vector (sve)" %} + format %{ "$3 $dst, $src1, $src2\t# vector (sve)" %} ins_encode %{ - __ $4(as_FloatRegister($dst$$reg), + __ $3(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); %}')dnl - +dnl // vector and -BINARY_OP_UNSIZED(vand, AndV, 16, sve_and) +BINARY_OP_UNSIZED(vand, AndV, sve_and) // vector or -BINARY_OP_UNSIZED(vor, OrV, 16, sve_orr) +BINARY_OP_UNSIZED(vor, OrV, sve_orr) // vector xor -BINARY_OP_UNSIZED(vxor, XorV, 16, sve_eor) +BINARY_OP_UNSIZED(vxor, XorV, sve_eor) + +dnl BINARY_LOGIC_OP_PREDICATE($1, $2, $3 ) +dnl BINARY_LOGIC_OP_PREDICATE(insn_name, op_name, insn) +define(`BINARY_LOGIC_OP_PREDICATE', ` +instruct $1_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src1 ($2 (Binary dst_src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "$3 $dst_src1, $pg, $dst_src1, $src2\t # vector (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ $3(as_FloatRegister($dst_src1$$reg), size, + as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +// vector and - predicated +BINARY_LOGIC_OP_PREDICATE(vand, AndV, sve_and) + +// vector or - predicated +BINARY_LOGIC_OP_PREDICATE(vor, OrV, sve_orr) + +// vector xor - predicated +BINARY_LOGIC_OP_PREDICATE(vxor, XorV, sve_eor) // vector not dnl @@ -406,7 +735,7 @@ dnl $1,$2 VECTOR_NOT(I, B/H/S) VECTOR_NOT(L, D) undefine(MATCH_RULE) - +dnl // vector and_not dnl define(`MATCH_RULE', `ifelse($1, I, @@ -447,52 +776,70 @@ instruct vdiv$1(vReg dst_src1, vReg src2) %{ %} ins_pipe(pipe_slow); %}')dnl - +dnl // vector float div VDIVF(F, S, 4) VDIVF(D, D, 2) -// vector min/max +// vector float div - predicated +BINARY_OP_PREDICATE(vfdivF, DivVF, S, sve_fdiv) +BINARY_OP_PREDICATE(vfdivD, DivVD, D, sve_fdiv) -instruct vmin(vReg dst_src1, vReg src2) %{ +dnl +dnl VMINMAX($1 , $2, $3 , $4 ) +dnl VMINMAX(op_name, op, finsn, insn) +define(`VMINMAX', ` +instruct v$1(vReg dst_src1, vReg src2) %{ predicate(UseSVE > 0); - match(Set dst_src1 (MinV dst_src1 src2)); + match(Set dst_src1 ($2 dst_src1 src2)); ins_cost(SVE_COST); - format %{ "sve_min $dst_src1, $dst_src1, $src2\t # vector (sve)" %} + format %{ "sve_$1 $dst_src1, $dst_src1, $src2\t # vector (sve)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); if (is_floating_point_type(bt)) { - __ sve_fmin(as_FloatRegister($dst_src1$$reg), size, + __ $3(as_FloatRegister($dst_src1$$reg), size, ptrue, as_FloatRegister($src2$$reg)); } else { - assert(is_integral_type(bt), "Unsupported type"); - __ sve_smin(as_FloatRegister($dst_src1$$reg), size, + assert(is_integral_type(bt), "unsupported type"); + __ $4(as_FloatRegister($dst_src1$$reg), size, ptrue, as_FloatRegister($src2$$reg)); } %} ins_pipe(pipe_slow); -%} +%}')dnl +dnl +// vector min/max +VMINMAX(min, MinV, sve_fmin, sve_smin) +VMINMAX(max, MaxV, sve_fmax, sve_smax) -instruct vmax(vReg dst_src1, vReg src2) %{ +dnl +dnl VMINMAX_PREDICATE($1 , $2, $3 , $4 ) +dnl VMINMAX_PREDICATE(op_name, op, finsn, insn) +define(`VMINMAX_PREDICATE', ` +instruct v$1_masked(vReg dst_src1, vReg src2, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst_src1 (MaxV dst_src1 src2)); + match(Set dst_src1 ($2 (Binary dst_src1 src2) pg)); ins_cost(SVE_COST); - format %{ "sve_max $dst_src1, $dst_src1, $src2\t # vector (sve)" %} + format %{ "sve_$1 $dst_src1, $pg, $dst_src1, $src2\t# vector (sve)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); if (is_floating_point_type(bt)) { - __ sve_fmax(as_FloatRegister($dst_src1$$reg), size, - ptrue, as_FloatRegister($src2$$reg)); + __ $3(as_FloatRegister($dst_src1$$reg), size, + as_PRegister($pg$$reg), as_FloatRegister($src2$$reg)); } else { - assert(is_integral_type(bt), "Unsupported type"); - __ sve_smax(as_FloatRegister($dst_src1$$reg), size, - ptrue, as_FloatRegister($src2$$reg)); + assert(is_integral_type(bt), "unsupported type"); + __ $4(as_FloatRegister($dst_src1$$reg), size, + as_PRegister($pg$$reg), as_FloatRegister($src2$$reg)); } %} ins_pipe(pipe_slow); -%} +%}')dnl +dnl +// vector min/max - predicated +VMINMAX_PREDICATE(min, MinV, sve_fmin, sve_smin) +VMINMAX_PREDICATE(max, MaxV, sve_fmax, sve_smax) dnl dnl VFMLA($1 $2 $3 ) @@ -515,6 +862,27 @@ dnl VFMLA(F, S, 4) VFMLA(D, D, 2) +dnl +dnl VFMLA_PREDICATE($1, $2 ) +dnl VFMLA_PREDICATE(type, size) +define(`VFMLA_PREDICATE', ` +// dst_src1 = dst_src1 * src2 + src3 +instruct vfmla$1_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ + predicate(UseFMA && UseSVE > 0); + match(Set dst_src1 (FmaV$1 (Binary dst_src1 src2) (Binary src3 pg))); + ins_cost(SVE_COST); + format %{ "sve_fmad $dst_src1, $pg, $src2, $src3\t# vector (sve) ($2)" %} + ins_encode %{ + __ sve_fmad(as_FloatRegister($dst_src1$$reg), __ $2, as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +// vector fmla - predicated +VFMLA_PREDICATE(F, S) +VFMLA_PREDICATE(D, D) + dnl dnl VFMLS($1 $2 $3 ) dnl VFMLS(name_suffix, size, min_vec_len) @@ -645,34 +1013,30 @@ instruct $1(vReg dst_src1, vReg src2) %{ %} ins_pipe(pipe_slow); %}')dnl - +dnl // vector mul BINARY_OP_TRUE_PREDICATE(vmulB, MulVB, B, 16, sve_mul) BINARY_OP_TRUE_PREDICATE(vmulS, MulVS, H, 8, sve_mul) BINARY_OP_TRUE_PREDICATE(vmulI, MulVI, S, 4, sve_mul) BINARY_OP_TRUE_PREDICATE(vmulL, MulVL, D, 2, sve_mul) -BINARY_OP_UNPREDICATED(vmulF, MulVF, S, 4, sve_fmul) -BINARY_OP_UNPREDICATED(vmulD, MulVD, D, 2, sve_fmul) +BINARY_OP_UNPREDICATE(vmulF, MulVF, S, 4, sve_fmul) +BINARY_OP_UNPREDICATE(vmulD, MulVD, D, 2, sve_fmul) + +// vector mul - predicated +BINARY_OP_PREDICATE(vmulB, MulVB, B, sve_mul) +BINARY_OP_PREDICATE(vmulS, MulVS, H, sve_mul) +BINARY_OP_PREDICATE(vmulI, MulVI, S, sve_mul) +BINARY_OP_PREDICATE(vmulL, MulVL, D, sve_mul) +BINARY_OP_PREDICATE(vmulF, MulVF, S, sve_fmul) +BINARY_OP_PREDICATE(vmulD, MulVD, D, sve_fmul) -dnl -dnl UNARY_OP_TRUE_PREDICATE($1, $2, $3, $4, $5 ) -dnl UNARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_bytes, insn) -define(`UNARY_OP_TRUE_PREDICATE', ` -instruct $1(vReg dst, vReg src) %{ - predicate(UseSVE > 0); - match(Set dst ($2 src)); - ins_cost(SVE_COST); - format %{ "$5 $dst, $src\t# vector (sve) ($3)" %} - ins_encode %{ - __ $5(as_FloatRegister($dst$$reg), __ $3, - ptrue, as_FloatRegister($src$$reg)); - %} - ins_pipe(pipe_slow); -%}')dnl -dnl // vector fneg -UNARY_OP_TRUE_PREDICATE(vnegF, NegVF, S, 16, sve_fneg) -UNARY_OP_TRUE_PREDICATE(vnegD, NegVD, D, 16, sve_fneg) +UNARY_OP_TRUE_PREDICATE(vnegF, NegVF, S, sve_fneg) +UNARY_OP_TRUE_PREDICATE(vnegD, NegVD, D, sve_fneg) + +// vector fneg - predicated +UNARY_OP_PREDICATE(vnegF, NegVF, S, sve_fneg) +UNARY_OP_PREDICATE(vnegD, NegVD, D, sve_fneg) // popcount vector @@ -688,354 +1052,307 @@ instruct vpopcountI(vReg dst, vReg src) %{ // vector mask compare -instruct vmaskcmp(vReg dst, vReg src1, vReg src2, immI cond, pRegGov pTmp, rFlagsReg cr) %{ +instruct vmaskcmp(pRegGov dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %{ predicate(UseSVE > 0); match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); - effect(TEMP pTmp, KILL cr); - ins_cost(2 * SVE_COST); - format %{ "sve_cmp $pTmp, $src1, $src2\n\t" - "sve_cpy $dst, $pTmp, -1\t# vector mask cmp (sve)" %} + effect(KILL cr); + ins_cost(SVE_COST); + format %{ "sve_cmp $dst, $src1, $src2\t# vector mask cmp (sve)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); - __ sve_compare(as_PRegister($pTmp$$reg), bt, ptrue, as_FloatRegister($src1$$reg), + __ sve_compare(as_PRegister($dst$$reg), bt, ptrue, as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), (int)$cond$$constant); - __ sve_cpy(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt), - as_PRegister($pTmp$$reg), -1, false); %} ins_pipe(pipe_slow); %} -// vector blend - -instruct vblend(vReg dst, vReg src1, vReg src2, vReg src3, pRegGov pTmp, rFlagsReg cr) %{ +instruct vmaskcmp_masked(pRegGov dst, vReg src1, vReg src2, immI cond, pRegGov pg, rFlagsReg cr) %{ predicate(UseSVE > 0); - match(Set dst (VectorBlend (Binary src1 src2) src3)); - effect(TEMP pTmp, KILL cr); - ins_cost(2 * SVE_COST); - format %{ "sve_cmpeq $pTmp, $src3, -1\n\t" - "sve_sel $dst, $pTmp, $src2, $src1\t# vector blend (sve)" %} + match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond pg))); + effect(KILL cr); + ins_cost(SVE_COST); + format %{ "sve_cmp $dst, $pg, $src1, $src2\t# vector mask cmp (sve)" %} ins_encode %{ - Assembler::SIMD_RegVariant size = - __ elemType_to_regVariant(Matcher::vector_element_basic_type(this)); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, - ptrue, as_FloatRegister($src3$$reg), -1); - __ sve_sel(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg), - as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this); + __ sve_compare(as_PRegister($dst$$reg), bt, as_PRegister($pg$$reg), as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), (int)$cond$$constant); %} ins_pipe(pipe_slow); %} -// vector blend with compare +// vector blend -instruct vblend_maskcmp(vReg dst, vReg src1, vReg src2, vReg src3, - vReg src4, pRegGov pTmp, immI cond, rFlagsReg cr) %{ +instruct vblend(vReg dst, vReg src1, vReg src2, pRegGov pg) %{ predicate(UseSVE > 0); - match(Set dst (VectorBlend (Binary src1 src2) (VectorMaskCmp (Binary src3 src4) cond))); - effect(TEMP pTmp, KILL cr); - ins_cost(2 * SVE_COST); - format %{ "sve_cmp $pTmp, $src3, $src4\t# vector cmp (sve)\n\t" - "sve_sel $dst, $pTmp, $src2, $src1\t# vector blend (sve)" %} + match(Set dst (VectorBlend (Binary src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_sel $dst, $pg, $src2, $src1\t# vector blend (sve)" %} ins_encode %{ - BasicType bt = Matcher::vector_element_basic_type(this); - __ sve_compare(as_PRegister($pTmp$$reg), bt, ptrue, as_FloatRegister($src3$$reg), - as_FloatRegister($src4$$reg), (int)$cond$$constant); - __ sve_sel(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt), - as_PRegister($pTmp$$reg), as_FloatRegister($src2$$reg), - as_FloatRegister($src1$$reg)); + Assembler::SIMD_RegVariant size = + __ elemType_to_regVariant(Matcher::vector_element_basic_type(this)); + __ sve_sel(as_FloatRegister($dst$$reg), size, as_PRegister($pg$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); %} ins_pipe(pipe_slow); %} // vector load mask -instruct vloadmaskB(vReg dst, vReg src) %{ +instruct vloadmaskB(pRegGov dst, vReg src, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorLoadMask src)); + effect(KILL cr); ins_cost(SVE_COST); - format %{ "sve_neg $dst, $src\t# vector load mask (B)" %} - ins_encode %{ - __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, as_FloatRegister($src$$reg)); - %} - ins_pipe(pipe_slow); -%} - -instruct vloadmaskS(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); - match(Set dst (VectorLoadMask src)); - ins_cost(2 * SVE_COST); - format %{ "sve_uunpklo $dst, H, $src\n\t" - "sve_neg $dst, $dst\t# vector load mask (B to H)" %} + format %{ "vloadmaskB $dst, $src\t# vector load mask (sve) (B)" %} ins_encode %{ - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); - __ sve_neg(as_FloatRegister($dst$$reg), __ H, ptrue, as_FloatRegister($dst$$reg)); + __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), __ B, + ptrue, as_FloatRegister($src$$reg), 0); %} ins_pipe(pipe_slow); %} -instruct vloadmaskI(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && - (n->bottom_type()->is_vect()->element_basic_type() == T_INT || - n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); +instruct vloadmask_extend(pRegGov dst, vReg src, vReg tmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() != T_BYTE); match(Set dst (VectorLoadMask src)); + effect(TEMP tmp, KILL cr); ins_cost(3 * SVE_COST); - format %{ "sve_uunpklo $dst, H, $src\n\t" - "sve_uunpklo $dst, S, $dst\n\t" - "sve_neg $dst, $dst\t# vector load mask (B to S)" %} - ins_encode %{ - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); - __ sve_neg(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($dst$$reg)); - %} - ins_pipe(pipe_slow); -%} - -instruct vloadmaskL(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && - (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || - n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); - match(Set dst (VectorLoadMask src)); - ins_cost(4 * SVE_COST); - format %{ "sve_uunpklo $dst, H, $src\n\t" - "sve_uunpklo $dst, S, $dst\n\t" - "sve_uunpklo $dst, D, $dst\n\t" - "sve_neg $dst, $dst\t# vector load mask (B to D)" %} + format %{ "vloadmask $dst, $src\t# vector load mask (sve) (H/S/D)" %} ins_encode %{ - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg)); - __ sve_neg(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_vector_extend(as_FloatRegister($tmp$$reg), size, as_FloatRegister($src$$reg), __ B); + __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($tmp$$reg), 0); %} ins_pipe(pipe_slow); %} // vector store mask -instruct vstoremaskB(vReg dst, vReg src, immI_1 size) %{ +instruct vstoremaskB(vReg dst, pRegGov src, immI_1 size) %{ predicate(UseSVE > 0); match(Set dst (VectorStoreMask src size)); ins_cost(SVE_COST); - format %{ "sve_neg $dst, $src\t# vector store mask (B)" %} + format %{ "vstoremask $dst, $src\t# vector store mask (sve) (B)" %} ins_encode %{ - __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, - as_FloatRegister($src$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), __ B, as_PRegister($src$$reg), 1, false); %} ins_pipe(pipe_slow); %} -instruct vstoremaskS(vReg dst, vReg src, vReg tmp, immI_2 size) %{ +instruct vstoremask_narrow(vReg dst, pRegGov src, vReg tmp, immI_gt_1 size) %{ predicate(UseSVE > 0); match(Set dst (VectorStoreMask src size)); effect(TEMP_DEF dst, TEMP tmp); ins_cost(3 * SVE_COST); - format %{ "sve_dup $tmp, H, 0\n\t" - "sve_uzp1 $dst, B, $src, $tmp\n\t" - "sve_neg $dst, B, $dst\t# vector store mask (sve) (H to B)" %} + format %{ "vstoremask $dst, $src\t# vector store mask (sve) (H/S/D)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, - as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); - __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, - as_FloatRegister($dst$$reg)); - + Assembler::SIMD_RegVariant size = __ elemBytes_to_regVariant((int)$size$$constant); + __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($src$$reg), 1, false); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($dst$$reg), size, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %} -instruct vstoremaskI(vReg dst, vReg src, vReg tmp, immI_4 size) %{ - predicate(UseSVE > 0); - match(Set dst (VectorStoreMask src size)); - effect(TEMP_DEF dst, TEMP tmp); - ins_cost(4 * SVE_COST); - format %{ "sve_dup $tmp, S, 0\n\t" - "sve_uzp1 $dst, H, $src, $tmp\n\t" - "sve_uzp1 $dst, B, $dst, $tmp\n\t" - "sve_neg $dst, B, $dst\t# vector store mask (sve) (S to B)" %} +// Combine LoadVector+VectorLoadMask when the vector element type is not T_BYTE + +instruct vloadmask_loadV(pRegGov dst, indirect mem, vReg tmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_Vector()->length_in_bytes() == MaxVectorSize && + type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) > 1); + match(Set dst (VectorLoadMask (LoadVector mem))); + effect(TEMP tmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "sve_ld1b $tmp, $mem\n\t" + "sve_cmpne $dst, $tmp, 0\t# load vector mask (sve) (H/S/D)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, - as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, - as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, - as_FloatRegister($dst$$reg)); + // Load mask values which are boolean type, and extend them to the + // expected vector element type. Convert the vector to predicate. + BasicType to_vect_bt = Matcher::vector_element_basic_type(this); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($tmp$$reg), + ptrue, T_BOOLEAN, to_vect_bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), __ elemType_to_regVariant(to_vect_bt), + ptrue, as_FloatRegister($tmp$$reg), 0); %} ins_pipe(pipe_slow); %} -instruct vstoremaskL(vReg dst, vReg src, vReg tmp, immI_8 size) %{ - predicate(UseSVE > 0); - match(Set dst (VectorStoreMask src size)); - effect(TEMP_DEF dst, TEMP tmp); - ins_cost(5 * SVE_COST); - format %{ "sve_dup $tmp, D, 0\n\t" - "sve_uzp1 $dst, S, $src, $tmp\n\t" - "sve_uzp1 $dst, H, $dst, $tmp\n\t" - "sve_uzp1 $dst, B, $dst, $tmp\n\t" - "sve_neg $dst, B, $dst\t# vector store mask (sve) (D to B)" %} +instruct vloadmask_loadV_partial(pRegGov dst, indirect mem, vReg vtmp, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_Vector()->length_in_bytes() > 16 && + n->as_Vector()->length_in_bytes() < MaxVectorSize && + type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) > 1); + match(Set dst (VectorLoadMask (LoadVector mem))); + effect(TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(6 * SVE_COST); + format %{ "vloadmask_loadV $dst, $mem\t# load vector mask partial (sve) (H/S/D)" %} ins_encode %{ - __ sve_dup(as_FloatRegister($tmp$$reg), __ D, 0); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, - as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, - as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, - as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, - as_FloatRegister($dst$$reg)); + // Load valid mask values which are boolean type, and extend them to the + // expected vector element type. Convert the vector to predicate. + BasicType to_vect_bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(to_vect_bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this)); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($vtmp$$reg), + as_PRegister($ptmp$$reg), T_BOOLEAN, to_vect_bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($vtmp$$reg), 0); %} ins_pipe(pipe_slow); %} -dnl -dnl -dnl VLOADMASK_LOADV($1, $2 ) -dnl VLOADMASK_LOADV(esize, cond) -define(`VLOADMASK_LOADV', ` -instruct vloadmask_loadV_$1(vReg dst, ifelse($1, `byte', vmemA, indirect) mem) %{ - predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() == MaxVectorSize && - type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) $2); - match(Set dst (VectorLoadMask (LoadVector mem))); - ins_cost(5 * SVE_COST); - format %{ "sve_ld1b $dst, $mem\n\t" - "sve_neg $dst, $dst\t# load vector mask (sve)" %} - ins_encode %{ - FloatRegister dst_reg = as_FloatRegister($dst$$reg); - BasicType to_vect_bt = Matcher::vector_element_basic_type(this); - Assembler::SIMD_RegVariant to_vect_variant = __ elemType_to_regVariant(to_vect_bt); - loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue, - T_BOOLEAN, to_vect_bt, $mem->opcode(), - as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); - __ sve_neg(dst_reg, to_vect_variant, ptrue, dst_reg); - %} - ins_pipe(pipe_slow); -%}')dnl -dnl -define(`ARGLIST', -`ifelse($1, `byte', vmemA, indirect) mem, vReg src, vReg tmp, ifelse($1, `byte', immI_1, immI_gt_1) esize') -dnl -dnl STOREV_VSTOREMASK($1, ) -dnl STOREV_VSTOREMASK(esize) -define(`STOREV_VSTOREMASK', ` -instruct storeV_vstoremask_$1(ARGLIST($1)) %{ - predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() * - n->as_StoreVector()->in(MemNode::ValueIn)->in(2)->get_int() == MaxVectorSize); + +// Combine VectorStoreMask+StoreVector when the vector element type is not T_BYTE + +instruct storeV_vstoremask(indirect mem, pRegGov src, vReg tmp, immI_gt_1 esize) %{ + predicate(UseSVE > 0 && + Matcher::vector_length_in_bytes(n->as_StoreVector()->in(MemNode::ValueIn)->in(1)) == MaxVectorSize); match(Set mem (StoreVector mem (VectorStoreMask src esize))); effect(TEMP tmp); - ins_cost(5 * SVE_COST); - format %{ "sve_neg $tmp, $src\n\t" - "sve_st1b $tmp, $mem\t# store vector mask (sve)" %} + ins_cost(3 * SVE_COST); + format %{ "sve_cpy $tmp, $src, 1\n\t" + "sve_st1b $tmp, $mem\t# store vector mask (sve) (H/S/D)" %} ins_encode %{ BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src); assert(type2aelembytes(from_vect_bt) == (int)$esize$$constant, "unsupported type."); - Assembler::SIMD_RegVariant from_vect_variant = __ elemBytes_to_regVariant($esize$$constant); - __ sve_neg(as_FloatRegister($tmp$$reg), from_vect_variant, ptrue, - as_FloatRegister($src$$reg)); + Assembler::SIMD_RegVariant size = __ elemBytes_to_regVariant($esize$$constant); + __ sve_cpy(as_FloatRegister($tmp$$reg), size, as_PRegister($src$$reg), 1, false); loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($tmp$$reg), ptrue, T_BOOLEAN, from_vect_bt, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} ins_pipe(pipe_slow); -%}')dnl -undefine(ARGLIST)dnl -dnl -// load/store mask vector -VLOADMASK_LOADV(byte, == 1) -VLOADMASK_LOADV(non_byte, > 1) -STOREV_VSTOREMASK(byte) -STOREV_VSTOREMASK(non_byte) - -// vector add reduction +%} -instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (AddReductionVI src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); +instruct storeV_vstoremask_partial(indirect mem, pRegGov src, vReg vtmp, + immI_gt_1 esize, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_StoreVector()->memory_size() > 16 && + type2aelembytes(n->as_StoreVector()->vect_type()->element_basic_type()) > 1 && + Matcher::vector_length_in_bytes(n->as_StoreVector()->in(MemNode::ValueIn)->in(1)) < MaxVectorSize); + match(Set mem (StoreVector mem (VectorStoreMask src esize))); + effect(TEMP vtmp, TEMP ptmp, KILL cr); + format %{ "storeV_vstoremask $src, $mem\t# store vector mask partial (sve) (H/S/D)" %} + ins_cost(6 * SVE_COST); + ins_encode %{ + // Convert the valid src predicate to vector, and store the vector + // elements as boolean values. + BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(from_vect_bt); + __ sve_cpy(as_FloatRegister($vtmp$$reg), size, as_PRegister($src$$reg), 1, false); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this, $src)); + loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($vtmp$$reg), + as_PRegister($ptmp$$reg), T_BOOLEAN, from_vect_bt, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + ins_pipe(pipe_slow); +%} +dnl +dnl REDUCE_I($1, $2 ) +dnl REDUCE_I(insn_name, op_name) +define(`REDUCE_I', ` +instruct reduce_$1I(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{ + ifelse($2, AddReductionVI, + `predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);', + `predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);') + match(Set dst ($2 src1 src2)); + effect(TEMP_DEF dst, TEMP tmp); ins_cost(SVE_COST); - format %{ "sve_reduce_addI $dst, $src1, $src2\t# addB/S/I reduction (sve) (may extend)" %} + format %{ "sve_reduce_$1I $dst, $src1, $src2\t# $1I reduction (sve) (may extend)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); - __ sve_uaddv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ addw($dst$$Register, $dst$$Register, $src1$$Register); - if (bt == T_BYTE) { - __ sxtb($dst$$Register, $dst$$Register); - } else if (bt == T_SHORT) { - __ sxth($dst$$Register, $dst$$Register); - } else { - assert(bt == T_INT, "unsupported type"); - } + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + ptrue, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); -%} - -instruct reduce_addI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, +%}')dnl +dnl +dnl +dnl REDUCE_L($1, $2 ) +dnl REDUCE_L(insn_name, op_name) +define(`REDUCE_L', ` +instruct reduce_$1L(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{ + ifelse($2, AddReductionVL, + `predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);', + `predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);') + match(Set dst ($2 src1 src2)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(SVE_COST); + format %{ "sve_reduce_$1L $dst, $src1, $src2\t# $1L reduction (sve)" %} + ins_encode %{ + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + ptrue, as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl REDUCE_I_PARTIAL($1, $2 ) +dnl REDUCE_I_PARTIAL(insn_name, op_name) +define(`REDUCE_I_PARTIAL', ` +instruct reduce_$1I_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (AddReductionVI src1 src2)); + ifelse($2, AddReductionVI, + `predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);', + `predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);') + match(Set dst ($2 src1 src2)); effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); - ins_cost(SVE_COST); - format %{ "sve_reduce_addI $dst, $src1, $src2\t# addI reduction partial (sve) (may extend)" %} + ins_cost(2 * SVE_COST); + format %{ "sve_reduce_$1I $dst, $src1, $src2\t# $1I reduction partial (sve) (may extend)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, Matcher::vector_length(this, $src2)); - __ sve_uaddv(as_FloatRegister($vtmp$$reg), variant, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ addw($dst$$Register, $dst$$Register, $src1$$Register); - if (bt == T_BYTE) { - __ sxtb($dst$$Register, $dst$$Register); - } else if (bt == T_SHORT) { - __ sxth($dst$$Register, $dst$$Register); - } else { - assert(bt == T_INT, "unsupported type"); - } - %} - ins_pipe(pipe_slow); -%} - -instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (AddReductionVL src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); - ins_cost(SVE_COST); - format %{ "sve_reduce_addL $dst, $src1, $src2\t# addL reduction (sve)" %} - ins_encode %{ - __ sve_uaddv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ add($dst$$Register, $dst$$Register, $src1$$Register); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); -%} - -instruct reduce_addL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, +%}')dnl +dnl +dnl REDUCE_L_PARTIAL($1, $2 ) +dnl REDUCE_L_PARTIAL(insn_name, op_name) +define(`REDUCE_L_PARTIAL', ` +instruct reduce_$1L_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (AddReductionVL src1 src2)); + ifelse($2, AddReductionVL, + `predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);', + `predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);') + match(Set dst ($2 src1 src2)); effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); - ins_cost(SVE_COST); - format %{ "sve_reduce_addL $dst, $src1, $src2\t# addL reduction partial (sve)" %} + ins_cost(2 * SVE_COST); + format %{ "sve_reduce_$1L $dst, $src1, $src2\t# $1L reduction partial (sve)" %} ins_encode %{ __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this, $src2)); - __ sve_uaddv(as_FloatRegister($vtmp$$reg), __ D, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ add($dst$$Register, $dst$$Register, $src1$$Register); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); -%} - +%}')dnl dnl dnl REDUCE_ADDF($1, $2, $3, $4 ) dnl REDUCE_ADDF(insn_name, op_name, reg_dst, size) define(`REDUCE_ADDF', ` -instruct $1($3 src1_dst, vReg src2) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set src1_dst (AddReductionV$2 src1_dst src2)); +instruct reduce_$1($3 src1_dst, vReg src2) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set src1_dst ($2 src1_dst src2)); ins_cost(SVE_COST); format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) ($4)" %} ins_encode %{ @@ -1049,12 +1366,13 @@ dnl dnl REDUCE_ADDF_PARTIAL($1, $2, $3, $4 ) dnl REDUCE_ADDF_PARTIAL(insn_name, suffix, reg_dst, size) define(`REDUCE_ADDF_PARTIAL', ` -instruct $1($3 src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set src1_dst (AddReductionV$2 src1_dst src2)); +instruct reduce_$1_partial($3 src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set src1_dst ($2 src1_dst src2)); ins_cost(SVE_COST); effect(TEMP ptmp, KILL cr); - format %{ "sve_reduce_add$2 $src1_dst, $src1_dst, $src2\t# add$2 reduction partial (sve) ($4)" %} + format %{ "sve_reduce_$1 $src1_dst, $src1_dst, $src2\t# $1 reduction partial (sve) ($4)" %} ins_encode %{ __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ $4, Matcher::vector_length(this, $src2)); @@ -1064,364 +1382,379 @@ instruct $1($3 src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{ ins_pipe(pipe_slow); %}')dnl dnl -REDUCE_ADDF(reduce_addF, F, vRegF, S) -REDUCE_ADDF_PARTIAL(reduce_addF_partial, F, vRegF, S) -REDUCE_ADDF(reduce_addD, D, vRegD, D) -REDUCE_ADDF_PARTIAL(reduce_addD_partial, D, vRegD, D) - -// vector and reduction - -instruct reduce_andI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (AndReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); +dnl +dnl REDUCE_I_PREDICATE($1, $2 ) +dnl REDUCE_I_PREDICATE(insn_name, op_name) +define(`REDUCE_I_PREDICATE', ` +instruct reduce_$1I_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, pRegGov pg) %{ + ifelse($2, AddReductionVI, + `predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);', + `predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);') + match(Set dst ($2 (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP tmp); ins_cost(SVE_COST); - format %{ "sve_reduce_andI $dst, $src1, $src2\t# andB/S/I reduction (sve) (may extend)" %} + format %{ "sve_reduce_$1I $dst, $src1, $pg, $src2\t# $1I reduction predicated (sve) (may extend)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); - __ sve_andv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ andw($dst$$Register, $dst$$Register, $src1$$Register); - if (bt == T_BYTE) { - __ sxtb($dst$$Register, $dst$$Register); - } else if (bt == T_SHORT) { - __ sxth($dst$$Register, $dst$$Register); - } else { - assert(bt == T_INT, "unsupported type"); - } + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); -%} - -instruct reduce_andI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, - pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (AndReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); +%}')dnl +dnl +dnl REDUCE_L_PREDICATE($1, $2 ) +dnl REDUCE_L_PREDICATE(insn_name, op_name) +define(`REDUCE_L_PREDICATE', ` +instruct reduce_$1L_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, pRegGov pg) %{ + ifelse($2, AddReductionVL, + `predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);', + `predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);') + match(Set dst ($2 (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP tmp); ins_cost(SVE_COST); - format %{ "sve_reduce_andI $dst, $src1, $src2\t# andI reduction partial (sve) (may extend)" %} + format %{ "sve_reduce_$1L $dst, $src1, $pg, $src2\t# $1L reduction predicated (sve)" %} + ins_encode %{ + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl REDUCE_I_PREDICATE_PARTIAL($1, $2 ) +dnl REDUCE_I_PREDICATE_PARTIAL(insn_name, op_name) +define(`REDUCE_I_PREDICATE_PARTIAL', ` +instruct reduce_$1I_masked_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + ifelse($2, AddReductionVI, + `predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);', + `predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);') + match(Set dst ($2 (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_$1I $dst, $src1, $pg, $src2\t# $1I reduction predicated partial (sve) (may extend)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, Matcher::vector_length(this, $src2)); - __ sve_andv(as_FloatRegister($vtmp$$reg), variant, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ andw($dst$$Register, $dst$$Register, $src1$$Register); - if (bt == T_BYTE) { - __ sxtb($dst$$Register, $dst$$Register); - } else if (bt == T_SHORT) { - __ sxth($dst$$Register, $dst$$Register); - } else { - assert(bt == T_INT, "unsupported type"); - } - %} - ins_pipe(pipe_slow); -%} - -instruct reduce_andL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (AndReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); - ins_cost(SVE_COST); - format %{ "sve_reduce_andL $dst, $src1, $src2\t# andL reduction (sve)" %} - ins_encode %{ - __ sve_andv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ andr($dst$$Register, $dst$$Register, $src1$$Register); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); -%} - -instruct reduce_andL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, - pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (AndReductionV src1 src2)); +%}')dnl +dnl +dnl REDUCE_L_PREDICATE_PARTIAL($1, $2 ) +dnl REDUCE_L_PREDICATE_PARTIAL(insn_name, op_name) +define(`REDUCE_L_PREDICATE_PARTIAL', ` +instruct reduce_$1L_masked_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + ifelse($2, AddReductionVL, + `predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);', + `predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);') + match(Set dst ($2 (Binary src1 src2) pg)); effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); - ins_cost(SVE_COST); - format %{ "sve_reduce_andL $dst, $src1, $src2\t# andL reduction partial (sve)" %} + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_$1L $dst, $src1, $pg, $src2\t# $1L reduction predicated partial (sve)" %} ins_encode %{ __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this, $src2)); - __ sve_andv(as_FloatRegister($vtmp$$reg), __ D, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ andr($dst$$Register, $dst$$Register, $src1$$Register); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); -%} - -// vector or reduction - -instruct reduce_orI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (OrReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); +%}')dnl +dnl +dnl REDUCE_ADDF_PREDICATE($1, $2, $3, $4 ) +dnl REDUCE_ADDF_PREDICATE(insn_name, op_name, reg_dst, size) +define(`REDUCE_ADDF_PREDICATE', ` +instruct reduce_$1_masked($3 src1_dst, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set src1_dst ($2 (Binary src1_dst src2) pg)); ins_cost(SVE_COST); - format %{ "sve_reduce_orI $dst, $src1, $src2\t# orB/S/I reduction (sve) (may extend)" %} + format %{ "sve_reduce_$1 $src1_dst, $pg, $src2\t# $1 reduction predicated (sve)" %} ins_encode %{ - BasicType bt = Matcher::vector_element_basic_type(this, $src2); - Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); - __ sve_orv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ orrw($dst$$Register, $dst$$Register, $src1$$Register); - if (bt == T_BYTE) { - __ sxtb($dst$$Register, $dst$$Register); - } else if (bt == T_SHORT) { - __ sxth($dst$$Register, $dst$$Register); - } else { - assert(bt == T_INT, "unsupported type"); - } + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4, + as_PRegister($pg$$reg), as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); -%} - -instruct reduce_orI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, - pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (OrReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); +%}')dnl +dnl +dnl REDUCE_ADDF_PREDICATE_PARTIAL($1, $2, $3, $4 ) +dnl REDUCE_ADDF_PREDICATE_PARTIAL(insn_name, op_name, reg_dst, size) +define(`REDUCE_ADDF_PREDICATE_PARTIAL', ` +instruct reduce_$1_masked_partial($3 src1_dst, vReg src2, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set src1_dst ($2 (Binary src1_dst src2) pg)); + effect(TEMP ptmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_reduce_orI $dst, $src1, $src2\t# orI reduction partial (sve) (may extend)" %} + format %{ "sve_reduce_$1 $src1_dst, $pg, $src2\t# $1 reduction predicated partial (sve)" %} ins_encode %{ - BasicType bt = Matcher::vector_element_basic_type(this, $src2); - Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); - __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ $4, Matcher::vector_length(this, $src2)); - __ sve_orv(as_FloatRegister($vtmp$$reg), variant, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ orrw($dst$$Register, $dst$$Register, $src1$$Register); - if (bt == T_BYTE) { - __ sxtb($dst$$Register, $dst$$Register); - } else if (bt == T_SHORT) { - __ sxth($dst$$Register, $dst$$Register); - } else { - assert(bt == T_INT, "unsupported type"); - } + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4, + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_slow); -%} +%}')dnl +dnl -instruct reduce_orL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (OrReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); - ins_cost(SVE_COST); - format %{ "sve_reduce_orL $dst, $src1, $src2\t# orL reduction (sve)" %} - ins_encode %{ - __ sve_orv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ orr($dst$$Register, $dst$$Register, $src1$$Register); - %} - ins_pipe(pipe_slow); -%} +// vector add reduction +REDUCE_I(add, AddReductionVI) +REDUCE_L(add, AddReductionVL) +REDUCE_ADDF(addF, AddReductionVF, vRegF, S) +REDUCE_ADDF(addD, AddReductionVD, vRegD, D) +REDUCE_I_PARTIAL(add, AddReductionVI) +REDUCE_L_PARTIAL(add, AddReductionVL) +REDUCE_ADDF_PARTIAL(addF, AddReductionVF, vRegF, S) +REDUCE_ADDF_PARTIAL(addD, AddReductionVD, vRegD, D) + +// vector add reduction - predicated +REDUCE_I_PREDICATE(add, AddReductionVI) +REDUCE_L_PREDICATE(add, AddReductionVL) +REDUCE_ADDF_PREDICATE(addF, AddReductionVF, vRegF, S) +REDUCE_ADDF_PREDICATE(addD, AddReductionVD, vRegD, D) +REDUCE_I_PREDICATE_PARTIAL(add, AddReductionVI) +REDUCE_L_PREDICATE_PARTIAL(add, AddReductionVL) +REDUCE_ADDF_PREDICATE_PARTIAL(addF, AddReductionVF, vRegF, S) +REDUCE_ADDF_PREDICATE_PARTIAL(addD, AddReductionVD, vRegD, D) -instruct reduce_orL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, - pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (OrReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); - ins_cost(SVE_COST); - format %{ "sve_reduce_orL $dst, $src1, $src2\t# orL reduction partial (sve)" %} - ins_encode %{ - __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, - Matcher::vector_length(this, $src2)); - __ sve_orv(as_FloatRegister($vtmp$$reg), __ D, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ orr($dst$$Register, $dst$$Register, $src1$$Register); - %} - ins_pipe(pipe_slow); -%} +// vector and reduction +REDUCE_I(and, AndReductionV) +REDUCE_L(and, AndReductionV) +REDUCE_I_PARTIAL(and, AndReductionV) +REDUCE_L_PARTIAL(and, AndReductionV) + +// vector and reduction - predicated +REDUCE_I_PREDICATE(and, AndReductionV) +REDUCE_L_PREDICATE(and, AndReductionV) +REDUCE_I_PREDICATE_PARTIAL(and, AndReductionV) +REDUCE_L_PREDICATE_PARTIAL(and, AndReductionV) + +// vector or reduction +REDUCE_I(or, OrReductionV) +REDUCE_L(or, OrReductionV) +REDUCE_I_PARTIAL(or, OrReductionV) +REDUCE_L_PARTIAL(or, OrReductionV) + +// vector or reduction - predicated +REDUCE_I_PREDICATE(or, OrReductionV) +REDUCE_L_PREDICATE(or, OrReductionV) +REDUCE_I_PREDICATE_PARTIAL(or, OrReductionV) +REDUCE_L_PREDICATE_PARTIAL(or, OrReductionV) // vector xor reduction +REDUCE_I(eor, XorReductionV) +REDUCE_L(eor, XorReductionV) +REDUCE_I_PARTIAL(eor, XorReductionV) +REDUCE_L_PARTIAL(eor, XorReductionV) -instruct reduce_eorI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (XorReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); +// vector xor reduction - predicated +REDUCE_I_PREDICATE(eor, XorReductionV) +REDUCE_L_PREDICATE(eor, XorReductionV) +REDUCE_I_PREDICATE_PARTIAL(eor, XorReductionV) +REDUCE_L_PREDICATE_PARTIAL(eor, XorReductionV) + +dnl +dnl REDUCE_MAXMIN_I($1, $2 ) +dnl REDUCE_MAXMIN_I(insn_name, op_name) +define(`REDUCE_MAXMIN_I', ` +instruct reduce_$1I(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + is_integral_type(n->in(2)->bottom_type()->is_vect()->element_basic_type())); + match(Set dst ($2 src1 src2)); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_reduce_eorI $dst, $src1, $src2\t# xorB/H/I reduction (sve) (may extend)" %} + format %{ "sve_reduce_$1I $dst, $src1, $src2\t# $1I reduction (sve)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); - __ sve_eorv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ eorw($dst$$Register, $dst$$Register, $src1$$Register); - if (bt == T_BYTE) { - __ sxtb($dst$$Register, $dst$$Register); - } else if (bt == T_SHORT) { - __ sxth($dst$$Register, $dst$$Register); - } else { - assert(bt == T_INT, "unsupported type"); - } + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + ptrue, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); -%} - -instruct reduce_eorI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, +%}')dnl +dnl +dnl REDUCE_MAXMIN_L($1, $2 ) +dnl REDUCE_MAXMIN_L(insn_name, op_name) +define(`REDUCE_MAXMIN_L', ` +instruct reduce_$1L(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst ($2 src1 src2)); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + ins_cost(SVE_COST); + format %{ "sve_reduce_$1L $dst, $src1, $src2\t# $1L reduction (sve)" %} + ins_encode %{ + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + ptrue, as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl REDUCE_MAXMIN_I_PARTIAL($1 , $2 ) +dnl REDUCE_MAXMIN_I_PARTIAL(min_max, op_name) +define(`REDUCE_MAXMIN_I_PARTIAL', ` +instruct reduce_$1I_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (XorReductionV src1 src2)); + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + is_integral_type(n->in(2)->bottom_type()->is_vect()->element_basic_type())); + match(Set dst ($2 src1 src2)); effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); - ins_cost(SVE_COST); - format %{ "sve_reduce_eorI $dst, $src1, $src2\t# xorI reduction partial (sve) (may extend)" %} + ins_cost(2 * SVE_COST); + format %{ "sve_reduce_$1I $dst, $src1, $src2\t# $1I reduction partial (sve)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, Matcher::vector_length(this, $src2)); - __ sve_eorv(as_FloatRegister($vtmp$$reg), variant, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ eorw($dst$$Register, $dst$$Register, $src1$$Register); - if (bt == T_BYTE) { - __ sxtb($dst$$Register, $dst$$Register); - } else if (bt == T_SHORT) { - __ sxth($dst$$Register, $dst$$Register); - } else { - assert(bt == T_INT, "unsupported type"); - } - %} - ins_pipe(pipe_slow); -%} - -instruct reduce_eorL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst (XorReductionV src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); - ins_cost(SVE_COST); - format %{ "sve_reduce_eorL $dst, $src1, $src2\t# xorL reduction (sve)" %} - ins_encode %{ - __ sve_eorv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ eor($dst$$Register, $dst$$Register, $src1$$Register); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); -%} - -instruct reduce_eorL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, +%}')dnl +dnl +dnl REDUCE_MAXMIN_L_PARTIAL($1 , $2 ) +dnl REDUCE_MAXMIN_L_PARTIAL(min_max, op_name) +define(`REDUCE_MAXMIN_L_PARTIAL', ` +instruct reduce_$1L_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG && - n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst (XorReductionV src1 src2)); + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst ($2 src1 src2)); effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); - ins_cost(SVE_COST); - format %{ "sve_reduce_eorL $dst, $src1, $src2\t# xorL reduction partial (sve)" %} + ins_cost(2 * SVE_COST); + format %{ "sve_reduce_$1L $dst, $src1, $src2\t# $1L reduction partial (sve)" %} ins_encode %{ __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this, $src2)); - __ sve_eorv(as_FloatRegister($vtmp$$reg), __ D, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ eor($dst$$Register, $dst$$Register, $src1$$Register); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); -%} - +%}')dnl dnl -dnl REDUCE_MAXMIN_I($1, $2, $3 ) -dnl REDUCE_MAXMIN_I(min_max, op_mame, cmp) -define(`REDUCE_MAXMIN_I', ` -instruct reduce_$1I(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && - (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE || - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT || - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT)); - match(Set dst ($2 src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); +dnl REDUCE_MAXMIN_I_PREDICATE($1 , $2 ) +dnl REDUCE_MAXMIN_I_PREDICATE(min_max, op_name) +define(`REDUCE_MAXMIN_I_PREDICATE', ` +instruct reduce_$1I_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, + pRegGov pg, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + is_integral_type(n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type())); + match(Set dst ($2 (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_reduce_$1I $dst, $src1, $src2\t# reduce $1B/S/I (sve)" %} + format %{ "sve_reduce_$1I $dst, $src1, $pg, $src2\t# $1I reduction predicated (sve)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); - __ sve_s$1v(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ cmpw($dst$$Register, $src1$$Register); - __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::$3); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %}')dnl dnl -dnl REDUCE_MAXMIN_L($1, $2, $3 ) -dnl REDUCE_MAXMIN_L(min_max, op_name, cmp) -define(`REDUCE_MAXMIN_L', ` -instruct reduce_$1L(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); - match(Set dst ($2 src1 src2)); - effect(TEMP_DEF dst, TEMP vtmp); +dnl REDUCE_MAXMIN_L_PREDICATE($1 , $2 ) +dnl REDUCE_MAXMIN_L_PREDICATE(min_max, op_name) +define(`REDUCE_MAXMIN_L_PREDICATE', ` +instruct reduce_$1L_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, + pRegGov pg, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst ($2 (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); ins_cost(SVE_COST); - format %{ "sve_reduce_$1L $dst, $src1, $src2\t# reduce $1L partial (sve)" %} + format %{ "sve_reduce_$1L $dst, $src1, $pg, $src2\t# $1L reduction predicated (sve)" %} ins_encode %{ - __ sve_s$1v(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ cmp($dst$$Register, $src1$$Register); - __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::$3); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); %}')dnl dnl -dnl REDUCE_MAXMIN_I_PARTIAL($1, $2, $3 ) -dnl REDUCE_MAXMIN_I_PARTIAL(min_max, op_mame, cmp) -define(`REDUCE_MAXMIN_I_PARTIAL', ` -instruct reduce_$1I_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, - pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && - (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE || - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT || - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT)); - match(Set dst ($2 src1 src2)); +dnl REDUCE_MAXMIN_I_PREDICATE_PARTIAL($1 , $2 ) +dnl REDUCE_MAXMIN_I_PREDICATE_PARTIAL(min_max, op_name) +define(`REDUCE_MAXMIN_I_PREDICATE_PARTIAL', ` +instruct reduce_$1I_masked_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp, + pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG && + is_integral_type(n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type())); + match(Set dst ($2 (Binary src1 src2) pg)); effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); - ins_cost(SVE_COST); - format %{ "sve_reduce_$1I $dst, $src1, $src2\t# reduce $1I partial (sve)" %} + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_$1I $dst, $src1, $pg, $src2\t# $1I reduction predicated partial (sve)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt); __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant, Matcher::vector_length(this, $src2)); - __ sve_s$1v(as_FloatRegister($vtmp$$reg), variant, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0); - __ cmpw($dst$$Register, $src1$$Register); - __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::$3); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); %}')dnl dnl -dnl REDUCE_MAXMIN_L_PARTIAL($1, $2, $3 ) -dnl REDUCE_MAXMIN_L_PARTIAL(min_max, op_name, cmp) -define(`REDUCE_MAXMIN_L_PARTIAL', ` -instruct reduce_$1L_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, - pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && - n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); - match(Set dst ($2 src1 src2)); +dnl REDUCE_MAXMIN_L_PREDICATE_PARTIAL($1 , $2 ) +dnl REDUCE_MAXMIN_L_PREDICATE_PARTIAL(min_max, op_name) +define(`REDUCE_MAXMIN_L_PREDICATE_PARTIAL', ` +instruct reduce_$1L_masked_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp, + pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst ($2 (Binary src1 src2) pg)); effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr); - ins_cost(SVE_COST); - format %{ "sve_reduce_$1L $dst, $src1, $src2\t# reduce $1L partial (sve)" %} + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_$1L $dst, $src1, $pg, $src2\t# $1L reduction predicated partial (sve)" %} ins_encode %{ __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this, $src2)); - __ sve_s$1v(as_FloatRegister($vtmp$$reg), __ D, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); - __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0); - __ cmp($dst$$Register, $src1$$Register); - __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::$3); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG, + $src1$$Register, as_FloatRegister($src2$$reg), + as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg)); %} ins_pipe(pipe_slow); %}')dnl @@ -1430,63 +1763,121 @@ dnl REDUCE_FMINMAX($1, $2, $3, $4, $5 ) dnl REDUCE_FMINMAX(min_max, name_suffix, element_type, size, reg_src_dst) define(`REDUCE_FMINMAX', ` instruct reduce_$1$2($5 dst, $5 src1, vReg src2) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 && + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); match(Set dst (translit($1, `m', `M')ReductionV src1 src2)); ins_cost(INSN_COST); effect(TEMP_DEF dst); - format %{ "sve_f$1v $dst, $src2 # vector (sve) ($4)\n\t" - "f$1s $dst, $dst, $src1\t# $1 reduction $2" %} + format %{ "sve_reduce_$1$2 $dst, $src1, $src2\t# $1$2 reduction (sve)" %} ins_encode %{ - __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4, - ptrue, as_FloatRegister($src2$$reg)); + __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src2$$reg)); __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); %} ins_pipe(pipe_slow); %}')dnl dnl -dnl dnl REDUCE_FMINMAX_PARTIAL($1, $2, $3, $4, $5 ) dnl REDUCE_FMINMAX_PARTIAL(min_max, name_suffix, element_type, size, reg_src_dst) define(`REDUCE_FMINMAX_PARTIAL', ` instruct reduce_$1$2_partial($5 dst, $5 src1, vReg src2, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 && + predicate(UseSVE > 0 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); match(Set dst (translit($1, `m', `M')ReductionV src1 src2)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP ptmp, KILL cr); - format %{ "sve_reduce_$1$2 $dst, $src1, $src2\t# reduce $1 $4 partial (sve)" %} + format %{ "sve_reduce_$1$2 $dst, $src1, $src2\t# $1$2 reduction partial (sve)" %} ins_encode %{ __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ $4, Matcher::vector_length(this, $src2)); + __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4, as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl REDUCE_FMINMAX_PREDICATE($1, $2, $3, $4, $5 ) +dnl REDUCE_FMINMAX_PREDICATE(min_max, name_suffix, element_type, size, reg_src_dst) +define(`REDUCE_FMINMAX_PREDICATE', ` +instruct reduce_$1$2_masked($5 dst, $5 src1, vReg src2, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (translit($1, `m', `M')ReductionV (Binary src1 src2) pg)); + ins_cost(SVE_COST); + format %{ "sve_reduce_$1$2 $dst, $src1, $pg, $src2\t# $1$2 reduction predicated (sve)" %} + ins_encode %{ + __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4, as_PRegister($pg$$reg), as_FloatRegister($src2$$reg)); + __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +dnl REDUCE_FMINMAX_PREDICATE_PARTIAL($1, $2, $3, $4, $5 ) +dnl REDUCE_FMINMAX_PREDICATE_PARTIAL(min_max, name_suffix, element_type, size, reg_src_dst) +define(`REDUCE_FMINMAX_PREDICATE_PARTIAL', ` +instruct reduce_$1$2_masked_partial($5 dst, $5 src1, vReg src2, pRegGov pg, + pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 && + n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (translit($1, `m', `M')ReductionV (Binary src1 src2) pg)); + effect(TEMP_DEF dst, TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "sve_reduce_$1$2 $dst, $src1, $pg, $src2\t# $1$2 reduction predicated partial (sve)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ $4, + Matcher::vector_length(this, $src2)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4, - as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); + as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg)); __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); %} ins_pipe(pipe_slow); %}')dnl - // vector max reduction -REDUCE_MAXMIN_I(max, MaxReductionV, GT) -REDUCE_MAXMIN_I_PARTIAL(max, MaxReductionV, GT) -REDUCE_MAXMIN_L(max, MaxReductionV, GT) -REDUCE_MAXMIN_L_PARTIAL(max, MaxReductionV, GT) +REDUCE_MAXMIN_I(max, MaxReductionV) +REDUCE_MAXMIN_L(max, MaxReductionV) +REDUCE_MAXMIN_I_PARTIAL(max, MaxReductionV) +REDUCE_MAXMIN_L_PARTIAL(max, MaxReductionV) REDUCE_FMINMAX(max, F, T_FLOAT, S, vRegF) REDUCE_FMINMAX_PARTIAL(max, F, T_FLOAT, S, vRegF) REDUCE_FMINMAX(max, D, T_DOUBLE, D, vRegD) REDUCE_FMINMAX_PARTIAL(max, D, T_DOUBLE, D, vRegD) +// vector max reduction - predicated +REDUCE_MAXMIN_I_PREDICATE(max, MaxReductionV) +REDUCE_MAXMIN_L_PREDICATE(max, MaxReductionV) +REDUCE_MAXMIN_I_PREDICATE_PARTIAL(max, MaxReductionV) +REDUCE_MAXMIN_L_PREDICATE_PARTIAL(max, MaxReductionV) +REDUCE_FMINMAX_PREDICATE(max, F, T_FLOAT, S, vRegF) +REDUCE_FMINMAX_PREDICATE(max, D, T_DOUBLE, D, vRegD) +REDUCE_FMINMAX_PREDICATE_PARTIAL(max, F, T_FLOAT, S, vRegF) +REDUCE_FMINMAX_PREDICATE_PARTIAL(max, D, T_DOUBLE, D, vRegD) + // vector min reduction -REDUCE_MAXMIN_I(min, MinReductionV, LT) -REDUCE_MAXMIN_I_PARTIAL(min, MinReductionV, LT) -REDUCE_MAXMIN_L(min, MinReductionV, LT) -REDUCE_MAXMIN_L_PARTIAL(min, MinReductionV, LT) +REDUCE_MAXMIN_I(min, MinReductionV) +REDUCE_MAXMIN_L(min, MinReductionV) +REDUCE_MAXMIN_I_PARTIAL(min, MinReductionV) +REDUCE_MAXMIN_L_PARTIAL(min, MinReductionV) REDUCE_FMINMAX(min, F, T_FLOAT, S, vRegF) REDUCE_FMINMAX_PARTIAL(min, F, T_FLOAT, S, vRegF) REDUCE_FMINMAX(min, D, T_DOUBLE, D, vRegD) REDUCE_FMINMAX_PARTIAL(min, D, T_DOUBLE, D, vRegD) +// vector min reduction - predicated +REDUCE_MAXMIN_I_PREDICATE(min, MinReductionV) +REDUCE_MAXMIN_L_PREDICATE(min, MinReductionV) +REDUCE_MAXMIN_I_PREDICATE_PARTIAL(min, MinReductionV) +REDUCE_MAXMIN_L_PREDICATE_PARTIAL(min, MinReductionV) +REDUCE_FMINMAX_PREDICATE(min, F, T_FLOAT, S, vRegF) +REDUCE_FMINMAX_PREDICATE(min, D, T_DOUBLE, D, vRegD) +REDUCE_FMINMAX_PREDICATE_PARTIAL(min, F, T_FLOAT, S, vRegF) +REDUCE_FMINMAX_PREDICATE_PARTIAL(min, D, T_DOUBLE, D, vRegD) + // vector Math.rint, floor, ceil instruct vroundD(vReg dst, vReg src, immI rmode) %{ @@ -1664,369 +2055,400 @@ VSHIFT_COUNT(vshiftcntS, H, 8, T_SHORT) VSHIFT_COUNT(vshiftcntI, S, 4, T_INT) VSHIFT_COUNT(vshiftcntL, D, 2, T_LONG) -// vector sqrt -UNARY_OP_TRUE_PREDICATE(vsqrtF, SqrtVF, S, 16, sve_fsqrt) -UNARY_OP_TRUE_PREDICATE(vsqrtD, SqrtVD, D, 16, sve_fsqrt) +// vector shift - predicated +BINARY_OP_PREDICATE(vasrB, RShiftVB, B, sve_asr) +BINARY_OP_PREDICATE(vasrS, RShiftVS, H, sve_asr) +BINARY_OP_PREDICATE(vasrI, RShiftVI, S, sve_asr) +BINARY_OP_PREDICATE(vasrL, RShiftVL, D, sve_asr) +BINARY_OP_PREDICATE(vlslB, LShiftVB, B, sve_lsl) +BINARY_OP_PREDICATE(vlslS, LShiftVS, H, sve_lsl) +BINARY_OP_PREDICATE(vlslI, LShiftVI, S, sve_lsl) +BINARY_OP_PREDICATE(vlslL, LShiftVL, D, sve_lsl) +BINARY_OP_PREDICATE(vlsrB, URShiftVB, B, sve_lsr) +BINARY_OP_PREDICATE(vlsrS, URShiftVS, H, sve_lsr) +BINARY_OP_PREDICATE(vlsrI, URShiftVI, S, sve_lsr) +BINARY_OP_PREDICATE(vlsrL, URShiftVL, D, sve_lsr) +dnl +dnl VSHIFT_IMM_PREDICATED($1, $2, $3, $4, $5, $6 ) +dnl VSHIFT_IMM_PREDICATED(insn_name, op_name, op_name2, type, size, insn) +define(`VSHIFT_IMM_PREDICATED', ` +instruct $1_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src ($2 (Binary dst_src ($3 shift)) pg)); + ins_cost(SVE_COST); + format %{ "$6 $dst_src, $pg, $dst_src, $shift\t# vector (sve) ($4)" %} + ins_encode %{ + int con = (int)$shift$$constant; + assert(con ifelse(index(`$1', `vlsl'), 0, `>=', `>') 0 && con < $5, "invalid shift immediate"); + __ $6(as_FloatRegister($dst_src$$reg), __ $4, as_PRegister($pg$$reg), con); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl +VSHIFT_IMM_PREDICATED(vasrB, RShiftVB, RShiftCntV, B, 8, sve_asr) +VSHIFT_IMM_PREDICATED(vasrS, RShiftVS, RShiftCntV, H, 16, sve_asr) +VSHIFT_IMM_PREDICATED(vasrI, RShiftVI, RShiftCntV, S, 32, sve_asr) +VSHIFT_IMM_PREDICATED(vasrL, RShiftVL, RShiftCntV, D, 64, sve_asr) +VSHIFT_IMM_PREDICATED(vlsrB, URShiftVB, RShiftCntV, B, 8, sve_lsr) +VSHIFT_IMM_PREDICATED(vlsrS, URShiftVS, RShiftCntV, H, 16, sve_lsr) +VSHIFT_IMM_PREDICATED(vlsrI, URShiftVI, RShiftCntV, S, 32, sve_lsr) +VSHIFT_IMM_PREDICATED(vlsrL, URShiftVL, RShiftCntV, D, 64, sve_lsr) +VSHIFT_IMM_PREDICATED(vlslB, LShiftVB, LShiftCntV, B, 8, sve_lsl) +VSHIFT_IMM_PREDICATED(vlslS, LShiftVS, LShiftCntV, H, 16, sve_lsl) +VSHIFT_IMM_PREDICATED(vlslI, LShiftVI, LShiftCntV, S, 32, sve_lsl) +VSHIFT_IMM_PREDICATED(vlslL, LShiftVL, LShiftCntV, D, 64, sve_lsl) -// vector sub -BINARY_OP_UNPREDICATED(vsubB, SubVB, B, 16, sve_sub) -BINARY_OP_UNPREDICATED(vsubS, SubVS, H, 8, sve_sub) -BINARY_OP_UNPREDICATED(vsubI, SubVI, S, 4, sve_sub) -BINARY_OP_UNPREDICATED(vsubL, SubVL, D, 2, sve_sub) -BINARY_OP_UNPREDICATED(vsubF, SubVF, S, 4, sve_fsub) -BINARY_OP_UNPREDICATED(vsubD, SubVD, D, 2, sve_fsub) +// vector sqrt +UNARY_OP_TRUE_PREDICATE(vsqrtF, SqrtVF, S, sve_fsqrt) +UNARY_OP_TRUE_PREDICATE(vsqrtD, SqrtVD, D, sve_fsqrt) -// vector mask cast +// vector sqrt - predicated +UNARY_OP_PREDICATE(vsqrtF, SqrtVF, S, sve_fsqrt) +UNARY_OP_PREDICATE(vsqrtD, SqrtVD, D, sve_fsqrt) -instruct vmaskcast(vReg dst) %{ - predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length() && +// vector sub +BINARY_OP_UNPREDICATE(vsubB, SubVB, B, 16, sve_sub) +BINARY_OP_UNPREDICATE(vsubS, SubVS, H, 8, sve_sub) +BINARY_OP_UNPREDICATE(vsubI, SubVI, S, 4, sve_sub) +BINARY_OP_UNPREDICATE(vsubL, SubVL, D, 2, sve_sub) +BINARY_OP_UNPREDICATE(vsubF, SubVF, S, 4, sve_fsub) +BINARY_OP_UNPREDICATE(vsubD, SubVD, D, 2, sve_fsub) + +// vector sub - predicated +BINARY_OP_PREDICATE(vsubB, SubVB, B, sve_sub) +BINARY_OP_PREDICATE(vsubS, SubVS, H, sve_sub) +BINARY_OP_PREDICATE(vsubI, SubVI, S, sve_sub) +BINARY_OP_PREDICATE(vsubL, SubVL, D, sve_sub) +BINARY_OP_PREDICATE(vsubF, SubVF, S, sve_fsub) +BINARY_OP_PREDICATE(vsubD, SubVD, D, sve_fsub) + +// ------------------------------ Vector mask cast -------------------------- + +instruct vmaskcast(pRegGov dst_src) %{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length() && n->bottom_type()->is_vect()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes()); - match(Set dst (VectorMaskCast dst)); + match(Set dst_src (VectorMaskCast dst_src)); ins_cost(0); - format %{ "vmaskcast $dst\t# empty (sve)" %} + format %{ "vmaskcast $dst_src\t# empty (sve)" %} ins_encode %{ // empty %} ins_pipe(pipe_class_empty); %} +instruct vmaskcast_extend(pRegGov dst, pReg src) +%{ + predicate(UseSVE > 0 && + (Matcher::vector_length_in_bytes(n) == 2 * Matcher::vector_length_in_bytes(n->in(1)) || + Matcher::vector_length_in_bytes(n) == 4 * Matcher::vector_length_in_bytes(n->in(1)) || + Matcher::vector_length_in_bytes(n) == 8 * Matcher::vector_length_in_bytes(n->in(1)))); + match(Set dst (VectorMaskCast src)); + ins_cost(SVE_COST * 3); + format %{ "sve_vmaskcast_extend $dst, $src\t# extend predicate $src" %} + ins_encode %{ + __ sve_vmaskcast_extend(as_PRegister($dst$$reg), as_PRegister($src$$reg), + Matcher::vector_length_in_bytes(this), Matcher::vector_length_in_bytes(this, $src)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmaskcast_narrow(pRegGov dst, pReg src) +%{ + predicate(UseSVE > 0 && + (Matcher::vector_length_in_bytes(n) * 2 == Matcher::vector_length_in_bytes(n->in(1)) || + Matcher::vector_length_in_bytes(n) * 4 == Matcher::vector_length_in_bytes(n->in(1)) || + Matcher::vector_length_in_bytes(n) * 8 == Matcher::vector_length_in_bytes(n->in(1)))); + match(Set dst (VectorMaskCast src)); + ins_cost(SVE_COST * 3); + format %{ "sve_vmaskcast_narrow $dst, $src\t# narrow predicate $src" %} + ins_encode %{ + __ sve_vmaskcast_narrow(as_PRegister($dst$$reg), as_PRegister($src$$reg), + Matcher::vector_length_in_bytes(this), Matcher::vector_length_in_bytes(this, $src)); + %} + ins_pipe(pipe_slow); +%} +dnl + // ------------------------------ Vector cast ------------------------------- dnl dnl -define(`VECTOR_CAST_EXTEND1', ` +define(`VECTOR_CAST_X2X', ` instruct vcvt$1to$2`'(vReg dst, vReg src) %{ predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); match(Set dst (VectorCast$1`'2X src)); ins_cost(SVE_COST); - format %{ "sve_$3 $dst, $4, $src\t# convert $1 to $2 vector" %} + format %{ "sve_vectorcast_$5 $dst, $src\t# convert $1 to $2 vector" %} ins_encode %{ - __ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg)); + __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4); %} ins_pipe(pipe_slow); %}')dnl + dnl +dnl Start of vector cast rules dnl -define(`VECTOR_CAST_EXTEND2', ` -instruct vcvt$1to$2`'(vReg dst, vReg src) +instruct vcvtBtoX_extend(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); - match(Set dst (VectorCast$1`'2X src)); + predicate(UseSVE > 0); + match(Set dst (VectorCastB2X src)); ins_cost(2 * SVE_COST); - format %{ "sve_$3 $dst, $4, $src\n\t" - "sve_$3 $dst, $5, $dst\t# convert $1 to $2 vector" %} + format %{ "sve_vectorcast_b2x $dst, $src\t# convert B to X vector (extend)" %} ins_encode %{ - __ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg)); - __ sve_$3(as_FloatRegister($dst$$reg), __ $5, as_FloatRegister($dst$$reg)); + BasicType to_bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt); + __ sve_vector_extend(as_FloatRegister($dst$$reg), to_size, as_FloatRegister($src$$reg), __ B); + if (to_bt == T_FLOAT || to_bt == T_DOUBLE) { + __ sve_scvtf(as_FloatRegister($dst$$reg), to_size, ptrue, as_FloatRegister($dst$$reg), to_size); + } %} ins_pipe(pipe_slow); -%}')dnl -dnl -dnl -define(`VECTOR_CAST_EXTEND3', ` -instruct vcvt$1to$2`'(vReg dst, vReg src) +%} + +instruct vcvtStoB(vReg dst, vReg src, vReg tmp) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); - match(Set dst (VectorCast$1`'2X src)); - ins_cost(3 * SVE_COST); - format %{ "sve_$3 $dst, $4, $src\n\t" - "sve_$3 $dst, $5, $dst\n\t" - "sve_$3 $dst, $6, $dst\t# convert $1 to $2 vector" %} + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorCastS2X src)); + effect(TEMP tmp); + ins_cost(2 * SVE_COST); + format %{ "sve_vectorcast_s2b $dst, $src\t# convert H to B vector" %} ins_encode %{ - __ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg)); - __ sve_$3(as_FloatRegister($dst$$reg), __ $5, as_FloatRegister($dst$$reg)); - __ sve_$3(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg)); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($src$$reg), __ H, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); -%}')dnl -dnl -dnl -define(`VECTOR_CAST_NARROW1', ` -instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp) +%} + +instruct vcvtStoX_extend(vReg dst, vReg src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); - match(Set dst (VectorCast$1`'2X src)); - effect(TEMP tmp); + type2aelembytes(Matcher::vector_element_basic_type(n)) > 2); + match(Set dst (VectorCastS2X src)); ins_cost(2 * SVE_COST); - format %{ "sve_$3 $tmp, $4, 0\n\t" - "sve_$5 $dst, $4, $src, tmp\t# convert $1 to $2 vector" %} + format %{ "sve_vectorcast_s2x $dst, $src\t# convert H to X vector (extend)" %} ins_encode %{ - __ sve_$3(as_FloatRegister($tmp$$reg), __ $4, 0); - __ sve_$5(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); + BasicType to_bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt); + __ sve_vector_extend(as_FloatRegister($dst$$reg), to_size, as_FloatRegister($src$$reg), __ H); + if (to_bt == T_FLOAT || to_bt == T_DOUBLE) { + __ sve_scvtf(as_FloatRegister($dst$$reg), to_size, ptrue, as_FloatRegister($dst$$reg), to_size); + } %} ins_pipe(pipe_slow); -%}')dnl -dnl -dnl -define(`VECTOR_CAST_NARROW2', ` -instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp) +%} + +instruct vcvtItoB(vReg dst, vReg src, vReg tmp) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); - match(Set dst (VectorCast$1`'2X src)); + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorCastI2X src)); effect(TEMP_DEF dst, TEMP tmp); ins_cost(3 * SVE_COST); - format %{ "sve_$3 $tmp, $4, 0\n\t" - "sve_$5 $dst, $4, $src, tmp\n\t" - "sve_$5 $dst, $6, $dst, tmp\n\t# convert $1 to $2 vector" %} + format %{ "sve_vectorcast_i2b $dst, $src\t# convert I to B vector" %} ins_encode %{ - __ sve_$3(as_FloatRegister($tmp$$reg), __ $4, 0); - __ sve_$5(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); - __ sve_$5(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ B, + as_FloatRegister($src$$reg), __ S, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); -%}')dnl -dnl -dnl -define(`VECTOR_CAST_NARROW3', ` -instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp) +%} + +instruct vcvtItoS(vReg dst, vReg src, vReg tmp) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); - match(Set dst (VectorCast$1`'2X src)); - effect(TEMP_DEF dst, TEMP tmp); - ins_cost(4 * SVE_COST); - format %{ "sve_$3 $tmp, $4, 0\n\t" - "sve_$5 $dst, $4, $src, tmp\n\t" - "sve_$5 $dst, $6, $dst, tmp\n\t" - "sve_$5 $dst, $7, $dst, tmp\n\t# convert $1 to $2 vector" %} + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorCastI2X src)); + effect(TEMP tmp); + ins_cost(2 * SVE_COST); + format %{ "sve_vectorcast_i2s $dst, $src\t# convert I to H vector" %} ins_encode %{ - __ sve_$3(as_FloatRegister($tmp$$reg), __ $4, 0); - __ sve_$5(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg)); - __ sve_$5(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ sve_$5(as_FloatRegister($dst$$reg), __ $7, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ H, + as_FloatRegister($src$$reg), __ S, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); -%}')dnl -dnl -dnl -define(`VECTOR_CAST_I2F_EXTEND2', ` -instruct vcvt$1to$2`'(vReg dst, vReg src) +%} + +instruct vcvtItoL(vReg dst, vReg src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); - match(Set dst (VectorCast$1`'2X src)); - ins_cost(3 * SVE_COST); - format %{ "sve_$3 $dst, $4, $src\n\t" - "sve_$3 $dst, $5, $dst\n\t" - "sve_$6 $dst, $5, $dst, $5\t# convert $1 to $2 vector" %} + n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (VectorCastI2X src)); + ins_cost(SVE_COST); + format %{ "sve_vectorcast_i2l $dst, $src\t# convert I to L vector" %} ins_encode %{ - __ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg)); - __ sve_$3(as_FloatRegister($dst$$reg), __ $5, as_FloatRegister($dst$$reg)); - __ sve_$6(as_FloatRegister($dst$$reg), __ $5, ptrue, as_FloatRegister($dst$$reg), __ $5); + __ sve_vector_extend(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg), __ S); %} ins_pipe(pipe_slow); -%}')dnl -dnl +%} dnl -define(`VECTOR_CAST_I2F_EXTEND3', ` -instruct vcvt$1to$2`'(vReg dst, vReg src) +dnl vcvtItoF +VECTOR_CAST_X2X(I, F, scvtf, S, i2f) + +instruct vcvtItoD(vReg dst, vReg src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); - match(Set dst (VectorCast$1`'2X src)); - ins_cost(4 * SVE_COST); - format %{ "sve_$3 $dst, $4, $src\n\t" - "sve_$3 $dst, $5, $dst\n\t" - "sve_$3 $dst, $6, $dst\n\t" - "sve_$7 $dst, $6, $dst, $6\t# convert $1 to $2 vector" %} + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorCastI2X src)); + ins_cost(2 * SVE_COST); + format %{ "sve_vectorcast_i2d $dst, $src\t# convert I to D vector" %} ins_encode %{ - __ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg)); - __ sve_$3(as_FloatRegister($dst$$reg), __ $5, as_FloatRegister($dst$$reg)); - __ sve_$3(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg)); - __ sve_$7(as_FloatRegister($dst$$reg), __ $6, ptrue, as_FloatRegister($dst$$reg), __ $6); + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg)); + __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D); %} ins_pipe(pipe_slow); -%}')dnl -dnl -dnl -define(`VECTOR_CAST_X2F_NARROW1', ` -instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp) +%} + +instruct vcvtLtoX_narrow(vReg dst, vReg src, vReg tmp) +%{ + predicate(UseSVE > 0 && is_integral_type(Matcher::vector_element_basic_type(n))); + match(Set dst (VectorCastL2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(2 * SVE_COST); + format %{ "sve_vectorcast_l2x $dst, $src\t# convert L to B/H/S vector (narrow)" %} + ins_encode %{ + BasicType to_bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), to_size, + as_FloatRegister($src$$reg), __ D, as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvtLtoF(vReg dst, vReg src, vReg tmp) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); - match(Set dst (VectorCast$1`'2X src)); + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorCastL2X src)); effect(TEMP_DEF dst, TEMP tmp); ins_cost(3 * SVE_COST); - format %{ "sve_$3 $dst, $4, $src, $5\n\t" - "sve_$6 $tmp, $7, 0\n\t" - "sve_$8 $dst, $7, $dst, $tmp\t# convert $1 to $2 vector" %} + format %{ "sve_vectorcast_l2f $dst, $src\t# convert L to F vector" %} ins_encode %{ - __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $5); - __ sve_$6(as_FloatRegister($tmp$$reg), __ $7, 0); - __ sve_$8(as_FloatRegister($dst$$reg), __ $7, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($dst$$reg), __ D, as_FloatRegister($tmp$$reg)); + %} ins_pipe(pipe_slow); -%}')dnl -dnl +%} dnl -define(`VECTOR_CAST_X2X', ` -instruct vcvt$1to$2`'(vReg dst, vReg src) +dnl vcvtLtoD +VECTOR_CAST_X2X(L, D, scvtf, D, l2d) + +instruct vcvtFtoX_narrow(vReg dst, vReg src, vReg tmp) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); - match(Set dst (VectorCast$1`'2X src)); - ins_cost(SVE_COST); - format %{ "sve_$3 $dst, $4, $src, $4\t# convert $1 to $2 vector" %} + (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE || + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT)); + match(Set dst (VectorCastF2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(3 * SVE_COST); + format %{ "sve_vectorcast_f2x $dst, $src\t# convert F to B/H vector" %} ins_encode %{ - __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4); + BasicType to_bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt); + __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), to_size, + as_FloatRegister($dst$$reg), __ S, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); -%}')dnl -dnl -dnl -define(`VECTOR_CAST_X2F_EXTEND1', ` -instruct vcvt$1to$2`'(vReg dst, vReg src) +%} + +instruct vcvtFtoI(vReg dst, vReg src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); - match(Set dst (VectorCast$1`'2X src)); - ins_cost(2 * SVE_COST); - format %{ "sve_$3 $dst, $4, $src\n\t" - "sve_$5 $dst, $4, $dst, $6\t# convert $1 to $2 vector" %} + (n->bottom_type()->is_vect()->element_basic_type() == T_INT)); + match(Set dst (VectorCastF2X src)); + ins_cost(SVE_COST); + format %{ "sve_vectorcast_f2x $dst, $src\t# convert F to I vector" %} ins_encode %{ - __ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg)); - __ sve_$5(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($dst$$reg), __ $6); + __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S); %} ins_pipe(pipe_slow); -%}')dnl -dnl -dnl -define(`VECTOR_CAST_F2X_NARROW1', ` -instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp) +%} + +instruct vcvtFtoL(vReg dst, vReg src) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); - match(Set dst (VectorCast$1`'2X src)); - effect(TEMP_DEF dst, TEMP tmp); - ins_cost(3 * SVE_COST); - format %{ "sve_$3 $dst, $4, $src, $4\n\t" - "sve_$5 $tmp, $6, 0\n\t" - "sve_$7 $dst, $6, $dst, tmp\t# convert $1 to $2 vector" %} + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG)); + match(Set dst (VectorCastF2X src)); + ins_cost(SVE_COST * 2); + format %{ "sve_vectorcast_f2x $dst, $src\t# convert F to L vector" %} ins_encode %{ - __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4); - __ sve_$5(as_FloatRegister($tmp$$reg), __ $6, 0); - __ sve_$7(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg)); + __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ S); %} ins_pipe(pipe_slow); -%}')dnl -dnl -dnl -define(`VECTOR_CAST_F2X_NARROW2', ` -instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp) +%} + +instruct vcvtFtoD(vReg dst, vReg src) %{ - predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); - match(Set dst (VectorCast$1`'2X src)); - effect(TEMP_DEF dst, TEMP tmp); - ins_cost(4 * SVE_COST); - format %{ "sve_$3 $dst, $4, $src, $4\n\t" - "sve_$5 $tmp, $6, 0\n\t" - "sve_$7 $dst, $6, $dst, tmp\n\t" - "sve_$7 $dst, $8, $dst, tmp\n\t# convert $1 to $2 vector" %} + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorCastF2X src)); + ins_cost(2 * SVE_COST); + format %{ "sve_vectorcast_f2d $dst, $dst\t# convert F to D vector" %} ins_encode %{ - __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4); - __ sve_$5(as_FloatRegister($tmp$$reg), __ $6, 0); - __ sve_$7(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ sve_$7(as_FloatRegister($dst$$reg), __ $8, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_vector_extend(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg), __ S); + __ sve_fcvt(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ S); %} ins_pipe(pipe_slow); -%}')dnl -dnl -dnl -define(`VECTOR_CAST_F2X_EXTEND1', ` -instruct vcvt$1to$2`'(vReg dst, vReg src) +%} + +instruct vcvtDtoX_narrow(vReg dst, vReg src, vReg tmp) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); - match(Set dst (VectorCast$1`'2X src)); - ins_cost(2 * SVE_COST); - format %{ "sve_$3 $dst, $4, $src, $4\n\t" - "sve_$5 $dst, $6, $dst\t# convert $1 to $2 vector" %} + (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE || + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || + n->bottom_type()->is_vect()->element_basic_type() == T_INT)); + match(Set dst (VectorCastD2X src)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(3 * SVE_COST); + format %{ "sve_vectorcast_d2x $dst, $src\t# convert D to X vector (narrow)" %} ins_encode %{ - __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4); - __ sve_$5(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg)); + BasicType to_bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt); + __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), to_size, + as_FloatRegister($dst$$reg), __ D, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); -%}')dnl -dnl +%} dnl -define(`VECTOR_CAST_F2X_NARROW3', ` -instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp) +dnl vcvtDtoL +VECTOR_CAST_X2X(D, L, fcvtzs, D, d2l) + +instruct vcvtDtoF(vReg dst, vReg src, vReg tmp) %{ predicate(UseSVE > 0 && - n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); - match(Set dst (VectorCast$1`'2X src)); + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorCastD2X src)); effect(TEMP_DEF dst, TEMP tmp); - ins_cost(5 * SVE_COST); - format %{ "sve_$3 $dst, $4, $src, $4\n\t" - "sve_$5 $tmp, $6, 0\n\t" - "sve_$7 $dst, $6, $dst, tmp\n\t" - "sve_$7 $dst, $8, $dst, tmp\n\t" - "sve_$7 $dst, $9, $dst, tmp\n\t# convert $1 to $2 vector" %} + ins_cost(3 * SVE_COST); + format %{ "sve_vectorcast_d2f $dst, S, $dst\t# convert D to F vector" %} ins_encode %{ - __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4); - __ sve_$5(as_FloatRegister($tmp$$reg), __ $6, 0); - __ sve_$7(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ sve_$7(as_FloatRegister($dst$$reg), __ $8, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ sve_$7(as_FloatRegister($dst$$reg), __ $9, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ sve_fcvt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($dst$$reg), __ D, as_FloatRegister($tmp$$reg)); %} ins_pipe(pipe_slow); -%}')dnl -dnl -VECTOR_CAST_EXTEND1(B, S, sunpklo, H) -VECTOR_CAST_EXTEND2(B, I, sunpklo, H, S) -VECTOR_CAST_EXTEND3(B, L, sunpklo, H, S, D) -VECTOR_CAST_I2F_EXTEND2(B, F, sunpklo, H, S, scvtf) -VECTOR_CAST_I2F_EXTEND3(B, D, sunpklo, H, S, D, scvtf) -dnl -VECTOR_CAST_NARROW1(S, B, dup, B, uzp1) -VECTOR_CAST_EXTEND1(S, I, sunpklo, S) -VECTOR_CAST_EXTEND2(S, L, sunpklo, S, D) -VECTOR_CAST_X2F_EXTEND1(S, F, sunpklo, S, scvtf, S) -VECTOR_CAST_I2F_EXTEND2(S, D, sunpklo, S, D, scvtf) -dnl -VECTOR_CAST_NARROW2(I, B, dup, H, uzp1, B) -VECTOR_CAST_NARROW1(I, S, dup, H, uzp1) -VECTOR_CAST_EXTEND1(I, L, sunpklo, D) -VECTOR_CAST_X2X(I, F, scvtf, S) -VECTOR_CAST_X2F_EXTEND1(I, D, sunpklo, D, scvtf, D) -dnl -VECTOR_CAST_NARROW3(L, B, dup, S, uzp1, H, B) -VECTOR_CAST_NARROW2(L, S, dup, S, uzp1, H) -VECTOR_CAST_NARROW1(L, I, dup, S, uzp1) -VECTOR_CAST_X2F_NARROW1(L, F, scvtf, S, D, dup, S, uzp1) -VECTOR_CAST_X2X(L, D, scvtf, D) -dnl -VECTOR_CAST_F2X_NARROW2(F, B, fcvtzs, S, dup, H, uzp1, B) -VECTOR_CAST_F2X_NARROW1(F, S, fcvtzs, S, dup, H, uzp1) -VECTOR_CAST_X2X(F, I, fcvtzs, S) -VECTOR_CAST_F2X_EXTEND1(F, L, fcvtzs, S, sunpklo, D) -VECTOR_CAST_X2F_EXTEND1(F, D, sunpklo, D, fcvt, S) -dnl -VECTOR_CAST_F2X_NARROW3(D, B, fcvtzs, D, dup, S, uzp1, H, B) -VECTOR_CAST_F2X_NARROW2(D, S, fcvtzs, D, dup, S, uzp1, H) -VECTOR_CAST_F2X_NARROW1(D, I, fcvtzs, D, dup, S, uzp1) -VECTOR_CAST_X2X(D, L, fcvtzs, D) -VECTOR_CAST_X2F_NARROW1(D, F, fcvt, S, D, dup, S, uzp1) +%} + dnl dnl // ------------------------------ Vector extract --------------------------------- define(`VECTOR_EXTRACT_SXT', ` -instruct extract$1`'($2 dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr) +instruct extract$1`'($2 dst, vReg src, immI idx, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0); match(Set dst (Extract$1 src idx)); - effect(TEMP pTmp, KILL cr); + effect(TEMP pgtmp, KILL cr); ins_cost(2 * SVE_COST); - format %{ "sve_extract $dst, $3, $pTmp, $src, $idx\n\t" + format %{ "sve_extract $dst, $3, $pgtmp, $src, $idx\n\t" "sbfmw $dst, $dst, 0U, $5\t# extract from vector($1)" %} ins_encode %{ - __ sve_extract(as_$4($dst$$reg), __ $3, as_PRegister($pTmp$$reg), + __ sve_extract(as_$4($dst$$reg), __ $3, as_PRegister($pgtmp$$reg), as_FloatRegister($src$$reg), (int)($idx$$constant)); __ sbfmw(as_$4($dst$$reg), as_$4($dst$$reg), 0U, $5); %} @@ -2038,15 +2460,15 @@ VECTOR_EXTRACT_SXT(S, iRegINoSp, H, Register, 15U) dnl define(`VECTOR_EXTRACT', ` -instruct extract$1`'($2 dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr) +instruct extract$1`'($2 dst, vReg src, immI idx, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0); match(Set dst (Extract$1 src idx)); - effect(TEMP pTmp, KILL cr); + effect(TEMP pgtmp, KILL cr); ins_cost(2 * SVE_COST); - format %{ "sve_extract $dst, $3, $pTmp, $src, $idx\t# extract from vector($1)" %} + format %{ "sve_extract $dst, $3, $pgtmp, $src, $idx\t# extract from vector($1)" %} ins_encode %{ - __ sve_extract(as_$4($dst$$reg), __ $3, as_PRegister($pTmp$$reg), + __ sve_extract(as_$4($dst$$reg), __ $3, as_PRegister($pgtmp$$reg), as_FloatRegister($src$$reg), (int)($idx$$constant)); %} ins_pipe(pipe_slow); @@ -2058,155 +2480,165 @@ VECTOR_EXTRACT(F, vRegF, S, FloatRegister) VECTOR_EXTRACT(D, vRegD, D, FloatRegister) // ------------------------------- VectorTest ---------------------------------- -dnl -dnl VTEST($1, $2, $3, $4 ) -dnl VTEST(op_name, pred, imm, cond) -define(`VTEST', ` -instruct vtest_$1`'(iRegINoSp dst, vReg src1, vReg src2, pReg pTmp, rFlagsReg cr) + +instruct vtest_alltrue(iRegINoSp dst, pRegGov src1, pRegGov src2, pReg ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && - static_cast(n)->get_predicate() == BoolTest::$2); + predicate(UseSVE > 0 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + static_cast(n)->get_predicate() == BoolTest::overflow); + match(Set dst (VectorTest src1 src2)); + effect(TEMP ptmp, KILL cr); + ins_cost(SVE_COST); + format %{ "sve_eors $ptmp, $src1, $src2\t# $src2 is all true mask\n" + "csetw $dst, EQ\t# VectorTest (sve) - alltrue" %} + ins_encode %{ + __ sve_eors(as_PRegister($ptmp$$reg), ptrue, + as_PRegister($src1$$reg), as_PRegister($src2$$reg)); + __ csetw(as_Register($dst$$reg), Assembler::EQ); + %} + ins_pipe(pipe_slow); +%} + +instruct vtest_anytrue(iRegINoSp dst, pRegGov src1, pRegGov src2, rFlagsReg cr) +%{ + predicate(UseSVE > 0 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize && + static_cast(n)->get_predicate() == BoolTest::ne); match(Set dst (VectorTest src1 src2)); - effect(TEMP pTmp, KILL cr); + effect(KILL cr); ins_cost(SVE_COST); - format %{ "sve_cmpeq $pTmp, $src1, $3\n\t" - "csetw $dst, $4\t# VectorTest (sve) - $1" %} + format %{ "sve_ptest $src1\n\t" + "csetw $dst, NE\t# VectorTest (sve) - anytrue" %} ins_encode %{ // "src2" is not used for sve. - BasicType bt = Matcher::vector_element_basic_type(this, $src1); - Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, - ptrue, as_FloatRegister($src1$$reg), $3); - __ csetw(as_Register($dst$$reg), Assembler::$4); + __ sve_ptest(ptrue, as_PRegister($src1$$reg)); + __ csetw(as_Register($dst$$reg), Assembler::NE); %} ins_pipe(pipe_slow); -%}')dnl -dnl -VTEST(alltrue, overflow, 0, EQ) -VTEST(anytrue, ne, -1, NE) +%} dnl dnl -dnl VTEST_PARTIAL($1, $2, $3, $4 ) -dnl VTEST_PARTIAL(op_name, pred, imm, cond) +dnl VTEST_PARTIAL($1, $2, $3, $4 ) +dnl VTEST_PARTIAL(op_name, pred, inst, cond) define(`VTEST_PARTIAL', ` -instruct vtest_$1_partial`'(iRegINoSp dst, vReg src1, vReg src2, pRegGov pTmp, rFlagsReg cr) +instruct vtest_$1_partial`'(iRegINoSp dst, pRegGov src1, pRegGov src2, pRegGov ptmp, rFlagsReg cr) %{ - predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && + predicate(UseSVE > 0 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize && static_cast(n)->get_predicate() == BoolTest::$2); match(Set dst (VectorTest src1 src2)); - effect(TEMP pTmp, KILL cr); + effect(TEMP ptmp, KILL cr); ins_cost(SVE_COST); format %{ "vtest_$1_partial $dst, $src1, $src2\t# VectorTest partial (sve) - $1" %} ins_encode %{ - // "src2" is not used for sve. BasicType bt = Matcher::vector_element_basic_type(this, $src1); Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); - __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), size, + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this, $src1)); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, - as_PRegister($pTmp$$reg), as_FloatRegister($src1$$reg), $3); + __ $3(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($src1$$reg), as_PRegister($src2$$reg)); __ csetw(as_Register($dst$$reg), Assembler::$4); %} ins_pipe(pipe_slow); %}')dnl dnl -VTEST_PARTIAL(alltrue, overflow, 0, EQ) -VTEST_PARTIAL(anytrue, ne, -1, NE) +VTEST_PARTIAL(alltrue, overflow, sve_eors, EQ) +VTEST_PARTIAL(anytrue, ne, sve_ands, NE) // ------------------------------ Vector insert --------------------------------- -instruct insertI_small(vReg dst, vReg src, iRegIorL2I val, immI idx, pRegGov pTmp, rFlagsReg cr) +instruct insertI_small(vReg dst, vReg src, iRegIorL2I val, immI idx, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_Vector()->length() <= 32 && (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE || n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || n->bottom_type()->is_vect()->element_basic_type() == T_INT)); match(Set dst (VectorInsert (Binary src val) idx)); - effect(TEMP_DEF dst, TEMP pTmp, KILL cr); + effect(TEMP_DEF dst, TEMP pgtmp, KILL cr); ins_cost(4 * SVE_COST); - format %{ "sve_index $dst, -16, 1\t# (B/S/I)\n\t" - "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" + format %{ "sve_index $dst, -16, 1\t# (B/H/S)\n\t" + "sve_cmpeq $pgtmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" "sve_orr $dst, $src, $src\n\t" - "sve_cpy $dst, $pTmp, $val\t# insert into vector (B/S/I)" %} + "sve_cpy $dst, $pgtmp, $val\t# insert into vector (B/H/S)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src); Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); __ sve_index(as_FloatRegister($dst$$reg), size, -16, 1); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, ptrue, + __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), size, ptrue, as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16); __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); - __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg), as_Register($val$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pgtmp$$reg), as_Register($val$$reg)); %} ins_pipe(pipe_slow); %} -instruct insertF_small(vReg dst, vReg src, vRegF val, immI idx, pRegGov pTmp, rFlagsReg cr) +instruct insertF_small(vReg dst, vReg src, vRegF val, immI idx, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_Vector()->length() <= 32 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorInsert (Binary src val) idx)); - effect(TEMP_DEF dst, TEMP pTmp, KILL cr); + effect(TEMP_DEF dst, TEMP pgtmp, KILL cr); ins_cost(4 * SVE_COST); format %{ "sve_index $dst, S, -16, 1\n\t" - "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" + "sve_cmpeq $pgtmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" "sve_orr $dst, $src, $src\n\t" - "sve_cpy $dst, $pTmp, $val\t# insert into vector (F)" %} + "sve_cpy $dst, $pgtmp, $val\t# insert into vector (F)" %} ins_encode %{ __ sve_index(as_FloatRegister($dst$$reg), __ S, -16, 1); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ S, ptrue, + __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), __ S, ptrue, as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16); __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); - __ sve_cpy(as_FloatRegister($dst$$reg), __ S, as_PRegister($pTmp$$reg), as_FloatRegister($val$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), __ S, as_PRegister($pgtmp$$reg), as_FloatRegister($val$$reg)); %} ins_pipe(pipe_slow); %} -instruct insertI(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg tmp1, pRegGov pTmp, rFlagsReg cr) +instruct insertI(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg tmp1, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_Vector()->length() > 32 && (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE || n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || n->bottom_type()->is_vect()->element_basic_type() == T_INT)); match(Set dst (VectorInsert (Binary src val) idx)); - effect(TEMP_DEF dst, TEMP tmp1, TEMP pTmp, KILL cr); + effect(TEMP_DEF dst, TEMP tmp1, TEMP pgtmp, KILL cr); ins_cost(5 * SVE_COST); - format %{ "sve_index $tmp1, 0, 1\t# (B/S/I)\n\t" - "sve_dup $dst, $idx\t# (B/S/I)\n\t" - "sve_cmpeq $pTmp, $tmp1, $dst\n\t" + format %{ "sve_index $tmp1, 0, 1\t# (B/H/S)\n\t" + "sve_dup $dst, $idx\t# (B/H/S)\n\t" + "sve_cmpeq $pgtmp, $tmp1, $dst\n\t" "sve_orr $dst, $src, $src\n\t" - "sve_cpy $dst, $pTmp, $val\t# insert into vector (B/S/I)" %} + "sve_cpy $dst, $pgtmp, $val\t# insert into vector (B/H/S)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src); Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); __ sve_index(as_FloatRegister($tmp1$$reg), size, 0, 1); __ sve_dup(as_FloatRegister($dst$$reg), size, (int)($idx$$constant)); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, ptrue, + __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), size, ptrue, as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg)); __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); - __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg), as_Register($val$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pgtmp$$reg), as_Register($val$$reg)); %} ins_pipe(pipe_slow); %} dnl dnl define(`VECTOR_INSERT_D', ` -instruct insert$1`'(vReg dst, vReg src, $2 val, immI idx, pRegGov pTmp, rFlagsReg cr) +instruct insert$1`'(vReg dst, vReg src, $2 val, immI idx, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($1)); match(Set dst (VectorInsert (Binary src val) idx)); - effect(TEMP_DEF dst, TEMP pTmp, KILL cr); + effect(TEMP_DEF dst, TEMP pgtmp, KILL cr); ins_cost(4 * SVE_COST); format %{ "sve_index $dst, $3, -16, 1\n\t" - "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" + "sve_cmpeq $pgtmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t" "sve_orr $dst, $src, $src\n\t" - "sve_cpy $dst, $pTmp, $val\t# insert into vector ($1)" %} + "sve_cpy $dst, $pgtmp, $val\t# insert into vector ($1)" %} ins_encode %{ __ sve_index(as_FloatRegister($dst$$reg), __ $3, -16, 1); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ $3, ptrue, + __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), __ $3, ptrue, as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16); __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); - __ sve_cpy(as_FloatRegister($dst$$reg), __ $3, as_PRegister($pTmp$$reg), as_$4($val$$reg)); + __ sve_cpy(as_FloatRegister($dst$$reg), __ $3, as_PRegister($pgtmp$$reg), as_$4($val$$reg)); %} ins_pipe(pipe_slow); %}')dnl @@ -2214,96 +2646,54 @@ dnl $1 $2 $3 $4 VECTOR_INSERT_D(L, iRegL, D, Register) VECTOR_INSERT_D(D, vRegD, D, FloatRegister) -instruct insertF(vReg dst, vReg src, vRegF val, immI idx, vReg tmp1, pRegGov pTmp, rFlagsReg cr) +instruct insertF(vReg dst, vReg src, vRegF val, immI idx, vReg tmp1, pRegGov pgtmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_Vector()->length() > 32 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorInsert (Binary src val) idx)); - effect(TEMP_DEF dst, TEMP tmp1, TEMP pTmp, KILL cr); + effect(TEMP_DEF dst, TEMP tmp1, TEMP pgtmp, KILL cr); ins_cost(5 * SVE_COST); format %{ "sve_index $tmp1, S, 0, 1\n\t" "sve_dup $dst, S, $idx\n\t" - "sve_cmpeq $pTmp, $tmp1, $dst\n\t" + "sve_cmpeq $pgtmp, $tmp1, $dst\n\t" "sve_orr $dst, $src, $src\n\t" - "sve_cpy $dst, $pTmp, $val\t# insert into vector (F)" %} + "sve_cpy $dst, $pgtmp, $val\t# insert into vector (F)" %} ins_encode %{ __ sve_index(as_FloatRegister($tmp1$$reg), __ S, 0, 1); __ sve_dup(as_FloatRegister($dst$$reg), __ S, (int)($idx$$constant)); - __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ S, ptrue, + __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), __ S, ptrue, as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg)); __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); __ sve_cpy(as_FloatRegister($dst$$reg), __ S, - as_PRegister($pTmp$$reg), as_FloatRegister($val$$reg)); + as_PRegister($pgtmp$$reg), as_FloatRegister($val$$reg)); %} ins_pipe(pipe_slow); %} // ------------------------------ Vector shuffle ------------------------------- -instruct loadshuffleB(vReg dst, vReg src) -%{ - predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); +instruct loadshuffle(vReg dst, vReg src) %{ + predicate(UseSVE > 0); match(Set dst (VectorLoadShuffle src)); ins_cost(SVE_COST); - format %{ "sve_orr $dst, $src, $src\t# vector load shuffle (B)" %} + format %{ "sve_loadshuffle $dst, $src\t# vector load shuffle (B/H/S/D)" %} ins_encode %{ - if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { - __ sve_orr(as_FloatRegister($dst$$reg), - as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this); + if (bt == T_BYTE) { + if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } + } else { + __ sve_vector_extend(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt), + as_FloatRegister($src$$reg), __ B); } %} ins_pipe(pipe_slow); %} -instruct loadshuffleS(vReg dst, vReg src) -%{ - predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); - match(Set dst (VectorLoadShuffle src)); - ins_cost(SVE_COST); - format %{ "sve_uunpklo $dst, $src\t# vector load shuffle (B to H)" %} - ins_encode %{ - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); - %} - ins_pipe(pipe_slow); -%} - -instruct loadshuffleI(vReg dst, vReg src) -%{ - predicate(UseSVE > 0 && - (n->bottom_type()->is_vect()->element_basic_type() == T_INT || - n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); - match(Set dst (VectorLoadShuffle src)); - ins_cost(2 * SVE_COST); - format %{ "sve_uunpklo $dst, H, $src\n\t" - "sve_uunpklo $dst, S, $dst\t# vector load shuffle (B to S)" %} - ins_encode %{ - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); - %} - ins_pipe(pipe_slow); -%} - -instruct loadshuffleL(vReg dst, vReg src) -%{ - predicate(UseSVE > 0 && - (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || - n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); - match(Set dst (VectorLoadShuffle src)); - ins_cost(3 * SVE_COST); - format %{ "sve_uunpklo $dst, H, $src\n\t" - "sve_uunpklo $dst, S, $dst\n\t" - "sve_uunpklo $dst, D, $dst\t# vector load shuffle (B to D)" %} - ins_encode %{ - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg)); - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg)); - __ sve_uunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg)); - %} - ins_pipe(pipe_slow); -%} - // ------------------------------ Vector rearrange ------------------------------- instruct rearrange(vReg dst, vReg src, vReg shuffle) @@ -2330,7 +2720,7 @@ instruct gatherI(vReg dst, indirect mem, vReg idx) %{ n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set dst (LoadVectorGather mem idx)); ins_cost(SVE_COST); - format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (I/F)" %} + format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (S)" %} ins_encode %{ __ sve_ld1w_gather(as_FloatRegister($dst$$reg), ptrue, as_Register($mem$$base), as_FloatRegister($idx$$reg)); @@ -2345,52 +2735,123 @@ instruct gatherL(vReg dst, indirect mem, vReg idx) %{ n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set dst (LoadVectorGather mem idx)); ins_cost(2 * SVE_COST); - format %{ "sve_uunpklo $idx, $idx\n\t" - "load_vector_gather $dst, $mem, $idx\t# vector load gather (L/D)" %} + format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (D)" %} ins_encode %{ __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); - __ sve_ld1d_gather(as_FloatRegister($dst$$reg), ptrue, as_Register($mem$$base), as_FloatRegister($idx$$reg)); + __ sve_ld1d_gather(as_FloatRegister($dst$$reg), ptrue, as_Register($mem$$base), + as_FloatRegister($idx$$reg)); %} ins_pipe(pipe_slow); %} // ------------------------------ Vector Load Gather Partial------------------------------- -instruct gatherI_partial(vReg dst, indirect mem, vReg idx, pRegGov pTmp, rFlagsReg cr) %{ +instruct gatherI_partial(vReg dst, indirect mem, vReg idx, pRegGov ptmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_LoadVectorGather()->memory_size() < MaxVectorSize && (n->bottom_type()->is_vect()->element_basic_type() == T_INT || n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set dst (LoadVectorGather mem idx)); - effect(TEMP pTmp, KILL cr); + effect(TEMP ptmp, KILL cr); ins_cost(2 * SVE_COST + INSN_COST); - format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" - "load_vector_gather $dst, $pTmp, $mem, $idx\t# vector load gather partial (I/F)" %} + format %{ "load_vector_gather $dst, $ptmp, $mem, $idx\t# vector load gather partial (S)" %} ins_encode %{ - __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ S, - Matcher::vector_length(this)); - __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($pTmp$$reg), + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, Matcher::vector_length(this)); + __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($ptmp$$reg), as_Register($mem$$base), as_FloatRegister($idx$$reg)); %} ins_pipe(pipe_slow); %} -instruct gatherL_partial(vReg dst, indirect mem, vReg idx, pRegGov pTmp, rFlagsReg cr) %{ +instruct gatherL_partial(vReg dst, indirect mem, vReg idx, pRegGov ptmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_LoadVectorGather()->memory_size() < MaxVectorSize && (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set dst (LoadVectorGather mem idx)); - effect(TEMP pTmp, KILL cr); + effect(TEMP ptmp, KILL cr); ins_cost(3 * SVE_COST + INSN_COST); - format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" - "sve_uunpklo $idx, $idx\n\t" - "load_vector_gather $dst, $pTmp, $mem, $idx\t# vector load gather partial (L/D)" %} + format %{ "load_vector_gather $dst, $ptmp, $mem, $idx\t# vector load gather partial (D)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this)); + __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); + __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($ptmp$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector Load Gather Predicated ------------------------------- + +instruct gatherI_masked(vReg dst, indirect mem, vReg idx, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->as_LoadVector()->memory_size() == MaxVectorSize && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (LoadVectorGatherMasked mem (Binary idx pg))); + ins_cost(SVE_COST); + format %{ "load_vector_gather $dst, $pg, $mem, $idx\t# vector load gather predicated (S)" %} + ins_encode %{ + __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($pg$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct gatherL_masked(vReg dst, indirect mem, vReg idx, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->as_LoadVector()->memory_size() == MaxVectorSize && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (LoadVectorGatherMasked mem (Binary idx pg))); + ins_cost(2 * SVE_COST); + format %{ "load_vector_gather $dst, $pg, $mem, $idx\t# vector load gather predicated (D)" %} + ins_encode %{ + __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); + __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($pg$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector Load Gather Predicated Partial ------------------------------- + +instruct gatherI_masked_partial(vReg dst, indirect mem, vReg idx, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_LoadVector()->memory_size() < MaxVectorSize && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (LoadVectorGatherMasked mem (Binary idx pg))); + effect(TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "load_vector_gather $dst, $pg, $mem, $idx\t# vector load gather predicated partial (S)" %} ins_encode %{ - __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ D, + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, Matcher::vector_length(this)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($ptmp$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct gatherL_masked_partial(vReg dst, indirect mem, vReg idx, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_LoadVector()->memory_size() < MaxVectorSize && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (LoadVectorGatherMasked mem (Binary idx pg))); + effect(TEMP ptmp, KILL cr); + ins_cost(4 * SVE_COST); + format %{ "load_vector_gather $dst, $pg, $mem, $idx\t# vector load gather predicated partial (D)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); - __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($pTmp$$reg), + __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($ptmp$$reg), as_Register($mem$$base), as_FloatRegister($idx$$reg)); %} ins_pipe(pipe_slow); @@ -2405,7 +2866,7 @@ instruct scatterI(indirect mem, vReg src, vReg idx) %{ n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set mem (StoreVectorScatter mem (Binary src idx))); ins_cost(SVE_COST); - format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (I/F)" %} + format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (S)" %} ins_encode %{ __ sve_st1w_scatter(as_FloatRegister($src$$reg), ptrue, as_Register($mem$$base), as_FloatRegister($idx$$reg)); @@ -2420,59 +2881,129 @@ instruct scatterL(indirect mem, vReg src, vReg idx) %{ n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set mem (StoreVectorScatter mem (Binary src idx))); ins_cost(2 * SVE_COST); - format %{ "sve_uunpklo $idx, $idx\n\t" - "store_vector_scatter $mem, $idx, $src\t# vector store scatter (L/D)" %} + format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (D)" %} ins_encode %{ - __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, - as_FloatRegister($idx$$reg)); + __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); __ sve_st1d_scatter(as_FloatRegister($src$$reg), ptrue, as_Register($mem$$base), as_FloatRegister($idx$$reg)); %} ins_pipe(pipe_slow); %} -// ------------------------------ Vector Store Scatter Partial------------------------------- +// ------------------------------ Vector Store Scatter Partial ------------------------------- -instruct scatterI_partial(indirect mem, vReg src, vReg idx, pRegGov pTmp, rFlagsReg cr) %{ +instruct scatterI_partial(indirect mem, vReg src, vReg idx, pRegGov ptmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_StoreVectorScatter()->memory_size() < MaxVectorSize && (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT || n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set mem (StoreVectorScatter mem (Binary src idx))); - effect(TEMP pTmp, KILL cr); + effect(TEMP ptmp, KILL cr); ins_cost(2 * SVE_COST + INSN_COST); - format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" - "store_vector_scatter $mem, $pTmp, $idx, $src\t# vector store scatter partial (I/F)" %} + format %{ "store_vector_scatter $mem, $ptmp, $idx, $src\t# vector store scatter partial (S)" %} ins_encode %{ - __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ S, + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, Matcher::vector_length(this, $src)); - __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($pTmp$$reg), + __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($ptmp$$reg), as_Register($mem$$base), as_FloatRegister($idx$$reg)); %} ins_pipe(pipe_slow); %} -instruct scatterL_partial(indirect mem, vReg src, vReg idx, pRegGov pTmp, rFlagsReg cr) %{ +instruct scatterL_partial(indirect mem, vReg src, vReg idx, pRegGov ptmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && n->as_StoreVectorScatter()->memory_size() < MaxVectorSize && (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG || n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set mem (StoreVectorScatter mem (Binary src idx))); - effect(TEMP pTmp, KILL cr); + effect(TEMP ptmp, KILL cr); ins_cost(3 * SVE_COST + INSN_COST); - format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t" - "sve_uunpklo $idx, $idx\n\t" - "store_vector_scatter $mem, $pTmp, $idx, $src\t# vector store scatter partial (L/D)" %} + format %{ "store_vector_scatter $mem, $ptmp, $idx, $src\t# vector store scatter partial (D)" %} ins_encode %{ - __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ D, + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this, $src)); __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); - __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($pTmp$$reg), + __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($ptmp$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector Store Scatter Predicated ------------------------------- + +instruct scatterI_masked(indirect mem, vReg src, vReg idx, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->as_StoreVector()->memory_size() == MaxVectorSize && + (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx pg)))); + ins_cost(SVE_COST); + format %{ "store_vector_scatter $mem, $pg, $idx, $src\t# vector store scatter predicate (S)" %} + ins_encode %{ + __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($pg$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct scatterL_masked(indirect mem, vReg src, vReg idx, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->as_StoreVector()->memory_size() == MaxVectorSize && + (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx pg)))); + ins_cost(2 * SVE_COST); + format %{ "store_vector_scatter $mem, $pg, $idx, $src\t# vector store scatter predicated (D)" %} + ins_encode %{ + __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); + __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($pg$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector Store Scatter Predicated Partial ------------------------------- + +instruct scatterI_masked_partial(indirect mem, vReg src, vReg idx, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_StoreVector()->memory_size() < MaxVectorSize && + (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx pg)))); + effect(TEMP ptmp, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "store_vector_scatter $mem, $pg, $idx, $src\t# vector store scatter predicated partial (S)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, + Matcher::vector_length(this, $src)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($ptmp$$reg), as_Register($mem$$base), as_FloatRegister($idx$$reg)); %} ins_pipe(pipe_slow); %} +instruct scatterL_masked_partial(indirect mem, vReg src, vReg idx, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->as_StoreVector()->memory_size() < MaxVectorSize && + (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx pg)))); + effect(TEMP ptmp, KILL cr); + ins_cost(4 * SVE_COST); + format %{ "store_vector_scatter $mem, $pg, $idx, $src\t# vector store scatter predicated partial (D)" %} + ins_encode %{ + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, + Matcher::vector_length(this, $src)); + __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), + as_PRegister($pg$$reg), as_PRegister($pg$$reg)); + __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg)); + __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($ptmp$$reg), + as_Register($mem$$base), as_FloatRegister($idx$$reg)); + %} + ins_pipe(pipe_slow); +%} // ------------------------------ Vector Load Const ------------------------------- @@ -2513,100 +3044,98 @@ dnl $1 $2 $3 STRING_INDEXOF_CHAR(L, Latin1, true) STRING_INDEXOF_CHAR(U, UTF16, false) -dnl -dnl VMASK_REDUCTION($1, $2, $3 ) -dnl VMASK_REDUCTION(suffix, op_name, cost) -define(`VMASK_REDUCTION', ` -instruct vmask_$1(iRegINoSp dst, vReg src, pReg ptmp, rFlagsReg cr) %{ +// ---------------------------- Vector mask reductions --------------------------- +instruct vmask_truecount(iRegINoSp dst, pReg src) %{ predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst ($2 src)); - effect(TEMP ptmp, KILL cr); - ins_cost($3 * SVE_COST); - format %{ "vmask_$1 $dst, $src\t# vector mask $1 (sve)" %} + match(Set dst (VectorMaskTrueCount src)); + ins_cost(SVE_COST); + format %{ "vmask_truecount $dst, $src\t# vector mask truecount (sve)" %} ins_encode %{ - __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, - as_FloatRegister($src$$reg), ptrue, as_PRegister($ptmp$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_cntp($dst$$Register, size, ptrue, as_PRegister($src$$reg)); %} ins_pipe(pipe_slow); -%}')dnl -dnl -// ---------------------------- Vector mask reductions --------------------------- -VMASK_REDUCTION(truecount, VectorMaskTrueCount, 2) -VMASK_REDUCTION(firsttrue, VectorMaskFirstTrue, 3) -VMASK_REDUCTION(lasttrue, VectorMaskLastTrue, 4) -dnl -dnl VMASK_REDUCTION_PARTIAL($1, $2, $3 ) -dnl VMASK_REDUCTION_PARTIAL(suffix, op_name, cost) -define(`VMASK_REDUCTION_PARTIAL', ` -instruct vmask_$1_partial(iRegINoSp dst, vReg src, pRegGov ifelse($1, `firsttrue', `pgtmp, pReg ptmp', `ptmp'), rFlagsReg cr) %{ +%} + +instruct vmask_firsttrue(iRegINoSp dst, pReg src, pReg ptmp) %{ predicate(UseSVE > 0 && - n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst ($2 src)); - effect(TEMP ifelse($1, `firsttrue', `pgtmp, TEMP ptmp', `ptmp'), KILL cr); - ins_cost($3 * SVE_COST); - format %{ "vmask_$1 $dst, $src\t# vector mask $1 partial (sve)" %} + n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (VectorMaskFirstTrue src)); + effect(TEMP ptmp); + ins_cost(2 * SVE_COST); + format %{ "vmask_firsttrue $dst, $src\t# vector mask firsttrue (sve)" %} ins_encode %{ - __ sve_whilelo_zr_imm(as_PRegister(ifelse($1, `firsttrue', `$pgtmp', `$ptmp')$$reg), __ B, - Matcher::vector_length(this, $src)); - __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, as_FloatRegister($src$$reg), - as_PRegister(ifelse($1, `firsttrue', `$pgtmp', `$ptmp')$$reg), as_PRegister($ptmp$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_brkb(as_PRegister($ptmp$$reg), ptrue, as_PRegister($src$$reg), false); + __ sve_cntp($dst$$Register, size, ptrue, as_PRegister($ptmp$$reg)); %} ins_pipe(pipe_slow); -%}')dnl -dnl -VMASK_REDUCTION_PARTIAL(truecount, VectorMaskTrueCount, 3) -VMASK_REDUCTION_PARTIAL(firsttrue, VectorMaskFirstTrue, 4) -VMASK_REDUCTION_PARTIAL(lasttrue, VectorMaskLastTrue, 5) +%} -dnl -dnl VSTOREMASK_REDUCTION($1, $2, $3 ) -dnl VSTOREMASK_REDUCTION(suffix, op_name, cost) -define(`VSTOREMASK_REDUCTION', ` -instruct vstoremask_$1(iRegINoSp dst, vReg src, immI esize, pReg ptmp, rFlagsReg cr) %{ +instruct vmask_lasttrue(iRegINoSp dst, pReg src, pReg ptmp) %{ + predicate(UseSVE > 0 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); + match(Set dst (VectorMaskLastTrue src)); + effect(TEMP ptmp); + ins_cost(3 * SVE_COST); + format %{ "vmask_lasttrue $dst, $src\t# vector mask lasttrue (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src); + __ sve_vmask_lasttrue($dst$$Register, bt, as_PRegister($src$$reg), as_PRegister($ptmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_truecount_partial(iRegINoSp dst, pReg src, pReg ptmp, rFlagsReg cr) %{ predicate(UseSVE > 0 && - n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize); - match(Set dst ($2 (VectorStoreMask src esize))); + n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (VectorMaskTrueCount src)); effect(TEMP ptmp, KILL cr); - ins_cost($3 * SVE_COST); - format %{ "vstoremask_$1 $dst, $src\t# vector mask $1 (sve)" %} + ins_cost(2 * SVE_COST); + format %{ "vmask_truecount_partial $dst, $src\t# vector mask truecount partial (sve)" %} ins_encode %{ - unsigned size = $esize$$constant; - assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size"); - Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size); - __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg), - ptrue, as_PRegister($ptmp$$reg), Matcher::vector_length(this, $src)); + BasicType bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this, $src)); + __ sve_cntp($dst$$Register, size, as_PRegister($ptmp$$reg), as_PRegister($src$$reg)); %} ins_pipe(pipe_slow); -%}')dnl -dnl -// ----------------- Vector mask reductions combined with VectorMaskStore --------------- -VSTOREMASK_REDUCTION(truecount, VectorMaskTrueCount, 2) -VSTOREMASK_REDUCTION(firsttrue, VectorMaskFirstTrue, 3) -VSTOREMASK_REDUCTION(lasttrue, VectorMaskLastTrue, 4) -dnl -dnl VSTOREMASK_REDUCTION_PARTIAL($1, $2, $3 ) -dnl VSTOREMASK_REDUCTION_PARTIAL(suffix, op_name, cost) -define(`VSTOREMASK_REDUCTION_PARTIAL', ` -instruct vstoremask_$1_partial(iRegINoSp dst, vReg src, immI esize, pRegGov ifelse($1, `firsttrue', `pgtmp, pReg ptmp', `ptmp'), rFlagsReg cr) %{ +%} + +instruct vmask_firsttrue_partial(iRegINoSp dst, pReg src, pReg ptmp1, pReg ptmp2, rFlagsReg cr) %{ predicate(UseSVE > 0 && - n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); - match(Set dst ($2 (VectorStoreMask src esize))); - effect(TEMP ifelse($1, `firsttrue', `pgtmp, TEMP ptmp', `ptmp'), KILL cr); - ins_cost($3 * SVE_COST); - format %{ "vstoremask_$1 $dst, $src\t# vector mask $1 partial (sve)" %} + n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (VectorMaskFirstTrue src)); + effect(TEMP ptmp1, TEMP ptmp2, KILL cr); + ins_cost(3 * SVE_COST); + format %{ "vmask_firsttrue_partial $dst, $src\t# vector mask firsttrue partial (sve)" %} ins_encode %{ - unsigned size = $esize$$constant; - assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size"); - Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size); - __ sve_whilelo_zr_imm(as_PRegister(ifelse($1, `firsttrue', `$pgtmp', `$ptmp')$$reg), variant, + BasicType bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp1$$reg), size, Matcher::vector_length(this, $src)); - __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg), - as_PRegister(ifelse($1, `firsttrue', `$pgtmp', `$ptmp')$$reg), as_PRegister($ptmp$$reg), MaxVectorSize / size); + __ sve_brkb(as_PRegister($ptmp2$$reg), as_PRegister($ptmp1$$reg), as_PRegister($src$$reg), false); + __ sve_cntp($dst$$Register, size, as_PRegister($ptmp1$$reg), as_PRegister($ptmp2$$reg)); %} ins_pipe(pipe_slow); -%}')dnl -dnl -VSTOREMASK_REDUCTION_PARTIAL(truecount, VectorMaskTrueCount, 3) -VSTOREMASK_REDUCTION_PARTIAL(firsttrue, VectorMaskFirstTrue, 4) -VSTOREMASK_REDUCTION_PARTIAL(lasttrue, VectorMaskLastTrue, 5) +%} + +instruct vmask_lasttrue_partial(iRegINoSp dst, pReg src, pReg ptmp, rFlagsReg cr) %{ + predicate(UseSVE > 0 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize); + match(Set dst (VectorMaskLastTrue src)); + effect(TEMP ptmp, KILL cr); + ins_cost(5 * SVE_COST); + format %{ "vmask_lasttrue_partial $dst, $src\t# vector mask lasttrue partial (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this, $src)); + __ sve_and(as_PRegister($ptmp$$reg), ptrue, as_PRegister($ptmp$$reg), as_PRegister($src$$reg)); + __ sve_vmask_lasttrue($dst$$Register, bt, as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg)); + %} + ins_pipe(pipe_slow); +%}dnl diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp index 4e883838a6610ba34d7f737526f7e39b2aead592..943ca002c7ad998f1d7d1f406a832cd194a9dd5c 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp @@ -82,6 +82,11 @@ Assembler::SIMD_RegVariant Assembler::elemType_to_regVariant(BasicType bt) { return elemBytes_to_regVariant(type2aelembytes(bt)); } +unsigned Assembler::regVariant_to_elemBits(Assembler::SIMD_RegVariant T){ + guarantee(T != Q, "Invalid register variant"); + return 1 << (T + 3); +} + void Assembler::emit_data64(jlong data, relocInfo::relocType rtype, int format) { @@ -339,21 +344,21 @@ void Assembler::wrap_label(Label &L, prfop op, prefetch_insn insn) { } bool Assembler::operand_valid_for_add_sub_immediate(int64_t imm) { - bool shift = false; - uint64_t uimm = (uint64_t)uabs((jlong)imm); - if (uimm < (1 << 12)) - return true; - if (uimm < (1 << 24) - && ((uimm >> 12) << 12 == uimm)) { - return true; - } - return false; + return operand_valid_for_immediate_bits(imm, 12); +} + +bool Assembler::operand_valid_for_sve_add_sub_immediate(int64_t imm) { + return operand_valid_for_immediate_bits(imm, 8); } bool Assembler::operand_valid_for_logical_immediate(bool is32, uint64_t imm) { return encode_logical_immediate(is32, imm) != 0xffffffff; } +bool Assembler::operand_valid_for_sve_logical_immediate(unsigned elembits, uint64_t imm) { + return encode_sve_logical_immediate(elembits, imm) != 0xffffffff; +} + static uint64_t doubleTo64Bits(jdouble d) { union { jdouble double_value; @@ -383,6 +388,17 @@ int AbstractAssembler::code_fill_byte() { // n.b. this is implemented in subclass MacroAssembler void Assembler::bang_stack_with_offset(int offset) { Unimplemented(); } +bool asm_util::operand_valid_for_immediate_bits(int64_t imm, unsigned nbits) { + guarantee(nbits == 8 || nbits == 12, "invalid nbits value"); + uint64_t uimm = (uint64_t)uabs((jlong)imm); + if (uimm < (UCONST64(1) << nbits)) + return true; + if (uimm < (UCONST64(1) << (2 * nbits)) + && ((uimm >> nbits) << nbits == uimm)) { + return true; + } + return false; +} // and now the routines called by the assembler which encapsulate the // above encode and decode functions @@ -403,6 +419,25 @@ asm_util::encode_logical_immediate(bool is32, uint64_t imm) return encoding_for_logical_immediate(imm); } +uint32_t +asm_util::encode_sve_logical_immediate(unsigned elembits, uint64_t imm) { + guarantee(elembits == 8 || elembits == 16 || + elembits == 32 || elembits == 64, "unsupported element size"); + uint64_t upper = UCONST64(-1) << (elembits/2) << (elembits/2); + /* Allow all zeros or all ones in top bits, so that + * constant expressions like ~1 are permitted. */ + if ((imm & ~upper) != imm && (imm | upper) != imm) + return 0xffffffff; + + // Replicate the immediate in different element sizes to 64 bits. + imm &= ~upper; + for (unsigned i = elembits; i < 64; i *= 2) { + imm |= (imm << i); + } + + return encoding_for_logical_immediate(imm); +} + unsigned Assembler::pack(double value) { float val = (float)value; unsigned result = encoding_for_fp_immediate(val); diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp index c5f65f2bc5a92ac0a705a35840dc095324530f37..9eee231ec0fea3046f9c600e90d8673bc6b99c9a 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -159,6 +159,8 @@ REGISTER_DECLARATION(PRegister, ptrue, p7); namespace asm_util { uint32_t encode_logical_immediate(bool is32, uint64_t imm); + uint32_t encode_sve_logical_immediate(unsigned elembits, uint64_t imm); + bool operand_valid_for_immediate_bits(int64_t imm, unsigned nbits); }; using namespace asm_util; @@ -583,7 +585,7 @@ class Address { static bool offset_ok_for_immed(int64_t offset, uint shift); - static bool offset_ok_for_sve_immed(long offset, int shift, int vl /* sve vector length */) { + static bool offset_ok_for_sve_immed(int64_t offset, int shift, int vl /* sve vector length */) { if (offset % vl == 0) { // Convert address offset into sve imm offset (MUL VL). int sve_offset = offset / vl; @@ -1516,6 +1518,8 @@ public: static SIMD_Arrangement esize2arrangement(unsigned esize, bool isQ); static SIMD_RegVariant elemType_to_regVariant(BasicType bt); static SIMD_RegVariant elemBytes_to_regVariant(unsigned esize); + // Return the corresponding bits for different SIMD_RegVariant value. + static unsigned regVariant_to_elemBits(SIMD_RegVariant T); enum shift_kind { LSL, LSR, ASR, ROR }; @@ -2953,6 +2957,32 @@ public: INSN(sve_sub, 0b001); #undef INSN +// SVE integer add/subtract immediate (unpredicated) +#define INSN(NAME, op) \ + void NAME(FloatRegister Zd, SIMD_RegVariant T, unsigned imm8) { \ + starti; \ + /* The immediate is an unsigned value in the range 0 to 255, and \ + * for element width of 16 bits or higher it may also be a \ + * positive multiple of 256 in the range 256 to 65280. \ + */ \ + assert(T != Q, "invalid size"); \ + int sh = 0; \ + if (imm8 <= 0xff) { \ + sh = 0; \ + } else if (T != B && imm8 <= 0xff00 && (imm8 & 0xff) == 0) { \ + sh = 1; \ + imm8 = (imm8 >> 8); \ + } else { \ + guarantee(false, "invalid immediate"); \ + } \ + f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000, 21, 17); \ + f(op, 16, 14), f(sh, 13), f(imm8, 12, 5), rf(Zd, 0); \ + } + + INSN(sve_add, 0b011); + INSN(sve_sub, 0b111); +#undef INSN + // SVE floating-point arithmetic - unpredicated #define INSN(NAME, opcode) \ void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \ @@ -2976,6 +3006,32 @@ private: pgrf(Pg, 10), rf(Zn_or_Vn, 5), rf(Zd_or_Vd, 0); } + void sve_shift_imm_encoding(SIMD_RegVariant T, int shift, bool isSHR, + int& tszh, int& tszl_imm) { + /* The encodings for the tszh:tszl:imm3 fields + * for shift right is calculated as: + * 0001 xxx B, shift = 16 - UInt(tszh:tszl:imm3) + * 001x xxx H, shift = 32 - UInt(tszh:tszl:imm3) + * 01xx xxx S, shift = 64 - UInt(tszh:tszl:imm3) + * 1xxx xxx D, shift = 128 - UInt(tszh:tszl:imm3) + * for shift left is calculated as: + * 0001 xxx B, shift = UInt(tszh:tszl:imm3) - 8 + * 001x xxx H, shift = UInt(tszh:tszl:imm3) - 16 + * 01xx xxx S, shift = UInt(tszh:tszl:imm3) - 32 + * 1xxx xxx D, shift = UInt(tszh:tszl:imm3) - 64 + */ + assert(T != Q, "Invalid register variant"); + if (isSHR) { + assert(((1 << (T + 3)) >= shift) && (shift > 0) , "Invalid shift value"); + } else { + assert(((1 << (T + 3)) > shift) && (shift >= 0) , "Invalid shift value"); + } + int cVal = (1 << ((T + 3) + (isSHR ? 1 : 0))); + int encodedShift = isSHR ? cVal - shift : cVal + shift; + tszh = encodedShift >> 5; + tszl_imm = encodedShift & 0x1f; + } + public: // SVE integer arithmetic - predicate @@ -2987,16 +3043,19 @@ public: INSN(sve_abs, 0b00000100, 0b010110101); // vector abs, unary INSN(sve_add, 0b00000100, 0b000000000); // vector add + INSN(sve_and, 0b00000100, 0b011010000); // vector and INSN(sve_andv, 0b00000100, 0b011010001); // bitwise and reduction to scalar INSN(sve_asr, 0b00000100, 0b010000100); // vector arithmetic shift right - INSN(sve_cnt, 0b00000100, 0b011010101) // count non-zero bits + INSN(sve_cnt, 0b00000100, 0b011010101); // count non-zero bits INSN(sve_cpy, 0b00000101, 0b100000100); // copy scalar to each active vector element + INSN(sve_eor, 0b00000100, 0b011001000); // vector eor INSN(sve_eorv, 0b00000100, 0b011001001); // bitwise xor reduction to scalar INSN(sve_lsl, 0b00000100, 0b010011100); // vector logical shift left INSN(sve_lsr, 0b00000100, 0b010001100); // vector logical shift right INSN(sve_mul, 0b00000100, 0b010000000); // vector mul INSN(sve_neg, 0b00000100, 0b010111101); // vector neg, unary INSN(sve_not, 0b00000100, 0b011110101); // bitwise invert vector, unary + INSN(sve_orr, 0b00000100, 0b011000000); // vector or INSN(sve_orv, 0b00000100, 0b011000001); // bitwise or reduction to scalar INSN(sve_smax, 0b00000100, 0b001000000); // signed maximum vectors INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar @@ -3039,10 +3098,11 @@ public: f(op2, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zda, 0); \ } - INSN(sve_fmla, 0b01100101, 1, 0b000); // floating-point fused multiply-add: Zda = Zda + Zn * Zm + INSN(sve_fmla, 0b01100101, 1, 0b000); // floating-point fused multiply-add, writing addend: Zda = Zda + Zn * Zm INSN(sve_fmls, 0b01100101, 1, 0b001); // floating-point fused multiply-subtract: Zda = Zda + -Zn * Zm INSN(sve_fnmla, 0b01100101, 1, 0b010); // floating-point negated fused multiply-add: Zda = -Zda + -Zn * Zm INSN(sve_fnmls, 0b01100101, 1, 0b011); // floating-point negated fused multiply-subtract: Zda = -Zda + Zn * Zm + INSN(sve_fmad, 0b01100101, 1, 0b100); // floating-point fused multiply-add, writing multiplicand: Zda = Zm + Zda * Zn INSN(sve_mla, 0b00000100, 0, 0b010); // multiply-add: Zda = Zda + Zn*Zm INSN(sve_mls, 0b00000100, 0, 0b011); // multiply-subtract: Zda = Zda + -Zn*Zm #undef INSN @@ -3060,32 +3120,26 @@ public: INSN(sve_bic, 0b11); #undef INSN +// SVE bitwise logical with immediate (unpredicated) +#define INSN(NAME, opc) \ + void NAME(FloatRegister Zd, SIMD_RegVariant T, uint64_t imm) { \ + starti; \ + unsigned elembits = regVariant_to_elemBits(T); \ + uint32_t val = encode_sve_logical_immediate(elembits, imm); \ + f(0b00000101, 31, 24), f(opc, 23, 22), f(0b0000, 21, 18); \ + f(val, 17, 5), rf(Zd, 0); \ + } + INSN(sve_and, 0b10); + INSN(sve_eor, 0b01); + INSN(sve_orr, 0b00); +#undef INSN + // SVE shift immediate - unpredicated #define INSN(NAME, opc, isSHR) \ void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, int shift) { \ starti; \ - /* The encodings for the tszh:tszl:imm3 fields (bits 23:22 20:19 18:16) \ - * for shift right is calculated as: \ - * 0001 xxx B, shift = 16 - UInt(tszh:tszl:imm3) \ - * 001x xxx H, shift = 32 - UInt(tszh:tszl:imm3) \ - * 01xx xxx S, shift = 64 - UInt(tszh:tszl:imm3) \ - * 1xxx xxx D, shift = 128 - UInt(tszh:tszl:imm3) \ - * for shift left is calculated as: \ - * 0001 xxx B, shift = UInt(tszh:tszl:imm3) - 8 \ - * 001x xxx H, shift = UInt(tszh:tszl:imm3) - 16 \ - * 01xx xxx S, shift = UInt(tszh:tszl:imm3) - 32 \ - * 1xxx xxx D, shift = UInt(tszh:tszl:imm3) - 64 \ - */ \ - assert(T != Q, "Invalid register variant"); \ - if (isSHR) { \ - assert(((1 << (T + 3)) >= shift) && (shift > 0) , "Invalid shift value"); \ - } else { \ - assert(((1 << (T + 3)) > shift) && (shift >= 0) , "Invalid shift value"); \ - } \ - int cVal = (1 << ((T + 3) + (isSHR ? 1 : 0))); \ - int encodedShift = isSHR ? cVal - shift : cVal + shift; \ - int tszh = encodedShift >> 5; \ - int tszl_imm = encodedShift & 0x1f; \ + int tszh, tszl_imm; \ + sve_shift_imm_encoding(T, shift, isSHR, tszh, tszl_imm); \ f(0b00000100, 31, 24); \ f(tszh, 23, 22), f(1,21), f(tszl_imm, 20, 16); \ f(0b100, 15, 13), f(opc, 12, 10), rf(Zn, 5), rf(Zd, 0); \ @@ -3096,6 +3150,21 @@ public: INSN(sve_lsr, 0b101, /* isSHR = */ true); #undef INSN +// SVE bitwise shift by immediate (predicated) +#define INSN(NAME, opc, isSHR) \ + void NAME(FloatRegister Zdn, SIMD_RegVariant T, PRegister Pg, int shift) { \ + starti; \ + int tszh, tszl_imm; \ + sve_shift_imm_encoding(T, shift, isSHR, tszh, tszl_imm); \ + f(0b00000100, 31, 24), f(tszh, 23, 22), f(0b00, 21, 20), f(opc, 19, 16); \ + f(0b100, 15, 13), pgrf(Pg, 10), f(tszl_imm, 9, 5), rf(Zdn, 0); \ + } + + INSN(sve_asr, 0b0000, /* isSHR = */ true); + INSN(sve_lsl, 0b0011, /* isSHR = */ false); + INSN(sve_lsr, 0b0001, /* isSHR = */ true); +#undef INSN + private: // Scalar base + immediate index @@ -3207,6 +3276,24 @@ public: INSN(sve_dec, 1); #undef INSN +// SVE predicate logical operations +#define INSN(NAME, op1, op2, op3) \ + void NAME(PRegister Pd, PRegister Pg, PRegister Pn, PRegister Pm) { \ + starti; \ + f(0b00100101, 31, 24), f(op1, 23, 22), f(0b00, 21, 20); \ + prf(Pm, 16), f(0b01, 15, 14), prf(Pg, 10), f(op2, 9); \ + prf(Pn, 5), f(op3, 4), prf(Pd, 0); \ + } + + INSN(sve_and, 0b00, 0b0, 0b0); + INSN(sve_ands, 0b01, 0b0, 0b0); + INSN(sve_eor, 0b00, 0b1, 0b0); + INSN(sve_eors, 0b01, 0b1, 0b0); + INSN(sve_orr, 0b10, 0b0, 0b0); + INSN(sve_orrs, 0b11, 0b0, 0b0); + INSN(sve_bic, 0b00, 0b0, 0b1); +#undef INSN + // SVE increment register by predicate count void sve_incp(const Register rd, SIMD_RegVariant T, PRegister pg) { starti; @@ -3240,12 +3327,47 @@ public: f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0); } + // SVE predicate test + void sve_ptest(PRegister Pg, PRegister Pn) { + starti; + f(0b001001010101000011, 31, 14), prf(Pg, 10), f(0, 9), prf(Pn, 5), f(0, 4, 0); + } + + // SVE predicate initialize void sve_ptrue(PRegister pd, SIMD_RegVariant esize, int pattern = 0b11111) { starti; f(0b00100101, 31, 24), f(esize, 23, 22), f(0b011000111000, 21, 10); f(pattern, 9, 5), f(0b0, 4), prf(pd, 0); } + // SVE predicate zero + void sve_pfalse(PRegister pd) { + starti; + f(0b00100101, 31, 24), f(0b00, 23, 22), f(0b011000111001, 21, 10); + f(0b000000, 9, 4), prf(pd, 0); + } + +// SVE load/store predicate register +#define INSN(NAME, op1) \ + void NAME(PRegister Pt, const Address &a) { \ + starti; \ + assert(a.index() == noreg, "invalid address variant"); \ + f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16), \ + f(0b000, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5), \ + f(0, 4), prf(Pt, 0); \ + } + + INSN(sve_ldr, 0b100); // LDR (predicate) + INSN(sve_str, 0b111); // STR (predicate) +#undef INSN + + // SVE move predicate register + void sve_mov(PRegister Pd, PRegister Pn) { + starti; + f(0b001001011000, 31, 20), prf(Pn, 16), f(0b01, 15, 14), prf(Pn, 10); + f(0, 9), prf(Pn, 5), f(0, 4), prf(Pd, 0); + } + // SVE copy general-purpose register to vector elements (predicated) void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, Register Rn) { starti; @@ -3348,6 +3470,18 @@ void sve_cmp(Condition cond, PRegister Pd, SIMD_RegVariant T, INSN(sve_sunpklo, 0b00); // Unsigned unpack and extend half of vector - low half #undef INSN +// SVE unpack predicate elements +#define INSN(NAME, op) \ + void NAME(PRegister Pd, PRegister Pn) { \ + starti; \ + f(0b000001010011000, 31, 17), f(op, 16), f(0b0100000, 15, 9); \ + prf(Pn, 5), f(0b0, 4), prf(Pd, 0); \ + } + + INSN(sve_punpkhi, 0b1); // Unpack and widen high half of predicate + INSN(sve_punpklo, 0b0); // Unpack and widen low half of predicate +#undef INSN + // SVE permute vector elements #define INSN(NAME, op) \ void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \ @@ -3361,6 +3495,19 @@ void sve_cmp(Condition cond, PRegister Pd, SIMD_RegVariant T, INSN(sve_uzp2, 0b1); // Concatenate odd elements from two vectors #undef INSN +// SVE permute predicate elements +#define INSN(NAME, op) \ + void NAME(PRegister Pd, SIMD_RegVariant T, PRegister Pn, PRegister Pm) { \ + starti; \ + assert(T != Q, "invalid size"); \ + f(0b00000101, 31, 24), f(T, 23, 22), f(0b10, 21, 20), prf(Pm, 16); \ + f(0b01001, 15, 11), f(op, 10), f(0b0, 9), prf(Pn, 5), f(0b0, 4), prf(Pd, 0); \ + } + + INSN(sve_uzp1, 0b0); // Concatenate even elements from two predicates + INSN(sve_uzp2, 0b1); // Concatenate odd elements from two predicates +#undef INSN + // Predicate counted loop (SVE) (32-bit variants are not included) #define INSN(NAME, decode) \ void NAME(PRegister Pd, SIMD_RegVariant T, Register Rn, Register Rm) { \ @@ -3536,7 +3683,9 @@ void sve_cmp(Condition cond, PRegister Pd, SIMD_RegVariant T, virtual void bang_stack_with_offset(int offset); static bool operand_valid_for_logical_immediate(bool is32, uint64_t imm); + static bool operand_valid_for_sve_logical_immediate(unsigned elembits, uint64_t imm); static bool operand_valid_for_add_sub_immediate(int64_t imm); + static bool operand_valid_for_sve_add_sub_immediate(int64_t imm); static bool operand_valid_for_float_immediate(double imm); void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0); diff --git a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp index 826d11b2aaefec630d9eb477cb9e1a2a0c8ee056..23a59f6d83c4e5ea80fd231c7f787f689279fd24 100644 --- a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp @@ -70,7 +70,7 @@ RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) } RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) - : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) { + : _index(index), _array(), _throw_index_out_of_bounds_exception(true) { assert(info != NULL, "must have info"); _info = new CodeEmitInfo(info); } @@ -95,7 +95,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) { if (_throw_index_out_of_bounds_exception) { stub_id = Runtime1::throw_index_exception_id; } else { - assert(_array != NULL, "sanity"); + assert(_array != LIR_Opr::nullOpr(), "sanity"); __ mov(rscratch2, _array->as_pointer_register()); stub_id = Runtime1::throw_range_check_failed_id; } diff --git a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp index 0da37ae08a6434bd9ee31d14cf1e94def0bf14ca..c0953912c3f8672a20fc5819138dfd666ab04492 100644 --- a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp @@ -154,8 +154,8 @@ LIR_Opr FrameMap::long1_opr; LIR_Opr FrameMap::fpu0_float_opr; LIR_Opr FrameMap::fpu0_double_opr; -LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, }; -LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, }; +LIR_Opr FrameMap::_caller_save_cpu_regs[] = {}; +LIR_Opr FrameMap::_caller_save_fpu_regs[] = {}; //-------------------------------------------------------- // FrameMap diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index a3a3778783eecc743b808038da2efe5c7e5b6f24..1cae3a3f3b64d998640c8f3a94d12be42127df8f 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -1564,7 +1564,7 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { assert(op->addr()->is_address(), "what else?"); LIR_Address* addr_ptr = op->addr()->as_address_ptr(); assert(addr_ptr->disp() == 0, "need 0 disp"); - assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index"); + assert(addr_ptr->index() == LIR_Opr::illegalOpr(), "need 0 index"); addr = as_reg(addr_ptr->base()); } Register newval = as_reg(op->new_value()); @@ -2984,7 +2984,7 @@ void LIR_Assembler::membar_loadstore() { __ membar(MacroAssembler::LoadStore); } void LIR_Assembler::membar_storeload() { __ membar(MacroAssembler::StoreLoad); } void LIR_Assembler::on_spin_wait() { - Unimplemented(); + __ spin_wait(); } void LIR_Assembler::get_thread(LIR_Opr result_reg) { diff --git a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp index 59adb698d5e5a85aff9b3c9a5f75958677865bb9..0a97872900e8940e33d80f7ce23fffcb0e8ee0f1 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp @@ -245,7 +245,6 @@ LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { } } else { ShouldNotReachHere(); - r = NULL; // unreachable } return r; } @@ -261,7 +260,7 @@ void LIRGenerator::increment_counter(address counter, BasicType type, int step) void LIRGenerator::increment_counter(LIR_Address* addr, int step) { - LIR_Opr imm = NULL; + LIR_Opr imm; switch(addr->type()) { case T_INT: imm = LIR_OprFact::intConst(step); diff --git a/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp index 58e1cf5ae63a70b0bc1f09acf1d3e9f0ebf053ca..0dd1a2156e89e5d3fadb1f8faf162bc6bae89fa0 100644 --- a/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp @@ -26,22 +26,22 @@ #include "asm/register.hpp" #include "c1/c1_LIR.hpp" -FloatRegister LIR_OprDesc::as_float_reg() const { +FloatRegister LIR_Opr::as_float_reg() const { return as_FloatRegister(fpu_regnr()); } -FloatRegister LIR_OprDesc::as_double_reg() const { +FloatRegister LIR_Opr::as_double_reg() const { return as_FloatRegister(fpu_regnrLo()); } // Reg2 unused. LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform"); - return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | - (reg1 << LIR_OprDesc::reg2_shift) | - LIR_OprDesc::double_type | - LIR_OprDesc::fpu_register | - LIR_OprDesc::double_size); + return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) | + (reg1 << LIR_Opr::reg2_shift) | + LIR_Opr::double_type | + LIR_Opr::fpu_register | + LIR_Opr::double_size); } #ifndef PRODUCT diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index 2713576bf4ce8310b901f1d27bd9782b8c2b59de..5ba1026415f617ec50ee70e438c2310884ec38c6 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -972,28 +972,219 @@ void C2_MacroAssembler::sve_compare(PRegister pd, BasicType bt, PRegister pg, } } -void C2_MacroAssembler::sve_vmask_reduction(int opc, Register dst, SIMD_RegVariant size, FloatRegister src, - PRegister pg, PRegister pn, int length) { +// Get index of the last mask lane that is set +void C2_MacroAssembler::sve_vmask_lasttrue(Register dst, BasicType bt, PRegister src, PRegister ptmp) { + SIMD_RegVariant size = elemType_to_regVariant(bt); + sve_rev(ptmp, size, src); + sve_brkb(ptmp, ptrue, ptmp, false); + sve_cntp(dst, size, ptrue, ptmp); + movw(rscratch1, MaxVectorSize / type2aelembytes(bt) - 1); + subw(dst, rscratch1, dst); +} + +void C2_MacroAssembler::sve_vector_extend(FloatRegister dst, SIMD_RegVariant dst_size, + FloatRegister src, SIMD_RegVariant src_size) { + assert(dst_size > src_size && dst_size <= D && src_size <= S, "invalid element size"); + if (src_size == B) { + switch (dst_size) { + case H: + sve_sunpklo(dst, H, src); + break; + case S: + sve_sunpklo(dst, H, src); + sve_sunpklo(dst, S, dst); + break; + case D: + sve_sunpklo(dst, H, src); + sve_sunpklo(dst, S, dst); + sve_sunpklo(dst, D, dst); + break; + default: + ShouldNotReachHere(); + } + } else if (src_size == H) { + if (dst_size == S) { + sve_sunpklo(dst, S, src); + } else { // D + sve_sunpklo(dst, S, src); + sve_sunpklo(dst, D, dst); + } + } else if (src_size == S) { + sve_sunpklo(dst, D, src); + } +} + +// Vector narrow from src to dst with specified element sizes. +// High part of dst vector will be filled with zero. +void C2_MacroAssembler::sve_vector_narrow(FloatRegister dst, SIMD_RegVariant dst_size, + FloatRegister src, SIMD_RegVariant src_size, + FloatRegister tmp) { + assert(dst_size < src_size && dst_size <= S && src_size <= D, "invalid element size"); + sve_dup(tmp, src_size, 0); + if (src_size == D) { + switch (dst_size) { + case S: + sve_uzp1(dst, S, src, tmp); + break; + case H: + sve_uzp1(dst, S, src, tmp); + sve_uzp1(dst, H, dst, tmp); + break; + case B: + sve_uzp1(dst, S, src, tmp); + sve_uzp1(dst, H, dst, tmp); + sve_uzp1(dst, B, dst, tmp); + break; + default: + ShouldNotReachHere(); + } + } else if (src_size == S) { + if (dst_size == H) { + sve_uzp1(dst, H, src, tmp); + } else { // B + sve_uzp1(dst, H, src, tmp); + sve_uzp1(dst, B, dst, tmp); + } + } else if (src_size == H) { + sve_uzp1(dst, B, src, tmp); + } +} + +// Extend src predicate to dst predicate with the same lane count but larger +// element size, e.g. 64Byte -> 512Long +void C2_MacroAssembler::sve_vmaskcast_extend(PRegister dst, PRegister src, + uint dst_element_length_in_bytes, + uint src_element_length_in_bytes) { + if (dst_element_length_in_bytes == 2 * src_element_length_in_bytes) { + sve_punpklo(dst, src); + } else if (dst_element_length_in_bytes == 4 * src_element_length_in_bytes) { + sve_punpklo(dst, src); + sve_punpklo(dst, dst); + } else if (dst_element_length_in_bytes == 8 * src_element_length_in_bytes) { + sve_punpklo(dst, src); + sve_punpklo(dst, dst); + sve_punpklo(dst, dst); + } else { + assert(false, "unsupported"); + ShouldNotReachHere(); + } +} + +// Narrow src predicate to dst predicate with the same lane count but +// smaller element size, e.g. 512Long -> 64Byte +void C2_MacroAssembler::sve_vmaskcast_narrow(PRegister dst, PRegister src, + uint dst_element_length_in_bytes, uint src_element_length_in_bytes) { + // The insignificant bits in src predicate are expected to be zero. + if (dst_element_length_in_bytes * 2 == src_element_length_in_bytes) { + sve_uzp1(dst, B, src, src); + } else if (dst_element_length_in_bytes * 4 == src_element_length_in_bytes) { + sve_uzp1(dst, H, src, src); + sve_uzp1(dst, B, dst, dst); + } else if (dst_element_length_in_bytes * 8 == src_element_length_in_bytes) { + sve_uzp1(dst, S, src, src); + sve_uzp1(dst, H, dst, dst); + sve_uzp1(dst, B, dst, dst); + } else { + assert(false, "unsupported"); + ShouldNotReachHere(); + } +} + +void C2_MacroAssembler::sve_reduce_integral(int opc, Register dst, BasicType bt, Register src1, + FloatRegister src2, PRegister pg, FloatRegister tmp) { + assert(bt == T_BYTE || bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported element type"); assert(pg->is_governing(), "This register has to be a governing predicate register"); - // The conditional flags will be clobbered by this function - sve_cmp(Assembler::NE, pn, size, pg, src, 0); + assert_different_registers(src1, dst); + // Register "dst" and "tmp" are to be clobbered, and "src1" and "src2" should be preserved. + Assembler::SIMD_RegVariant size = elemType_to_regVariant(bt); switch (opc) { - case Op_VectorMaskTrueCount: - sve_cntp(dst, size, ptrue, pn); + case Op_AddReductionVI: { + sve_uaddv(tmp, size, pg, src2); + smov(dst, tmp, size, 0); + if (bt == T_BYTE) { + addw(dst, src1, dst, ext::sxtb); + } else if (bt == T_SHORT) { + addw(dst, src1, dst, ext::sxth); + } else { + addw(dst, dst, src1); + } + break; + } + case Op_AddReductionVL: { + sve_uaddv(tmp, size, pg, src2); + umov(dst, tmp, size, 0); + add(dst, dst, src1); + break; + } + case Op_AndReductionV: { + sve_andv(tmp, size, pg, src2); + if (bt == T_LONG) { + umov(dst, tmp, size, 0); + andr(dst, dst, src1); + } else { + smov(dst, tmp, size, 0); + andw(dst, dst, src1); + } + break; + } + case Op_OrReductionV: { + sve_orv(tmp, size, pg, src2); + if (bt == T_LONG) { + umov(dst, tmp, size, 0); + orr(dst, dst, src1); + } else { + smov(dst, tmp, size, 0); + orrw(dst, dst, src1); + } break; - case Op_VectorMaskFirstTrue: - sve_brkb(pn, pg, pn, false); - sve_cntp(dst, size, ptrue, pn); + } + case Op_XorReductionV: { + sve_eorv(tmp, size, pg, src2); + if (bt == T_LONG) { + umov(dst, tmp, size, 0); + eor(dst, dst, src1); + } else { + smov(dst, tmp, size, 0); + eorw(dst, dst, src1); + } + break; + } + case Op_MaxReductionV: { + sve_smaxv(tmp, size, pg, src2); + if (bt == T_LONG) { + umov(dst, tmp, size, 0); + cmp(dst, src1); + csel(dst, dst, src1, Assembler::GT); + } else { + smov(dst, tmp, size, 0); + cmpw(dst, src1); + cselw(dst, dst, src1, Assembler::GT); + } break; - case Op_VectorMaskLastTrue: - sve_rev(pn, size, pn); - sve_brkb(pn, ptrue, pn, false); - sve_cntp(dst, size, ptrue, pn); - movw(rscratch1, length - 1); - subw(dst, rscratch1, dst); + } + case Op_MinReductionV: { + sve_sminv(tmp, size, pg, src2); + if (bt == T_LONG) { + umov(dst, tmp, size, 0); + cmp(dst, src1); + csel(dst, dst, src1, Assembler::LT); + } else { + smov(dst, tmp, size, 0); + cmpw(dst, src1); + cselw(dst, dst, src1, Assembler::LT); + } break; + } default: assert(false, "unsupported"); ShouldNotReachHere(); } + + if (opc == Op_AndReductionV || opc == Op_OrReductionV || opc == Op_XorReductionV) { + if (bt == T_BYTE) { + sxtb(dst, dst); + } else if (bt == T_SHORT) { + sxth(dst, dst); + } + } } diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp index fb0fbabea9ed70dcf5cfc538ae6b15cc43bac85a..81d9799f5adbc06f85424339cc4e84d13e87700b 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp @@ -61,8 +61,22 @@ void sve_compare(PRegister pd, BasicType bt, PRegister pg, FloatRegister zn, FloatRegister zm, int cond); - void sve_vmask_reduction(int opc, Register dst, SIMD_RegVariant size, FloatRegister src, - PRegister pg, PRegister pn, int length = MaxVectorSize); + void sve_vmask_lasttrue(Register dst, BasicType bt, PRegister src, PRegister ptmp); + + void sve_vector_extend(FloatRegister dst, SIMD_RegVariant dst_size, + FloatRegister src, SIMD_RegVariant src_size); + + void sve_vector_narrow(FloatRegister dst, SIMD_RegVariant dst_size, + FloatRegister src, SIMD_RegVariant src_size, FloatRegister tmp); + + void sve_vmaskcast_extend(PRegister dst, PRegister src, + uint dst_element_length_in_bytes, uint src_element_lenght_in_bytes); + + void sve_vmaskcast_narrow(PRegister dst, PRegister src, + uint dst_element_length_in_bytes, uint src_element_lenght_in_bytes); + + void sve_reduce_integral(int opc, Register dst, BasicType bt, Register src1, + FloatRegister src2, PRegister pg, FloatRegister tmp); // Generate predicate through whilelo, by comparing ZR with an unsigned // immediate. rscratch1 will be clobbered. diff --git a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp index 6b42982ed905bbabc21eec628dcee214b152f40d..10b1cf20ef910240b4988db7d611a1b778d71ca4 100644 --- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp @@ -315,6 +315,7 @@ private: MacroAssembler* const _masm; RegSet _gp_regs; FloatRegSet _fp_regs; + PRegSet _p_regs; public: void initialize(ZLoadBarrierStubC2* stub) { @@ -328,6 +329,8 @@ public: _gp_regs += RegSet::of(vm_reg->as_Register()); } else if (vm_reg->is_FloatRegister()) { _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister()); + } else if (vm_reg->is_PRegister()) { + _p_regs += PRegSet::of(vm_reg->as_PRegister()); } else { fatal("Unknown register type"); } @@ -341,7 +344,8 @@ public: ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : _masm(masm), _gp_regs(), - _fp_regs() { + _fp_regs(), + _p_regs() { // Figure out what registers to save/restore initialize(stub); @@ -349,10 +353,12 @@ public: // Save registers __ push(_gp_regs, sp); __ push_fp(_fp_regs, sp); + __ push_p(_p_regs, sp); } ~ZSaveLiveRegisters() { // Restore registers + __ pop_p(_p_regs, sp); __ pop_fp(_fp_regs, sp); // External runtime call may clobber ptrue reg diff --git a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp index cca87382560142a7a8b144f0052bd17a084e5fa2..f3d29b44a15b825fe27724a76149dfeff9961521 100644 --- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp @@ -32,8 +32,7 @@ #ifdef COMPILER1 class LIR_Assembler; -class LIR_OprDesc; -typedef LIR_OprDesc* LIR_Opr; +class LIR_Opr; class StubAssembler; class ZLoadBarrierStubC1; #endif // COMPILER1 diff --git a/src/hotspot/cpu/aarch64/globals_aarch64.hpp b/src/hotspot/cpu/aarch64/globals_aarch64.hpp index aa6c3ad95f53409028067fd3d90a1e5180883636..82760cc3bcf066becfedc9c5fb279c2a14cd1c89 100644 --- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp @@ -110,7 +110,15 @@ define_pd_global(intx, InlineSmallCode, 1000); product(int, SoftwarePrefetchHintDistance, -1, \ "Use prfm hint with specified distance in compiled code." \ "Value -1 means off.") \ - range(-1, 4096) + range(-1, 4096) \ + product(ccstr, OnSpinWaitInst, "none", DIAGNOSTIC, \ + "The instruction to use to implement " \ + "java.lang.Thread.onSpinWait()." \ + "Options: none, nop, isb, yield.") \ + product(uint, OnSpinWaitInstCount, 1, DIAGNOSTIC, \ + "The number of OnSpinWaitInst instructions to generate." \ + "It cannot be used with OnSpinWaitInst=none.") \ + range(1, 99) // end of ARCH_FLAGS diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 5c9b1fc327d09af0d73b39e40e8b54bb106a1b90..07e3fdad17b16ac2c7b0614358884651e92a2386 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -1978,7 +1978,7 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) { return count * 2; } -// Return the number of dwords poped +// Return the number of dwords popped int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { int words_pushed = 0; bool use_sve = false; @@ -2037,6 +2037,80 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { return count * 2; } +// Return the number of dwords pushed +int MacroAssembler::push_p(unsigned int bitset, Register stack) { + bool use_sve = false; + int sve_predicate_size_in_slots = 0; + +#ifdef COMPILER2 + use_sve = Matcher::supports_scalable_vector(); + if (use_sve) { + sve_predicate_size_in_slots = Matcher::scalable_predicate_reg_slots(); + } +#endif + + if (!use_sve) { + return 0; + } + + unsigned char regs[PRegisterImpl::number_of_saved_registers]; + int count = 0; + for (int reg = 0; reg < PRegisterImpl::number_of_saved_registers; reg++) { + if (1 & bitset) + regs[count++] = reg; + bitset >>= 1; + } + + if (count == 0) { + return 0; + } + + int total_push_bytes = align_up(sve_predicate_size_in_slots * + VMRegImpl::stack_slot_size * count, 16); + sub(stack, stack, total_push_bytes); + for (int i = 0; i < count; i++) { + sve_str(as_PRegister(regs[i]), Address(stack, i)); + } + return total_push_bytes / 8; +} + +// Return the number of dwords popped +int MacroAssembler::pop_p(unsigned int bitset, Register stack) { + bool use_sve = false; + int sve_predicate_size_in_slots = 0; + +#ifdef COMPILER2 + use_sve = Matcher::supports_scalable_vector(); + if (use_sve) { + sve_predicate_size_in_slots = Matcher::scalable_predicate_reg_slots(); + } +#endif + + if (!use_sve) { + return 0; + } + + unsigned char regs[PRegisterImpl::number_of_saved_registers]; + int count = 0; + for (int reg = 0; reg < PRegisterImpl::number_of_saved_registers; reg++) { + if (1 & bitset) + regs[count++] = reg; + bitset >>= 1; + } + + if (count == 0) { + return 0; + } + + int total_pop_bytes = align_up(sve_predicate_size_in_slots * + VMRegImpl::stack_slot_size * count, 16); + for (int i = count - 1; i >= 0; i--) { + sve_ldr(as_PRegister(regs[i]), Address(stack, i)); + } + add(stack, stack, total_pop_bytes); + return total_pop_bytes / 8; +} + #ifdef ASSERT void MacroAssembler::verify_heapbase(const char* msg) { #if 0 @@ -2495,7 +2569,7 @@ void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { } void MacroAssembler::push_CPU_state(bool save_vectors, bool use_sve, - int sve_vector_size_in_bytes) { + int sve_vector_size_in_bytes, int total_predicate_in_bytes) { push(RegSet::range(r0, r29), sp); // integer registers except lr & sp if (save_vectors && use_sve && sve_vector_size_in_bytes > 16) { sub(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers); @@ -2512,10 +2586,22 @@ void MacroAssembler::push_CPU_state(bool save_vectors, bool use_sve, } st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp); } + if (save_vectors && use_sve && total_predicate_in_bytes > 0) { + sub(sp, sp, total_predicate_in_bytes); + for (int i = 0; i < PRegisterImpl::number_of_saved_registers; i++) { + sve_str(as_PRegister(i), Address(sp, i)); + } + } } void MacroAssembler::pop_CPU_state(bool restore_vectors, bool use_sve, - int sve_vector_size_in_bytes) { + int sve_vector_size_in_bytes, int total_predicate_in_bytes) { + if (restore_vectors && use_sve && total_predicate_in_bytes > 0) { + for (int i = PRegisterImpl::number_of_saved_registers - 1; i >= 0; i--) { + sve_ldr(as_PRegister(i), Address(sp, i)); + } + add(sp, sp, total_predicate_in_bytes); + } if (restore_vectors && use_sve && sve_vector_size_in_bytes > 16) { for (int i = FloatRegisterImpl::number_of_registers - 1; i >= 0; i--) { sve_ldr(as_FloatRegister(i), Address(sp, i)); @@ -5154,3 +5240,21 @@ void MacroAssembler::verify_cross_modify_fence_not_required() { } } #endif + +void MacroAssembler::spin_wait() { + for (int i = 0; i < VM_Version::spin_wait_desc().inst_count(); ++i) { + switch (VM_Version::spin_wait_desc().inst()) { + case SpinWait::NOP: + nop(); + break; + case SpinWait::ISB: + isb(); + break; + case SpinWait::YIELD: + yield(); + break; + default: + ShouldNotReachHere(); + } + } +} diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index 3287f153ab8f846c74619b0e774dab5484896201..a9ebc8bdc69196fe97229633737f99992c20701f 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -455,6 +455,9 @@ private: int push_fp(unsigned int bitset, Register stack); int pop_fp(unsigned int bitset, Register stack); + int push_p(unsigned int bitset, Register stack); + int pop_p(unsigned int bitset, Register stack); + void mov(Register dst, Address a); public: @@ -466,6 +469,9 @@ public: static RegSet call_clobbered_registers(); + void push_p(PRegSet regs, Register stack) { if (regs.bits()) push_p(regs.bits(), stack); } + void pop_p(PRegSet regs, Register stack) { if (regs.bits()) pop_p(regs.bits(), stack); } + // Push and pop everything that might be clobbered by a native // runtime call except rscratch1 and rscratch2. (They are always // scratch, so we don't have to protect them.) Only save the lower @@ -865,9 +871,9 @@ public: DEBUG_ONLY(void verify_heapbase(const char* msg);) void push_CPU_state(bool save_vectors = false, bool use_sve = false, - int sve_vector_size_in_bytes = 0); + int sve_vector_size_in_bytes = 0, int total_predicate_in_bytes = 0); void pop_CPU_state(bool restore_vectors = false, bool use_sve = false, - int sve_vector_size_in_bytes = 0); + int sve_vector_size_in_bytes = 0, int total_predicate_in_bytes = 0); // Round up to a power of two void round_to(Register reg, int modulus); @@ -1361,9 +1367,14 @@ public: void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) { str(Vx, T, spill_address(1 << (int)T, offset)); } + void spill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) { sve_str(Zx, sve_spill_address(vector_reg_size_in_bytes, offset)); } + void spill_sve_predicate(PRegister pr, int offset, int predicate_reg_size_in_bytes) { + sve_str(pr, sve_spill_address(predicate_reg_size_in_bytes, offset)); + } + void unspill(Register Rx, bool is64, int offset) { if (is64) { ldr(Rx, spill_address(8, offset)); @@ -1374,9 +1385,14 @@ public: void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) { ldr(Vx, T, spill_address(1 << (int)T, offset)); } + void unspill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) { sve_ldr(Zx, sve_spill_address(vector_reg_size_in_bytes, offset)); } + void unspill_sve_predicate(PRegister pr, int offset, int predicate_reg_size_in_bytes) { + sve_ldr(pr, sve_spill_address(predicate_reg_size_in_bytes, offset)); + } + void spill_copy128(int src_offset, int dst_offset, Register tmp1=rscratch1, Register tmp2=rscratch2) { if (src_offset < 512 && (src_offset & 7) == 0 && @@ -1399,9 +1415,18 @@ public: dst_offset += 16; } } + void spill_copy_sve_predicate_stack_to_stack(int src_offset, int dst_offset, + int sve_predicate_reg_size_in_bytes) { + sve_ldr(ptrue, sve_spill_address(sve_predicate_reg_size_in_bytes, src_offset)); + sve_str(ptrue, sve_spill_address(sve_predicate_reg_size_in_bytes, dst_offset)); + reinitialize_ptrue(); + } void cache_wb(Address line); void cache_wbsync(bool is_pre); + // Code for java.lang.Thread::onSpinWait() intrinsic. + void spin_wait(); + private: // Check the current thread doesn't need a cross modify fence. void verify_cross_modify_fence_not_required() PRODUCT_RETURN; diff --git a/src/hotspot/cpu/aarch64/register_aarch64.cpp b/src/hotspot/cpu/aarch64/register_aarch64.cpp index 24c3f32c2b6b60ae53b2bf15147e7ff708859656..b785457ae29ccdc206d839ca0121912366af5a3e 100644 --- a/src/hotspot/cpu/aarch64/register_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/register_aarch64.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,7 +34,8 @@ const int ConcreteRegisterImpl::max_fpr FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; const int ConcreteRegisterImpl::max_pr - = ConcreteRegisterImpl::max_fpr + PRegisterImpl::number_of_registers; + = ConcreteRegisterImpl::max_fpr + + PRegisterImpl::number_of_registers * PRegisterImpl::max_slots_per_register; const char* RegisterImpl::name() const { const char* names[number_of_registers] = { diff --git a/src/hotspot/cpu/aarch64/register_aarch64.hpp b/src/hotspot/cpu/aarch64/register_aarch64.hpp index 479bd1f37c43563dcd49e5cc7ab301a3754cce2b..69a0a7eb535de7b546e76610c1209dac70d37c85 100644 --- a/src/hotspot/cpu/aarch64/register_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/register_aarch64.hpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -243,6 +243,11 @@ class PRegisterImpl: public AbstractRegisterImpl { enum { number_of_registers = 16, number_of_governing_registers = 8, + // p0-p7 are governing predicates for load/store and arithmetic, but p7 is + // preserved as an all-true predicate in OpenJDK. And since we don't support + // non-governing predicate registers allocation for non-temp register, the + // predicate registers to be saved are p0-p6. + number_of_saved_registers = number_of_governing_registers - 1, max_slots_per_register = 1 }; @@ -377,6 +382,7 @@ public: typedef AbstractRegSet RegSet; typedef AbstractRegSet FloatRegSet; +typedef AbstractRegSet PRegSet; template class RegSetIterator { diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp index b6ee437603b234a0a8b0c32fb21db12c37225b48..5c80566aeadd7d3425a82cde23b7c51d558bfc10 100644 --- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp @@ -101,7 +101,10 @@ class RegisterSaver { int reg_offset_in_bytes(Register r); int r0_offset_in_bytes() { return reg_offset_in_bytes(r0); } int rscratch1_offset_in_bytes() { return reg_offset_in_bytes(rscratch1); } - int v0_offset_in_bytes(void) { return 0; } + int v0_offset_in_bytes(); + + // Total stack size in bytes for saving sve predicate registers. + int total_sve_predicate_in_bytes(); // Capture info about frame layout // Note this is only correct when not saving full vectors. @@ -139,24 +142,49 @@ int RegisterSaver::reg_offset_in_bytes(Register r) { } #endif - int r0_offset = (slots_per_vect * FloatRegisterImpl::number_of_registers) * BytesPerInt; + int r0_offset = v0_offset_in_bytes() + (slots_per_vect * FloatRegisterImpl::number_of_registers) * BytesPerInt; return r0_offset + r->encoding() * wordSize; } +int RegisterSaver::v0_offset_in_bytes() { + // The floating point registers are located above the predicate registers if + // they are present in the stack frame pushed by save_live_registers(). So the + // offset depends on the saved total predicate vectors in the stack frame. + return (total_sve_predicate_in_bytes() / VMRegImpl::stack_slot_size) * BytesPerInt; +} + +int RegisterSaver::total_sve_predicate_in_bytes() { +#ifdef COMPILER2 + if (_save_vectors && Matcher::supports_scalable_vector()) { + // The number of total predicate bytes is unlikely to be a multiple + // of 16 bytes so we manually align it up. + return align_up(Matcher::scalable_predicate_reg_slots() * + VMRegImpl::stack_slot_size * + PRegisterImpl::number_of_saved_registers, 16); + } +#endif + return 0; +} + OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { bool use_sve = false; int sve_vector_size_in_bytes = 0; int sve_vector_size_in_slots = 0; + int sve_predicate_size_in_slots = 0; + int total_predicate_in_bytes = total_sve_predicate_in_bytes(); + int total_predicate_in_slots = total_predicate_in_bytes / VMRegImpl::stack_slot_size; #ifdef COMPILER2 use_sve = Matcher::supports_scalable_vector(); - sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); - sve_vector_size_in_slots = Matcher::scalable_vector_reg_size(T_FLOAT); + if (use_sve) { + sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); + sve_vector_size_in_slots = Matcher::scalable_vector_reg_size(T_FLOAT); + sve_predicate_size_in_slots = Matcher::scalable_predicate_reg_slots(); + } #endif #if COMPILER2_OR_JVMCI if (_save_vectors) { - int vect_words = 0; int extra_save_slots_per_register = 0; // Save upper half of vector registers if (use_sve) { @@ -164,9 +192,10 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ } else { extra_save_slots_per_register = FloatRegisterImpl::extra_save_slots_per_neon_register; } - vect_words = FloatRegisterImpl::number_of_registers * extra_save_slots_per_register / - VMRegImpl::slots_per_word; - additional_frame_words += vect_words; + int extra_vector_bytes = extra_save_slots_per_register * + VMRegImpl::stack_slot_size * + FloatRegisterImpl::number_of_registers; + additional_frame_words += ((extra_vector_bytes + total_predicate_in_bytes) / wordSize); } #else assert(!_save_vectors, "vectors are generated only by C2 and JVMCI"); @@ -184,7 +213,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ // Save Integer and Float registers. __ enter(); - __ push_CPU_state(_save_vectors, use_sve, sve_vector_size_in_bytes); + __ push_CPU_state(_save_vectors, use_sve, sve_vector_size_in_bytes, total_predicate_in_bytes); // Set an oopmap for the call site. This oopmap will map all // oop-registers and debug-info registers as callee-saved. This @@ -201,8 +230,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ // Register slots are 8 bytes wide, 32 floating-point registers. int sp_offset = RegisterImpl::max_slots_per_register * i + FloatRegisterImpl::save_slots_per_register * FloatRegisterImpl::number_of_registers; - oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots), - r->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots), r->as_VMReg()); } } @@ -210,13 +238,20 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ FloatRegister r = as_FloatRegister(i); int sp_offset = 0; if (_save_vectors) { - sp_offset = use_sve ? (sve_vector_size_in_slots * i) : + sp_offset = use_sve ? (total_predicate_in_slots + sve_vector_size_in_slots * i) : (FloatRegisterImpl::slots_per_neon_register * i); } else { sp_offset = FloatRegisterImpl::save_slots_per_register * i; } - oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), - r->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); + } + + if (_save_vectors && use_sve) { + for (int i = 0; i < PRegisterImpl::number_of_saved_registers; i++) { + PRegister r = as_PRegister(i); + int sp_offset = sve_predicate_size_in_slots * i; + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); + } } return oop_map; @@ -225,7 +260,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ void RegisterSaver::restore_live_registers(MacroAssembler* masm) { #ifdef COMPILER2 __ pop_CPU_state(_save_vectors, Matcher::supports_scalable_vector(), - Matcher::scalable_vector_reg_size(T_BYTE)); + Matcher::scalable_vector_reg_size(T_BYTE), total_sve_predicate_in_bytes()); #else #if !INCLUDE_JVMCI assert(!_save_vectors, "vectors are generated only by C2 and JVMCI"); @@ -238,8 +273,10 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm) { // Is vector's size (in bytes) bigger than a size saved by default? // 8 bytes vector registers are saved by default on AArch64. +// The SVE supported min vector size is 8 bytes and we need to save +// predicate registers when the vector size is 8 bytes as well. bool SharedRuntime::is_wide_vector(int size) { - return size > 8; + return size > 8 || (UseSVE > 0 && size >= 8); } // The java_calling_convention describes stack locations as ideal slots on @@ -1112,69 +1149,6 @@ static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMR } } -// Unpack an array argument into a pointer to the body and the length -// if the array is non-null, otherwise pass 0 for both. -static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { Unimplemented(); } - - -class ComputeMoveOrder: public StackObj { - class MoveOperation: public ResourceObj { - friend class ComputeMoveOrder; - private: - VMRegPair _src; - VMRegPair _dst; - int _src_index; - int _dst_index; - bool _processed; - MoveOperation* _next; - MoveOperation* _prev; - - static int get_id(VMRegPair r) { Unimplemented(); return 0; } - - public: - MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst): - _src(src) - , _dst(dst) - , _src_index(src_index) - , _dst_index(dst_index) - , _processed(false) - , _next(NULL) - , _prev(NULL) { Unimplemented(); } - - VMRegPair src() const { Unimplemented(); return _src; } - int src_id() const { Unimplemented(); return 0; } - int src_index() const { Unimplemented(); return 0; } - VMRegPair dst() const { Unimplemented(); return _src; } - void set_dst(int i, VMRegPair dst) { Unimplemented(); } - int dst_index() const { Unimplemented(); return 0; } - int dst_id() const { Unimplemented(); return 0; } - MoveOperation* next() const { Unimplemented(); return 0; } - MoveOperation* prev() const { Unimplemented(); return 0; } - void set_processed() { Unimplemented(); } - bool is_processed() const { Unimplemented(); return 0; } - - // insert - void break_cycle(VMRegPair temp_register) { Unimplemented(); } - - void link(GrowableArray& killer) { Unimplemented(); } - }; - - private: - GrowableArray edges; - - public: - ComputeMoveOrder(int total_in_args, VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs, - BasicType* in_sig_bt, GrowableArray& arg_order, VMRegPair tmp_vmreg) { Unimplemented(); } - - // Collected all the move operations - void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { Unimplemented(); } - - // Walk the edges breaking cycles between moves. The result list - // can be walked in order to produce the proper set of loads - GrowableArray* get_store_order(VMRegPair temp_register) { Unimplemented(); return 0; } -}; - - static void rt_call(MacroAssembler* masm, address dest) { CodeBlob *cb = CodeCache::find_blob(dest); if (cb) { @@ -1287,8 +1261,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, int compile_id, BasicType* in_sig_bt, VMRegPair* in_regs, - BasicType ret_type, - address critical_entry) { + BasicType ret_type) { if (method->is_method_handle_intrinsic()) { vmIntrinsics::ID iid = method->intrinsic_id(); intptr_t start = (intptr_t)__ pc(); @@ -1313,12 +1286,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, in_ByteSize(-1), (OopMapSet*)NULL); } - bool is_critical_native = true; - address native_func = critical_entry; - if (native_func == NULL) { - native_func = method->native_function(); - is_critical_native = false; - } + address native_func = method->native_function(); assert(native_func != NULL, "must have function"); // An OopMap for lock (and class if static) @@ -1332,55 +1300,20 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // the hidden arguments as arg[0] and possibly arg[1] (static method) const int total_in_args = method->size_of_parameters(); - int total_c_args = total_in_args; - if (!is_critical_native) { - total_c_args += 1; - if (method->is_static()) { - total_c_args++; - } - } else { - for (int i = 0; i < total_in_args; i++) { - if (in_sig_bt[i] == T_ARRAY) { - total_c_args++; - } - } - } + int total_c_args = total_in_args + (method->is_static() ? 2 : 1); BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); BasicType* in_elem_bt = NULL; int argc = 0; - if (!is_critical_native) { - out_sig_bt[argc++] = T_ADDRESS; - if (method->is_static()) { - out_sig_bt[argc++] = T_OBJECT; - } + out_sig_bt[argc++] = T_ADDRESS; + if (method->is_static()) { + out_sig_bt[argc++] = T_OBJECT; + } - for (int i = 0; i < total_in_args ; i++ ) { - out_sig_bt[argc++] = in_sig_bt[i]; - } - } else { - in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); - SignatureStream ss(method->signature()); - for (int i = 0; i < total_in_args ; i++ ) { - if (in_sig_bt[i] == T_ARRAY) { - // Arrays are passed as int, elem* pair - out_sig_bt[argc++] = T_INT; - out_sig_bt[argc++] = T_ADDRESS; - ss.skip_array_prefix(1); // skip one '[' - assert(ss.is_primitive(), "primitive type expected"); - in_elem_bt[i] = ss.type(); - } else { - out_sig_bt[argc++] = in_sig_bt[i]; - in_elem_bt[i] = T_VOID; - } - if (in_sig_bt[i] != T_VOID) { - assert(in_sig_bt[i] == ss.type() || - in_sig_bt[i] == T_ARRAY, "must match"); - ss.next(); - } - } + for (int i = 0; i < total_in_args ; i++ ) { + out_sig_bt[argc++] = in_sig_bt[i]; } // Now figure out where the args must be stored and how much stack space @@ -1402,34 +1335,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Now the space for the inbound oop handle area int total_save_slots = 8 * VMRegImpl::slots_per_word; // 8 arguments passed in registers - if (is_critical_native) { - // Critical natives may have to call out so they need a save area - // for register arguments. - int double_slots = 0; - int single_slots = 0; - for ( int i = 0; i < total_in_args; i++) { - if (in_regs[i].first()->is_Register()) { - const Register reg = in_regs[i].first()->as_Register(); - switch (in_sig_bt[i]) { - case T_BOOLEAN: - case T_BYTE: - case T_SHORT: - case T_CHAR: - case T_INT: single_slots++; break; - case T_ARRAY: // specific to LP64 (7145024) - case T_LONG: double_slots++; break; - default: ShouldNotReachHere(); - } - } else if (in_regs[i].first()->is_FloatRegister()) { - ShouldNotReachHere(); - } - } - total_save_slots = double_slots * 2 + single_slots; - // align the save area - if (double_slots != 0) { - stack_slots = align_up(stack_slots, 2); - } - } int oop_handle_offset = stack_slots; stack_slots += total_save_slots; @@ -1596,22 +1501,14 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, #endif /* ASSERT */ - // This may iterate in two different directions depending on the - // kind of native it is. The reason is that for regular JNI natives - // the incoming and outgoing registers are offset upwards and for - // critical natives they are offset down. + // For JNI natives the incoming and outgoing registers are offset upwards. GrowableArray arg_order(2 * total_in_args); VMRegPair tmp_vmreg; tmp_vmreg.set2(r19->as_VMReg()); - if (!is_critical_native) { - for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { - arg_order.push(i); - arg_order.push(c_arg); - } - } else { - // Compute a valid move order, using tmp_vmreg to break any cycles - ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); + for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { + arg_order.push(i); + arg_order.push(c_arg); } int temploc = -1; @@ -1619,20 +1516,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, int i = arg_order.at(ai); int c_arg = arg_order.at(ai + 1); __ block_comment(err_msg("move %d -> %d", i, c_arg)); - if (c_arg == -1) { - assert(is_critical_native, "should only be required for critical natives"); - // This arg needs to be moved to a temporary - __ mov(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); - in_regs[i] = tmp_vmreg; - temploc = i; - continue; - } else if (i == -1) { - assert(is_critical_native, "should only be required for critical natives"); - // Read from the temporary location - assert(temploc != -1, "must be valid"); - i = temploc; - temploc = -1; - } + assert(c_arg != -1 && i != -1, "wrong order"); #ifdef ASSERT if (in_regs[i].first()->is_Register()) { assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); @@ -1647,21 +1531,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, #endif /* ASSERT */ switch (in_sig_bt[i]) { case T_ARRAY: - if (is_critical_native) { - unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); - c_arg++; -#ifdef ASSERT - if (out_regs[c_arg].first()->is_Register()) { - reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; - } else if (out_regs[c_arg].first()->is_FloatRegister()) { - freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; - } -#endif - int_args++; - break; - } case T_OBJECT: - assert(!is_critical_native, "no oop arguments"); object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ((i == 0) && (!is_static)), &receiver_offset); @@ -1701,7 +1571,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, int c_arg = total_c_args - total_in_args; // Pre-load a static method's oop into c_rarg1. - if (method->is_static() && !is_critical_native) { + if (method->is_static()) { // load oop into a register __ movoop(c_rarg1, @@ -1759,7 +1629,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, Label lock_done; if (method->is_synchronized()) { - assert(!is_critical_native, "unhandled"); const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); @@ -1813,14 +1682,12 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Finally just about ready to make the JNI call // get JNIEnv* which is first argument to native - if (!is_critical_native) { - __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset()))); + __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset()))); - // Now set thread in native - __ mov(rscratch1, _thread_in_native); - __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); - __ stlrw(rscratch1, rscratch2); - } + // Now set thread in native + __ mov(rscratch1, _thread_in_native); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); rt_call(masm, native_func); @@ -1851,18 +1718,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, Label safepoint_in_progress, safepoint_in_progress_done; Label after_transition; - // If this is a critical native, check for a safepoint or suspend request after the call. - // If a safepoint is needed, transition to native, then to native_trans to handle - // safepoints like the native methods that are not critical natives. - if (is_critical_native) { - Label needs_safepoint; - __ safepoint_poll(needs_safepoint, false /* at_return */, true /* acquire */, false /* in_nmethod */); - __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset())); - __ cbnzw(rscratch1, needs_safepoint); - __ b(after_transition); - __ bind(needs_safepoint); - } - // Switch thread to "native transition" state before reading the synchronization state. // This additional state is necessary because reading and testing the synchronization // state is not atomic w.r.t. GC, as this scenario demonstrates: @@ -1971,32 +1826,26 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ str(zr, Address(rthread, JavaThread::pending_jni_exception_check_fn_offset())); } - if (!is_critical_native) { - // reset handle block - __ ldr(r2, Address(rthread, JavaThread::active_handles_offset())); - __ str(zr, Address(r2, JNIHandleBlock::top_offset_in_bytes())); - } + // reset handle block + __ ldr(r2, Address(rthread, JavaThread::active_handles_offset())); + __ str(zr, Address(r2, JNIHandleBlock::top_offset_in_bytes())); __ leave(); - if (!is_critical_native) { - // Any exception pending? - __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); - __ cbnz(rscratch1, exception_pending); - } + // Any exception pending? + __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ cbnz(rscratch1, exception_pending); // We're done __ ret(lr); // Unexpected paths are out of line and go here - if (!is_critical_native) { - // forward the exception - __ bind(exception_pending); + // forward the exception + __ bind(exception_pending); - // and forward the exception - __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); - } + // and forward the exception + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); // Slow path locking & unlocking if (method->is_synchronized()) { diff --git a/test/jdk/sun/tools/jps/TestJpsHostName.java b/src/hotspot/cpu/aarch64/spin_wait_aarch64.hpp similarity index 60% rename from test/jdk/sun/tools/jps/TestJpsHostName.java rename to src/hotspot/cpu/aarch64/spin_wait_aarch64.hpp index 4ef9d91ac31b2907a17419aef22d517ce264392f..4edce2642e9ff3e48eee910e7b9bc8f84cb52005 100644 --- a/test/jdk/sun/tools/jps/TestJpsHostName.java +++ b/src/hotspot/cpu/aarch64/spin_wait_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. + * Copyright (c) 2021, Amazon.com Inc. or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -19,28 +19,30 @@ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. + * */ -import jdk.test.lib.process.OutputAnalyzer; +#ifndef CPU_AARCH64_SPIN_WAIT_AARCH64_HPP +#define CPU_AARCH64_SPIN_WAIT_AARCH64_HPP -/* - * @test - * @bug 8251155 - * @summary Test host names starting with digits - * @library /test/lib - * @build JpsHelper - * @run driver TestJpsHostName - */ -public class TestJpsHostName { +class SpinWait { +public: + enum Inst { + NONE = -1, + NOP, + ISB, + YIELD + }; + +private: + Inst _inst; + int _count; - public static void main(String[] args) throws Throwable { - testJpsHostName("12345"); - testJpsHostName("12345:37266"); - } +public: + SpinWait(Inst inst = NONE, int count = 0) : _inst(inst), _count(count) {} - private static void testJpsHostName(String hostname) throws Exception { - OutputAnalyzer output = JpsHelper.jps(hostname); - output.shouldNotContain("Malformed Host Identifier: " + hostname); - } + Inst inst() const { return _inst; } + int inst_count() const { return _count; } +}; -} +#endif // CPU_AARCH64_SPIN_WAIT_AARCH64_HPP diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp index a3ac94505b09bb6c71bf328fca9243d606216228..b4e75f88d291774a95942310253d304b4b23a187 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp @@ -46,6 +46,26 @@ int VM_Version::_dcache_line_size; int VM_Version::_icache_line_size; int VM_Version::_initial_sve_vector_length; +SpinWait VM_Version::_spin_wait; + +static SpinWait get_spin_wait_desc() { + if (strcmp(OnSpinWaitInst, "nop") == 0) { + return SpinWait(SpinWait::NOP, OnSpinWaitInstCount); + } else if (strcmp(OnSpinWaitInst, "isb") == 0) { + return SpinWait(SpinWait::ISB, OnSpinWaitInstCount); + } else if (strcmp(OnSpinWaitInst, "yield") == 0) { + return SpinWait(SpinWait::YIELD, OnSpinWaitInstCount); + } else if (strcmp(OnSpinWaitInst, "none") != 0) { + vm_exit_during_initialization("The options for OnSpinWaitInst are nop, isb, yield, and none", OnSpinWaitInst); + } + + if (!FLAG_IS_DEFAULT(OnSpinWaitInstCount) && OnSpinWaitInstCount > 0) { + vm_exit_during_initialization("OnSpinWaitInstCount cannot be used for OnSpinWaitInst 'none'"); + } + + return SpinWait{}; +} + void VM_Version::initialize() { _supports_cx8 = true; _supports_atomic_getset4 = true; @@ -182,6 +202,14 @@ void VM_Version::initialize() { if (FLAG_IS_DEFAULT(UseSIMDForMemoryOps)) { FLAG_SET_DEFAULT(UseSIMDForMemoryOps, true); } + + if (FLAG_IS_DEFAULT(OnSpinWaitInst)) { + FLAG_SET_DEFAULT(OnSpinWaitInst, "isb"); + } + + if (FLAG_IS_DEFAULT(OnSpinWaitInstCount)) { + FLAG_SET_DEFAULT(OnSpinWaitInstCount, 1); + } } if (_cpu == CPU_ARM) { @@ -451,5 +479,5 @@ void VM_Version::initialize() { } #endif - UNSUPPORTED_OPTION(CriticalJNINatives); + _spin_wait = get_spin_wait_desc(); } diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp index 6817eed08e95e3016e69e8658cfe24e16a29b091..61f422bd2d38b3348f0294c1a741c8ba059b95a5 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp @@ -26,6 +26,7 @@ #ifndef CPU_AARCH64_VM_VERSION_AARCH64_HPP #define CPU_AARCH64_VM_VERSION_AARCH64_HPP +#include "spin_wait_aarch64.hpp" #include "runtime/abstract_vm_version.hpp" #include "utilities/sizes.hpp" @@ -45,6 +46,8 @@ protected: static int _icache_line_size; static int _initial_sve_vector_length; + static SpinWait _spin_wait; + // Read additional info using OS-specific interfaces static void get_os_cpu_info(); @@ -142,6 +145,10 @@ public: static void get_compatible_board(char *buf, int buflen); + static const SpinWait& spin_wait_desc() { return _spin_wait; } + + static bool supports_on_spin_wait() { return _spin_wait.inst() != SpinWait::NONE; } + #ifdef __APPLE__ // Is the CPU running emulated (for example macOS Rosetta running x86_64 code on M1 ARM (aarch64) static bool is_cpu_emulated(); diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad index e9fc3d75bf9b56e8d014cbb121ae1f7de3f74aaa..9e76f430054ac8a1ccbe3a6324b77812d638d52c 100644 --- a/src/hotspot/cpu/arm/arm.ad +++ b/src/hotspot/cpu/arm/arm.ad @@ -983,6 +983,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType return ret_value; // Per default match rules are supported. } +const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { + return false; +} + const RegMask* Matcher::predicate_reg_mask(void) { return NULL; } diff --git a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp index b0ace8d21f9d3f17bc8fd9ddac7342d1dd2edc0b..f7a9bb447c2ce7d92616aae11afd112247037a15 100644 --- a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp +++ b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp @@ -64,7 +64,7 @@ RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) } RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) - : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) { + : _index(index), _array(), _throw_index_out_of_bounds_exception(true) { assert(info != NULL, "must have info"); _info = new CodeEmitInfo(info); } diff --git a/src/hotspot/cpu/arm/c1_FrameMap_arm.cpp b/src/hotspot/cpu/arm/c1_FrameMap_arm.cpp index bd74c3f83b486d7305f16cc6cfc6c4bce221f1bf..7eb4009be1136d863b4ab8b915086a8f8377ec66 100644 --- a/src/hotspot/cpu/arm/c1_FrameMap_arm.cpp +++ b/src/hotspot/cpu/arm/c1_FrameMap_arm.cpp @@ -66,8 +66,8 @@ LIR_Opr FrameMap::Double_result_opr; LIR_Opr FrameMap::Exception_oop_opr; LIR_Opr FrameMap::Exception_pc_opr; -LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0 }; -LIR_Opr FrameMap::_caller_save_fpu_regs[]; // same as initialize to zero +LIR_Opr FrameMap::_caller_save_cpu_regs[] = {}; +LIR_Opr FrameMap::_caller_save_fpu_regs[] = {}; LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) { LIR_Opr opr = LIR_OprFact::illegalOpr; diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp index 2ee7b68f72037323500dfa5883b02dd9e6e361c6..4d6ce557d0ab056e7946a1fbe4be896b65c5ba21 100644 --- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp @@ -1381,7 +1381,7 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { op->addr()->as_pointer_register() : op->addr()->as_address_ptr()->base()->as_pointer_register(); assert(op->addr()->is_register() || op->addr()->as_address_ptr()->disp() == 0, "unexpected disp"); - assert(op->addr()->is_register() || op->addr()->as_address_ptr()->index() == LIR_OprDesc::illegalOpr(), "unexpected index"); + assert(op->addr()->is_register() || op->addr()->as_address_ptr()->index() == LIR_Opr::illegalOpr(), "unexpected index"); if (op->code() == lir_cas_int || op->code() == lir_cas_obj) { Register cmpval = op->cmp_value()->as_register(); Register newval = op->new_value()->as_register(); diff --git a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp index cfde3451f5b01c4a10ee1e28988d60c6186b765d..0a5b80b23b7659ac35ad4c2cd2fbed4335379503 100644 --- a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp +++ b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp @@ -363,7 +363,7 @@ void LIRGenerator::set_card(LIR_Opr value, LIR_Address* card_addr) { } } -void LIRGenerator::CardTableBarrierSet_post_barrier_helper(LIR_OprDesc* addr, LIR_Const* card_table_base) { +void LIRGenerator::CardTableBarrierSet_post_barrier_helper(LIR_Opr addr, LIR_Const* card_table_base) { assert(addr->is_register(), "must be a register at this point"); LIR_Opr tmp = FrameMap::LR_ptr_opr; @@ -630,7 +630,7 @@ void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { } rlock_result(x); assert(right_arg->is_constant() || right_arg->is_register(), "wrong state of right"); - arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), NULL); + arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::nullOpr); } } diff --git a/src/hotspot/cpu/arm/c1_LIR_arm.cpp b/src/hotspot/cpu/arm/c1_LIR_arm.cpp index 60bd5265bfb3339d6d581f01dd98eb107117e309..9d70fd12f3558e3a0bcae40ecbfb923a39dbe986 100644 --- a/src/hotspot/cpu/arm/c1_LIR_arm.cpp +++ b/src/hotspot/cpu/arm/c1_LIR_arm.cpp @@ -25,21 +25,21 @@ #include "precompiled.hpp" #include "c1/c1_LIR.hpp" -FloatRegister LIR_OprDesc::as_float_reg() const { +FloatRegister LIR_Opr::as_float_reg() const { return as_FloatRegister(fpu_regnr()); } -FloatRegister LIR_OprDesc::as_double_reg() const { +FloatRegister LIR_Opr::as_double_reg() const { return as_FloatRegister(fpu_regnrLo()); } LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { assert(as_FloatRegister(reg2) != fnoreg, "Arm32 holds double in two regs."); - return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | - (reg2 << LIR_OprDesc::reg2_shift) | - LIR_OprDesc::double_type | - LIR_OprDesc::fpu_register | - LIR_OprDesc::double_size); + return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) | + (reg2 << LIR_Opr::reg2_shift) | + LIR_Opr::double_type | + LIR_Opr::fpu_register | + LIR_Opr::double_size); } #ifndef PRODUCT diff --git a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp index 09ae8e0e80461fad65ee6af49a2dddea66e251ff..d9bb8ab9cc417516054cd86b64d79dd5177f6744 100644 --- a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp +++ b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp @@ -750,8 +750,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, int compile_id, BasicType* in_sig_bt, VMRegPair* in_regs, - BasicType ret_type, - address critical_entry) { + BasicType ret_type) { if (method->is_method_handle_intrinsic()) { vmIntrinsics::ID iid = method->intrinsic_id(); intptr_t start = (intptr_t)__ pc(); @@ -777,20 +776,17 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Usage of Rtemp should be OK since scratched by native call - bool is_static = method->is_static(); + bool method_is_static = method->is_static(); const int total_in_args = method->size_of_parameters(); - int total_c_args = total_in_args + 1; - if (is_static) { - total_c_args++; - } + int total_c_args = total_in_args + (method_is_static ? 2 : 1); BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); int argc = 0; out_sig_bt[argc++] = T_ADDRESS; - if (is_static) { + if (method_is_static) { out_sig_bt[argc++] = T_OBJECT; } @@ -881,7 +877,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, OopMapSet* oop_maps = new OopMapSet(); OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); - const int extra_args = is_static ? 2 : 1; + const int extra_args = method_is_static ? 2 : 1; int receiver_offset = -1; int fp_regs_in_arguments = 0; @@ -904,7 +900,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, int offset = oop_handle_offset * VMRegImpl::stack_slot_size; __ str(src->as_Register(), Address(SP, offset)); map->set_oop(VMRegImpl::stack2reg(oop_handle_offset)); - if ((i == 0) && (!is_static)) { + if ((i == 0) && (!method_is_static)) { receiver_offset = offset; } oop_handle_offset += VMRegImpl::slots_per_word; @@ -1116,7 +1112,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Get Klass mirror int klass_offset = -1; - if (is_static) { + if (method_is_static) { klass_offset = oop_handle_offset * VMRegImpl::stack_slot_size; __ mov_oop(Rtemp, JNIHandles::make_local(method->method_holder()->java_mirror())); __ add(c_rarg1, SP, klass_offset); @@ -1332,7 +1328,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, vep_offset, frame_complete, stack_slots / VMRegImpl::slots_per_word, - in_ByteSize(is_static ? klass_offset : receiver_offset), + in_ByteSize(method_is_static ? klass_offset : receiver_offset), in_ByteSize(lock_slot_offset * VMRegImpl::stack_slot_size), oop_maps); } diff --git a/src/hotspot/cpu/arm/vm_version_arm_32.cpp b/src/hotspot/cpu/arm/vm_version_arm_32.cpp index 73d64d02fe790027a3b3db27fcf488258b1c9b7b..a31857d9a47227a228f9bd65e3e0e37fdee59d8a 100644 --- a/src/hotspot/cpu/arm/vm_version_arm_32.cpp +++ b/src/hotspot/cpu/arm/vm_version_arm_32.cpp @@ -335,7 +335,6 @@ void VM_Version::initialize() { } UNSUPPORTED_OPTION(TypeProfileLevel); - UNSUPPORTED_OPTION(CriticalJNINatives); FLAG_SET_DEFAULT(TypeProfileLevel, 0); // unsupported diff --git a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp index 65df14c2c8a57f4373cab0038468dfbf5e37a167..4f329d2e63333accbc0d53651f2bc3367e0f7310 100644 --- a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp @@ -71,7 +71,7 @@ RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) } RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) - : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) { + : _index(index), _array(), _throw_index_out_of_bounds_exception(true) { assert(info != NULL, "must have info"); _info = new CodeEmitInfo(info); } diff --git a/src/hotspot/cpu/ppc/c1_FrameMap_ppc.cpp b/src/hotspot/cpu/ppc/c1_FrameMap_ppc.cpp index 3c93cdde9177cbd03c221c1228d4916a912bdb1a..a776dbcc4ef58fca25f1cf7124a7465f05d8dec6 100644 --- a/src/hotspot/cpu/ppc/c1_FrameMap_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_FrameMap_ppc.cpp @@ -185,8 +185,8 @@ LIR_Opr FrameMap::R3_long_opr; LIR_Opr FrameMap::F1_opr; LIR_Opr FrameMap::F1_double_opr; -LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, }; -LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, }; +LIR_Opr FrameMap::_caller_save_cpu_regs[] = {}; +LIR_Opr FrameMap::_caller_save_fpu_regs[] = {}; FloatRegister FrameMap::nr2floatreg (int rnr) { assert(_init_done, "tables not initialized"); diff --git a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp index b4cd469406829d90e23c46ee956a2a4fb170bf5c..4aa35cf23fd099f6560b74773f485a020b620569 100644 --- a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp @@ -243,7 +243,7 @@ LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_o LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { - LIR_Opr r = NULL; + LIR_Opr r; if (type == T_LONG) { r = LIR_OprFact::longConst(x); } else if (type == T_INT) { diff --git a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp index fb234e8298502c3a60998f8051439077ac4fee9a..d031aaa1e40614f03f3d68d898c041b5e5a64684 100644 --- a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp @@ -27,22 +27,22 @@ #include "asm/register.hpp" #include "c1/c1_LIR.hpp" -FloatRegister LIR_OprDesc::as_float_reg() const { +FloatRegister LIR_Opr::as_float_reg() const { return as_FloatRegister(fpu_regnr()); } -FloatRegister LIR_OprDesc::as_double_reg() const { +FloatRegister LIR_Opr::as_double_reg() const { return as_FloatRegister(fpu_regnrLo()); } // Reg2 unused. LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { assert(!as_FloatRegister(reg2)->is_valid(), "Not used on this platform"); - return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | - (reg1 << LIR_OprDesc::reg2_shift) | - LIR_OprDesc::double_type | - LIR_OprDesc::fpu_register | - LIR_OprDesc::double_size); + return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) | + (reg1 << LIR_Opr::reg2_shift) | + LIR_Opr::double_type | + LIR_Opr::fpu_register | + LIR_Opr::double_size); } #ifndef PRODUCT diff --git a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp index 3758cc2fcf7627a618571efeb88589494092d0e5..c4b152a6db390401b0277eee08da2a14f1314f3d 100644 --- a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp @@ -151,6 +151,8 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Register t assert_different_registers(tmp, R0); + __ block_comment("nmethod_entry_barrier (nmethod_entry_barrier) {"); + // Load stub address using toc (fixed instruction size, unlike load_const_optimized) __ calculate_address_from_global_toc(tmp, StubRoutines::ppc::nmethod_entry_barrier(), true, true, false); // 2 instructions @@ -167,6 +169,8 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Register t // Oops may have been changed; exploiting isync semantics (used as acquire) to make those updates observable. __ isync(); + + __ block_comment("} nmethod_entry_barrier (nmethod_entry_barrier)"); } void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler *masm, Register tmp1, Register tmp2, Register tmp3) { @@ -177,6 +181,8 @@ void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler *masm, Register tmp1, assert_different_registers(tmp1, tmp2, tmp3); + __ block_comment("c2i_entry_barrier (c2i_entry_barrier) {"); + Register tmp1_class_loader_data = tmp1; Label bad_call, skip_barrier; @@ -207,4 +213,6 @@ void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler *masm, Register tmp1, __ bctr(); __ bind(skip_barrier); + + __ block_comment("} c2i_entry_barrier (c2i_entry_barrier)"); } diff --git a/src/hotspot/cpu/ppc/gc/shenandoah/c1/shenandoahBarrierSetC1_ppc.cpp b/src/hotspot/cpu/ppc/gc/shenandoah/c1/shenandoahBarrierSetC1_ppc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fc06e1b71e0b8fac8c899ec2a09594fe9d07aa18 --- /dev/null +++ b/src/hotspot/cpu/ppc/gc/shenandoah/c1/shenandoahBarrierSetC1_ppc.cpp @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved. + * Copyright (c) 2012, 2021 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" +#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" + +#define __ masm->masm()-> + +void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler *masm) { + __ block_comment("LIR_OpShenandoahCompareAndSwap (shenandaohgc) {"); + + Register addr = _addr->as_register_lo(); + Register new_val = _new_value->as_register(); + Register cmp_val = _cmp_value->as_register(); + Register tmp1 = _tmp1->as_register(); + Register tmp2 = _tmp2->as_register(); + Register result = result_opr()->as_register(); + + if (ShenandoahIUBarrier) { + ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), new_val, tmp1, tmp2, + MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS); + } + + if (UseCompressedOops) { + __ encode_heap_oop(cmp_val, cmp_val); + __ encode_heap_oop(new_val, new_val); + } + + // Due to the memory barriers emitted in ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved, + // there is no need to specify stronger memory semantics. + ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmp_val, new_val, tmp1, tmp2, + false, result); + + if (UseCompressedOops) { + __ decode_heap_oop(cmp_val); + __ decode_heap_oop(new_val); + } + + __ block_comment("} LIR_OpShenandoahCompareAndSwap (shenandaohgc)"); +} + +#undef __ + +#ifdef ASSERT +#define __ gen->lir(__FILE__, __LINE__)-> +#else +#define __ gen->lir()-> +#endif + +LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess &access, LIRItem &cmp_value, LIRItem &new_value) { + BasicType bt = access.type(); + + if (access.is_oop()) { + LIRGenerator* gen = access.gen(); + + if (ShenandoahCASBarrier) { + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ membar(); + } else { + __ membar_release(); + } + } + + if (ShenandoahSATBBarrier) { + pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(), + LIR_OprFact::illegalOpr); + } + + if (ShenandoahCASBarrier) { + cmp_value.load_item(); + new_value.load_item(); + + LIR_Opr t1 = gen->new_register(T_OBJECT); + LIR_Opr t2 = gen->new_register(T_OBJECT); + LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base(); + LIR_Opr result = gen->new_register(T_INT); + + __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), t1, t2, result)); + + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ membar_acquire(); + } else { + __ membar(); + } + + return result; + } + } + + return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value); +} + +LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess &access, LIRItem &value) { + LIRGenerator* gen = access.gen(); + BasicType type = access.type(); + + LIR_Opr result = gen->new_register(type); + value.load_item(); + LIR_Opr value_opr = value.result(); + + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ membar(); + } else { + __ membar_release(); + } + + if (access.is_oop()) { + value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators()); + } + + assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type"); + LIR_Opr tmp_xchg = gen->new_register(T_INT); + __ xchg(access.resolved_addr(), value_opr, result, tmp_xchg); + + if (access.is_oop()) { + result = load_reference_barrier_impl(access.gen(), result, LIR_OprFact::addressConst(0), + access.decorators()); + + LIR_Opr tmp_barrier = gen->new_register(type); + __ move(result, tmp_barrier); + result = tmp_barrier; + + if (ShenandoahSATBBarrier) { + pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr, result); + } + } + + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ membar_acquire(); + } else { + __ membar(); + } + + return result; +} diff --git a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8c9955078ffad25eee3cd1110db324355e00d020 --- /dev/null +++ b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp @@ -0,0 +1,1012 @@ +/* + * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved. + * Copyright (c) 2012, 2021 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "gc/shared/gcArguments.hpp" +#include "gc/shared/gc_globals.hpp" +#include "macroAssembler_ppc.hpp" +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" +#include "gc/shenandoah/shenandoahForwarding.hpp" +#include "gc/shenandoah/shenandoahHeap.hpp" +#include "gc/shenandoah/shenandoahHeap.inline.hpp" +#include "gc/shenandoah/shenandoahHeapRegion.hpp" +#include "gc/shenandoah/shenandoahRuntime.hpp" +#include "gc/shenandoah/shenandoahThreadLocalData.hpp" +#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" +#include "interpreter/interpreter.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.hpp" +#include "utilities/globalDefinitions.hpp" +#include "vm_version_ppc.hpp" + +#ifdef COMPILER1 + +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" + +#endif + +#define __ masm-> + +void ShenandoahBarrierSetAssembler::satb_write_barrier(MacroAssembler *masm, + Register base, RegisterOrConstant ind_or_offs, + Register tmp1, Register tmp2, Register tmp3, + MacroAssembler::PreservationLevel preservation_level) { + if (ShenandoahSATBBarrier) { + __ block_comment("satb_write_barrier (shenandoahgc) {"); + satb_write_barrier_impl(masm, 0, base, ind_or_offs, tmp1, tmp2, tmp3, preservation_level); + __ block_comment("} satb_write_barrier (shenandoahgc)"); + } +} + +void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler *masm, + Register val, + Register tmp1, Register tmp2, + MacroAssembler::PreservationLevel preservation_level, + DecoratorSet decorators) { + // IU barriers are also employed to avoid resurrection of weak references, + // even if Shenandoah does not operate in incremental update mode. + if (ShenandoahIUBarrier || ShenandoahSATBBarrier) { + __ block_comment("iu_barrier (shenandoahgc) {"); + satb_write_barrier_impl(masm, decorators, noreg, noreg, val, tmp1, tmp2, preservation_level); + __ block_comment("} iu_barrier (shenandoahgc)"); + } +} + +void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler *masm, DecoratorSet decorators, + Register base, RegisterOrConstant ind_or_offs, + Register dst, + Register tmp1, Register tmp2, + MacroAssembler::PreservationLevel preservation_level) { + if (ShenandoahLoadRefBarrier) { + __ block_comment("load_reference_barrier (shenandoahgc) {"); + load_reference_barrier_impl(masm, decorators, base, ind_or_offs, dst, tmp1, tmp2, preservation_level); + __ block_comment("} load_reference_barrier (shenandoahgc)"); + } +} + +void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler *masm, DecoratorSet decorators, BasicType type, + Register src, Register dst, Register count, + Register preserve1, Register preserve2) { + __ block_comment("arraycopy_prologue (shenandoahgc) {"); + + Register R11_tmp = R11_scratch1; + + assert_different_registers(src, dst, count, R11_tmp, noreg); + if (preserve1 != noreg) { + // Technically not required, but likely to indicate an error. + assert_different_registers(preserve1, preserve2); + } + + /* ==== Check whether barrier is required (optimizations) ==== */ + // Fast path: Component type of array is not a reference type. + if (!is_reference_type(type)) { + return; + } + + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; + + // Fast path: No barrier required if for every barrier type, it is either disabled or would not store + // any useful information. + if ((!ShenandoahSATBBarrier || dest_uninitialized) && !ShenandoahIUBarrier && !ShenandoahLoadRefBarrier) { + return; + } + + Label skip_prologue; + + // Fast path: Array is of length zero. + __ cmpdi(CCR0, count, 0); + __ beq(CCR0, skip_prologue); + + /* ==== Check whether barrier is required (gc state) ==== */ + __ lbz(R11_tmp, in_bytes(ShenandoahThreadLocalData::gc_state_offset()), + R16_thread); + + // The set of garbage collection states requiring barriers depends on the available barrier types and the + // type of the reference in question. + // For instance, satb barriers may be skipped if it is certain that the overridden values are not relevant + // for the garbage collector. + const int required_states = ShenandoahSATBBarrier && dest_uninitialized + ? ShenandoahHeap::HAS_FORWARDED + : ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING; + + __ andi_(R11_tmp, R11_tmp, required_states); + __ beq(CCR0, skip_prologue); + + /* ==== Invoke runtime ==== */ + // Save to-be-preserved registers. + int highest_preserve_register_index = 0; + { + if (preserve1 != noreg && preserve1->is_volatile()) { + __ std(preserve1, -BytesPerWord * ++highest_preserve_register_index, R1_SP); + } + if (preserve2 != noreg && preserve2 != preserve1 && preserve2->is_volatile()) { + __ std(preserve2, -BytesPerWord * ++highest_preserve_register_index, R1_SP); + } + + __ std(src, -BytesPerWord * ++highest_preserve_register_index, R1_SP); + __ std(dst, -BytesPerWord * ++highest_preserve_register_index, R1_SP); + __ std(count, -BytesPerWord * ++highest_preserve_register_index, R1_SP); + + __ save_LR_CR(R11_tmp); + __ push_frame_reg_args(-BytesPerWord * highest_preserve_register_index, + R11_tmp); + } + + // Invoke runtime. + address jrt_address = NULL; + if (UseCompressedOops) { + jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry); + } else { + jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry); + } + assert(jrt_address != nullptr, "jrt routine cannot be found"); + + __ call_VM_leaf(jrt_address, src, dst, count); + + // Restore to-be-preserved registers. + { + __ pop_frame(); + __ restore_LR_CR(R11_tmp); + + __ ld(count, -BytesPerWord * highest_preserve_register_index--, R1_SP); + __ ld(dst, -BytesPerWord * highest_preserve_register_index--, R1_SP); + __ ld(src, -BytesPerWord * highest_preserve_register_index--, R1_SP); + + if (preserve2 != noreg && preserve2 != preserve1 && preserve2->is_volatile()) { + __ ld(preserve2, -BytesPerWord * highest_preserve_register_index--, R1_SP); + } + if (preserve1 != noreg && preserve1->is_volatile()) { + __ ld(preserve1, -BytesPerWord * highest_preserve_register_index--, R1_SP); + } + } + + __ bind(skip_prologue); + __ block_comment("} arraycopy_prologue (shenandoahgc)"); +} + +// The to-be-enqueued value can either be determined +// - dynamically by passing the reference's address information (load mode) or +// - statically by passing a register the value is stored in (preloaded mode) +// - for performance optimizations in cases where the previous value is known (currently not implemented) and +// - for incremental-update barriers. +// +// decorators: The previous value's decorator set. +// In "load mode", the value must equal '0'. +// base: Base register of the reference's address (load mode). +// In "preloaded mode", the register must equal 'noreg'. +// ind_or_offs: Index or offset of the reference's address (load mode). +// If 'base' equals 'noreg' (preloaded mode), the passed value is ignored. +// pre_val: Register holding the to-be-stored value (preloaded mode). +// In "load mode", this register acts as a temporary register and must +// thus not be 'noreg'. In "preloaded mode", its content will be sustained. +// tmp1/tmp2: Temporary registers, one of which must be non-volatile in "preloaded mode". +void ShenandoahBarrierSetAssembler::satb_write_barrier_impl(MacroAssembler *masm, DecoratorSet decorators, + Register base, RegisterOrConstant ind_or_offs, + Register pre_val, + Register tmp1, Register tmp2, + MacroAssembler::PreservationLevel preservation_level) { + assert_different_registers(tmp1, tmp2, pre_val, noreg); + + Label skip_barrier; + + /* ==== Determine necessary runtime invocation preservation measures ==== */ + const bool needs_frame = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR; + const bool preserve_gp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_REGS; + const bool preserve_fp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS; + + // Check whether marking is active. + __ lbz(tmp1, in_bytes(ShenandoahThreadLocalData::gc_state_offset()), R16_thread); + + __ andi_(tmp1, tmp1, ShenandoahHeap::MARKING); + __ beq(CCR0, skip_barrier); + + /* ==== Determine the reference's previous value ==== */ + bool preloaded_mode = base == noreg; + Register pre_val_save = noreg; + + if (preloaded_mode) { + // Previous value has been passed to the method, so it must not be determined manually. + // In case 'pre_val' is a volatile register, it must be saved across the C-call + // as callers may depend on its value. + // Unless the general purposes registers are saved anyway, one of the temporary registers + // (i.e., 'tmp1' and 'tmp2') is used to the preserve 'pre_val'. + if (!preserve_gp_registers && pre_val->is_volatile()) { + pre_val_save = !tmp1->is_volatile() ? tmp1 : tmp2; + assert(!pre_val_save->is_volatile(), "at least one of the temporary registers must be non-volatile"); + } + + if ((decorators & IS_NOT_NULL) != 0) { +#ifdef ASSERT + __ cmpdi(CCR0, pre_val, 0); + __ asm_assert_ne("null oop is not allowed"); +#endif // ASSERT + } else { + __ cmpdi(CCR0, pre_val, 0); + __ beq(CCR0, skip_barrier); + } + } else { + // Load from the reference address to determine the reference's current value (before the store is being performed). + // Contrary to the given value in "preloaded mode", it is not necessary to preserve it. + assert(decorators == 0, "decorator set must be empty"); + assert(base != noreg, "base must be a register"); + assert(!ind_or_offs.is_register() || ind_or_offs.as_register() != noreg, "ind_or_offs must be a register"); + if (UseCompressedOops) { + __ lwz(pre_val, ind_or_offs, base); + } else { + __ ld(pre_val, ind_or_offs, base); + } + + __ cmpdi(CCR0, pre_val, 0); + __ beq(CCR0, skip_barrier); + + if (UseCompressedOops) { + __ decode_heap_oop_not_null(pre_val); + } + } + + /* ==== Try to enqueue the to-be-stored value directly into thread's local SATB mark queue ==== */ + { + Label runtime; + Register Rbuffer = tmp1, Rindex = tmp2; + + // Check whether the queue has enough capacity to store another oop. + // If not, jump to the runtime to commit the buffer and to allocate a new one. + // (The buffer's index corresponds to the amount of remaining free space.) + __ ld(Rindex, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()), R16_thread); + __ cmpdi(CCR0, Rindex, 0); + __ beq(CCR0, runtime); // If index == 0 (buffer is full), goto runtime. + + // Capacity suffices. Decrement the queue's size by the size of one oop. + // (The buffer is filled contrary to the heap's growing direction, i.e., it is filled downwards.) + __ addi(Rindex, Rindex, -wordSize); + __ std(Rindex, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()), R16_thread); + + // Enqueue the previous value and skip the invocation of the runtime. + __ ld(Rbuffer, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()), R16_thread); + __ stdx(pre_val, Rbuffer, Rindex); + __ b(skip_barrier); + + __ bind(runtime); + } + + /* ==== Invoke runtime to commit SATB mark queue to gc and allocate a new buffer ==== */ + // Save to-be-preserved registers. + int nbytes_save = 0; + + if (needs_frame) { + if (preserve_gp_registers) { + nbytes_save = (preserve_fp_registers + ? MacroAssembler::num_volatile_gp_regs + MacroAssembler::num_volatile_fp_regs + : MacroAssembler::num_volatile_gp_regs) * BytesPerWord; + __ save_volatile_gprs(R1_SP, -nbytes_save, preserve_fp_registers); + } + + __ save_LR_CR(tmp1); + __ push_frame_reg_args(nbytes_save, tmp2); + } + + if (!preserve_gp_registers && preloaded_mode && pre_val->is_volatile()) { + assert(pre_val_save != noreg, "nv_save must not be noreg"); + + // 'pre_val' register must be saved manually unless general-purpose are preserved in general. + __ mr(pre_val_save, pre_val); + } + + // Invoke runtime. + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, R16_thread); + + // Restore to-be-preserved registers. + if (!preserve_gp_registers && preloaded_mode && pre_val->is_volatile()) { + __ mr(pre_val, pre_val_save); + } + + if (needs_frame) { + __ pop_frame(); + __ restore_LR_CR(tmp1); + + if (preserve_gp_registers) { + __ restore_volatile_gprs(R1_SP, -nbytes_save, preserve_fp_registers); + } + } + + __ bind(skip_barrier); +} + +void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler *masm, Register dst, Register tmp) { + __ block_comment("resolve_forward_pointer_not_null (shenandoahgc) {"); + + Register tmp1 = tmp, + R0_tmp2 = R0; + assert_different_registers(dst, tmp1, R0_tmp2, noreg); + + // If the object has been evacuated, the mark word layout is as follows: + // | forwarding pointer (62-bit) | '11' (2-bit) | + + // The invariant that stack/thread pointers have the lowest two bits cleared permits retrieving + // the forwarding pointer solely by inversing the lowest two bits. + // This invariant follows inevitably from hotspot's minimal alignment. + assert(markWord::marked_value <= (unsigned long) MinObjAlignmentInBytes, + "marked value must not be higher than hotspot's minimal alignment"); + + Label done; + + // Load the object's mark word. + __ ld(tmp1, oopDesc::mark_offset_in_bytes(), dst); + + // Load the bit mask for the lock bits. + __ li(R0_tmp2, markWord::lock_mask_in_place); + + // Check whether all bits matching the bit mask are set. + // If that is the case, the object has been evacuated and the most significant bits form the forward pointer. + __ andc_(R0_tmp2, R0_tmp2, tmp1); + + assert(markWord::lock_mask_in_place == markWord::marked_value, + "marked value must equal the value obtained when all lock bits are being set"); + if (VM_Version::has_isel()) { + __ xori(tmp1, tmp1, markWord::lock_mask_in_place); + __ isel(dst, CCR0, Assembler::equal, false, tmp1); + } else { + __ bne(CCR0, done); + __ xori(dst, tmp1, markWord::lock_mask_in_place); + } + + __ bind(done); + __ block_comment("} resolve_forward_pointer_not_null (shenandoahgc)"); +} + +// base: Base register of the reference's address. +// ind_or_offs: Index or offset of the reference's address (load mode). +// dst: Reference's address. In case the object has been evacuated, this is the to-space version +// of that object. +void ShenandoahBarrierSetAssembler::load_reference_barrier_impl( + MacroAssembler *masm, DecoratorSet decorators, + Register base, RegisterOrConstant ind_or_offs, + Register dst, + Register tmp1, Register tmp2, + MacroAssembler::PreservationLevel preservation_level) { + if (ind_or_offs.is_register()) { + assert_different_registers(tmp1, tmp2, base, ind_or_offs.as_register(), dst, noreg); + } else { + assert_different_registers(tmp1, tmp2, base, dst, noreg); + } + + Label skip_barrier; + + bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); + bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); + bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); + bool is_native = ShenandoahBarrierSet::is_native_access(decorators); + bool is_narrow = UseCompressedOops && !is_native; + + /* ==== Check whether heap is stable ==== */ + __ lbz(tmp2, in_bytes(ShenandoahThreadLocalData::gc_state_offset()), R16_thread); + + if (is_strong) { + // For strong references, the heap is considered stable if "has forwarded" is not active. + __ andi_(tmp1, tmp2, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::EVACUATION); + __ beq(CCR0, skip_barrier); +#ifdef ASSERT + // "evacuation" -> (implies) "has forwarded". If we reach this code, "has forwarded" must thus be set. + __ andi_(tmp1, tmp1, ShenandoahHeap::HAS_FORWARDED); + __ asm_assert_ne("'has forwarded' is missing"); +#endif // ASSERT + } else { + // For all non-strong references, the heap is considered stable if not any of "has forwarded", + // "root set processing", and "weak reference processing" is active. + // The additional phase conditions are in place to avoid the resurrection of weak references (see JDK-8266440). + Label skip_fastpath; + __ andi_(tmp1, tmp2, ShenandoahHeap::WEAK_ROOTS); + __ bne(CCR0, skip_fastpath); + + __ andi_(tmp1, tmp2, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::EVACUATION); + __ beq(CCR0, skip_barrier); +#ifdef ASSERT + // "evacuation" -> (implies) "has forwarded". If we reach this code, "has forwarded" must thus be set. + __ andi_(tmp1, tmp1, ShenandoahHeap::HAS_FORWARDED); + __ asm_assert_ne("'has forwarded' is missing"); +#endif // ASSERT + + __ bind(skip_fastpath); + } + + /* ==== Check whether region is in collection set ==== */ + if (is_strong) { + // Shenandoah stores metadata on regions in a continuous area of memory in which a single byte corresponds to + // an entire region of the shenandoah heap. At present, only the least significant bit is of significance + // and indicates whether the region is part of the collection set. + // + // All regions are of the same size and are always aligned by a power of two. + // Any address can thus be shifted by a fixed number of bits to retrieve the address prefix shared by + // all objects within that region (region identification bits). + // + // | unused bits | region identification bits | object identification bits | + // (Region size depends on a couple of criteria, such as page size, user-provided arguments and the max heap size. + // The number of object identification bits can thus not be determined at compile time.) + // + // ------------------------------------------------------- <--- cs (collection set) base address + // | lost space due to heap space base address -> 'ShenandoahHeap::in_cset_fast_test_addr()' + // | (region identification bits contain heap base offset) + // |------------------------------------------------------ <--- cs base address + (heap_base >> region size shift) + // | collection set in the proper -> shift: 'region_size_bytes_shift_jint()' + // | + // |------------------------------------------------------ <--- cs base address + (heap_base >> region size shift) + // + number of regions + __ load_const_optimized(tmp2, ShenandoahHeap::in_cset_fast_test_addr(), tmp1); + __ srdi(tmp1, dst, ShenandoahHeapRegion::region_size_bytes_shift_jint()); + __ lbzx(tmp2, tmp1, tmp2); + __ andi_(tmp2, tmp2, 1); + __ beq(CCR0, skip_barrier); + } + + /* ==== Invoke runtime ==== */ + // Save to-be-preserved registers. + int nbytes_save = 0; + + const bool needs_frame = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR; + const bool preserve_gp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_REGS; + const bool preserve_fp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS; + + if (needs_frame) { + if (preserve_gp_registers) { + nbytes_save = (preserve_fp_registers + ? MacroAssembler::num_volatile_gp_regs + MacroAssembler::num_volatile_fp_regs + : MacroAssembler::num_volatile_gp_regs) * BytesPerWord; + __ save_volatile_gprs(R1_SP, -nbytes_save, preserve_fp_registers); + } + + __ save_LR_CR(tmp1); + __ push_frame_reg_args(nbytes_save, tmp1); + } + + // Calculate the reference's absolute address. + __ add(R4_ARG2, ind_or_offs, base); + + // Invoke runtime. + address jrt_address = nullptr; + + if (is_strong) { + if (is_narrow) { + jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow); + } else { + jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong); + } + } else if (is_weak) { + if (is_narrow) { + jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow); + } else { + jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak); + } + } else { + assert(is_phantom, "only remaining strength"); + assert(!is_narrow, "phantom access cannot be narrow"); + jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom); + } + assert(jrt_address != nullptr, "jrt routine cannot be found"); + + __ call_VM_leaf(jrt_address, dst /* reference */, R4_ARG2 /* reference address */); + + // Restore to-be-preserved registers. + if (preserve_gp_registers) { + __ mr(R0, R3_RET); + } else { + __ mr_if_needed(dst, R3_RET); + } + + if (needs_frame) { + __ pop_frame(); + __ restore_LR_CR(tmp1); + + if (preserve_gp_registers) { + __ restore_volatile_gprs(R1_SP, -nbytes_save, preserve_fp_registers); + __ mr(dst, R0); + } + } + + __ bind(skip_barrier); +} + +// base: Base register of the reference's address. +// ind_or_offs: Index or offset of the reference's address. +// L_handle_null: An optional label that will be jumped to if the reference is null. +void ShenandoahBarrierSetAssembler::load_at( + MacroAssembler *masm, DecoratorSet decorators, BasicType type, + Register base, RegisterOrConstant ind_or_offs, Register dst, + Register tmp1, Register tmp2, + MacroAssembler::PreservationLevel preservation_level, Label *L_handle_null) { + // Register must not clash, except 'base' and 'dst'. + if (ind_or_offs.is_register()) { + if (base != noreg) { + assert_different_registers(tmp1, tmp2, base, ind_or_offs.register_or_noreg(), R0, noreg); + } + assert_different_registers(tmp1, tmp2, dst, ind_or_offs.register_or_noreg(), R0, noreg); + } else { + if (base == noreg) { + assert_different_registers(tmp1, tmp2, base, R0, noreg); + } + assert_different_registers(tmp1, tmp2, dst, R0, noreg); + } + + /* ==== Apply load barrier, if required ==== */ + if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { + assert(is_reference_type(type), "need_load_reference_barrier must check whether type is a reference type"); + + // If 'dst' clashes with either 'base' or 'ind_or_offs', use an intermediate result register + // to keep the values of those alive until the load reference barrier is applied. + Register intermediate_dst = (dst == base || (ind_or_offs.is_register() && dst == ind_or_offs.as_register())) + ? tmp2 + : dst; + + BarrierSetAssembler::load_at(masm, decorators, type, + base, ind_or_offs, + intermediate_dst, + tmp1, noreg, + preservation_level, L_handle_null); + + load_reference_barrier(masm, decorators, + base, ind_or_offs, + intermediate_dst, + tmp1, R0, + preservation_level); + + __ mr_if_needed(dst, intermediate_dst); + } else { + BarrierSetAssembler::load_at(masm, decorators, type, + base, ind_or_offs, + dst, + tmp1, tmp2, + preservation_level, L_handle_null); + } + + /* ==== Apply keep-alive barrier, if required (e.g., to inhibit weak reference resurrection) ==== */ + if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { + iu_barrier(masm, dst, tmp1, tmp2, preservation_level); + } +} + +// base: Base register of the reference's address. +// ind_or_offs: Index or offset of the reference's address. +// val: To-be-stored value/reference's new value. +void ShenandoahBarrierSetAssembler::store_at(MacroAssembler *masm, DecoratorSet decorators, BasicType type, + Register base, RegisterOrConstant ind_or_offs, Register val, + Register tmp1, Register tmp2, Register tmp3, + MacroAssembler::PreservationLevel preservation_level) { + if (is_reference_type(type)) { + if (ShenandoahSATBBarrier) { + satb_write_barrier(masm, base, ind_or_offs, tmp1, tmp2, tmp3, preservation_level); + } + + if (ShenandoahIUBarrier && val != noreg) { + iu_barrier(masm, val, tmp1, tmp2, preservation_level, decorators); + } + } + + BarrierSetAssembler::store_at(masm, decorators, type, + base, ind_or_offs, + val, + tmp1, tmp2, tmp3, + preservation_level); +} + +void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler *masm, + Register dst, Register jni_env, Register obj, + Register tmp, Label &slowpath) { + __ block_comment("try_resolve_jobject_in_native (shenandoahgc) {"); + + assert_different_registers(jni_env, obj, tmp); + + Label done; + + // Fast path: Reference is null (JNI tags are zero for null pointers). + __ cmpdi(CCR0, obj, 0); + __ beq(CCR0, done); + + // Resolve jobject using standard implementation. + BarrierSetAssembler::try_resolve_jobject_in_native(masm, dst, jni_env, obj, tmp, slowpath); + + // Check whether heap is stable. + __ lbz(tmp, + in_bytes(ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()), + jni_env); + + __ andi_(tmp, tmp, ShenandoahHeap::EVACUATION | ShenandoahHeap::HAS_FORWARDED); + __ bne(CCR0, slowpath); + + __ bind(done); + __ block_comment("} try_resolve_jobject_in_native (shenandoahgc)"); +} + +// Special shenandoah CAS implementation that handles false negatives due +// to concurrent evacuation. That is, the CAS operation is intended to succeed in +// the following scenarios (success criteria): +// s1) The reference pointer ('base_addr') equals the expected ('expected') pointer. +// s2) The reference pointer refers to the from-space version of an already-evacuated +// object, whereas the expected pointer refers to the to-space version of the same object. +// Situations in which the reference pointer refers to the to-space version of an object +// and the expected pointer refers to the from-space version of the same object can not occur due to +// shenandoah's strong to-space invariant. This also implies that the reference stored in 'new_val' +// can not refer to the from-space version of an already-evacuated object. +// +// To guarantee correct behavior in concurrent environments, two races must be addressed: +// r1) A concurrent thread may heal the reference pointer (i.e., it is no longer referring to the +// from-space version but to the to-space version of the object in question). +// In this case, the CAS operation should succeed. +// r2) A concurrent thread may mutate the reference (i.e., the reference pointer refers to an entirely different object). +// In this case, the CAS operation should fail. +// +// By default, the value held in the 'result' register is zero to indicate failure of CAS, +// non-zero to indicate success. If 'is_cae' is set, the result is the most recently fetched +// value from 'base_addr' rather than a boolean success indicator. +void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler *masm, Register base_addr, + Register expected, Register new_val, Register tmp1, Register tmp2, + bool is_cae, Register result) { + __ block_comment("cmpxchg_oop (shenandoahgc) {"); + + assert_different_registers(base_addr, new_val, tmp1, tmp2, result, R0); + assert_different_registers(base_addr, expected, tmp1, tmp2, result, R0); + + // Potential clash of 'success_flag' and 'tmp' is being accounted for. + Register success_flag = is_cae ? noreg : result, + current_value = is_cae ? result : tmp1, + tmp = is_cae ? tmp1 : result, + initial_value = tmp2; + + Label done, step_four; + + __ bind(step_four); + + /* ==== Step 1 ("Standard" CAS) ==== */ + // Fast path: The values stored in 'expected' and 'base_addr' are equal. + // Given that 'expected' must refer to the to-space object of an evacuated object (strong to-space invariant), + // no special processing is required. + if (UseCompressedOops) { + __ cmpxchgw(CCR0, current_value, expected, new_val, base_addr, MacroAssembler::MemBarNone, + false, success_flag, true); + } else { + __ cmpxchgd(CCR0, current_value, expected, new_val, base_addr, MacroAssembler::MemBarNone, + false, success_flag, NULL, true); + } + + // Skip the rest of the barrier if the CAS operation succeeds immediately. + // If it does not, the value stored at the address is either the from-space pointer of the + // referenced object (success criteria s2)) or simply another object. + __ beq(CCR0, done); + + /* ==== Step 2 (Null check) ==== */ + // The success criteria s2) cannot be matched with a null pointer + // (null pointers cannot be subject to concurrent evacuation). The failure of the CAS operation is thus legitimate. + __ cmpdi(CCR0, current_value, 0); + __ beq(CCR0, done); + + /* ==== Step 3 (reference pointer refers to from-space version; success criteria s2)) ==== */ + // To check whether the reference pointer refers to the from-space version, the forward + // pointer of the object referred to by the reference is resolved and compared against the expected pointer. + // If this check succeed, another CAS operation is issued with the from-space pointer being the expected pointer. + // + // Save the potential from-space pointer. + __ mr(initial_value, current_value); + + // Resolve forward pointer. + if (UseCompressedOops) { __ decode_heap_oop_not_null(current_value); } + resolve_forward_pointer_not_null(masm, current_value, tmp); + if (UseCompressedOops) { __ encode_heap_oop_not_null(current_value); } + + if (!is_cae) { + // 'success_flag' was overwritten by call to 'resovle_forward_pointer_not_null'. + // Load zero into register for the potential failure case. + __ li(success_flag, 0); + } + __ cmpd(CCR0, current_value, expected); + __ bne(CCR0, done); + + // Discard fetched value as it might be a reference to the from-space version of an object. + if (UseCompressedOops) { + __ cmpxchgw(CCR0, R0, initial_value, new_val, base_addr, MacroAssembler::MemBarNone, + false, success_flag); + } else { + __ cmpxchgd(CCR0, R0, initial_value, new_val, base_addr, MacroAssembler::MemBarNone, + false, success_flag); + } + + /* ==== Step 4 (Retry CAS with to-space pointer (success criteria s2) under race r1)) ==== */ + // The reference pointer could have been healed whilst the previous CAS operation was being performed. + // Another CAS operation must thus be issued with the to-space pointer being the expected pointer. + // If that CAS operation fails as well, race r2) must have occurred, indicating that + // the operation failure is legitimate. + // + // To keep the code's size small and thus improving cache (icache) performance, this highly + // unlikely case should be handled by the smallest possible code. Instead of emitting a third, + // explicit CAS operation, the code jumps back and reuses the first CAS operation (step 1) + // (passed arguments are identical). + // + // A failure of the CAS operation in step 1 would imply that the overall CAS operation is supposed + // to fail. Jumping back to step 1 requires, however, that step 2 and step 3 are re-executed as well. + // It is thus important to ensure that a re-execution of those steps does not put program correctness + // at risk: + // - Step 2: Either terminates in failure (desired result) or falls through to step 3. + // - Step 3: Terminates if the comparison between the forwarded, fetched pointer and the expected value + // fails. Unless the reference has been updated in the meanwhile once again, this is + // guaranteed to be the case. + // In case of a concurrent update, the CAS would be retried again. This is legitimate + // in terms of program correctness (even though it is not desired). + __ bne(CCR0, step_four); + + __ bind(done); + __ block_comment("} cmpxchg_oop (shenandoahgc)"); +} + +#undef __ + +#ifdef COMPILER1 + +#define __ ce->masm()-> + +void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler *ce, ShenandoahPreBarrierStub *stub) { + __ block_comment("gen_pre_barrier_stub (shenandoahgc) {"); + + ShenandoahBarrierSetC1 *bs = (ShenandoahBarrierSetC1*) BarrierSet::barrier_set()->barrier_set_c1(); + __ bind(*stub->entry()); + + // GC status has already been verified by 'ShenandoahBarrierSetC1::pre_barrier'. + // This stub is the slowpath of that function. + + assert(stub->pre_val()->is_register(), "pre_val must be a register"); + Register pre_val = stub->pre_val()->as_register(); + + // If 'do_load()' returns false, the to-be-stored value is already available in 'stub->pre_val()' + // ("preloaded mode" of the store barrier). + if (stub->do_load()) { + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false); + } + + // Fast path: Reference is null. + __ cmpdi(CCR0, pre_val, 0); + __ bc_far_optimized(Assembler::bcondCRbiIs1_bhintNoHint, __ bi0(CCR0, Assembler::equal), *stub->continuation()); + + // Argument passing via the stack. + __ std(pre_val, -8, R1_SP); + + __ load_const_optimized(R0, bs->pre_barrier_c1_runtime_code_blob()->code_begin()); + __ call_stub(R0); + + __ b(*stub->continuation()); + __ block_comment("} gen_pre_barrier_stub (shenandoahgc)"); +} + +void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler *ce, + ShenandoahLoadReferenceBarrierStub *stub) { + __ block_comment("gen_load_reference_barrier_stub (shenandoahgc) {"); + + ShenandoahBarrierSetC1 *bs = (ShenandoahBarrierSetC1*) BarrierSet::barrier_set()->barrier_set_c1(); + __ bind(*stub->entry()); + + Register obj = stub->obj()->as_register(); + Register res = stub->result()->as_register(); + Register addr = stub->addr()->as_pointer_register(); + Register tmp1 = stub->tmp1()->as_register(); + Register tmp2 = stub->tmp2()->as_register(); + assert_different_registers(addr, res, tmp1, tmp2); + +#ifdef ASSERT + // Ensure that 'res' is 'R3_ARG1' and contains the same value as 'obj' to reduce the number of required + // copy instructions. + assert(R3_RET == res, "res must be r3"); + __ cmpd(CCR0, res, obj); + __ asm_assert_eq("result register must contain the reference stored in obj"); +#endif + + DecoratorSet decorators = stub->decorators(); + + /* ==== Check whether region is in collection set ==== */ + // GC status (unstable) has already been verified by 'ShenandoahBarrierSetC1::load_reference_barrier_impl'. + // This stub is the slowpath of that function. + + bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); + bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); + bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); + bool is_native = ShenandoahBarrierSet::is_native_access(decorators); + + if (is_strong) { + // Check whether object is in collection set. + __ load_const_optimized(tmp2, ShenandoahHeap::in_cset_fast_test_addr(), tmp1); + __ srdi(tmp1, obj, ShenandoahHeapRegion::region_size_bytes_shift_jint()); + __ lbzx(tmp2, tmp1, tmp2); + + __ andi_(tmp2, tmp2, 1); + __ bc_far_optimized(Assembler::bcondCRbiIs1_bhintNoHint, __ bi0(CCR0, Assembler::equal), *stub->continuation()); + } + + address blob_addr = nullptr; + + if (is_strong) { + if (is_native) { + blob_addr = bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin(); + } else { + blob_addr = bs->load_reference_barrier_strong_rt_code_blob()->code_begin(); + } + } else if (is_weak) { + blob_addr = bs->load_reference_barrier_weak_rt_code_blob()->code_begin(); + } else { + assert(is_phantom, "only remaining strength"); + blob_addr = bs->load_reference_barrier_phantom_rt_code_blob()->code_begin(); + } + + assert(blob_addr != nullptr, "code blob cannot be found"); + + // Argument passing via the stack. 'obj' is passed implicitly (as asserted above). + __ std(addr, -8, R1_SP); + + __ load_const_optimized(tmp1, blob_addr, tmp2); + __ call_stub(tmp1); + + // 'res' is 'R3_RET'. The result is thus already in the correct register. + + __ b(*stub->continuation()); + __ block_comment("} gen_load_reference_barrier_stub (shenandoahgc)"); +} + +#undef __ + +#define __ sasm-> + +void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler *sasm) { + __ block_comment("generate_c1_pre_barrier_runtime_stub (shenandoahgc) {"); + + Label runtime, skip_barrier; + BarrierSet *bs = BarrierSet::barrier_set(); + + // Argument passing via the stack. + const int caller_stack_slots = 3; + + Register R0_pre_val = R0; + __ ld(R0, -8, R1_SP); + Register R11_tmp1 = R11_scratch1; + __ std(R11_tmp1, -16, R1_SP); + Register R12_tmp2 = R12_scratch2; + __ std(R12_tmp2, -24, R1_SP); + + /* ==== Check whether marking is active ==== */ + // Even though gc status was checked in 'ShenandoahBarrierSetAssembler::gen_pre_barrier_stub', + // another check is required as a safepoint might have been reached in the meantime (JDK-8140588). + __ lbz(R12_tmp2, in_bytes(ShenandoahThreadLocalData::gc_state_offset()), R16_thread); + + __ andi_(R12_tmp2, R12_tmp2, ShenandoahHeap::MARKING); + __ beq(CCR0, skip_barrier); + + /* ==== Add previous value directly to thread-local SATB mark queue ==== */ + // Check queue's capacity. Jump to runtime if no free slot is available. + __ ld(R12_tmp2, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()), R16_thread); + __ cmpdi(CCR0, R12_tmp2, 0); + __ beq(CCR0, runtime); + + // Capacity suffices. Decrement the queue's size by one slot (size of one oop). + __ addi(R12_tmp2, R12_tmp2, -wordSize); + __ std(R12_tmp2, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()), R16_thread); + + // Enqueue the previous value and skip the runtime invocation. + __ ld(R11_tmp1, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()), R16_thread); + __ stdx(R0_pre_val, R11_tmp1, R12_tmp2); + __ b(skip_barrier); + + __ bind(runtime); + + /* ==== Invoke runtime to commit SATB mark queue to gc and allocate a new buffer ==== */ + // Save to-be-preserved registers. + const int nbytes_save = (MacroAssembler::num_volatile_regs + caller_stack_slots) * BytesPerWord; + __ save_volatile_gprs(R1_SP, -nbytes_save); + __ save_LR_CR(R11_tmp1); + __ push_frame_reg_args(nbytes_save, R11_tmp1); + + // Invoke runtime. + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), R0_pre_val, R16_thread); + + // Restore to-be-preserved registers. + __ pop_frame(); + __ restore_LR_CR(R11_tmp1); + __ restore_volatile_gprs(R1_SP, -nbytes_save); + + __ bind(skip_barrier); + + // Restore spilled registers. + __ ld(R11_tmp1, -16, R1_SP); + __ ld(R12_tmp2, -24, R1_SP); + + __ blr(); + __ block_comment("} generate_c1_pre_barrier_runtime_stub (shenandoahgc)"); +} + +void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler *sasm, + DecoratorSet decorators) { + __ block_comment("generate_c1_load_reference_barrier_runtime_stub (shenandoahgc) {"); + + // Argument passing via the stack. + const int caller_stack_slots = 1; + + // Save to-be-preserved registers. + const int nbytes_save = (MacroAssembler::num_volatile_regs - 1 // 'R3_ARG1' is skipped + + caller_stack_slots) * BytesPerWord; + __ save_volatile_gprs(R1_SP, -nbytes_save, true, false); + + // Load arguments from stack. + // No load required, as assured by assertions in 'ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub'. + Register R3_obj = R3_ARG1; + Register R4_load_addr = R4_ARG2; + __ ld(R4_load_addr, -8, R1_SP); + + Register R11_tmp = R11_scratch1; + + /* ==== Invoke runtime ==== */ + bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); + bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); + bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); + bool is_native = ShenandoahBarrierSet::is_native_access(decorators); + + address jrt_address = NULL; + + if (is_strong) { + if (is_native) { + jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong); + } else { + if (UseCompressedOops) { + jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow); + } else { + jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong); + } + } + } else if (is_weak) { + assert(!is_native, "weak load reference barrier must not be called off-heap"); + if (UseCompressedOops) { + jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow); + } else { + jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak); + } + } else { + assert(is_phantom, "reference type must be phantom"); + assert(is_native, "phantom load reference barrier must be called off-heap"); + jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom); + } + assert(jrt_address != NULL, "load reference barrier runtime routine cannot be found"); + + __ save_LR_CR(R11_tmp); + __ push_frame_reg_args(nbytes_save, R11_tmp); + + // Invoke runtime. Arguments are already stored in the corresponding registers. + __ call_VM_leaf(jrt_address, R3_obj, R4_load_addr); + + // Restore to-be-preserved registers. + __ pop_frame(); + __ restore_LR_CR(R11_tmp); + __ restore_volatile_gprs(R1_SP, -nbytes_save, true, false); // Skip 'R3_RET' register. + + __ blr(); + __ block_comment("} generate_c1_load_reference_barrier_runtime_stub (shenandoahgc)"); +} + +#undef __ + +#endif // COMPILER1 diff --git a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cf55f505b22070712b9c2b4f77bfc78000896d76 --- /dev/null +++ b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.hpp @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved. + * Copyright (c) 2012, 2021 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_PPC_HPP +#define CPU_PPC_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_PPC_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shenandoah/shenandoahBarrierSet.hpp" + +#ifdef COMPILER1 + +class LIR_Assembler; +class ShenandoahPreBarrierStub; +class ShenandoahLoadReferenceBarrierStub; +class StubAssembler; + +#endif + +class StubCodeGenerator; + +class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { +private: + + /* ==== Actual barrier implementations ==== */ + void satb_write_barrier_impl(MacroAssembler* masm, DecoratorSet decorators, + Register base, RegisterOrConstant ind_or_offs, + Register pre_val, + Register tmp1, Register tmp2, + MacroAssembler::PreservationLevel preservation_level); + + void load_reference_barrier_impl(MacroAssembler* masm, DecoratorSet decorators, + Register base, RegisterOrConstant ind_or_offs, + Register dst, + Register tmp1, Register tmp2, + MacroAssembler::PreservationLevel preservation_level); + + /* ==== Helper methods for barrier implementations ==== */ + void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp); + +public: + + /* ==== C1 stubs ==== */ +#ifdef COMPILER1 + + void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); + + void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); + + void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); + + void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators); + +#endif + + /* ==== Available barriers (facades of the actual implementations) ==== */ + void satb_write_barrier(MacroAssembler* masm, + Register base, RegisterOrConstant ind_or_offs, + Register tmp1, Register tmp2, Register tmp3, + MacroAssembler::PreservationLevel preservation_level); + + void iu_barrier(MacroAssembler* masm, + Register val, + Register tmp1, Register tmp2, + MacroAssembler::PreservationLevel preservation_level, DecoratorSet decorators = 0); + + void load_reference_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register base, RegisterOrConstant ind_or_offs, + Register dst, + Register tmp1, Register tmp2, + MacroAssembler::PreservationLevel preservation_level); + + /* ==== Helper methods used by C1 and C2 ==== */ + void cmpxchg_oop(MacroAssembler* masm, Register base_addr, Register expected, Register new_val, + Register tmp1, Register tmp2, + bool is_cae, Register result); + + /* ==== Access api ==== */ + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register src, Register dst, Register count, Register preserve1, Register preserve2); + + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register base, RegisterOrConstant ind_or_offs, Register val, + Register tmp1, Register tmp2, Register tmp3, + MacroAssembler::PreservationLevel preservation_level); + + virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register base, RegisterOrConstant ind_or_offs, Register dst, + Register tmp1, Register tmp2, + MacroAssembler::PreservationLevel preservation_level, Label* L_handle_null = NULL); + + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register dst, Register jni_env, + Register obj, Register tmp, Label& slowpath); +}; + +#endif // CPU_PPC_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_PPC_HPP diff --git a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoah_ppc.ad b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoah_ppc.ad new file mode 100644 index 0000000000000000000000000000000000000000..4825ca9cf81cdf4bfb1aaf4ffbe6ab87ddcc2d9b --- /dev/null +++ b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoah_ppc.ad @@ -0,0 +1,217 @@ +// +// Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved. +// Copyright (c) 2012, 2021 SAP SE. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +source_hpp %{ +#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" +%} + +// Weak compareAndSwap operations are treated as strong compareAndSwap operations. +// This is motivated by the retry logic of ShenandoahBarrierSetAssembler::cmpxchg_oop which is hard to realise +// using weak CAS operations. + +instruct compareAndSwapP_shenandoah(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, + iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr) %{ + match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); + match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr); + + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire + && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + + format %{ "CMPXCHG $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + ShenandoahBarrierSet::assembler()->cmpxchg_oop( + &_masm, + $mem$$Register, $oldval$$Register, $newval$$Register, + $tmp1$$Register, $tmp2$$Register, + false, $res$$Register + ); + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndSwapN_shenandoah(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, + iRegNdst tmp1, iRegNdst tmp2, flagsRegCR0 cr) %{ + match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); + match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr); + + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire + && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + + format %{ "CMPXCHG $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + ShenandoahBarrierSet::assembler()->cmpxchg_oop( + &_masm, + $mem$$Register, $oldval$$Register, $newval$$Register, + $tmp1$$Register, $tmp2$$Register, + false, $res$$Register + ); + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndSwapP_acq_shenandoah(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, + iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr) %{ + match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); + match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr); + + predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire + || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst); + + format %{ "CMPXCHGD acq $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + ShenandoahBarrierSet::assembler()->cmpxchg_oop( + &_masm, + $mem$$Register, $oldval$$Register, $newval$$Register, + $tmp1$$Register, $tmp2$$Register, + false, $res$$Register + ); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndSwapN_acq_shenandoah(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, + iRegNdst tmp1, iRegNdst tmp2, flagsRegCR0 cr) %{ + match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); + match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr); + + predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire + || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst); + + format %{ "CMPXCHGD acq $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + ShenandoahBarrierSet::assembler()->cmpxchg_oop( + &_masm, + $mem$$Register, $oldval$$Register, $newval$$Register, + $tmp1$$Register, $tmp2$$Register, + false, $res$$Register + ); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndExchangeP_shenandoah(iRegPdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, + iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr) %{ + match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr); + + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire + && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + + format %{ "CMPXCHGD $res, $mem, $oldval, $newval; as ptr; ptr" %} + ins_encode %{ + ShenandoahBarrierSet::assembler()->cmpxchg_oop( + &_masm, + $mem$$Register, $oldval$$Register, $newval$$Register, + $tmp1$$Register, $tmp2$$Register, + true, $res$$Register + ); + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndExchangeN_shenandoah(iRegNdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, + iRegNdst tmp1, iRegNdst tmp2, flagsRegCR0 cr) %{ + match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr); + + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire + && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + + format %{ "CMPXCHGD $res, $mem, $oldval, $newval; as ptr; ptr" %} + ins_encode %{ + ShenandoahBarrierSet::assembler()->cmpxchg_oop( + &_masm, + $mem$$Register, $oldval$$Register, $newval$$Register, + $tmp1$$Register, $tmp2$$Register, + true, $res$$Register + ); + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndExchangePAcq_shenandoah(iRegPdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, + iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr) %{ + match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr); + + predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire + || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst); + + format %{ "CMPXCHGD acq $res, $mem, $oldval, $newval; as ptr; ptr" %} + ins_encode %{ + ShenandoahBarrierSet::assembler()->cmpxchg_oop( + &_masm, + $mem$$Register, $oldval$$Register, $newval$$Register, + $tmp1$$Register, $tmp2$$Register, + true, $res$$Register + ); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndExchangeNAcq_shenandoah(iRegNdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, + iRegNdst tmp1, iRegNdst tmp2, flagsRegCR0 cr) %{ + match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr); + + predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire + || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst); + + format %{ "CMPXCHGD acq $res, $mem, $oldval, $newval; as ptr; ptr" %} + ins_encode %{ + ShenandoahBarrierSet::assembler()->cmpxchg_oop( + &_masm, + $mem$$Register, $oldval$$Register, $newval$$Register, + $tmp1$$Register, $tmp2$$Register, + true, $res$$Register + ); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} diff --git a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp index e2ff1bf53ae8052310663a8b18b44c7313f161e5..bd23a62083cdff26fb56e83f2f29433202366d2a 100644 --- a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp @@ -33,8 +33,7 @@ #ifdef COMPILER1 class LIR_Assembler; -class LIR_OprDesc; -typedef LIR_OprDesc* LIR_Opr; +class LIR_Opr; class StubAssembler; class ZLoadBarrierStubC1; #endif // COMPILER1 diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index f9aa60ca15a9bc404ac6f095e89a691fdf773e7d..958059e1ca254ad9218d7134b81eb6e517cf7c77 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -2177,6 +2177,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType return true; // Per default match rules are supported. } +const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { + return false; +} + const RegMask* Matcher::predicate_reg_mask(void) { return NULL; } diff --git a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp index 377df777511573212b9ea114e848d3bb8fef7942..a834fa1af36f2fc617c29b1fafc939ea1295bc85 100644 --- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp +++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp @@ -1533,57 +1533,6 @@ void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_ty } } -static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst, Register r_caller_sp, Register r_temp) { - if (src.first()->is_stack()) { - if (dst.first()->is_stack()) { - // stack to stack - __ ld(r_temp, reg2offset(src.first()), r_caller_sp); - __ std(r_temp, reg2offset(dst.first()), R1_SP); - } else { - // stack to reg - __ ld(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp); - } - } else if (dst.first()->is_stack()) { - // reg to stack - __ std(src.first()->as_Register(), reg2offset(dst.first()), R1_SP); - } else { - if (dst.first() != src.first()) { - __ mr(dst.first()->as_Register(), src.first()->as_Register()); - } - } -} - -// Unpack an array argument into a pointer to the body and the length -// if the array is non-null, otherwise pass 0 for both. -static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, - VMRegPair body_arg, VMRegPair length_arg, Register r_caller_sp, - Register tmp_reg, Register tmp2_reg) { - assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg, - "possible collision"); - assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg, - "possible collision"); - - // Pass the length, ptr pair. - Label set_out_args; - VMRegPair tmp, tmp2; - tmp.set_ptr(tmp_reg->as_VMReg()); - tmp2.set_ptr(tmp2_reg->as_VMReg()); - if (reg.first()->is_stack()) { - // Load the arg up from the stack. - move_ptr(masm, reg, tmp, r_caller_sp, /*unused*/ R0); - reg = tmp; - } - __ li(tmp2_reg, 0); // Pass zeros if Array=null. - if (tmp_reg != reg.first()->as_Register()) __ li(tmp_reg, 0); - __ cmpdi(CCR0, reg.first()->as_Register(), 0); - __ beq(CCR0, set_out_args); - __ lwa(tmp2_reg, arrayOopDesc::length_offset_in_bytes(), reg.first()->as_Register()); - __ addi(tmp_reg, reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type)); - __ bind(set_out_args); - move_ptr(masm, tmp, body_arg, r_caller_sp, /*unused*/ R0); - move_ptr(masm, tmp2, length_arg, r_caller_sp, /*unused*/ R0); // Same as move32_64 on PPC64. -} - static void verify_oop_args(MacroAssembler* masm, const methodHandle& method, const BasicType* sig_bt, @@ -1685,8 +1634,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, int compile_id, BasicType *in_sig_bt, VMRegPair *in_regs, - BasicType ret_type, - address critical_entry) { + BasicType ret_type) { if (method->is_method_handle_intrinsic()) { vmIntrinsics::ID iid = method->intrinsic_id(); intptr_t start = (intptr_t)__ pc(); @@ -1709,12 +1657,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, (OopMapSet*)NULL); } - bool is_critical_native = true; - address native_func = critical_entry; - if (native_func == NULL) { - native_func = method->native_function(); - is_critical_native = false; - } + address native_func = method->native_function(); assert(native_func != NULL, "must have function"); // First, create signature for outgoing C call @@ -1733,19 +1676,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // some floating-point arguments must be passed in registers _and_ // in stack locations. bool method_is_static = method->is_static(); - int total_c_args = total_in_args; - - if (!is_critical_native) { - int n_hidden_args = method_is_static ? 2 : 1; - total_c_args += n_hidden_args; - } else { - // No JNIEnv*, no this*, but unpacked arrays (base+length). - for (int i = 0; i < total_in_args; i++) { - if (in_sig_bt[i] == T_ARRAY) { - total_c_args++; - } - } - } + int total_c_args = total_in_args + (method_is_static ? 2 : 1); BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); VMRegPair *out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); @@ -1759,44 +1690,13 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // hidden arguments). int argc = 0; - if (!is_critical_native) { - out_sig_bt[argc++] = T_ADDRESS; - if (method->is_static()) { - out_sig_bt[argc++] = T_OBJECT; - } - - for (int i = 0; i < total_in_args ; i++ ) { - out_sig_bt[argc++] = in_sig_bt[i]; - } - } else { - in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); - SignatureStream ss(method->signature()); - int o = 0; - for (int i = 0; i < total_in_args ; i++, o++) { - if (in_sig_bt[i] == T_ARRAY) { - // Arrays are passed as int, elem* pair - ss.skip_array_prefix(1); // skip one '[' - assert(ss.is_primitive(), "primitive type expected"); - in_elem_bt[o] = ss.type(); - } else { - in_elem_bt[o] = T_VOID; - } - if (in_sig_bt[i] != T_VOID) { - assert(in_sig_bt[i] == ss.type() || - in_sig_bt[i] == T_ARRAY, "must match"); - ss.next(); - } - } + out_sig_bt[argc++] = T_ADDRESS; + if (method->is_static()) { + out_sig_bt[argc++] = T_OBJECT; + } - for (int i = 0; i < total_in_args ; i++ ) { - if (in_sig_bt[i] == T_ARRAY) { - // Arrays are passed as int, elem* pair. - out_sig_bt[argc++] = T_INT; - out_sig_bt[argc++] = T_ADDRESS; - } else { - out_sig_bt[argc++] = in_sig_bt[i]; - } - } + for (int i = 0; i < total_in_args ; i++ ) { + out_sig_bt[argc++] = in_sig_bt[i]; } @@ -1823,7 +1723,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // // NW [ABI_REG_ARGS] <-- 1) R1_SP // [outgoing arguments] <-- 2) R1_SP + out_arg_slot_offset - // [oopHandle area] <-- 3) R1_SP + oop_handle_offset (save area for critical natives) + // [oopHandle area] <-- 3) R1_SP + oop_handle_offset // klass <-- 4) R1_SP + klass_offset // lock <-- 5) R1_SP + lock_offset // [workspace] <-- 6) R1_SP + workspace_offset @@ -1838,42 +1738,13 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // Now the space for the inbound oop handle area. int total_save_slots = num_java_iarg_registers * VMRegImpl::slots_per_word; - if (is_critical_native) { - // Critical natives may have to call out so they need a save area - // for register arguments. - int double_slots = 0; - int single_slots = 0; - for (int i = 0; i < total_in_args; i++) { - if (in_regs[i].first()->is_Register()) { - const Register reg = in_regs[i].first()->as_Register(); - switch (in_sig_bt[i]) { - case T_BOOLEAN: - case T_BYTE: - case T_SHORT: - case T_CHAR: - case T_INT: - // Fall through. - case T_ARRAY: - case T_LONG: double_slots++; break; - default: ShouldNotReachHere(); - } - } else if (in_regs[i].first()->is_FloatRegister()) { - switch (in_sig_bt[i]) { - case T_FLOAT: single_slots++; break; - case T_DOUBLE: double_slots++; break; - default: ShouldNotReachHere(); - } - } - } - total_save_slots = double_slots * 2 + align_up(single_slots, 2); // round to even - } int oop_handle_slot_offset = stack_slots; stack_slots += total_save_slots; // 3) int klass_slot_offset = 0; int klass_offset = -1; - if (method_is_static && !is_critical_native) { // 4) + if (method_is_static) { // 4) klass_slot_offset = stack_slots; klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; stack_slots += VMRegImpl::slots_per_word; @@ -1919,10 +1790,8 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, Register r_carg1_jnienv = noreg; Register r_carg2_classorobject = noreg; - if (!is_critical_native) { - r_carg1_jnienv = out_regs[0].first()->as_Register(); - r_carg2_classorobject = out_regs[1].first()->as_Register(); - } + r_carg1_jnienv = out_regs[0].first()->as_Register(); + r_carg2_classorobject = out_regs[1].first()->as_Register(); // Generate the Unverified Entry Point (UEP). @@ -2058,15 +1927,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, long_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1); break; case T_ARRAY: - if (is_critical_native) { - int body_arg = out; - out -= 1; // Point to length arg. - unpack_array_argument(masm, in_regs[in], in_elem_bt[in], out_regs[body_arg], out_regs[out], - r_callers_sp, r_temp_1, r_temp_2); - break; - } case T_OBJECT: - assert(!is_critical_native, "no oop arguments"); object_move(masm, stack_slots, oop_map, oop_handle_slot_offset, ((in == 0) && (!method_is_static)), &receiver_offset, @@ -2098,7 +1959,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // Pre-load a static method's oop into ARG2. // Used both by locking code and the normal JNI call code. - if (method_is_static && !is_critical_native) { + if (method_is_static) { __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), r_carg2_classorobject); @@ -2109,9 +1970,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, } // Get JNIEnv* which is first argument to native. - if (!is_critical_native) { - __ addi(r_carg1_jnienv, R16_thread, in_bytes(JavaThread::jni_environment_offset())); - } + __ addi(r_carg1_jnienv, R16_thread, in_bytes(JavaThread::jni_environment_offset())); // NOTE: // @@ -2140,7 +1999,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // -------------------------------------------------------------------------- if (method->is_synchronized()) { - assert(!is_critical_native, "unhandled"); ConditionRegister r_flag = CCR1; Register r_oop = r_temp_4; const Register r_box = r_temp_5; @@ -2185,16 +2043,14 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // Use that pc we placed in r_return_pc a while back as the current frame anchor. __ set_last_Java_frame(R1_SP, r_return_pc); - if (!is_critical_native) { - // Publish thread state - // -------------------------------------------------------------------------- + // Publish thread state + // -------------------------------------------------------------------------- - // Transition from _thread_in_Java to _thread_in_native. - __ li(R0, _thread_in_native); - __ release(); - // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size"); - __ stw(R0, thread_(thread_state)); - } + // Transition from _thread_in_Java to _thread_in_native. + __ li(R0, _thread_in_native); + __ release(); + // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size"); + __ stw(R0, thread_(thread_state)); // The JNI call @@ -2256,24 +2112,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, Label after_transition; - // If this is a critical native, check for a safepoint or suspend request after the call. - // If a safepoint is needed, transition to native, then to native_trans to handle - // safepoints like the native methods that are not critical natives. - if (is_critical_native) { - Label needs_safepoint; - Register sync_state = r_temp_5; - // Note: We should not reach here with active stack watermark. There's no safepoint between - // start of the native wrapper and this check where it could have been added. - // We don't check the watermark in the fast path. - __ safepoint_poll(needs_safepoint, sync_state, false /* at_return */, false /* in_nmethod */); - - Register suspend_flags = r_temp_6; - __ lwz(suspend_flags, thread_(suspend_flags)); - __ cmpwi(CCR1, suspend_flags, 0); - __ beq(CCR1, after_transition); - __ bind(needs_safepoint); - } - // Publish thread state // -------------------------------------------------------------------------- @@ -2443,7 +2281,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // Reset handle block. // -------------------------------------------------------------------------- - if (!is_critical_native) { __ ld(r_temp_1, thread_(active_handles)); // TODO: PPC port assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size"); __ li(r_temp_2, 0); @@ -2455,7 +2292,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, __ ld(r_temp_2, thread_(pending_exception)); __ cmpdi(CCR0, r_temp_2, 0); __ bne(CCR0, handle_pending_exception); - } // Return // -------------------------------------------------------------------------- @@ -2470,14 +2306,12 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // Since this is a native call, we know the proper exception handler // is the empty function. We just pop this frame and then jump to // forward_exception_entry. - if (!is_critical_native) { __ bind(handle_pending_exception); __ pop_frame(); __ restore_LR_CR(R11); __ b64_patchable((address)StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); - } // Handler for a cache miss (out-of-line). // -------------------------------------------------------------------------- diff --git a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp index ccc2364007b8d168031497a0522e1c34482c6735..6d4a081b24fc46f7683ef29a9c42960d7d86221a 100644 --- a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp +++ b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp @@ -52,7 +52,7 @@ RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) } RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) - : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) { + : _index(index), _array(), _throw_index_out_of_bounds_exception(true) { assert(info != NULL, "must have info"); _info = new CodeEmitInfo(info); } diff --git a/src/hotspot/cpu/s390/c1_FrameMap_s390.cpp b/src/hotspot/cpu/s390/c1_FrameMap_s390.cpp index 1ed6ad6ed4d711240e5904019dd054252045aaaa..802d794c4e0614f59679500cc695c5589b80e251 100644 --- a/src/hotspot/cpu/s390/c1_FrameMap_s390.cpp +++ b/src/hotspot/cpu/s390/c1_FrameMap_s390.cpp @@ -138,8 +138,8 @@ LIR_Opr FrameMap::Z_F0_opr; LIR_Opr FrameMap::Z_F0_double_opr; -LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, }; -LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, }; +LIR_Opr FrameMap::_caller_save_cpu_regs[] = {}; +LIR_Opr FrameMap::_caller_save_fpu_regs[] = {}; // c1 rnr -> FloatRegister diff --git a/src/hotspot/cpu/s390/c1_LIR_s390.cpp b/src/hotspot/cpu/s390/c1_LIR_s390.cpp index 3c46915e47538109e844821879613a1f0453c6cc..4788a398de8ab6a5af956aaf0158e7fac7b05f69 100644 --- a/src/hotspot/cpu/s390/c1_LIR_s390.cpp +++ b/src/hotspot/cpu/s390/c1_LIR_s390.cpp @@ -29,22 +29,22 @@ #include "c1/c1_LIR.hpp" -FloatRegister LIR_OprDesc::as_float_reg() const { +FloatRegister LIR_Opr::as_float_reg() const { return FrameMap::nr2floatreg(fpu_regnr()); } -FloatRegister LIR_OprDesc::as_double_reg() const { +FloatRegister LIR_Opr::as_double_reg() const { return FrameMap::nr2floatreg(fpu_regnrHi()); } // Reg2 unused. LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { assert(!as_FloatRegister(reg2)->is_valid(), "Not used on this platform"); - return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | - (reg1 << LIR_OprDesc::reg2_shift) | - LIR_OprDesc::double_type | - LIR_OprDesc::fpu_register | - LIR_OprDesc::double_size); + return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) | + (reg1 << LIR_Opr::reg2_shift) | + LIR_Opr::double_type | + LIR_Opr::fpu_register | + LIR_Opr::double_size); } #ifndef PRODUCT diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad index 8a2dde76202308a142ce444b14fbf21d55c0d128..60c27c61a514c4cdcb190b661dab10f4d27e4b5c 100644 --- a/src/hotspot/cpu/s390/s390.ad +++ b/src/hotspot/cpu/s390/s390.ad @@ -1536,6 +1536,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType return true; // Per default match rules are supported. } +const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { + return false; +} + const RegMask* Matcher::predicate_reg_mask(void) { return NULL; } diff --git a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp index 949f3d206e63fef79e08d5d02a55d15f4ecfb818..2b310f6e44f75921f50ac022201587ca855096e5 100644 --- a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp +++ b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp @@ -1286,76 +1286,6 @@ static void move32_64(MacroAssembler *masm, } } -static void move_ptr(MacroAssembler *masm, - VMRegPair src, - VMRegPair dst, - int framesize_in_slots) { - int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size; - - if (src.first()->is_stack()) { - if (dst.first()->is_stack()) { - // stack to stack - __ mem2reg_opt(Z_R0_scratch, Address(Z_SP, reg2offset(src.first()) + frame_offset)); - __ reg2mem_opt(Z_R0_scratch, Address(Z_SP, reg2offset(dst.first()))); - } else { - // stack to reg - __ mem2reg_opt(dst.first()->as_Register(), - Address(Z_SP, reg2offset(src.first()) + frame_offset)); - } - } else { - if (dst.first()->is_stack()) { - // reg to stack - __ reg2mem_opt(src.first()->as_Register(), Address(Z_SP, reg2offset(dst.first()))); - } else { - __ lgr_if_needed(dst.first()->as_Register(), src.first()->as_Register()); - } - } -} - -// Unpack an array argument into a pointer to the body and the length -// if the array is non-null, otherwise pass 0 for both. -static void unpack_array_argument(MacroAssembler *masm, - VMRegPair reg, - BasicType in_elem_type, - VMRegPair body_arg, - VMRegPair length_arg, - int framesize_in_slots) { - Register tmp_reg = Z_tmp_2; - Register tmp2_reg = Z_tmp_1; - - assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg, - "possible collision"); - assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg, - "possible collision"); - - // Pass the length, ptr pair. - NearLabel set_out_args; - VMRegPair tmp, tmp2; - - tmp.set_ptr(tmp_reg->as_VMReg()); - tmp2.set_ptr(tmp2_reg->as_VMReg()); - if (reg.first()->is_stack()) { - // Load the arg up from the stack. - move_ptr(masm, reg, tmp, framesize_in_slots); - reg = tmp; - } - - const Register first = reg.first()->as_Register(); - - // Don't set CC, indicate unused result. - (void) __ clear_reg(tmp2_reg, true, false); - if (tmp_reg != first) { - __ clear_reg(tmp_reg, true, false); // Don't set CC. - } - __ compare64_and_branch(first, (RegisterOrConstant)0L, Assembler::bcondEqual, set_out_args); - __ z_lgf(tmp2_reg, Address(first, arrayOopDesc::length_offset_in_bytes())); - __ add2reg(tmp_reg, arrayOopDesc::base_offset_in_bytes(in_elem_type), first); - - __ bind(set_out_args); - move_ptr(masm, tmp, body_arg, framesize_in_slots); - move32_64(masm, tmp2, length_arg, framesize_in_slots); -} - //---------------------------------------------------------------------- // Wrap a JNI call. //---------------------------------------------------------------------- @@ -1365,8 +1295,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, int compile_id, BasicType *in_sig_bt, VMRegPair *in_regs, - BasicType ret_type, - address critical_entry) { + BasicType ret_type) { int total_in_args = method->size_of_parameters(); if (method->is_method_handle_intrinsic()) { vmIntrinsics::ID iid = method->intrinsic_id(); @@ -1400,12 +1329,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // /////////////////////////////////////////////////////////////////////// - bool is_critical_native = true; - address native_func = critical_entry; - if (native_func == NULL) { - native_func = method->native_function(); - is_critical_native = false; - } + address native_func = method->native_function(); assert(native_func != NULL, "must have function"); //--------------------------------------------------------------------- @@ -1430,19 +1354,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // So convert the signature and registers now, and adjust the total number // of in-arguments accordingly. bool method_is_static = method->is_static(); - int total_c_args = total_in_args; - - if (!is_critical_native) { - int n_hidden_args = method_is_static ? 2 : 1; - total_c_args += n_hidden_args; - } else { - // No JNIEnv*, no this*, but unpacked arrays (base+length). - for (int i = 0; i < total_in_args; i++) { - if (in_sig_bt[i] == T_ARRAY) { - total_c_args ++; - } - } - } + int total_c_args = total_in_args + (method_is_static ? 2 : 1); BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); VMRegPair *out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); @@ -1455,45 +1367,13 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // hidden arguments) int argc = 0; - if (!is_critical_native) { - out_sig_bt[argc++] = T_ADDRESS; - if (method->is_static()) { - out_sig_bt[argc++] = T_OBJECT; - } - - for (int i = 0; i < total_in_args; i++) { - out_sig_bt[argc++] = in_sig_bt[i]; - } - } else { - in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); - SignatureStream ss(method->signature()); - int o = 0; - for (int i = 0; i < total_in_args; i++, o++) { - if (in_sig_bt[i] == T_ARRAY) { - // Arrays are passed as tuples (int, elem*). - ss.skip_array_prefix(1); // skip one '[' - assert(ss.is_primitive(), "primitive type expected"); - in_elem_bt[o] = ss.type(); - } else { - in_elem_bt[o] = T_VOID; - } - if (in_sig_bt[i] != T_VOID) { - assert(in_sig_bt[i] == ss.type() || - in_sig_bt[i] == T_ARRAY, "must match"); - ss.next(); - } - } - assert(total_in_args == o, "must match"); + out_sig_bt[argc++] = T_ADDRESS; + if (method->is_static()) { + out_sig_bt[argc++] = T_OBJECT; + } - for (int i = 0; i < total_in_args; i++) { - if (in_sig_bt[i] == T_ARRAY) { - // Arrays are passed as tuples (int, elem*). - out_sig_bt[argc++] = T_INT; - out_sig_bt[argc++] = T_ADDRESS; - } else { - out_sig_bt[argc++] = in_sig_bt[i]; - } - } + for (int i = 0; i < total_in_args; i++) { + out_sig_bt[argc++] = in_sig_bt[i]; } /////////////////////////////////////////////////////////////////////// @@ -1550,8 +1430,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // 4| klass (if static) | // |---------------------| <- klass_slot_offset // 3| oopHandle area | - // | (save area for | - // | critical natives) | // | | // | | // |---------------------| <- oop_handle_offset @@ -1579,44 +1457,13 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // Now the space for the inbound oop handle area. int total_save_slots = RegisterImpl::number_of_arg_registers * VMRegImpl::slots_per_word; - if (is_critical_native) { - // Critical natives may have to call out so they need a save area - // for register arguments. - int double_slots = 0; - int single_slots = 0; - for (int i = 0; i < total_in_args; i++) { - if (in_regs[i].first()->is_Register()) { - const Register reg = in_regs[i].first()->as_Register(); - switch (in_sig_bt[i]) { - case T_BOOLEAN: - case T_BYTE: - case T_SHORT: - case T_CHAR: - case T_INT: - // Fall through. - case T_ARRAY: - case T_LONG: double_slots++; break; - default: ShouldNotReachHere(); - } - } else { - if (in_regs[i].first()->is_FloatRegister()) { - switch (in_sig_bt[i]) { - case T_FLOAT: single_slots++; break; - case T_DOUBLE: double_slots++; break; - default: ShouldNotReachHere(); - } - } - } - } // for - total_save_slots = double_slots * 2 + align_up(single_slots, 2); // Round to even. - } int oop_handle_slot_offset = stack_slots; stack_slots += total_save_slots; // 3) int klass_slot_offset = 0; int klass_offset = -1; - if (method_is_static && !is_critical_native) { // 4) + if (method_is_static) { // 4) klass_slot_offset = stack_slots; klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; stack_slots += VMRegImpl::slots_per_word; @@ -1783,15 +1630,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, break; case T_ARRAY: - if (is_critical_native) { - int body_arg = cix; - cix -= 1; // Point to length arg. - unpack_array_argument(masm, in_regs[jix], in_elem_bt[jix], out_regs[body_arg], out_regs[cix], stack_slots); - break; - } - // else fallthrough case T_OBJECT: - assert(!is_critical_native, "no oop arguments"); object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix], ((jix == 0) && (!method_is_static)), &receiver_offset); @@ -1821,7 +1660,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // Pre-load a static method's oop into ARG2. // Used both by locking code and the normal JNI call code. //-------------------------------------------------------------------- - if (method_is_static && !is_critical_native) { + if (method_is_static) { __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2); // Now handlize the static class mirror in ARG2. It's known not-null. @@ -1831,9 +1670,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, } // Get JNIEnv* which is first argument to native. - if (!is_critical_native) { - __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread); - } + __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread); ////////////////////////////////////////////////////////////////////// // We have all of the arguments setup at this point. @@ -1855,7 +1692,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // Lock a synchronized method. if (method->is_synchronized()) { - assert(!is_critical_native, "unhandled"); // ATTENTION: args and Z_R10 must be preserved. Register r_oop = Z_R11; @@ -1923,10 +1759,8 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // Use that pc we placed in Z_R10 a while back as the current frame anchor. __ set_last_Java_frame(Z_SP, Z_R10); - if (!is_critical_native) { - // Transition from _thread_in_Java to _thread_in_native. - __ set_thread_state(_thread_in_native); - } + // Transition from _thread_in_Java to _thread_in_native. + __ set_thread_state(_thread_in_native); ////////////////////////////////////////////////////////////////////// // This is the JNI call. @@ -1974,18 +1808,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, Label after_transition; - // If this is a critical native, check for a safepoint or suspend request after the call. - // If a safepoint is needed, transition to native, then to native_trans to handle - // safepoints like the native methods that are not critical natives. - if (is_critical_native) { - Label needs_safepoint; - // Does this need to save_native_result and fences? - __ safepoint_poll(needs_safepoint, Z_R1); - __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset())); - __ z_bre(after_transition); - __ bind(needs_safepoint); - } - // Switch thread to "native transition" state before reading the synchronization state. // This additional state is necessary because reading and testing the synchronization // state is not atomic w.r.t. GC, as this scenario demonstrates: @@ -2156,14 +1978,12 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, } // Reset handle block. - if (!is_critical_native) { - __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset())); - __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset_in_bytes()), 4); + __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset())); + __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset_in_bytes()), 4); - // Check for pending exceptions. - __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset())); - __ z_brne(handle_pending_exception); - } + // Check for pending exceptions. + __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset())); + __ z_brne(handle_pending_exception); ////////////////////////////////////////////////////////////////////// @@ -2185,26 +2005,23 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, ////////////////////////////////////////////////////////////////////// - if (!is_critical_native) { - - //--------------------------------------------------------------------- - // Handler for pending exceptions (out-of-line). - //--------------------------------------------------------------------- - // Since this is a native call, we know the proper exception handler - // is the empty function. We just pop this frame and then jump to - // forward_exception_entry. Z_R14 will contain the native caller's - // return PC. - __ bind(handle_pending_exception); - __ pop_frame(); - __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry()); - __ restore_return_pc(); - __ z_br(Z_R1_scratch); - - //--------------------------------------------------------------------- - // Handler for a cache miss (out-of-line) - //--------------------------------------------------------------------- - __ call_ic_miss_handler(ic_miss, 0x77, 0, Z_R1_scratch); - } + //--------------------------------------------------------------------- + // Handler for pending exceptions (out-of-line). + //--------------------------------------------------------------------- + // Since this is a native call, we know the proper exception handler + // is the empty function. We just pop this frame and then jump to + // forward_exception_entry. Z_R14 will contain the native caller's + // return PC. + __ bind(handle_pending_exception); + __ pop_frame(); + __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry()); + __ restore_return_pc(); + __ z_br(Z_R1_scratch); + + //--------------------------------------------------------------------- + // Handler for a cache miss (out-of-line) + //--------------------------------------------------------------------- + __ call_ic_miss_handler(ic_miss, 0x77, 0, Z_R1_scratch); __ flush(); diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index aee326133e0a7af7ade25262fc983b95a06e7525..2119c0797a63ea49f4ae85e93c3ce841b3c9a6ff 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -2458,6 +2458,13 @@ void Assembler::movddup(XMMRegister dst, XMMRegister src) { emit_int16(0x12, 0xC0 | encode); } +void Assembler::kmovbl(KRegister dst, KRegister src) { + assert(VM_Version::supports_avx512dq(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0x90, (0xC0 | encode)); +} + void Assembler::kmovbl(KRegister dst, Register src) { assert(VM_Version::supports_avx512dq(), ""); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); @@ -2505,7 +2512,7 @@ void Assembler::kmovwl(Address dst, KRegister src) { } void Assembler::kmovwl(KRegister dst, KRegister src) { - assert(VM_Version::supports_avx512bw(), ""); + assert(VM_Version::supports_evex(), ""); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0x90, (0xC0 | encode)); @@ -2571,6 +2578,104 @@ void Assembler::knotwl(KRegister dst, KRegister src) { emit_int16(0x44, (0xC0 | encode)); } +void Assembler::knotbl(KRegister dst, KRegister src) { + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x44, (0xC0 | encode)); +} + +void Assembler::korbl(KRegister dst, KRegister src1, KRegister src2) { + assert(VM_Version::supports_avx512dq(), ""); + InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x45, (0xC0 | encode)); +} + +void Assembler::korwl(KRegister dst, KRegister src1, KRegister src2) { + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16(0x45, (0xC0 | encode)); +} + +void Assembler::kordl(KRegister dst, KRegister src1, KRegister src2) { + assert(VM_Version::supports_avx512bw(), ""); + InstructionAttr attributes(AVX_256bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x45, (0xC0 | encode)); +} + +void Assembler::korql(KRegister dst, KRegister src1, KRegister src2) { + assert(VM_Version::supports_avx512bw(), ""); + InstructionAttr attributes(AVX_256bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16(0x45, (0xC0 | encode)); +} + +void Assembler::kxorbl(KRegister dst, KRegister src1, KRegister src2) { + assert(VM_Version::supports_avx512dq(), ""); + InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x47, (0xC0 | encode)); +} + +void Assembler::kxorwl(KRegister dst, KRegister src1, KRegister src2) { + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16(0x47, (0xC0 | encode)); +} + +void Assembler::kxordl(KRegister dst, KRegister src1, KRegister src2) { + assert(VM_Version::supports_avx512bw(), ""); + InstructionAttr attributes(AVX_256bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x47, (0xC0 | encode)); +} + +void Assembler::kxorql(KRegister dst, KRegister src1, KRegister src2) { + assert(VM_Version::supports_avx512bw(), ""); + InstructionAttr attributes(AVX_256bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16(0x47, (0xC0 | encode)); +} + +void Assembler::kandbl(KRegister dst, KRegister src1, KRegister src2) { + assert(VM_Version::supports_avx512dq(), ""); + InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x41, (0xC0 | encode)); +} + +void Assembler::kandwl(KRegister dst, KRegister src1, KRegister src2) { + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16(0x41, (0xC0 | encode)); +} + +void Assembler::kanddl(KRegister dst, KRegister src1, KRegister src2) { + assert(VM_Version::supports_avx512bw(), ""); + InstructionAttr attributes(AVX_256bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x41, (0xC0 | encode)); +} + +void Assembler::kandql(KRegister dst, KRegister src1, KRegister src2) { + assert(VM_Version::supports_avx512bw(), ""); + InstructionAttr attributes(AVX_256bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16(0x41, (0xC0 | encode)); +} + +void Assembler::knotdl(KRegister dst, KRegister src) { + assert(VM_Version::supports_avx512bw(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x44, (0xC0 | encode)); +} + void Assembler::knotql(KRegister dst, KRegister src) { assert(VM_Version::supports_avx512bw(), ""); InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); @@ -2618,6 +2723,27 @@ void Assembler::ktestql(KRegister src1, KRegister src2) { emit_int16((unsigned char)0x99, (0xC0 | encode)); } +void Assembler::ktestdl(KRegister src1, KRegister src2) { + assert(VM_Version::supports_avx512bw(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0x99, (0xC0 | encode)); +} + +void Assembler::ktestwl(KRegister src1, KRegister src2) { + assert(VM_Version::supports_avx512dq(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0x99, (0xC0 | encode)); +} + +void Assembler::ktestbl(KRegister src1, KRegister src2) { + assert(VM_Version::supports_avx512dq(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0x99, (0xC0 | encode)); +} + void Assembler::ktestq(KRegister src1, KRegister src2) { assert(VM_Version::supports_avx512bw(), ""); InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); @@ -2632,6 +2758,52 @@ void Assembler::ktestd(KRegister src1, KRegister src2) { emit_int16((unsigned char)0x99, (0xC0 | encode)); } +void Assembler::kxnorbl(KRegister dst, KRegister src1, KRegister src2) { + assert(VM_Version::supports_avx512dq(), ""); + InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x46, (0xC0 | encode)); +} + +void Assembler::kshiftlbl(KRegister dst, KRegister src, int imm8) { + assert(VM_Version::supports_avx512dq(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0 , src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int16(0x32, (0xC0 | encode)); + emit_int8(imm8); +} + +void Assembler::kshiftrbl(KRegister dst, KRegister src, int imm8) { + assert(VM_Version::supports_avx512dq(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0 , src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int16(0x30, (0xC0 | encode)); +} + +void Assembler::kshiftrwl(KRegister dst, KRegister src, int imm8) { + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0 , src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int16(0x30, (0xC0 | encode)); + emit_int8(imm8); +} + +void Assembler::kshiftrdl(KRegister dst, KRegister src, int imm8) { + assert(VM_Version::supports_avx512bw(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0 , src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int16(0x31, (0xC0 | encode)); + emit_int8(imm8); +} + +void Assembler::kshiftrql(KRegister dst, KRegister src, int imm8) { + assert(VM_Version::supports_avx512bw(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0 , src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int16(0x31, (0xC0 | encode)); + emit_int8(imm8); +} + void Assembler::movb(Address dst, int imm8) { InstructionMark im(this); prefix(dst); @@ -4112,24 +4284,6 @@ void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vect emit_operand(as_Register(dst_enc), src); } -void Assembler::evpmovd2m(KRegister kdst, XMMRegister src, int vector_len) { - assert(UseAVX > 2 && VM_Version::supports_avx512dq(), ""); - assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); - InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); - attributes.set_is_evex_instruction(); - int encode = vex_prefix_and_encode(kdst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); - emit_int16(0x39, (0xC0 | encode)); -} - -void Assembler::evpmovq2m(KRegister kdst, XMMRegister src, int vector_len) { - assert(UseAVX > 2 && VM_Version::supports_avx512dq(), ""); - assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); - InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); - attributes.set_is_evex_instruction(); - int encode = vex_prefix_and_encode(kdst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); - emit_int16(0x39, (0xC0 | encode)); -} - void Assembler::pcmpgtq(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse4_1(), ""); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); @@ -7419,8 +7573,8 @@ void Assembler::vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve } void Assembler::evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { - assert(VM_Version::supports_evex(), ""); // Encoding: EVEX.NDS.XXX.66.0F.W0 EF /r + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); attributes.set_embedded_opmask_register_specifier(mask); @@ -7431,6 +7585,118 @@ void Assembler::evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMReg emit_int16((unsigned char)0xEF, (0xC0 | encode)); } +void Assembler::evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEF); + emit_operand(dst, src); +} + +void Assembler::evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + // Encoding: EVEX.NDS.XXX.66.0F.W1 EF /r + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xEF, (0xC0 | encode)); +} + +void Assembler::evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEF); + emit_operand(dst, src); +} + +void Assembler::evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xDB); + emit_operand(dst, src); +} + +void Assembler::evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xDB, (0xC0 | encode)); +} + +void Assembler::evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xDB); + emit_operand(dst, src); +} + +void Assembler::evporq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xEB, (0xC0 | encode)); +} + +void Assembler::evporq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEB); + emit_operand(dst, src); +} + void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_evex(), "requires EVEX support"); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); @@ -7975,29 +8241,1468 @@ void Assembler::vpbroadcastw(XMMRegister dst, Address src, int vector_len) { // xmm/mem sourced byte/word/dword/qword replicate -// duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL -void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) { - assert(UseAVX >= 2, ""); - InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); - int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); - emit_int16(0x58, (0xC0 | encode)); +void Assembler::evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xFC, (0xC0 | encode)); } -void Assembler::vpbroadcastd(XMMRegister dst, Address src, int vector_len) { - assert(VM_Version::supports_avx2(), ""); - assert(dst != xnoreg, "sanity"); +void Assembler::evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { InstructionMark im(this); - InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); - // swap src<->dst for encoding - vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); - emit_int8(0x58); - emit_operand(dst, src); -} - -// duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL -void Assembler::vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) { - assert(VM_Version::supports_avx2(), ""); + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFC); + emit_operand(dst, src); +} + +void Assembler::evpaddw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xFD, (0xC0 | encode)); +} + +void Assembler::evpaddw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFD); + emit_operand(dst, src); +} + +void Assembler::evpaddd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xFE, (0xC0 | encode)); +} + +void Assembler::evpaddd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFE); + emit_operand(dst, src); +} + +void Assembler::evpaddq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xD4, (0xC0 | encode)); +} + +void Assembler::evpaddq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD4); + emit_operand(dst, src); +} + +void Assembler::evaddps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16(0x58, (0xC0 | encode)); +} + +void Assembler::evaddps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_operand(dst, src); +} + +void Assembler::evaddpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x58, (0xC0 | encode)); +} + +void Assembler::evaddpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_operand(dst, src); +} + +void Assembler::evpsubb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xF8, (0xC0 | encode)); +} + +void Assembler::evpsubb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF8); + emit_operand(dst, src); +} + +void Assembler::evpsubw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xF9, (0xC0 | encode)); +} + +void Assembler::evpsubw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xF9); + emit_operand(dst, src); +} + +void Assembler::evpsubd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xFA, (0xC0 | encode)); +} + +void Assembler::evpsubd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFA); + emit_operand(dst, src); +} + +void Assembler::evpsubq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xFB, (0xC0 | encode)); +} + +void Assembler::evpsubq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xFB); + emit_operand(dst, src); +} + +void Assembler::evsubps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16(0x5C, (0xC0 | encode)); +} + +void Assembler::evsubps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_operand(dst, src); +} + +void Assembler::evsubpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x5C, (0xC0 | encode)); +} + +void Assembler::evsubpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x5C); + emit_operand(dst, src); +} + +void Assembler::evpmullw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xD5, (0xC0 | encode)); +} + +void Assembler::evpmullw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xD5); + emit_operand(dst, src); +} + +void Assembler::evpmulld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x40, (0xC0 | encode)); +} + +void Assembler::evpmulld(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x40); + emit_operand(dst, src); +} + +void Assembler::evpmullq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512dq() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x40, (0xC0 | encode)); +} + +void Assembler::evpmullq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_avx512dq() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x40); + emit_operand(dst, src); +} + +void Assembler::evmulps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16(0x59, (0xC0 | encode)); +} + +void Assembler::evmulps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_operand(dst, src); +} + +void Assembler::evmulpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x59, (0xC0 | encode)); +} + +void Assembler::evmulpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x59); + emit_operand(dst, src); +} + +void Assembler::evsqrtps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len,/* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16(0x51, (0xC0 | encode)); +} + +void Assembler::evsqrtps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x51); + emit_operand(dst, src); +} + +void Assembler::evsqrtpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len,/* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x51, (0xC0 | encode)); +} + +void Assembler::evsqrtpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x51); + emit_operand(dst, src); +} + + +void Assembler::evdivps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len,/* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16(0x5E, (0xC0 | encode)); +} + +void Assembler::evdivps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_operand(dst, src); +} + +void Assembler::evdivpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len,/* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x5E, (0xC0 | encode)); +} + +void Assembler::evdivpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x5E); + emit_operand(dst, src); +} + +void Assembler::evpabsb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x1C, (0xC0 | encode)); +} + + +void Assembler::evpabsb(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x1C); + emit_operand(dst, src); +} + +void Assembler::evpabsw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x1D, (0xC0 | encode)); +} + + +void Assembler::evpabsw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x1D); + emit_operand(dst, src); +} + +void Assembler::evpabsd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x1E, (0xC0 | encode)); +} + + +void Assembler::evpabsd(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x1E); + emit_operand(dst, src); +} + +void Assembler::evpabsq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x1F, (0xC0 | encode)); +} + + +void Assembler::evpabsq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x1F); + emit_operand(dst, src); +} + +void Assembler::evpfma213ps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16((unsigned char)0xA8, (0xC0 | encode)); +} + +void Assembler::evpfma213ps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0xA8); + emit_operand(dst, src); +} + +void Assembler::evpfma213pd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16((unsigned char)0xA8, (0xC0 | encode)); +} + +void Assembler::evpfma213pd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + InstructionMark im(this); + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0xA8); + emit_operand(dst, src); +} + +void Assembler::evpermb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512_vbmi() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16((unsigned char)0x8D, (0xC0 | encode)); +} + +void Assembler::evpermb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512_vbmi() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x8D); + emit_operand(dst, src); +} + +void Assembler::evpermw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16((unsigned char)0x8D, (0xC0 | encode)); +} + +void Assembler::evpermw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x8D); + emit_operand(dst, src); +} + +void Assembler::evpermd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex() && vector_len > AVX_128bit, ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x36, (0xC0 | encode)); +} + +void Assembler::evpermd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + assert(VM_Version::supports_evex() && vector_len > AVX_128bit, ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x36); + emit_operand(dst, src); +} + +void Assembler::evpermq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex() && vector_len > AVX_128bit, ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x36, (0xC0 | encode)); +} + +void Assembler::evpermq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + assert(VM_Version::supports_evex() && vector_len > AVX_128bit, ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x36); + emit_operand(dst, src); +} + +void Assembler::evpsllw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24(0x71, (0xC0 | encode), shift & 0xFF); +} + +void Assembler::evpslld(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24(0x72, (0xC0 | encode), shift & 0xFF); +} + +void Assembler::evpsllq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24(0x73, (0xC0 | encode), shift & 0xFF); +} + +void Assembler::evpsrlw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24(0x71, (0xC0 | encode), shift & 0xFF); +} + +void Assembler::evpsrld(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24(0x72, (0xC0 | encode), shift & 0xFF); +} + +void Assembler::evpsrlq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24(0x73, (0xC0 | encode), shift & 0xFF); +} + +void Assembler::evpsraw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24(0x71, (0xC0 | encode), shift & 0xFF); +} + +void Assembler::evpsrad(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24(0x72, (0xC0 | encode), shift & 0xFF); +} + +void Assembler::evpsraq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24(0x73, (0xC0 | encode), shift & 0xFF); +} + +void Assembler::evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xF1, (0xC0 | encode)); +} + +void Assembler::evpslld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xF2, (0xC0 | encode)); +} + +void Assembler::evpsllq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xF3, (0xC0 | encode)); +} + +void Assembler::evpsrlw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xD1, (0xC0 | encode)); +} + +void Assembler::evpsrld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xD2, (0xC0 | encode)); +} + +void Assembler::evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xD3, (0xC0 | encode)); +} + +void Assembler::evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xE1, (0xC0 | encode)); +} + +void Assembler::evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xE2, (0xC0 | encode)); +} + +void Assembler::evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xE2, (0xC0 | encode)); +} + +void Assembler::evpsllvw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x12, (0xC0 | encode)); +} + +void Assembler::evpsllvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x47, (0xC0 | encode)); +} + +void Assembler::evpsllvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x47, (0xC0 | encode)); +} + +void Assembler::evpsrlvw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x10, (0xC0 | encode)); +} + +void Assembler::evpsrlvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x45, (0xC0 | encode)); +} + +void Assembler::evpsrlvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x45, (0xC0 | encode)); +} + +void Assembler::evpsravw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x11, (0xC0 | encode)); +} + +void Assembler::evpsravd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x46, (0xC0 | encode)); +} + +void Assembler::evpsravq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x46, (0xC0 | encode)); +} + +void Assembler::evpminsb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x38, (0xC0 | encode)); +} + +void Assembler::evpminsb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x38); + emit_operand(dst, src); +} + +void Assembler::evpminsw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xEA, (0xC0 | encode)); +} + +void Assembler::evpminsw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEA); + emit_operand(dst, src); +} + +void Assembler::evpminsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x39, (0xC0 | encode)); +} + +void Assembler::evpminsd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x39); + emit_operand(dst, src); +} + +void Assembler::evpminsq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x39, (0xC0 | encode)); +} + +void Assembler::evpminsq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x39); + emit_operand(dst, src); +} + + +void Assembler::evpmaxsb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x3C, (0xC0 | encode)); +} + +void Assembler::evpmaxsb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x3C); + emit_operand(dst, src); +} + +void Assembler::evpmaxsw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xEE, (0xC0 | encode)); +} + +void Assembler::evpmaxsw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEE); + emit_operand(dst, src); +} + +void Assembler::evpmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x3D, (0xC0 | encode)); +} + +void Assembler::evpmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x3D); + emit_operand(dst, src); +} + +void Assembler::evpmaxsq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x3D, (0xC0 | encode)); +} + +void Assembler::evpmaxsq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x3D); + emit_operand(dst, src); +} + +// duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL +void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) { + assert(UseAVX >= 2, ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x58, (0xC0 | encode)); +} + +void Assembler::vpbroadcastd(XMMRegister dst, Address src, int vector_len) { + assert(VM_Version::supports_avx2(), ""); + assert(dst != xnoreg, "sanity"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + // swap src<->dst for encoding + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x58); + emit_operand(dst, src); +} + +// duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL +void Assembler::vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx2(), ""); InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); @@ -9367,6 +11072,102 @@ void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address emit_int8((unsigned char)comparison); } +void Assembler::evprord(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len) { + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(xmm0->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24(0x72, (0xC0 | encode), shift & 0xFF); +} + +void Assembler::evprorq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len) { + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(xmm0->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24(0x72, (0xC0 | encode), shift & 0xFF); +} + +void Assembler::evprorvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x14, (0xC0 | encode)); +} + +void Assembler::evprorvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x14, (0xC0 | encode)); +} + +void Assembler::evprold(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len) { + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(xmm1->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24(0x72, (0xC0 | encode), shift & 0xFF); +} + +void Assembler::evprolq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len) { + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(xmm1->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24(0x72, (0xC0 | encode), shift & 0xFF); +} + +void Assembler::evprolvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x15, (0xC0 | encode)); +} + +void Assembler::evprolvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x15, (0xC0 | encode)); +} + void Assembler::vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len) { assert(VM_Version::supports_avx(), ""); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); @@ -9490,6 +11291,30 @@ void Assembler::shrxq(Register dst, Register src1, Register src2) { emit_int16((unsigned char)0xF7, (0xC0 | encode)); } +void Assembler::evpmovq2m(KRegister dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx512vldq(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x39, (0xC0 | encode)); +} + +void Assembler::evpmovd2m(KRegister dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx512vldq(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x39, (0xC0 | encode)); +} + +void Assembler::evpmovw2m(KRegister dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx512vlbw(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x29, (0xC0 | encode)); +} + void Assembler::evpmovb2m(KRegister dst, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx512vlbw(), ""); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); @@ -9498,6 +11323,37 @@ void Assembler::evpmovb2m(KRegister dst, XMMRegister src, int vector_len) { emit_int16(0x29, (0xC0 | encode)); } +void Assembler::evpmovm2q(XMMRegister dst, KRegister src, int vector_len) { + assert(VM_Version::supports_avx512vldq(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x38, (0xC0 | encode)); +} + +void Assembler::evpmovm2d(XMMRegister dst, KRegister src, int vector_len) { + assert(VM_Version::supports_avx512vldq(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x38, (0xC0 | encode)); +} + +void Assembler::evpmovm2w(XMMRegister dst, KRegister src, int vector_len) { + assert(VM_Version::supports_avx512vlbw(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x28, (0xC0 | encode)); +} + +void Assembler::evpmovm2b(XMMRegister dst, KRegister src, int vector_len) { + assert(VM_Version::supports_avx512vlbw(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x28, (0xC0 | encode)); +} #ifndef _LP64 void Assembler::incl(Register dst) { diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 5976597019b160de6f46ca7e866db2b39431457c..09b2a392d305801f990beb003460ae9e311accf0 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -1462,8 +1462,23 @@ private: void movddup(XMMRegister dst, XMMRegister src); + void kandbl(KRegister dst, KRegister src1, KRegister src2); + void kandwl(KRegister dst, KRegister src1, KRegister src2); + void kanddl(KRegister dst, KRegister src1, KRegister src2); + void kandql(KRegister dst, KRegister src1, KRegister src2); + + void korbl(KRegister dst, KRegister src1, KRegister src2); + void korwl(KRegister dst, KRegister src1, KRegister src2); + void kordl(KRegister dst, KRegister src1, KRegister src2); + void korql(KRegister dst, KRegister src1, KRegister src2); + + void kxorbl(KRegister dst, KRegister src1, KRegister src2); + void kxorwl(KRegister dst, KRegister src1, KRegister src2); + void kxordl(KRegister dst, KRegister src1, KRegister src2); + void kxorql(KRegister dst, KRegister src1, KRegister src2); void kmovbl(KRegister dst, Register src); void kmovbl(Register dst, KRegister src); + void kmovbl(KRegister dst, KRegister src); void kmovwl(KRegister dst, Register src); void kmovwl(KRegister dst, Address src); void kmovwl(Register dst, KRegister src); @@ -1477,7 +1492,9 @@ private: void kmovql(KRegister dst, Register src); void kmovql(Register dst, KRegister src); + void knotbl(KRegister dst, KRegister src); void knotwl(KRegister dst, KRegister src); + void knotdl(KRegister dst, KRegister src); void knotql(KRegister dst, KRegister src); void kortestbl(KRegister dst, KRegister src); @@ -1485,10 +1502,19 @@ private: void kortestdl(KRegister dst, KRegister src); void kortestql(KRegister dst, KRegister src); + void kxnorbl(KRegister dst, KRegister src1, KRegister src2); + void kshiftlbl(KRegister dst, KRegister src, int imm8); + void kshiftrbl(KRegister dst, KRegister src, int imm8); + void kshiftrwl(KRegister dst, KRegister src, int imm8); + void kshiftrdl(KRegister dst, KRegister src, int imm8); + void kshiftrql(KRegister dst, KRegister src, int imm8); void ktestq(KRegister src1, KRegister src2); void ktestd(KRegister src1, KRegister src2); void ktestql(KRegister dst, KRegister src); + void ktestdl(KRegister dst, KRegister src); + void ktestwl(KRegister dst, KRegister src); + void ktestbl(KRegister dst, KRegister src); void movdl(XMMRegister dst, Register src); void movdl(Register dst, XMMRegister src); @@ -2152,9 +2178,6 @@ private: void bzhiq(Register dst, Register src1, Register src2); //====================VECTOR ARITHMETIC===================================== - void evpmovd2m(KRegister kdst, XMMRegister src, int vector_len); - void evpmovq2m(KRegister kdst, XMMRegister src, int vector_len); - // Add Packed Floating-Point Values void addpd(XMMRegister dst, XMMRegister src); void addpd(XMMRegister dst, Address src); @@ -2246,6 +2269,136 @@ private: void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + // Leaf level assembler routines for masked operations. + void evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpaddw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpaddw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpaddd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpaddd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpaddq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpaddq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evaddps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evaddps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evaddpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evaddpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpsubb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsubb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpsubw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsubw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpsubd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsubd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpsubq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsubq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evsubps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evsubps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evsubpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evsubpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpmullw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpmullw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpmulld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpmulld(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpmullq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpmullq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evmulps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evmulps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evmulpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evmulpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evdivps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evdivps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evdivpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evdivpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpabsb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len); + void evpabsb(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len); + void evpabsw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len); + void evpabsw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len); + void evpabsd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len); + void evpabsd(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len); + void evpabsq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len); + void evpabsq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len); + void evpfma213ps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpfma213ps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpfma213pd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpfma213pd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpermb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpermb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpermw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpermw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpermd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpermd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpermq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpermq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpslld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsllq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsrld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evsqrtps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evsqrtps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evsqrtpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evsqrtpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + + void evpsllw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len); + void evpslld(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len); + void evpsllq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len); + void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len); + void evpsrld(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len); + void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len); + void evpsraw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len); + void evpsrad(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len); + void evpsraq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len); + + void evpsllvw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsllvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsllvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsrlvw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsrlvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsrlvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsravw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsravd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpsravq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpmaxsb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpmaxsw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpmaxsq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpminsb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpminsw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpminsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpminsq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpmaxsb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpmaxsw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpmaxsq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpminsb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpminsw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpminsd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpminsq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evporq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evporq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + + void evprold(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len); + void evprolq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len); + void evprolvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evprolvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evprord(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len); + void evprorq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len); + void evprorvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evprorvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + // Sub packed integers void psubb(XMMRegister dst, XMMRegister src); void psubw(XMMRegister dst, XMMRegister src); @@ -2364,7 +2517,6 @@ private: void pand(XMMRegister dst, XMMRegister src); void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len); - void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); // Andn packed integers @@ -2377,15 +2529,11 @@ private: void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); - void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); - void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); - // Xor packed integers void pxor(XMMRegister dst, XMMRegister src); void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); - void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len); @@ -2527,6 +2675,13 @@ private: int comparison, bool is_signed, int vector_len); void evpmovb2m(KRegister dst, XMMRegister src, int vector_len); + void evpmovw2m(KRegister dst, XMMRegister src, int vector_len); + void evpmovd2m(KRegister dst, XMMRegister src, int vector_len); + void evpmovq2m(KRegister dst, XMMRegister src, int vector_len); + void evpmovm2b(XMMRegister dst, KRegister src, int vector_len); + void evpmovm2w(XMMRegister dst, KRegister src, int vector_len); + void evpmovm2d(XMMRegister dst, KRegister src, int vector_len); + void evpmovm2q(XMMRegister dst, KRegister src, int vector_len); // Vector blends void blendvps(XMMRegister dst, XMMRegister src); diff --git a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp index 0656b204f2565b827fd568fd82d88bc215803a10..51909e5fa1473b070d298fa569e8b6dee8bace7e 100644 --- a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp +++ b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp @@ -123,7 +123,7 @@ RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) } RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) - : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) { + : _index(index), _array(), _throw_index_out_of_bounds_exception(true) { assert(info != NULL, "must have info"); _info = new CodeEmitInfo(info); } diff --git a/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp b/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp index a7411412d18f9bde10269021d142bdf4ae962f59..7b036a24277ac857879fa9e2a03b08dcd4f182e3 100644 --- a/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp +++ b/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp @@ -142,9 +142,9 @@ LIR_Opr FrameMap::r13_metadata_opr; LIR_Opr FrameMap::r14_metadata_opr; #endif // _LP64 -LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, }; -LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, }; -LIR_Opr FrameMap::_caller_save_xmm_regs[] = { 0, }; +LIR_Opr FrameMap::_caller_save_cpu_regs[] = {}; +LIR_Opr FrameMap::_caller_save_fpu_regs[] = {}; +LIR_Opr FrameMap::_caller_save_xmm_regs[] = {}; XMMRegister FrameMap::_xmm_regs [] = { 0, }; diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index 6f1cbe17395dfbc5b18479bc0e6401fcc47b09c6..0b62108c79fa266265fd50aff85b9b57798440b5 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -212,7 +212,7 @@ LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_o LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { - LIR_Opr r = NULL; + LIR_Opr r; if (type == T_LONG) { r = LIR_OprFact::longConst(x); } else if (type == T_INT) { diff --git a/src/hotspot/cpu/x86/c1_LIR_x86.cpp b/src/hotspot/cpu/x86/c1_LIR_x86.cpp index f7e3392d2e5c6b0fe8c7e9f895402f5d354b7d95..6bdbfd1824caacc026ecfe64630c737267dc7c52 100644 --- a/src/hotspot/cpu/x86/c1_LIR_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIR_x86.cpp @@ -28,21 +28,21 @@ #include "c1/c1_LIR.hpp" -FloatRegister LIR_OprDesc::as_float_reg() const { +FloatRegister LIR_Opr::as_float_reg() const { ShouldNotReachHere(); return fnoreg; } -FloatRegister LIR_OprDesc::as_double_reg() const { +FloatRegister LIR_Opr::as_double_reg() const { ShouldNotReachHere(); return fnoreg; } -XMMRegister LIR_OprDesc::as_xmm_float_reg() const { +XMMRegister LIR_Opr::as_xmm_float_reg() const { return FrameMap::nr2xmmreg(xmm_regnr()); } -XMMRegister LIR_OprDesc::as_xmm_double_reg() const { +XMMRegister LIR_Opr::as_xmm_double_reg() const { assert(xmm_regnrLo() == xmm_regnrHi(), "assumed in calculation"); return FrameMap::nr2xmmreg(xmm_regnrLo()); } @@ -50,11 +50,11 @@ XMMRegister LIR_OprDesc::as_xmm_double_reg() const { // Reg2 unused. LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform"); - return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | - (reg1 << LIR_OprDesc::reg2_shift) | - LIR_OprDesc::double_type | - LIR_OprDesc::fpu_register | - LIR_OprDesc::double_size); + return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) | + (reg1 << LIR_Opr::reg2_shift) | + LIR_Opr::double_type | + LIR_Opr::fpu_register | + LIR_Opr::double_size); } #ifndef PRODUCT diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 9b891ed727bd9b44027c1c9c0dc374e08d1f7735..5e4c8917612982827749f09cdbd6a787e47f40e4 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -574,8 +574,13 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp // Unconditionally set box->_displaced_header = markWord::unused_mark(). // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value())); - // Intentional fall-through into DONE_LABEL ... // Propagate ICC.ZF from CAS above into DONE_LABEL. + jcc(Assembler::equal, DONE_LABEL); // CAS above succeeded; propagate ZF = 1 (success) + + cmpptr(r15_thread, rax); // Check if we are already the owner (recursive lock) + jcc(Assembler::notEqual, DONE_LABEL); // If not recursive, ZF = 0 at this point (fail) + incq(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); + xorq(rax, rax); // Set ZF = 1 (success) for recursive lock, denoting locking success #endif // _LP64 #if INCLUDE_RTM_OPT } // use_rtm() @@ -670,10 +675,6 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t // Refer to the comments in synchronizer.cpp for how we might encode extra // state in _succ so we can avoid fetching EntryList|cxq. // - // I'd like to add more cases in fast_lock() and fast_unlock() -- - // such as recursive enter and exit -- but we have to be wary of - // I$ bloat, T$ effects and BP$ effects. - // // If there's no contention try a 1-0 exit. That is, exit without // a costly MEMBAR or CAS. See synchronizer.cpp for details on how // we detect and recover from the race that the 1-0 exit admits. @@ -721,9 +722,16 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t bind (CheckSucc); #else // _LP64 // It's inflated - xorptr(boxReg, boxReg); - orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); - jccb (Assembler::notZero, DONE_LABEL); + Label LNotRecursive, LSuccess, LGoSlowPath; + + cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0); + jccb(Assembler::equal, LNotRecursive); + + // Recursive inflated unlock + decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); + jmpb(LSuccess); + + bind(LNotRecursive); movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); jccb (Assembler::notZero, CheckSucc); @@ -732,7 +740,6 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t jmpb (DONE_LABEL); // Try to avoid passing control into the slow_path ... - Label LSuccess, LGoSlowPath ; bind (CheckSucc); // The following optional optimization can be elided if necessary @@ -1461,6 +1468,19 @@ void C2_MacroAssembler::load_vector_mask(XMMRegister dst, XMMRegister src, int v } } +void C2_MacroAssembler::load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, + Register tmp, bool novlbwdq, int vlen_enc) { + if (novlbwdq) { + vpmovsxbd(xtmp, src, vlen_enc); + evpcmpd(dst, k0, xtmp, ExternalAddress(StubRoutines::x86::vector_int_mask_cmp_bits()), + Assembler::eq, true, vlen_enc, tmp); + } else { + vpxor(xtmp, xtmp, xtmp, vlen_enc); + vpsubb(xtmp, xtmp, src, vlen_enc); + evpmovb2m(dst, xtmp, vlen_enc); + } +} + void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes) { ExternalAddress addr(StubRoutines::x86::vector_iota_indices()); if (vlen_in_bytes == 4) { @@ -3827,14 +3847,231 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register } } +void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst, + XMMRegister src1, int imm8, bool merge, int vlen_enc) { + switch(ideal_opc) { + case Op_LShiftVS: + Assembler::evpsllw(dst, mask, src1, imm8, merge, vlen_enc); break; + case Op_LShiftVI: + Assembler::evpslld(dst, mask, src1, imm8, merge, vlen_enc); break; + case Op_LShiftVL: + Assembler::evpsllq(dst, mask, src1, imm8, merge, vlen_enc); break; + case Op_RShiftVS: + Assembler::evpsraw(dst, mask, src1, imm8, merge, vlen_enc); break; + case Op_RShiftVI: + Assembler::evpsrad(dst, mask, src1, imm8, merge, vlen_enc); break; + case Op_RShiftVL: + Assembler::evpsraq(dst, mask, src1, imm8, merge, vlen_enc); break; + case Op_URShiftVS: + Assembler::evpsrlw(dst, mask, src1, imm8, merge, vlen_enc); break; + case Op_URShiftVI: + Assembler::evpsrld(dst, mask, src1, imm8, merge, vlen_enc); break; + case Op_URShiftVL: + Assembler::evpsrlq(dst, mask, src1, imm8, merge, vlen_enc); break; + case Op_RotateRightV: + evrord(eType, dst, mask, src1, imm8, merge, vlen_enc); break; + case Op_RotateLeftV: + evrold(eType, dst, mask, src1, imm8, merge, vlen_enc); break; + default: + fatal("Unsupported masked operation"); break; + } +} + +void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst, + XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc, + bool is_varshift) { + switch (ideal_opc) { + case Op_AddVB: + evpaddb(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_AddVS: + evpaddw(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_AddVI: + evpaddd(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_AddVL: + evpaddq(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_AddVF: + evaddps(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_AddVD: + evaddpd(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_SubVB: + evpsubb(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_SubVS: + evpsubw(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_SubVI: + evpsubd(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_SubVL: + evpsubq(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_SubVF: + evsubps(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_SubVD: + evsubpd(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_MulVS: + evpmullw(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_MulVI: + evpmulld(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_MulVL: + evpmullq(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_MulVF: + evmulps(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_MulVD: + evmulpd(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_DivVF: + evdivps(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_DivVD: + evdivpd(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_SqrtVF: + evsqrtps(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_SqrtVD: + evsqrtpd(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_AbsVB: + evpabsb(dst, mask, src2, merge, vlen_enc); break; + case Op_AbsVS: + evpabsw(dst, mask, src2, merge, vlen_enc); break; + case Op_AbsVI: + evpabsd(dst, mask, src2, merge, vlen_enc); break; + case Op_AbsVL: + evpabsq(dst, mask, src2, merge, vlen_enc); break; + case Op_FmaVF: + evpfma213ps(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_FmaVD: + evpfma213pd(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_VectorRearrange: + evperm(eType, dst, mask, src2, src1, merge, vlen_enc); break; + case Op_LShiftVS: + evpsllw(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break; + case Op_LShiftVI: + evpslld(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break; + case Op_LShiftVL: + evpsllq(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break; + case Op_RShiftVS: + evpsraw(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break; + case Op_RShiftVI: + evpsrad(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break; + case Op_RShiftVL: + evpsraq(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break; + case Op_URShiftVS: + evpsrlw(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break; + case Op_URShiftVI: + evpsrld(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break; + case Op_URShiftVL: + evpsrlq(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break; + case Op_RotateLeftV: + evrold(eType, dst, mask, src1, src2, merge, vlen_enc); break; + case Op_RotateRightV: + evrord(eType, dst, mask, src1, src2, merge, vlen_enc); break; + case Op_MaxV: + evpmaxs(eType, dst, mask, src1, src2, merge, vlen_enc); break; + case Op_MinV: + evpmins(eType, dst, mask, src1, src2, merge, vlen_enc); break; + case Op_XorV: + evxor(eType, dst, mask, src1, src2, merge, vlen_enc); break; + case Op_OrV: + evor(eType, dst, mask, src1, src2, merge, vlen_enc); break; + case Op_AndV: + evand(eType, dst, mask, src1, src2, merge, vlen_enc); break; + default: + fatal("Unsupported masked operation"); break; + } +} + +void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst, + XMMRegister src1, Address src2, bool merge, int vlen_enc) { + switch (ideal_opc) { + case Op_AddVB: + evpaddb(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_AddVS: + evpaddw(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_AddVI: + evpaddd(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_AddVL: + evpaddq(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_AddVF: + evaddps(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_AddVD: + evaddpd(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_SubVB: + evpsubb(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_SubVS: + evpsubw(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_SubVI: + evpsubd(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_SubVL: + evpsubq(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_SubVF: + evsubps(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_SubVD: + evsubpd(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_MulVS: + evpmullw(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_MulVI: + evpmulld(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_MulVL: + evpmullq(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_MulVF: + evmulps(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_MulVD: + evmulpd(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_DivVF: + evdivps(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_DivVD: + evdivpd(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_FmaVF: + evpfma213ps(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_FmaVD: + evpfma213pd(dst, mask, src1, src2, merge, vlen_enc); break; + case Op_MaxV: + evpmaxs(eType, dst, mask, src1, src2, merge, vlen_enc); break; + case Op_MinV: + evpmins(eType, dst, mask, src1, src2, merge, vlen_enc); break; + case Op_XorV: + evxor(eType, dst, mask, src1, src2, merge, vlen_enc); break; + case Op_OrV: + evor(eType, dst, mask, src1, src2, merge, vlen_enc); break; + case Op_AndV: + evand(eType, dst, mask, src1, src2, merge, vlen_enc); break; + default: + fatal("Unsupported masked operation"); break; + } +} + +void C2_MacroAssembler::masked_op(int ideal_opc, int mask_len, KRegister dst, + KRegister src1, KRegister src2) { + BasicType etype = T_ILLEGAL; + switch(mask_len) { + case 2: + case 4: + case 8: etype = T_BYTE; break; + case 16: etype = T_SHORT; break; + case 32: etype = T_INT; break; + case 64: etype = T_LONG; break; + default: fatal("Unsupported type"); break; + } + assert(etype != T_ILLEGAL, ""); + switch(ideal_opc) { + case Op_AndVMask: + kand(etype, dst, src1, src2); break; + case Op_OrVMask: + kor(etype, dst, src1, src2); break; + case Op_XorVMask: + kxor(etype, dst, src1, src2); break; + default: + fatal("Unsupported masked operation"); break; + } +} + #ifdef _LP64 -void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, - Register tmp, KRegister ktmp, int masklen, int vec_enc) { - assert(VM_Version::supports_avx512vlbw(), ""); - vpxor(xtmp, xtmp, xtmp, vec_enc); - vpsubb(xtmp, xtmp, mask, vec_enc); - evpmovb2m(ktmp, xtmp, vec_enc); - kmovql(tmp, ktmp); +void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, KRegister mask, + Register tmp, int masklen, int masksize, + int vec_enc) { + if(VM_Version::supports_avx512bw()) { + kmovql(tmp, mask); + } else { + assert(masklen <= 16, ""); + kmovwl(tmp, mask); + } + if (masksize < 16) { + andq(tmp, (((jlong)1 << masklen) - 1)); + } switch(opc) { case Op_VectorMaskTrueCount: popcntq(dst, tmp); @@ -3854,12 +4091,13 @@ void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister } void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, - XMMRegister xtmp1, Register tmp, int masklen, int vec_enc) { + XMMRegister xtmp1, Register tmp, int masklen, int masksize, + int vec_enc) { assert(VM_Version::supports_avx(), ""); vpxor(xtmp, xtmp, xtmp, vec_enc); vpsubb(xtmp, xtmp, mask, vec_enc); vpmovmskb(tmp, xtmp, vec_enc); - if (masklen < 64) { + if (masksize < 16) { andq(tmp, (((jlong)1 << masklen) - 1)); } switch(opc) { diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp index 2ef8e27cadce2fd08778cfef37cb490527c70384..8efa36a8101db5843323e66a00ffbbe1cf18433a 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp @@ -142,6 +142,8 @@ public: void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len); void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy); + void load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, Register tmp, bool novlbwdq, int vlen_enc); + void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes); // vector compare @@ -222,11 +224,10 @@ public: public: #ifdef _LP64 - void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, Register tmp, - KRegister ktmp, int masklen, int vec_enc); + void vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, int masklen, int masksize, int vec_enc); void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, XMMRegister xtmp1, - Register tmp, int masklen, int vec_enc); + Register tmp, int masklen, int masksize, int vec_enc); #endif void string_indexof_char(Register str1, Register cnt1, Register ch, Register result, XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); @@ -273,4 +274,18 @@ public: Register limit, Register result, Register chr, XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask = knoreg); + + void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, + XMMRegister dst, XMMRegister src1, XMMRegister src2, + bool merge, int vlen_enc, bool is_varshift = false); + + void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, + XMMRegister dst, XMMRegister src1, Address src2, + bool merge, int vlen_enc); + + void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst, + XMMRegister src1, int imm8, bool merge, int vlen_enc); + + void masked_op(int ideal_opc, int mask_len, KRegister dst, + KRegister src1, KRegister src2); #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp index c83783d3e3f52222ad7dbbf3b9eb316313e0a4ea..134f7e6c9e2e5951a63a8a816c3d1ddd311332df 100644 --- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp @@ -34,8 +34,7 @@ class MacroAssembler; #ifdef COMPILER1 class LIR_Assembler; -class LIR_OprDesc; -typedef LIR_OprDesc* LIR_Opr; +class LIR_Opr; class StubAssembler; class ZLoadBarrierStubC1; #endif // COMPILER1 diff --git a/src/hotspot/cpu/x86/jvmciCodeInstaller_x86.cpp b/src/hotspot/cpu/x86/jvmciCodeInstaller_x86.cpp index 38f696b50c2f833bc1141534ef6de87ff4190343..7d93ed522ba0146fe31e434716cd2afe900670a5 100644 --- a/src/hotspot/cpu/x86/jvmciCodeInstaller_x86.cpp +++ b/src/hotspot/cpu/x86/jvmciCodeInstaller_x86.cpp @@ -155,14 +155,15 @@ void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &, JVMCIObject hotspot_met method = JVMCIENV->asMethod(hotspot_method); } #endif + NativeCall* call = NULL; switch (_next_call_type) { case INLINE_INVOKE: - break; + return; case INVOKEVIRTUAL: case INVOKEINTERFACE: { assert(method == NULL || !method->is_static(), "cannot call static method with invokeinterface"); - NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); + call = nativeCall_at(_instructions->start() + pc_offset); call->set_destination(SharedRuntime::get_resolve_virtual_call_stub()); _instructions->relocate(call->instruction_address(), virtual_call_Relocation::spec(_invoke_mark_pc), @@ -172,7 +173,7 @@ void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &, JVMCIObject hotspot_met case INVOKESTATIC: { assert(method == NULL || method->is_static(), "cannot call non-static method with invokestatic"); - NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); + call = nativeCall_at(_instructions->start() + pc_offset); call->set_destination(SharedRuntime::get_resolve_static_call_stub()); _instructions->relocate(call->instruction_address(), relocInfo::static_call_type, Assembler::call32_operand); @@ -180,15 +181,18 @@ void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &, JVMCIObject hotspot_met } case INVOKESPECIAL: { assert(method == NULL || !method->is_static(), "cannot call static method with invokespecial"); - NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); + call = nativeCall_at(_instructions->start() + pc_offset); call->set_destination(SharedRuntime::get_resolve_opt_virtual_call_stub()); _instructions->relocate(call->instruction_address(), relocInfo::opt_virtual_call_type, Assembler::call32_operand); break; } default: - JVMCI_ERROR("invalid _next_call_type value"); - break; + JVMCI_ERROR("invalid _next_call_type value: %d", _next_call_type); + return; + } + if (!call->is_displacement_aligned()) { + JVMCI_ERROR("unaligned displacement for call at offset %d", pc_offset); } } diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 30056be9a658a1c427e71d91a2e7e12b770ed958..b44e8ee15c84183d10ad003b74b0b6f2af91c9f8 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -4786,8 +4786,6 @@ void MacroAssembler::encode_and_move_klass_not_null(Register dst, Register src) } } -// !!! If the instructions that get generated here change then function -// instr_size_for_decode_klass_not_null() needs to get updated. void MacroAssembler::decode_klass_not_null(Register r, Register tmp) { assert_different_registers(r, tmp); // Note: it will change flags @@ -8267,6 +8265,379 @@ void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, XMMR } } +void MacroAssembler::knot(uint masklen, KRegister dst, KRegister src, KRegister ktmp, Register rtmp) { + switch(masklen) { + case 2: + knotbl(dst, src); + movl(rtmp, 3); + kmovbl(ktmp, rtmp); + kandbl(dst, ktmp, dst); + break; + case 4: + knotbl(dst, src); + movl(rtmp, 15); + kmovbl(ktmp, rtmp); + kandbl(dst, ktmp, dst); + break; + case 8: + knotbl(dst, src); + break; + case 16: + knotwl(dst, src); + break; + case 32: + knotdl(dst, src); + break; + case 64: + knotql(dst, src); + break; + default: + fatal("Unexpected vector length %d", masklen); + break; + } +} + +void MacroAssembler::kand(BasicType type, KRegister dst, KRegister src1, KRegister src2) { + switch(type) { + case T_BOOLEAN: + case T_BYTE: + kandbl(dst, src1, src2); + break; + case T_CHAR: + case T_SHORT: + kandwl(dst, src1, src2); + break; + case T_INT: + case T_FLOAT: + kanddl(dst, src1, src2); + break; + case T_LONG: + case T_DOUBLE: + kandql(dst, src1, src2); + break; + default: + fatal("Unexpected type argument %s", type2name(type)); + break; + } +} + +void MacroAssembler::kor(BasicType type, KRegister dst, KRegister src1, KRegister src2) { + switch(type) { + case T_BOOLEAN: + case T_BYTE: + korbl(dst, src1, src2); + break; + case T_CHAR: + case T_SHORT: + korwl(dst, src1, src2); + break; + case T_INT: + case T_FLOAT: + kordl(dst, src1, src2); + break; + case T_LONG: + case T_DOUBLE: + korql(dst, src1, src2); + break; + default: + fatal("Unexpected type argument %s", type2name(type)); + break; + } +} + +void MacroAssembler::kxor(BasicType type, KRegister dst, KRegister src1, KRegister src2) { + switch(type) { + case T_BOOLEAN: + case T_BYTE: + kxorbl(dst, src1, src2); + break; + case T_CHAR: + case T_SHORT: + kxorwl(dst, src1, src2); + break; + case T_INT: + case T_FLOAT: + kxordl(dst, src1, src2); + break; + case T_LONG: + case T_DOUBLE: + kxorql(dst, src1, src2); + break; + default: + fatal("Unexpected type argument %s", type2name(type)); + break; + } +} + +void MacroAssembler::evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + switch(type) { + case T_BOOLEAN: + case T_BYTE: + evpermb(dst, mask, nds, src, merge, vector_len); break; + case T_CHAR: + case T_SHORT: + evpermw(dst, mask, nds, src, merge, vector_len); break; + case T_INT: + case T_FLOAT: + evpermd(dst, mask, nds, src, merge, vector_len); break; + case T_LONG: + case T_DOUBLE: + evpermq(dst, mask, nds, src, merge, vector_len); break; + default: + fatal("Unexpected type argument %s", type2name(type)); break; + } +} + +void MacroAssembler::evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + switch(type) { + case T_BOOLEAN: + case T_BYTE: + evpermb(dst, mask, nds, src, merge, vector_len); break; + case T_CHAR: + case T_SHORT: + evpermw(dst, mask, nds, src, merge, vector_len); break; + case T_INT: + case T_FLOAT: + evpermd(dst, mask, nds, src, merge, vector_len); break; + case T_LONG: + case T_DOUBLE: + evpermq(dst, mask, nds, src, merge, vector_len); break; + default: + fatal("Unexpected type argument %s", type2name(type)); break; + } +} + +void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + switch(type) { + case T_BYTE: + evpminsb(dst, mask, nds, src, merge, vector_len); break; + case T_SHORT: + evpminsw(dst, mask, nds, src, merge, vector_len); break; + case T_INT: + evpminsd(dst, mask, nds, src, merge, vector_len); break; + case T_LONG: + evpminsq(dst, mask, nds, src, merge, vector_len); break; + default: + fatal("Unexpected type argument %s", type2name(type)); break; + } +} + +void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + switch(type) { + case T_BYTE: + evpmaxsb(dst, mask, nds, src, merge, vector_len); break; + case T_SHORT: + evpmaxsw(dst, mask, nds, src, merge, vector_len); break; + case T_INT: + evpmaxsd(dst, mask, nds, src, merge, vector_len); break; + case T_LONG: + evpmaxsq(dst, mask, nds, src, merge, vector_len); break; + default: + fatal("Unexpected type argument %s", type2name(type)); break; + } +} + +void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + switch(type) { + case T_BYTE: + evpminsb(dst, mask, nds, src, merge, vector_len); break; + case T_SHORT: + evpminsw(dst, mask, nds, src, merge, vector_len); break; + case T_INT: + evpminsd(dst, mask, nds, src, merge, vector_len); break; + case T_LONG: + evpminsq(dst, mask, nds, src, merge, vector_len); break; + default: + fatal("Unexpected type argument %s", type2name(type)); break; + } +} + +void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + switch(type) { + case T_BYTE: + evpmaxsb(dst, mask, nds, src, merge, vector_len); break; + case T_SHORT: + evpmaxsw(dst, mask, nds, src, merge, vector_len); break; + case T_INT: + evpmaxsd(dst, mask, nds, src, merge, vector_len); break; + case T_LONG: + evpmaxsq(dst, mask, nds, src, merge, vector_len); break; + default: + fatal("Unexpected type argument %s", type2name(type)); break; + } +} + +void MacroAssembler::evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + switch(type) { + case T_INT: + evpxord(dst, mask, nds, src, merge, vector_len); break; + case T_LONG: + evpxorq(dst, mask, nds, src, merge, vector_len); break; + default: + fatal("Unexpected type argument %s", type2name(type)); break; + } +} + +void MacroAssembler::evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + switch(type) { + case T_INT: + evpxord(dst, mask, nds, src, merge, vector_len); break; + case T_LONG: + evpxorq(dst, mask, nds, src, merge, vector_len); break; + default: + fatal("Unexpected type argument %s", type2name(type)); break; + } +} + +void MacroAssembler::evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + switch(type) { + case T_INT: + Assembler::evpord(dst, mask, nds, src, merge, vector_len); break; + case T_LONG: + evporq(dst, mask, nds, src, merge, vector_len); break; + default: + fatal("Unexpected type argument %s", type2name(type)); break; + } +} + +void MacroAssembler::evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + switch(type) { + case T_INT: + Assembler::evpord(dst, mask, nds, src, merge, vector_len); break; + case T_LONG: + evporq(dst, mask, nds, src, merge, vector_len); break; + default: + fatal("Unexpected type argument %s", type2name(type)); break; + } +} + +void MacroAssembler::evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + switch(type) { + case T_INT: + evpandd(dst, mask, nds, src, merge, vector_len); break; + case T_LONG: + evpandq(dst, mask, nds, src, merge, vector_len); break; + default: + fatal("Unexpected type argument %s", type2name(type)); break; + } +} + +void MacroAssembler::evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + switch(type) { + case T_INT: + evpandd(dst, mask, nds, src, merge, vector_len); break; + case T_LONG: + evpandq(dst, mask, nds, src, merge, vector_len); break; + default: + fatal("Unexpected type argument %s", type2name(type)); break; + } +} + +void MacroAssembler::anytrue(Register dst, uint masklen, KRegister src1, KRegister src2) { + masklen = masklen < 8 ? 8 : masklen; + ktest(masklen, src1, src2); + setb(Assembler::notZero, dst); + movzbl(dst, dst); +} + +void MacroAssembler::alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch) { + if (masklen < 8) { + knotbl(kscratch, src2); + kortestbl(src1, kscratch); + setb(Assembler::carrySet, dst); + movzbl(dst, dst); + } else { + ktest(masklen, src1, src2); + setb(Assembler::carrySet, dst); + movzbl(dst, dst); + } +} + +void MacroAssembler::kortest(uint masklen, KRegister src1, KRegister src2) { + switch(masklen) { + case 8: + kortestbl(src1, src2); + break; + case 16: + kortestwl(src1, src2); + break; + case 32: + kortestdl(src1, src2); + break; + case 64: + kortestql(src1, src2); + break; + default: + fatal("Unexpected mask length %d", masklen); + break; + } +} + + +void MacroAssembler::ktest(uint masklen, KRegister src1, KRegister src2) { + switch(masklen) { + case 8: + ktestbl(src1, src2); + break; + case 16: + ktestwl(src1, src2); + break; + case 32: + ktestdl(src1, src2); + break; + case 64: + ktestql(src1, src2); + break; + default: + fatal("Unexpected mask length %d", masklen); + break; + } +} + +void MacroAssembler::evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc) { + switch(type) { + case T_INT: + evprold(dst, mask, src, shift, merge, vlen_enc); break; + case T_LONG: + evprolq(dst, mask, src, shift, merge, vlen_enc); break; + default: + fatal("Unexpected type argument %s", type2name(type)); break; + break; + } +} + +void MacroAssembler::evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc) { + switch(type) { + case T_INT: + evprord(dst, mask, src, shift, merge, vlen_enc); break; + case T_LONG: + evprorq(dst, mask, src, shift, merge, vlen_enc); break; + default: + fatal("Unexpected type argument %s", type2name(type)); break; + } +} + +void MacroAssembler::evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc) { + switch(type) { + case T_INT: + evprolvd(dst, mask, src1, src2, merge, vlen_enc); break; + case T_LONG: + evprolvq(dst, mask, src1, src2, merge, vlen_enc); break; + default: + fatal("Unexpected type argument %s", type2name(type)); break; + } +} + +void MacroAssembler::evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc) { + switch(type) { + case T_INT: + evprorvd(dst, mask, src1, src2, merge, vlen_enc); break; + case T_LONG: + evprorvq(dst, mask, src1, src2, merge, vlen_enc); break; + default: + fatal("Unexpected type argument %s", type2name(type)); break; + } +} #if COMPILER2_OR_JVMCI void MacroAssembler::fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask, diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 415223253c7b49c8d3f13d47aaae71d7d9a119fc..392ff61d87fd4351f90a300d16aca5fcc45cd762 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -1338,6 +1338,75 @@ public: void evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); void evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len); + void evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { + if (!is_varshift) { + Assembler::evpsllw(dst, mask, nds, src, merge, vector_len); + } else { + Assembler::evpsllvw(dst, mask, nds, src, merge, vector_len); + } + } + void evpslld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { + if (!is_varshift) { + Assembler::evpslld(dst, mask, nds, src, merge, vector_len); + } else { + Assembler::evpsllvd(dst, mask, nds, src, merge, vector_len); + } + } + void evpsllq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { + if (!is_varshift) { + Assembler::evpsllq(dst, mask, nds, src, merge, vector_len); + } else { + Assembler::evpsllvq(dst, mask, nds, src, merge, vector_len); + } + } + void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { + if (!is_varshift) { + Assembler::evpsrlw(dst, mask, nds, src, merge, vector_len); + } else { + Assembler::evpsrlvw(dst, mask, nds, src, merge, vector_len); + } + } + void evpsrld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { + if (!is_varshift) { + Assembler::evpsrld(dst, mask, nds, src, merge, vector_len); + } else { + Assembler::evpsrlvd(dst, mask, nds, src, merge, vector_len); + } + } + void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { + if (!is_varshift) { + Assembler::evpsrlq(dst, mask, nds, src, merge, vector_len); + } else { + Assembler::evpsrlvq(dst, mask, nds, src, merge, vector_len); + } + } + void evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { + if (!is_varshift) { + Assembler::evpsraw(dst, mask, nds, src, merge, vector_len); + } else { + Assembler::evpsravw(dst, mask, nds, src, merge, vector_len); + } + } + void evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { + if (!is_varshift) { + Assembler::evpsrad(dst, mask, nds, src, merge, vector_len); + } else { + Assembler::evpsravd(dst, mask, nds, src, merge, vector_len); + } + } + void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { + if (!is_varshift) { + Assembler::evpsraq(dst, mask, nds, src, merge, vector_len); + } else { + Assembler::evpsravq(dst, mask, nds, src, merge, vector_len); + } + } + + void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); @@ -1627,7 +1696,33 @@ public: Assembler::evpclmulqdq(dst, nds, src, 0x11, vector_len); } - // Data + // AVX-512 mask operations. + void kand(BasicType etype, KRegister dst, KRegister src1, KRegister src2); + void kor(BasicType type, KRegister dst, KRegister src1, KRegister src2); + void knot(uint masklen, KRegister dst, KRegister src, KRegister ktmp = knoreg, Register rtmp = noreg); + void kxor(BasicType type, KRegister dst, KRegister src1, KRegister src2); + void kortest(uint masklen, KRegister src1, KRegister src2); + void ktest(uint masklen, KRegister src1, KRegister src2); + + void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + + void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + + void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + + void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + + void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc); + void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc); + void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc); + void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc); + + void alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch); + void anytrue(Register dst, uint masklen, KRegister src, KRegister kscratch); void cmov32( Condition cc, Register dst, Address src); void cmov32( Condition cc, Register dst, Register src); diff --git a/src/hotspot/cpu/x86/nativeInst_x86.cpp b/src/hotspot/cpu/x86/nativeInst_x86.cpp index fb00defc99e61e672f569e9ecc019183e16d53ee..0374a9cadeaaa06b3fb7234c1c8682569b553d12 100644 --- a/src/hotspot/cpu/x86/nativeInst_x86.cpp +++ b/src/hotspot/cpu/x86/nativeInst_x86.cpp @@ -260,6 +260,9 @@ void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) { } +bool NativeCall::is_displacement_aligned() { + return (uintptr_t) displacement_address() % 4 == 0; +} // Similar to replace_mt_safe, but just changes the destination. The // important thing is that free-running threads are able to execute this @@ -282,8 +285,7 @@ void NativeCall::set_destination_mt_safe(address dest) { CompiledICLocker::is_safe(instruction_address()), "concurrent code patching"); // Both C1 and C2 should now be generating code which aligns the patched address // to be within a single cache line. - bool is_aligned = ((uintptr_t)displacement_address() + 0) / cache_line_size == - ((uintptr_t)displacement_address() + 3) / cache_line_size; + bool is_aligned = is_displacement_aligned(); guarantee(is_aligned, "destination must be aligned"); diff --git a/src/hotspot/cpu/x86/nativeInst_x86.hpp b/src/hotspot/cpu/x86/nativeInst_x86.hpp index 94f8b5e637c958af88a1852971dd8602fb60e9c0..a86128e7e4c02523327948b44884edc444f860e3 100644 --- a/src/hotspot/cpu/x86/nativeInst_x86.hpp +++ b/src/hotspot/cpu/x86/nativeInst_x86.hpp @@ -160,8 +160,6 @@ class NativeCall: public NativeInstruction { return_address_offset = 5 }; - enum { cache_line_size = BytesPerWord }; // conservative estimate! - address instruction_address() const { return addr_at(instruction_offset); } address next_instruction_address() const { return addr_at(return_address_offset); } int displacement() const { return (jint) int_at(displacement_offset); } @@ -175,9 +173,11 @@ class NativeCall: public NativeInstruction { #endif // AMD64 set_int_at(displacement_offset, dest - return_address()); } + // Returns whether the 4-byte displacement operand is 4-byte aligned. + bool is_displacement_aligned(); void set_destination_mt_safe(address dest); - void verify_alignment() { assert((intptr_t)addr_at(displacement_offset) % BytesPerInt == 0, "must be aligned"); } + void verify_alignment() { assert(is_displacement_aligned(), "displacement of call is not aligned"); } void verify(); void print(); diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp index cad8207e5b9f3bdacc1385627dc6dd39dffd61b8..a00af0e4af0d61c1ccf64598a5ef314a3a2e2930 100644 --- a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp +++ b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp @@ -1236,40 +1236,6 @@ void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_ty } } -// Unpack an array argument into a pointer to the body and the length -// if the array is non-null, otherwise pass 0 for both. -static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { - Register tmp_reg = rax; - assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg, - "possible collision"); - assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg, - "possible collision"); - - // Pass the length, ptr pair - Label is_null, done; - VMRegPair tmp(tmp_reg->as_VMReg()); - if (reg.first()->is_stack()) { - // Load the arg up from the stack - simple_move32(masm, reg, tmp); - reg = tmp; - } - __ testptr(reg.first()->as_Register(), reg.first()->as_Register()); - __ jccb(Assembler::equal, is_null); - __ lea(tmp_reg, Address(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type))); - simple_move32(masm, tmp, body_arg); - // load the length relative to the body. - __ movl(tmp_reg, Address(tmp_reg, arrayOopDesc::length_offset_in_bytes() - - arrayOopDesc::base_offset_in_bytes(in_elem_type))); - simple_move32(masm, tmp, length_arg); - __ jmpb(done); - __ bind(is_null); - // Pass zeros - __ xorptr(tmp_reg, tmp_reg); - simple_move32(masm, tmp, body_arg); - simple_move32(masm, tmp, length_arg); - __ bind(done); -} - static void verify_oop_args(MacroAssembler* masm, const methodHandle& method, const BasicType* sig_bt, @@ -1372,8 +1338,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, int compile_id, BasicType* in_sig_bt, VMRegPair* in_regs, - BasicType ret_type, - address critical_entry) { + BasicType ret_type) { if (method->is_method_handle_intrinsic()) { vmIntrinsics::ID iid = method->intrinsic_id(); intptr_t start = (intptr_t)__ pc(); @@ -1395,12 +1360,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, in_ByteSize(-1), (OopMapSet*)NULL); } - bool is_critical_native = true; - address native_func = critical_entry; - if (native_func == NULL) { - native_func = method->native_function(); - is_critical_native = false; - } + address native_func = method->native_function(); assert(native_func != NULL, "must have function"); // An OopMap for lock (and class if static) @@ -1413,55 +1373,20 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // the hidden arguments as arg[0] and possibly arg[1] (static method) const int total_in_args = method->size_of_parameters(); - int total_c_args = total_in_args; - if (!is_critical_native) { - total_c_args += 1; - if (method->is_static()) { - total_c_args++; - } - } else { - for (int i = 0; i < total_in_args; i++) { - if (in_sig_bt[i] == T_ARRAY) { - total_c_args++; - } - } - } + int total_c_args = total_in_args + (method->is_static() ? 2 : 1); BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); BasicType* in_elem_bt = NULL; int argc = 0; - if (!is_critical_native) { - out_sig_bt[argc++] = T_ADDRESS; - if (method->is_static()) { - out_sig_bt[argc++] = T_OBJECT; - } + out_sig_bt[argc++] = T_ADDRESS; + if (method->is_static()) { + out_sig_bt[argc++] = T_OBJECT; + } - for (int i = 0; i < total_in_args ; i++ ) { - out_sig_bt[argc++] = in_sig_bt[i]; - } - } else { - in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); - SignatureStream ss(method->signature()); - for (int i = 0; i < total_in_args ; i++ ) { - if (in_sig_bt[i] == T_ARRAY) { - // Arrays are passed as int, elem* pair - out_sig_bt[argc++] = T_INT; - out_sig_bt[argc++] = T_ADDRESS; - ss.skip_array_prefix(1); // skip one '[' - assert(ss.is_primitive(), "primitive type expected"); - in_elem_bt[i] = ss.type(); - } else { - out_sig_bt[argc++] = in_sig_bt[i]; - in_elem_bt[i] = T_VOID; - } - if (in_sig_bt[i] != T_VOID) { - assert(in_sig_bt[i] == ss.type() || - in_sig_bt[i] == T_ARRAY, "must match"); - ss.next(); - } - } + for (int i = 0; i < total_in_args ; i++ ) { + out_sig_bt[argc++] = in_sig_bt[i]; } // Now figure out where the args must be stored and how much stack space @@ -1479,40 +1404,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Now the space for the inbound oop handle area int total_save_slots = 2 * VMRegImpl::slots_per_word; // 2 arguments passed in registers - if (is_critical_native) { - // Critical natives may have to call out so they need a save area - // for register arguments. - int double_slots = 0; - int single_slots = 0; - for ( int i = 0; i < total_in_args; i++) { - if (in_regs[i].first()->is_Register()) { - const Register reg = in_regs[i].first()->as_Register(); - switch (in_sig_bt[i]) { - case T_ARRAY: // critical array (uses 2 slots on LP64) - case T_BOOLEAN: - case T_BYTE: - case T_SHORT: - case T_CHAR: - case T_INT: single_slots++; break; - case T_LONG: double_slots++; break; - default: ShouldNotReachHere(); - } - } else if (in_regs[i].first()->is_XMMRegister()) { - switch (in_sig_bt[i]) { - case T_FLOAT: single_slots++; break; - case T_DOUBLE: double_slots++; break; - default: ShouldNotReachHere(); - } - } else if (in_regs[i].first()->is_FloatRegister()) { - ShouldNotReachHere(); - } - } - total_save_slots = double_slots * 2 + single_slots; - // align the save area - if (double_slots != 0) { - stack_slots = align_up(stack_slots, 2); - } - } int oop_handle_offset = stack_slots; stack_slots += total_save_slots; @@ -1691,7 +1582,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // vectors we have in our possession. We simply walk the java vector to // get the source locations and the c vector to get the destinations. - int c_arg = is_critical_native ? 0 : (method->is_static() ? 2 : 1 ); + int c_arg = method->is_static() ? 2 : 1; // Record rsp-based slot for receiver on stack for non-static methods int receiver_offset = -1; @@ -1714,14 +1605,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, for (int i = 0; i < total_in_args ; i++, c_arg++ ) { switch (in_sig_bt[i]) { case T_ARRAY: - if (is_critical_native) { - VMRegPair in_arg = in_regs[i]; - unpack_array_argument(masm, in_arg, in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); - c_arg++; - break; - } case T_OBJECT: - assert(!is_critical_native, "no oop arguments"); object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ((i == 0) && (!is_static)), &receiver_offset); @@ -1753,7 +1637,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Pre-load a static method's oop into rsi. Used both by locking code and // the normal JNI call code. - if (method->is_static() && !is_critical_native) { + if (method->is_static()) { // load opp into a register __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror())); @@ -1808,8 +1692,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Lock a synchronized method if (method->is_synchronized()) { - assert(!is_critical_native, "unhandled"); - const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); @@ -1861,13 +1743,11 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Finally just about ready to make the JNI call // get JNIEnv* which is first argument to native - if (!is_critical_native) { - __ lea(rdx, Address(thread, in_bytes(JavaThread::jni_environment_offset()))); - __ movptr(Address(rsp, 0), rdx); + __ lea(rdx, Address(thread, in_bytes(JavaThread::jni_environment_offset()))); + __ movptr(Address(rsp, 0), rdx); - // Now set thread in native - __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native); - } + // Now set thread in native + __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native); __ call(RuntimeAddress(native_func)); @@ -1900,17 +1780,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, Label after_transition; - // If this is a critical native, check for a safepoint or suspend request after the call. - // If a safepoint is needed, transition to native, then to native_trans to handle - // safepoints like the native methods that are not critical natives. - if (is_critical_native) { - Label needs_safepoint; - __ safepoint_poll(needs_safepoint, thread, false /* at_return */, false /* in_nmethod */); - __ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0); - __ jcc(Assembler::equal, after_transition); - __ bind(needs_safepoint); - } - // Switch thread to "native transition" state before reading the synchronization state. // This additional state is necessary because reading and testing the synchronization // state is not atomic w.r.t. GC, as this scenario demonstrates: @@ -2043,15 +1912,13 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ movptr(Address(thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD); } - if (!is_critical_native) { - // reset handle block - __ movptr(rcx, Address(thread, JavaThread::active_handles_offset())); - __ movl(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD); + // reset handle block + __ movptr(rcx, Address(thread, JavaThread::active_handles_offset())); + __ movl(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD); - // Any exception pending? - __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD); - __ jcc(Assembler::notEqual, exception_pending); - } + // Any exception pending? + __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD); + __ jcc(Assembler::notEqual, exception_pending); // no exception, we're almost done @@ -2165,18 +2032,16 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // BEGIN EXCEPTION PROCESSING - if (!is_critical_native) { - // Forward the exception - __ bind(exception_pending); + // Forward the exception + __ bind(exception_pending); - // remove possible return value from FPU register stack - __ empty_FPU_stack(); + // remove possible return value from FPU register stack + __ empty_FPU_stack(); - // pop our frame - __ leave(); - // and forward the exception - __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); - } + // pop our frame + __ leave(); + // and forward the exception + __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); __ flush(); diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp index 8dcb557fd9384e8b70b7f4b016cabdbd09d9bafc..f78ec39c25e42c7dc8a3c3d55e776546cec92091 100644 --- a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp +++ b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp @@ -1235,46 +1235,6 @@ static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMR } } -// Unpack an array argument into a pointer to the body and the length -// if the array is non-null, otherwise pass 0 for both. -static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { - Register tmp_reg = rax; - assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg, - "possible collision"); - assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg, - "possible collision"); - - __ block_comment("unpack_array_argument {"); - - // Pass the length, ptr pair - Label is_null, done; - VMRegPair tmp; - tmp.set_ptr(tmp_reg->as_VMReg()); - if (reg.first()->is_stack()) { - // Load the arg up from the stack - __ move_ptr(reg, tmp); - reg = tmp; - } - __ testptr(reg.first()->as_Register(), reg.first()->as_Register()); - __ jccb(Assembler::equal, is_null); - __ lea(tmp_reg, Address(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type))); - __ move_ptr(tmp, body_arg); - // load the length relative to the body. - __ movl(tmp_reg, Address(tmp_reg, arrayOopDesc::length_offset_in_bytes() - - arrayOopDesc::base_offset_in_bytes(in_elem_type))); - __ move32_64(tmp, length_arg); - __ jmpb(done); - __ bind(is_null); - // Pass zeros - __ xorptr(tmp_reg, tmp_reg); - __ move_ptr(tmp, body_arg); - __ move32_64(tmp, length_arg); - __ bind(done); - - __ block_comment("} unpack_array_argument"); -} - - // Different signatures may require very different orders for the move // to avoid clobbering other arguments. There's no simple way to // order them safely. Compute a safe order for issuing stores and @@ -1550,8 +1510,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, int compile_id, BasicType* in_sig_bt, VMRegPair* in_regs, - BasicType ret_type, - address critical_entry) { + BasicType ret_type) { if (method->is_method_handle_intrinsic()) { vmIntrinsics::ID iid = method->intrinsic_id(); intptr_t start = (intptr_t)__ pc(); @@ -1573,12 +1532,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, in_ByteSize(-1), (OopMapSet*)NULL); } - bool is_critical_native = true; - address native_func = critical_entry; - if (native_func == NULL) { - native_func = method->native_function(); - is_critical_native = false; - } + address native_func = method->native_function(); assert(native_func != NULL, "must have function"); // An OopMap for lock (and class if static) @@ -1592,55 +1546,20 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // the hidden arguments as arg[0] and possibly arg[1] (static method) const int total_in_args = method->size_of_parameters(); - int total_c_args = total_in_args; - if (!is_critical_native) { - total_c_args += 1; - if (method->is_static()) { - total_c_args++; - } - } else { - for (int i = 0; i < total_in_args; i++) { - if (in_sig_bt[i] == T_ARRAY) { - total_c_args++; - } - } - } + int total_c_args = total_in_args + (method->is_static() ? 2 : 1); BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); BasicType* in_elem_bt = NULL; int argc = 0; - if (!is_critical_native) { - out_sig_bt[argc++] = T_ADDRESS; - if (method->is_static()) { - out_sig_bt[argc++] = T_OBJECT; - } + out_sig_bt[argc++] = T_ADDRESS; + if (method->is_static()) { + out_sig_bt[argc++] = T_OBJECT; + } - for (int i = 0; i < total_in_args ; i++ ) { - out_sig_bt[argc++] = in_sig_bt[i]; - } - } else { - in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); - SignatureStream ss(method->signature()); - for (int i = 0; i < total_in_args ; i++ ) { - if (in_sig_bt[i] == T_ARRAY) { - // Arrays are passed as int, elem* pair - out_sig_bt[argc++] = T_INT; - out_sig_bt[argc++] = T_ADDRESS; - ss.skip_array_prefix(1); // skip one '[' - assert(ss.is_primitive(), "primitive type expected"); - in_elem_bt[i] = ss.type(); - } else { - out_sig_bt[argc++] = in_sig_bt[i]; - in_elem_bt[i] = T_VOID; - } - if (in_sig_bt[i] != T_VOID) { - assert(in_sig_bt[i] == ss.type() || - in_sig_bt[i] == T_ARRAY, "must match"); - ss.next(); - } - } + for (int i = 0; i < total_in_args ; i++ ) { + out_sig_bt[argc++] = in_sig_bt[i]; } // Now figure out where the args must be stored and how much stack space @@ -1658,40 +1577,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Now the space for the inbound oop handle area int total_save_slots = 6 * VMRegImpl::slots_per_word; // 6 arguments passed in registers - if (is_critical_native) { - // Critical natives may have to call out so they need a save area - // for register arguments. - int double_slots = 0; - int single_slots = 0; - for ( int i = 0; i < total_in_args; i++) { - if (in_regs[i].first()->is_Register()) { - const Register reg = in_regs[i].first()->as_Register(); - switch (in_sig_bt[i]) { - case T_BOOLEAN: - case T_BYTE: - case T_SHORT: - case T_CHAR: - case T_INT: single_slots++; break; - case T_ARRAY: // specific to LP64 (7145024) - case T_LONG: double_slots++; break; - default: ShouldNotReachHere(); - } - } else if (in_regs[i].first()->is_XMMRegister()) { - switch (in_sig_bt[i]) { - case T_FLOAT: single_slots++; break; - case T_DOUBLE: double_slots++; break; - default: ShouldNotReachHere(); - } - } else if (in_regs[i].first()->is_FloatRegister()) { - ShouldNotReachHere(); - } - } - total_save_slots = double_slots * 2 + single_slots; - // align the save area - if (double_slots != 0) { - stack_slots = align_up(stack_slots, 2); - } - } int oop_handle_offset = stack_slots; stack_slots += total_save_slots; @@ -1886,23 +1771,15 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, #endif /* ASSERT */ - // This may iterate in two different directions depending on the - // kind of native it is. The reason is that for regular JNI natives - // the incoming and outgoing registers are offset upwards and for - // critical natives they are offset down. + // For JNI natives the incoming and outgoing registers are offset upwards. GrowableArray arg_order(2 * total_in_args); VMRegPair tmp_vmreg; tmp_vmreg.set2(rbx->as_VMReg()); - if (!is_critical_native) { - for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { - arg_order.push(i); - arg_order.push(c_arg); - } - } else { - // Compute a valid move order, using tmp_vmreg to break any cycles - ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); + for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { + arg_order.push(i); + arg_order.push(c_arg); } int temploc = -1; @@ -1910,20 +1787,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, int i = arg_order.at(ai); int c_arg = arg_order.at(ai + 1); __ block_comment(err_msg("move %d -> %d", i, c_arg)); - if (c_arg == -1) { - assert(is_critical_native, "should only be required for critical natives"); - // This arg needs to be moved to a temporary - __ mov(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); - in_regs[i] = tmp_vmreg; - temploc = i; - continue; - } else if (i == -1) { - assert(is_critical_native, "should only be required for critical natives"); - // Read from the temporary location - assert(temploc != -1, "must be valid"); - i = temploc; - temploc = -1; - } #ifdef ASSERT if (in_regs[i].first()->is_Register()) { assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); @@ -1938,20 +1801,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, #endif /* ASSERT */ switch (in_sig_bt[i]) { case T_ARRAY: - if (is_critical_native) { - unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); - c_arg++; -#ifdef ASSERT - if (out_regs[c_arg].first()->is_Register()) { - reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; - } else if (out_regs[c_arg].first()->is_XMMRegister()) { - freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true; - } -#endif - break; - } case T_OBJECT: - assert(!is_critical_native, "no oop arguments"); __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ((i == 0) && (!is_static)), &receiver_offset); @@ -1985,30 +1835,25 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Pre-load a static method's oop into r14. Used both by locking code and // the normal JNI call code. - if (!is_critical_native) { - // point c_arg at the first arg that is already loaded in case we - // need to spill before we call out - c_arg = total_c_args - total_in_args; - - if (method->is_static()) { - - // load oop into a register - __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror())); - - // Now handlize the static class mirror it's known not-null. - __ movptr(Address(rsp, klass_offset), oop_handle_reg); - map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); - - // Now get the handle - __ lea(oop_handle_reg, Address(rsp, klass_offset)); - // store the klass handle as second argument - __ movptr(c_rarg1, oop_handle_reg); - // and protect the arg if we must spill - c_arg--; - } - } else { - // For JNI critical methods we need to save all registers in save_args. - c_arg = 0; + // point c_arg at the first arg that is already loaded in case we + // need to spill before we call out + c_arg = total_c_args - total_in_args; + + if (method->is_static()) { + + // load oop into a register + __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror())); + + // Now handlize the static class mirror it's known not-null. + __ movptr(Address(rsp, klass_offset), oop_handle_reg); + map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); + + // Now get the handle + __ lea(oop_handle_reg, Address(rsp, klass_offset)); + // store the klass handle as second argument + __ movptr(c_rarg1, oop_handle_reg); + // and protect the arg if we must spill + c_arg--; } // Change state to native (we save the return address in the thread, since it might not @@ -2060,8 +1905,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, Label lock_done; if (method->is_synchronized()) { - assert(!is_critical_native, "unhandled"); - const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); @@ -2115,12 +1958,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Finally just about ready to make the JNI call // get JNIEnv* which is first argument to native - if (!is_critical_native) { - __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset()))); + __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset()))); - // Now set thread in native - __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native); - } + // Now set thread in native + __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native); __ call(RuntimeAddress(native_func)); @@ -2148,17 +1989,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, Label after_transition; - // If this is a critical native, check for a safepoint or suspend request after the call. - // If a safepoint is needed, transition to native, then to native_trans to handle - // safepoints like the native methods that are not critical natives. - if (is_critical_native) { - Label needs_safepoint; - __ safepoint_poll(needs_safepoint, r15_thread, false /* at_return */, false /* in_nmethod */); - __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0); - __ jcc(Assembler::equal, after_transition); - __ bind(needs_safepoint); - } - // Switch thread to "native transition" state before reading the synchronization state. // This additional state is necessary because reading and testing the synchronization // state is not atomic w.r.t. GC, as this scenario demonstrates: @@ -2279,21 +2109,17 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ movptr(Address(r15_thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD); } - if (!is_critical_native) { - // reset handle block - __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset())); - __ movl(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD); - } + // reset handle block + __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset())); + __ movl(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD); // pop our frame __ leave(); - if (!is_critical_native) { - // Any exception pending? - __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD); - __ jcc(Assembler::notEqual, exception_pending); - } + // Any exception pending? + __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD); + __ jcc(Assembler::notEqual, exception_pending); // Return @@ -2301,13 +2127,11 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Unexpected paths are out of line and go here - if (!is_critical_native) { - // forward the exception - __ bind(exception_pending); + // forward the exception + __ bind(exception_pending); - // and forward the exception - __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); - } + // and forward the exception + __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); // Slow path locking & unlocking if (method->is_synchronized()) { diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp index 654066ac87262194a4e87e9ee68b7ad699a41f07..1525d10e5b5f3dea743318ca097dd1b3098dd6f5 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp @@ -4001,6 +4001,7 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask"); StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask_long_double("vector_long_sign_mask", 0x80000000, 0x00000000); StubRoutines::x86::_vector_all_bits_set = generate_vector_mask("vector_all_bits_set", 0xFFFFFFFF); + StubRoutines::x86::_vector_int_mask_cmp_bits = generate_vector_mask("vector_int_mask_cmp_bits", 0x00000001); StubRoutines::x86::_vector_iota_indices = generate_iota_indices("iota_indices"); // support for verify_oop (must happen after universe_init) diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp index 24509e694a63fb9eefa9131198a30dd8ea6e5c43..f5ef24ddf4cecfc19794c57155364d4aa2a32cac 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -7676,6 +7676,7 @@ address generate_avx_ghash_processBlocks() { StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask("vector_double_sign_mask", 0x7FFFFFFFFFFFFFFF); StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask("vector_double_sign_flip", 0x8000000000000000); StubRoutines::x86::_vector_all_bits_set = generate_vector_mask("vector_all_bits_set", 0xFFFFFFFFFFFFFFFF); + StubRoutines::x86::_vector_int_mask_cmp_bits = generate_vector_mask("vector_int_mask_cmp_bits", 0x0000000100000001); StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask("vector_short_to_byte_mask", 0x00ff00ff00ff00ff); StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask"); StubRoutines::x86::_vector_int_to_byte_mask = generate_vector_mask("vector_int_to_byte_mask", 0x000000ff000000ff); diff --git a/src/hotspot/cpu/x86/stubRoutines_x86.cpp b/src/hotspot/cpu/x86/stubRoutines_x86.cpp index 9a4523cd06f124fd40f104b355daf56e3c94ec0e..ec5a5d0f1433b7fd3a395b40414e31cd15051d6f 100644 --- a/src/hotspot/cpu/x86/stubRoutines_x86.cpp +++ b/src/hotspot/cpu/x86/stubRoutines_x86.cpp @@ -48,6 +48,7 @@ address StubRoutines::x86::_vector_int_to_byte_mask = NULL; address StubRoutines::x86::_vector_int_to_short_mask = NULL; address StubRoutines::x86::_vector_all_bits_set = NULL; address StubRoutines::x86::_vector_byte_shuffle_mask = NULL; +address StubRoutines::x86::_vector_int_mask_cmp_bits = NULL; address StubRoutines::x86::_vector_short_shuffle_mask = NULL; address StubRoutines::x86::_vector_int_shuffle_mask = NULL; address StubRoutines::x86::_vector_long_shuffle_mask = NULL; diff --git a/src/hotspot/cpu/x86/stubRoutines_x86.hpp b/src/hotspot/cpu/x86/stubRoutines_x86.hpp index 4134990dcbb8c5686ad21a4ffadfdaa691703e98..1ef8377dfc2376b7f52901831673d44b2b8a39ee 100644 --- a/src/hotspot/cpu/x86/stubRoutines_x86.hpp +++ b/src/hotspot/cpu/x86/stubRoutines_x86.hpp @@ -165,6 +165,7 @@ class x86 { static address _vector_double_sign_flip; static address _vector_long_sign_mask; static address _vector_all_bits_set; + static address _vector_int_mask_cmp_bits; static address _vector_byte_perm_mask; static address _vector_int_to_byte_mask; static address _vector_int_to_short_mask; @@ -289,6 +290,10 @@ class x86 { return _vector_all_bits_set; } + static address vector_int_mask_cmp_bits() { + return _vector_int_mask_cmp_bits; + } + static address vector_byte_perm_mask() { return _vector_byte_perm_mask; } diff --git a/src/hotspot/cpu/x86/vm_version_x86.hpp b/src/hotspot/cpu/x86/vm_version_x86.hpp index ec82ecd81c706475dbf31afdfdbd0aa1f922a8cb..6feb3b8c1a0a6ec18ce98ecb47d7f05250cd65e0 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.hpp +++ b/src/hotspot/cpu/x86/vm_version_x86.hpp @@ -884,6 +884,7 @@ public: static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; } static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; } static bool supports_avx512vlbw() { return (supports_evex() && supports_avx512bw() && supports_avx512vl()); } + static bool supports_avx512bwdq() { return (supports_evex() && supports_avx512bw() && supports_avx512dq()); } static bool supports_avx512vldq() { return (supports_evex() && supports_avx512dq() && supports_avx512vl()); } static bool supports_avx512vlbwdq() { return (supports_evex() && supports_avx512vl() && supports_avx512bw() && supports_avx512dq()); } diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 1ea22eee800a986d21bea5ba80cce2529277e1b8..5a8569dc6e0f290eb7d68b850bdd60636fad16df 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -1374,6 +1374,7 @@ Assembler::Width widthForType(BasicType bt) { static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } + static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } @@ -1556,6 +1557,7 @@ const bool Matcher::match_rule_supported(int opcode) { case Op_VectorMaskFirstTrue: case Op_VectorMaskLastTrue: case Op_VectorMaskTrueCount: + case Op_VectorMaskToLong: if (!is_LP64 || UseAVX < 1) { return false; } @@ -1802,8 +1804,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType return false; } break; + case Op_LoadVectorGatherMasked: + case Op_StoreVectorScatterMasked: case Op_StoreVectorScatter: - if(bt == T_BYTE || bt == T_SHORT) { + if(is_subword_type(bt)) { return false; } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { return false; @@ -1814,6 +1818,17 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType return false; } break; + case Op_MaskAll: + if (!is_LP64 || !VM_Version::supports_evex()) { + return false; + } + if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { + return false; + } + if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { + return false; + } + break; case Op_VectorMaskCmp: if (vlen < 2 || size_in_bits < 32) { return false; @@ -1823,6 +1838,148 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType return true; // Per default match rules are supported. } +const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { + // ADLC based match_rule_supported routine checks for the existence of pattern based + // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes + // of their non-masked counterpart with mask edge being the differentiator. + // This routine does a strict check on the existence of masked operation patterns + // by returning a default false value for all the other opcodes apart from the + // ones whose masked instruction patterns are defined in this file. + if (!match_rule_supported_vector(opcode, vlen, bt)) { + return false; + } + + const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); + int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; + if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { + return false; + } + switch(opcode) { + // Unary masked operations + case Op_AbsVB: + case Op_AbsVS: + if(!VM_Version::supports_avx512bw()) { + return false; // Implementation limitation + } + case Op_AbsVI: + case Op_AbsVL: + return true; + + // Ternary masked operations + case Op_FmaVF: + case Op_FmaVD: + return true; + + // Binary masked operations + case Op_AddVB: + case Op_AddVS: + case Op_SubVB: + case Op_SubVS: + case Op_MulVS: + case Op_LShiftVS: + case Op_RShiftVS: + case Op_URShiftVS: + assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); + if (!VM_Version::supports_avx512bw()) { + return false; // Implementation limitation + } + return true; + + case Op_MulVL: + assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); + if (!VM_Version::supports_avx512dq()) { + return false; // Implementation limitation + } + return true; + + case Op_AndV: + case Op_OrV: + case Op_XorV: + case Op_RotateRightV: + case Op_RotateLeftV: + if (bt != T_INT && bt != T_LONG) { + return false; // Implementation limitation + } + return true; + + case Op_VectorLoadMask: + assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); + if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { + return false; + } + return true; + + case Op_AddVI: + case Op_AddVL: + case Op_AddVF: + case Op_AddVD: + case Op_SubVI: + case Op_SubVL: + case Op_SubVF: + case Op_SubVD: + case Op_MulVI: + case Op_MulVF: + case Op_MulVD: + case Op_DivVF: + case Op_DivVD: + case Op_SqrtVF: + case Op_SqrtVD: + case Op_LShiftVI: + case Op_LShiftVL: + case Op_RShiftVI: + case Op_RShiftVL: + case Op_URShiftVI: + case Op_URShiftVL: + case Op_LoadVectorMasked: + case Op_StoreVectorMasked: + case Op_LoadVectorGatherMasked: + case Op_StoreVectorScatterMasked: + return true; + + case Op_MaxV: + case Op_MinV: + if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { + return false; // Implementation limitation + } + if (is_floating_point_type(bt)) { + return false; // Implementation limitation + } + return true; + + case Op_VectorMaskCmp: + if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { + return false; // Implementation limitation + } + return true; + + case Op_VectorRearrange: + if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { + return false; // Implementation limitation + } + if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { + return false; // Implementation limitation + } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { + return false; // Implementation limitation + } + return true; + + // Binary Logical operations + case Op_AndVMask: + case Op_OrVMask: + case Op_XorVMask: + if (vlen > 16 && !VM_Version::supports_avx512bw()) { + return false; // Implementation limitation + } + return true; + + case Op_MaskAll: + return true; + + default: + return false; + } +} + MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { assert(Matcher::is_generic_vector(generic_opnd), "not generic"); bool legacy = (generic_opnd->opcode() == LEGVEC); @@ -1887,7 +2044,7 @@ const RegMask* Matcher::predicate_reg_mask(void) { } const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) { - return new TypeVectMask(TypeInt::BOOL, length); + return new TypeVectMask(elemTy, length); } // Max vector size in bytes. 0 if not supported. @@ -3310,10 +3467,85 @@ instruct sqrtD_reg(regD dst) %{ ins_pipe(pipe_slow); %} + // ---------------------------------------- VectorReinterpret ------------------------------------ +instruct reinterpret_mask(kReg dst) %{ + predicate(n->bottom_type()->isa_vectmask() && + Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src + match(Set dst (VectorReinterpret dst)); + ins_cost(125); + format %{ "vector_reinterpret $dst\t!" %} + ins_encode %{ + // empty + %} + ins_pipe( pipe_slow ); +%} + +instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ + predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && + n->bottom_type()->isa_vectmask() && + n->in(1)->bottom_type()->isa_vectmask() && + n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && + n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src + match(Set dst (VectorReinterpret src)); + effect(TEMP xtmp); + format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} + ins_encode %{ + int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); + int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); + assert(src_sz == dst_sz , "src and dst size mismatch"); + int vlen_enc = vector_length_encoding(src_sz); + __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); + __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ + predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && + n->bottom_type()->isa_vectmask() && + n->in(1)->bottom_type()->isa_vectmask() && + (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || + n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && + n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src + match(Set dst (VectorReinterpret src)); + effect(TEMP xtmp); + format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} + ins_encode %{ + int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); + int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); + assert(src_sz == dst_sz , "src and dst size mismatch"); + int vlen_enc = vector_length_encoding(src_sz); + __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); + __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ + predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && + n->bottom_type()->isa_vectmask() && + n->in(1)->bottom_type()->isa_vectmask() && + (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || + n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && + n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src + match(Set dst (VectorReinterpret src)); + effect(TEMP xtmp); + format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} + ins_encode %{ + int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); + int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); + assert(src_sz == dst_sz , "src and dst size mismatch"); + int vlen_enc = vector_length_encoding(src_sz); + __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); + __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} instruct reinterpret(vec dst) %{ - predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src + predicate(!n->bottom_type()->isa_vectmask() && + Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src match(Set dst (VectorReinterpret dst)); ins_cost(125); format %{ "vector_reinterpret $dst\t!" %} @@ -3348,6 +3580,7 @@ instruct reinterpret_expand(vec dst, vec src, rRegP scratch) %{ instruct vreinterpret_expand4(legVec dst, vec src, rRegP scratch) %{ predicate(UseAVX > 0 && + !n->bottom_type()->isa_vectmask() && (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst match(Set dst (VectorReinterpret src)); @@ -3363,6 +3596,7 @@ instruct vreinterpret_expand4(legVec dst, vec src, rRegP scratch) %{ instruct vreinterpret_expand(legVec dst, vec src) %{ predicate(UseAVX > 0 && + !n->bottom_type()->isa_vectmask() && (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst match(Set dst (VectorReinterpret src)); @@ -3380,7 +3614,8 @@ instruct vreinterpret_expand(legVec dst, vec src) %{ %} instruct reinterpret_shrink(vec dst, legVec src) %{ - predicate(Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst + predicate(!n->bottom_type()->isa_vectmask() && + Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst match(Set dst (VectorReinterpret src)); ins_cost(125); format %{ "vector_reinterpret_shrink $dst,$src\t!" %} @@ -3582,7 +3817,7 @@ instruct storeV(memory mem, vec src) %{ // Gather INT, LONG, FLOAT, DOUBLE instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ - predicate(Matcher::vector_length_in_bytes(n) <= 32); + predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); match(Set dst (LoadVectorGather mem idx)); effect(TEMP dst, TEMP tmp, TEMP mask); format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} @@ -3607,10 +3842,10 @@ instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ %} instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ - predicate(Matcher::vector_length_in_bytes(n) == 64); + predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); match(Set dst (LoadVectorGather mem idx)); effect(TEMP dst, TEMP tmp, TEMP ktmp); - format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and k2 as TEMP" %} + format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} ins_encode %{ assert(UseAVX > 2, "sanity"); @@ -3626,6 +3861,24 @@ instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ ins_pipe( pipe_slow ); %} +instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ + match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); + effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); + format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} + ins_encode %{ + assert(UseAVX > 2, "sanity"); + int vlen_enc = vector_length_encoding(this); + BasicType elem_bt = Matcher::vector_element_basic_type(this); + assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE + // Note: Since gather instruction partially updates the opmask register used + // for predication hense moving mask operand to a temporary. + __ kmovwl($ktmp$$KRegister, $mask$$KRegister); + __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); + __ lea($tmp$$Register, $mem$$Address); + __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} // ====================Scatter======================================= // Scatter INT, LONG, FLOAT, DOUBLE @@ -3649,6 +3902,24 @@ instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ ins_pipe( pipe_slow ); %} +instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ + match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); + effect(TEMP tmp, TEMP ktmp); + format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this, $src); + BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); + assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); + assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE + // Note: Since scatter instruction partially updates the opmask register used + // for predication hense moving mask operand to a temporary. + __ kmovwl($ktmp$$KRegister, $mask$$KRegister); + __ lea($tmp$$Register, $mem$$Address); + __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + // ====================REPLICATE======================================= // Replicate byte scalar to be vector @@ -3894,7 +4165,7 @@ instruct ReplI_zero(vec dst, immI_0 zero) %{ %} instruct ReplI_M1(vec dst, immI_M1 con) %{ - predicate(UseAVX > 0); + predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) >= 16); match(Set dst (ReplicateB con)); match(Set dst (ReplicateS con)); match(Set dst (ReplicateI con)); @@ -5860,6 +6131,7 @@ instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ instruct vsqrtF_reg(vec dst, vec src) %{ match(Set dst (SqrtVF src)); + ins_cost(400); format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} ins_encode %{ assert(UseAVX > 0, "required"); @@ -5872,6 +6144,7 @@ instruct vsqrtF_reg(vec dst, vec src) %{ instruct vsqrtF_mem(vec dst, memory mem) %{ predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); match(Set dst (SqrtVF (LoadVector mem))); + ins_cost(400); format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} ins_encode %{ assert(UseAVX > 0, "required"); @@ -5884,6 +6157,7 @@ instruct vsqrtF_mem(vec dst, memory mem) %{ // Floating point vector sqrt instruct vsqrtD_reg(vec dst, vec src) %{ match(Set dst (SqrtVD src)); + ins_cost(400); format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} ins_encode %{ assert(UseAVX > 0, "required"); @@ -5896,6 +6170,7 @@ instruct vsqrtD_reg(vec dst, vec src) %{ instruct vsqrtD_mem(vec dst, memory mem) %{ predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); match(Set dst (SqrtVD (LoadVector mem))); + ins_cost(400); format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} ins_encode %{ assert(UseAVX > 0, "required"); @@ -6904,7 +7179,8 @@ instruct vcastDtoF_reg(vec dst, vec src) %{ // --------------------------------- VectorMaskCmp -------------------------------------- instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ - predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 + predicate(n->bottom_type()->isa_vectmask() == NULL && + Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); @@ -6921,8 +7197,9 @@ instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ ins_pipe( pipe_slow ); %} -instruct evcmpFD(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{ +instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{ predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 + n->bottom_type()->isa_vectmask() == NULL && is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); effect(TEMP scratch, TEMP ktmp); @@ -6942,8 +7219,27 @@ instruct evcmpFD(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg kt ins_pipe( pipe_slow ); %} +instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ + predicate(n->bottom_type()->isa_vectmask() && + is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} + ins_encode %{ + assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); + int vlen_enc = vector_length_encoding(this, $src1); + Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); + KRegister mask = k0; // The comparison itself is not being masked. + if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { + __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); + } else { + __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); + } + %} + ins_pipe( pipe_slow ); +%} + instruct vcmp(legVec dst, legVec src1, legVec src2, immI8 cond, rRegP scratch) %{ - predicate((UseAVX <= 2 || !VM_Version::supports_avx512vl()) && + predicate(n->bottom_type()->isa_vectmask() == NULL && !is_unsigned_booltest_pred(n->in(2)->get_int()) && Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 @@ -6961,7 +7257,7 @@ instruct vcmp(legVec dst, legVec src1, legVec src2, immI8 cond, rRegP scratch) % %} instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec vtmp1, legVec vtmp2, rRegP scratch) %{ - predicate((UseAVX == 2 || !VM_Version::supports_avx512vl()) && + predicate(n->bottom_type()->isa_vectmask() == NULL && is_unsigned_booltest_pred(n->in(2)->get_int()) && Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 16 && // src1 @@ -6980,7 +7276,7 @@ instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec vtmp1, l %} instruct vcmpu32(legVec dst, legVec src1, legVec src2, immI8 cond, legVec vtmp1, legVec vtmp2, legVec vtmp3, rRegP scratch) %{ - predicate((UseAVX == 2 || !VM_Version::supports_avx512vl()) && + predicate(n->bottom_type()->isa_vectmask() == NULL && is_unsigned_booltest_pred(n->in(2)->get_int()) && Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 32 && // src1 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 @@ -6997,9 +7293,8 @@ instruct vcmpu32(legVec dst, legVec src1, legVec src2, immI8 cond, legVec vtmp1, ins_pipe( pipe_slow ); %} -instruct evcmp(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{ - predicate(UseAVX > 2 && - (VM_Version::supports_avx512vl() || +instruct vcmpu64(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{ + predicate((n->bottom_type()->isa_vectmask() == NULL && Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); @@ -7015,25 +7310,54 @@ instruct evcmp(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp bool merge = false; BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); + switch (src1_elem_bt) { + case T_INT: { + __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); + __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); + break; + } + case T_LONG: { + __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); + __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); + break; + } + default: assert(false, "%s", type2name(src1_elem_bt)); + } + %} + ins_pipe( pipe_slow ); +%} + + +instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ + predicate(n->bottom_type()->isa_vectmask() && + is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} + ins_encode %{ + assert(UseAVX > 2, "required"); + assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); + + int vlen_enc = vector_length_encoding(this, $src1); + Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); + bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); + BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); + + // Comparison i switch (src1_elem_bt) { case T_BYTE: { - __ evpcmpb($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); - __ evmovdqub($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); + __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); break; } case T_SHORT: { - __ evpcmpw($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); - __ evmovdquw($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); + __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); break; } case T_INT: { - __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); - __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); + __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); break; } case T_LONG: { - __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); - __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); + __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); break; } default: assert(false, "%s", type2name(src1_elem_bt)); @@ -7186,6 +7510,7 @@ instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ predicate(UseAVX > 0 && + n->in(2)->bottom_type()->isa_vectmask() == NULL && Matcher::vector_length_in_bytes(n) <= 32 && is_integral_type(Matcher::vector_element_basic_type(n))); match(Set dst (VectorBlend (Binary src1 src2) mask)); @@ -7199,6 +7524,7 @@ instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ predicate(UseAVX > 0 && + n->in(2)->bottom_type()->isa_vectmask() == NULL && Matcher::vector_length_in_bytes(n) <= 32 && !is_integral_type(Matcher::vector_element_basic_type(n))); match(Set dst (VectorBlend (Binary src1 src2) mask)); @@ -7211,7 +7537,8 @@ instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ %} instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch, kReg ktmp) %{ - predicate(Matcher::vector_length_in_bytes(n) == 64); + predicate(Matcher::vector_length_in_bytes(n) == 64 && + n->in(2)->bottom_type()->isa_vectmask() == NULL); match(Set dst (VectorBlend (Binary src1 src2) mask)); format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %} effect(TEMP scratch, TEMP ktmp); @@ -7224,10 +7551,27 @@ instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch, kReg ins_pipe( pipe_slow ); %} + +instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask, rRegP scratch) %{ + predicate(n->in(2)->bottom_type()->isa_vectmask() && + (!is_subword_type(Matcher::vector_element_basic_type(n)) || + VM_Version::supports_avx512bw())); + match(Set dst (VectorBlend (Binary src1 src2) mask)); + format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %} + effect(TEMP scratch); + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType elem_bt = Matcher::vector_element_basic_type(this); + __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + // --------------------------------- ABS -------------------------------------- // a = |a| instruct vabsB_reg(vec dst, vec src) %{ match(Set dst (AbsVB src)); + ins_cost(450); format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} ins_encode %{ uint vlen = Matcher::vector_length(this); @@ -7243,6 +7587,7 @@ instruct vabsB_reg(vec dst, vec src) %{ instruct vabsS_reg(vec dst, vec src) %{ match(Set dst (AbsVS src)); + ins_cost(450); format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} ins_encode %{ uint vlen = Matcher::vector_length(this); @@ -7259,6 +7604,7 @@ instruct vabsS_reg(vec dst, vec src) %{ instruct vabsI_reg(vec dst, vec src) %{ match(Set dst (AbsVI src)); format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} + ins_cost(250); ins_encode %{ uint vlen = Matcher::vector_length(this); if (vlen <= 4) { @@ -7273,6 +7619,7 @@ instruct vabsI_reg(vec dst, vec src) %{ instruct vabsL_reg(vec dst, vec src) %{ match(Set dst (AbsVL src)); + ins_cost(450); format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} ins_encode %{ assert(UseAVX > 2, "required"); @@ -7345,12 +7692,13 @@ instruct vabsnegD(vec dst, vec src, rRegI scratch) %{ #ifdef _LP64 instruct vptest_alltrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp1, legVec vtmp2, rFlagsReg cr) %{ - predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 4 && + predicate(!VM_Version::supports_avx512bwdq() && + Matcher::vector_length_in_bytes(n->in(1)) >= 4 && Matcher::vector_length_in_bytes(n->in(1)) < 16 && static_cast(n)->get_predicate() == BoolTest::overflow); match(Set dst (VectorTest src1 src2 )); effect(TEMP vtmp1, TEMP vtmp2, KILL cr); - format %{ "vector_test $dst,$src1, $src2\t! using $vtmp1, $vtmp2 and $cr as TEMP" %} + format %{ "vptest_alltrue_lt16 $dst,$src1, $src2\t! using $vtmp1, $vtmp2 and $cr as TEMP" %} ins_encode %{ int vlen = Matcher::vector_length_in_bytes(this, $src1); __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); @@ -7360,13 +7708,14 @@ instruct vptest_alltrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp1, ins_pipe( pipe_slow ); %} -instruct vptest_alltrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ - predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16 && +instruct vptest_alltrue_ge16(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ + predicate(!VM_Version::supports_avx512bwdq() && + Matcher::vector_length_in_bytes(n->in(1)) >= 16 && Matcher::vector_length_in_bytes(n->in(1)) < 64 && static_cast(n)->get_predicate() == BoolTest::overflow); match(Set dst (VectorTest src1 src2 )); effect(KILL cr); - format %{ "vector_test $dst,$src1, $src2\t! using $cr as TEMP" %} + format %{ "vptest_alltrue_ge16 $dst,$src1, $src2\t! using $cr as TEMP" %} ins_encode %{ int vlen = Matcher::vector_length_in_bytes(this, $src1); __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg); @@ -7376,28 +7725,52 @@ instruct vptest_alltrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ ins_pipe( pipe_slow ); %} -instruct vptest_alltrue_evex(rRegI dst, legVec src1, legVec src2, kReg ktmp, rFlagsReg cr) %{ - predicate(Matcher::vector_length_in_bytes(n->in(1)) == 64 && - static_cast(n)->get_predicate() == BoolTest::overflow); - match(Set dst (VectorTest src1 src2 )); - effect(KILL cr, TEMP ktmp); - format %{ "vector_test $dst,$src1, $src2\t! using $cr as TEMP" %} +instruct vptest_alltrue_lt8_evex(rRegI dst, kReg src1, kReg src2, kReg kscratch, rFlagsReg cr) %{ + predicate(VM_Version::supports_avx512bwdq() && + static_cast(n)->get_predicate() == BoolTest::overflow && + n->in(1)->bottom_type()->isa_vectmask() && + Matcher::vector_length(n->in(1)) < 8); + match(Set dst (VectorTest src1 src2)); + effect(KILL cr, TEMP kscratch); + format %{ "vptest_alltrue_lt8_evex $dst,$src1,$src2\t! using $cr as TEMP" %} ins_encode %{ - int vlen = Matcher::vector_length_in_bytes(this, $src1); - __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, $ktmp$$KRegister); - __ setb(Assembler::carrySet, $dst$$Register); - __ movzbl($dst$$Register, $dst$$Register); + const MachNode* mask1 = static_cast(this->in(this->operand_index($src1))); + const MachNode* mask2 = static_cast(this->in(this->operand_index($src2))); + assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); + uint masklen = Matcher::vector_length(this, $src1); + __ alltrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister, $kscratch$$KRegister); + %} + ins_pipe( pipe_slow ); +%} + + +instruct vptest_alltrue_ge8_evex(rRegI dst, kReg src1, kReg src2, rFlagsReg cr) %{ + predicate(VM_Version::supports_avx512bwdq() && + static_cast(n)->get_predicate() == BoolTest::overflow && + n->in(1)->bottom_type()->isa_vectmask() && + Matcher::vector_length(n->in(1)) >= 8); + match(Set dst (VectorTest src1 src2)); + effect(KILL cr); + format %{ "vptest_alltrue_ge8_evex $dst,$src1,$src2\t! using $cr as TEMP" %} + ins_encode %{ + const MachNode* mask1 = static_cast(this->in(this->operand_index($src1))); + const MachNode* mask2 = static_cast(this->in(this->operand_index($src2))); + assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); + uint masklen = Matcher::vector_length(this, $src1); + __ alltrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister, knoreg); %} ins_pipe( pipe_slow ); %} + instruct vptest_anytrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp, rFlagsReg cr) %{ - predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 4 && + predicate(!VM_Version::supports_avx512bwdq() && + Matcher::vector_length_in_bytes(n->in(1)) >= 4 && Matcher::vector_length_in_bytes(n->in(1)) < 16 && static_cast(n)->get_predicate() == BoolTest::ne); match(Set dst (VectorTest src1 src2 )); effect(TEMP vtmp, KILL cr); - format %{ "vector_test_any_true $dst,$src1,$src2\t! using $vtmp, $cr as TEMP" %} + format %{ "vptest_anytrue_lt16 $dst,$src1,$src2\t! using $vtmp, $cr as TEMP" %} ins_encode %{ int vlen = Matcher::vector_length_in_bytes(this, $src1); __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); @@ -7407,13 +7780,14 @@ instruct vptest_anytrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp, r ins_pipe( pipe_slow ); %} -instruct vptest_anytrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ - predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16 && +instruct vptest_anytrue_ge16(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ + predicate(!VM_Version::supports_avx512bwdq() && + Matcher::vector_length_in_bytes(n->in(1)) >= 16 && Matcher::vector_length_in_bytes(n->in(1)) < 64 && static_cast(n)->get_predicate() == BoolTest::ne); match(Set dst (VectorTest src1 src2 )); effect(KILL cr); - format %{ "vector_test_any_true $dst,$src1,$src2\t! using $cr as TEMP" %} + format %{ "vptest_anytrue_ge16 $dst,$src1,$src2\t! using $cr as TEMP" %} ins_encode %{ int vlen = Matcher::vector_length_in_bytes(this, $src1); __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg); @@ -7423,28 +7797,30 @@ instruct vptest_anytrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ ins_pipe( pipe_slow ); %} -instruct vptest_anytrue_evex(rRegI dst, legVec src1, legVec src2, kReg ktmp, rFlagsReg cr) %{ - predicate(Matcher::vector_length_in_bytes(n->in(1)) == 64 && +instruct vptest_anytrue_evex(rRegI dst, kReg src1, kReg src2, rFlagsReg cr) %{ + predicate(VM_Version::supports_avx512bwdq() && static_cast(n)->get_predicate() == BoolTest::ne); - match(Set dst (VectorTest src1 src2 )); - effect(KILL cr, TEMP ktmp); - format %{ "vector_test_any_true $dst,$src1,$src2\t! using $cr as TEMP" %} + match(Set dst (VectorTest src1 src2)); + effect(KILL cr); + format %{ "vptest_anytrue_lt8_evex $dst,$src1,$src2\t! using $cr as TEMP" %} ins_encode %{ - int vlen = Matcher::vector_length_in_bytes(this, $src1); - __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, $ktmp$$KRegister); - __ setb(Assembler::notZero, $dst$$Register); - __ movzbl($dst$$Register, $dst$$Register); + const MachNode* mask1 = static_cast(this->in(this->operand_index($src1))); + const MachNode* mask2 = static_cast(this->in(this->operand_index($src2))); + assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); + uint masklen = Matcher::vector_length(this, $src1); + __ anytrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister); %} ins_pipe( pipe_slow ); %} instruct cmpvptest_anytrue_lt16(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero, legVec vtmp) %{ - predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && + predicate(!VM_Version::supports_avx512bwdq() && + Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && Matcher::vector_length_in_bytes(n->in(1)->in(1)) < 16 && static_cast(n->in(1))->get_predicate() == BoolTest::ne); match(Set cr (CmpI (VectorTest src1 src2) zero)); effect(TEMP vtmp); - format %{ "cmp_vector_test_any_true $src1,$src2\t! using $vtmp as TEMP" %} + format %{ "cmpvptest_anytrue_lt16 $src1,$src2\t! using $vtmp as TEMP" %} ins_encode %{ int vlen = Matcher::vector_length_in_bytes(this, $src1); __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); @@ -7452,12 +7828,13 @@ instruct cmpvptest_anytrue_lt16(rFlagsReg cr, legVec src1, legVec src2, immI_0 z ins_pipe( pipe_slow ); %} -instruct cmpvptest_anytrue(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero) %{ - predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 16 && +instruct cmpvptest_anytrue_ge16(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero) %{ + predicate(!VM_Version::supports_avx512bwdq() && + Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 16 && Matcher::vector_length_in_bytes(n->in(1)->in(1)) < 64 && static_cast(n->in(1))->get_predicate() == BoolTest::ne); match(Set cr (CmpI (VectorTest src1 src2) zero)); - format %{ "cmp_vector_test_any_true $src1,$src2\t!" %} + format %{ "cmpvptest_anytrue_ge16 $src1,$src2\t!" %} ins_encode %{ int vlen = Matcher::vector_length_in_bytes(this, $src1); __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg); @@ -7465,15 +7842,18 @@ instruct cmpvptest_anytrue(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero) ins_pipe( pipe_slow ); %} -instruct cmpvptest_anytrue_evex(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero, kReg ktmp) %{ - predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && +instruct cmpvptest_anytrue_evex(rFlagsReg cr, kReg src1, kReg src2, immI_0 zero) %{ + predicate(VM_Version::supports_avx512bwdq() && static_cast(n->in(1))->get_predicate() == BoolTest::ne); match(Set cr (CmpI (VectorTest src1 src2) zero)); - effect(TEMP ktmp); - format %{ "cmp_vector_test_any_true $src1,$src2\t!" %} + format %{ "cmpvptest_anytrue_evex $src1,$src2\t!" %} ins_encode %{ - int vlen = Matcher::vector_length_in_bytes(this, $src1); - __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, $ktmp$$KRegister); + uint masklen = Matcher::vector_length(this, $src1); + const MachNode* mask1 = static_cast(this->in(this->operand_index($src1))); + const MachNode* mask2 = static_cast(this->in(this->operand_index($src2))); + assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); + masklen = masklen < 8 ? 8 : masklen; + __ ktest(masklen, $src1$$KRegister, $src2$$KRegister); %} ins_pipe( pipe_slow ); %} @@ -7482,45 +7862,56 @@ instruct cmpvptest_anytrue_evex(rFlagsReg cr, legVec src1, legVec src2, immI_0 z //------------------------------------- LoadMask -------------------------------------------- instruct loadMask(legVec dst, legVec src) %{ - predicate(!VM_Version::supports_avx512vlbw()); + predicate(n->bottom_type()->isa_vectmask() == NULL && !VM_Version::supports_avx512vlbw()); match(Set dst (VectorLoadMask src)); effect(TEMP dst); - format %{ "vector_loadmask_byte $dst,$src\n\t" %} + format %{ "vector_loadmask_byte $dst, $src\n\t" %} ins_encode %{ int vlen_in_bytes = Matcher::vector_length_in_bytes(this); BasicType elem_bt = Matcher::vector_element_basic_type(this); - __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); %} ins_pipe( pipe_slow ); %} -instruct loadMask_evex(vec dst, vec src) %{ - predicate(VM_Version::supports_avx512vlbw()); +instruct loadMask64(kReg dst, vec src, vec xtmp, rRegI tmp) %{ + predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); match(Set dst (VectorLoadMask src)); - effect(TEMP dst); - format %{ "vector_loadmask_byte $dst,$src\n\t" %} + effect(TEMP xtmp, TEMP tmp); + format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp and $tmp as TEMP" %} ins_encode %{ - int vlen_in_bytes = Matcher::vector_length_in_bytes(this); - BasicType elem_bt = Matcher::vector_element_basic_type(this); + __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, + $tmp$$Register, true, Assembler::AVX_512bit); + %} + ins_pipe( pipe_slow ); +%} - __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, false); +instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ + predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); + match(Set dst (VectorLoadMask src)); + effect(TEMP xtmp); + format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(in(1)); + __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, + noreg, false, vlen_enc); %} ins_pipe( pipe_slow ); %} //------------------------------------- StoreMask -------------------------------------------- -instruct storeMask1B(vec dst, vec src, immI_1 size) %{ - predicate(Matcher::vector_length(n) < 64 || VM_Version::supports_avx512vlbw()); +instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ + predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == NULL); match(Set dst (VectorStoreMask src size)); - format %{ "vector_store_mask $dst,$src\t!" %} + format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} ins_encode %{ - assert(UseSSE >= 3, "required"); - if (Matcher::vector_length_in_bytes(this) <= 16) { + int vlen = Matcher::vector_length(this); + if (vlen <= 16 && UseAVX <= 2) { + assert(UseSSE >= 3, "required"); __ pabsb($dst$$XMMRegister, $src$$XMMRegister); } else { - assert(UseAVX >= 2, "required"); + assert(UseAVX > 0, "required"); int src_vlen_enc = vector_length_encoding(this, $src); __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); } @@ -7528,77 +7919,93 @@ instruct storeMask1B(vec dst, vec src, immI_1 size) %{ ins_pipe( pipe_slow ); %} -instruct storeMask2B(vec dst, vec src, immI_2 size) %{ - predicate(Matcher::vector_length(n) <= 8); +instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ + predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == NULL); match(Set dst (VectorStoreMask src size)); - format %{ "vector_store_mask $dst,$src\n\t" %} - ins_encode %{ - assert(UseSSE >= 3, "required"); - __ pabsw($dst$$XMMRegister, $src$$XMMRegister); - __ packsswb($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct vstoreMask2B(vec dst, vec src, immI_2 size) %{ - predicate(Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); - match(Set dst (VectorStoreMask src size)); - effect(TEMP dst); - format %{ "vector_store_mask $dst,$src\t!" %} + effect(TEMP_DEF dst, TEMP xtmp); + format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} ins_encode %{ int vlen_enc = Assembler::AVX_128bit; - __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); - __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister,vlen_enc); - __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); + int vlen = Matcher::vector_length(this); + if (vlen <= 8) { + assert(UseSSE >= 3, "required"); + __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); + __ pabsw($dst$$XMMRegister, $src$$XMMRegister); + __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); + } else { + assert(UseAVX > 0, "required"); + __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); + __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); + __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); + } %} ins_pipe( pipe_slow ); %} -instruct vstoreMask2B_evex(vec dst, vec src, immI_2 size) %{ - predicate(VM_Version::supports_avx512bw()); +instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ + predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == NULL); match(Set dst (VectorStoreMask src size)); - format %{ "vector_store_mask $dst,$src\t!" %} + format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} + effect(TEMP_DEF dst, TEMP xtmp); ins_encode %{ - int src_vlen_enc = vector_length_encoding(this, $src); - int dst_vlen_enc = vector_length_encoding(this); - __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); - __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); + int vlen_enc = Assembler::AVX_128bit; + int vlen = Matcher::vector_length(this); + if (vlen <= 4) { + assert(UseSSE >= 3, "required"); + __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); + __ pabsd($dst$$XMMRegister, $src$$XMMRegister); + __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); + __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); + } else { + assert(UseAVX > 0, "required"); + __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); + __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); + __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); + __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); + __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); + } %} ins_pipe( pipe_slow ); %} -instruct storeMask4B(vec dst, vec src, immI_4 size) %{ - predicate(Matcher::vector_length(n) <= 4 && UseAVX <= 2); +instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ + predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); match(Set dst (VectorStoreMask src size)); - format %{ "vector_store_mask $dst,$src\t!" %} + effect(TEMP_DEF dst, TEMP xtmp); + format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} ins_encode %{ assert(UseSSE >= 3, "required"); - __ pabsd($dst$$XMMRegister, $src$$XMMRegister); - __ packssdw($dst$$XMMRegister, $dst$$XMMRegister); - __ packsswb($dst$$XMMRegister, $dst$$XMMRegister); + __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); + __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); + __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); + __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vstoreMask4B(vec dst, vec src, immI_4 size) %{ - predicate(Matcher::vector_length(n) == 8 && UseAVX <= 2); +instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ + predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); match(Set dst (VectorStoreMask src size)); - format %{ "vector_store_mask $dst,$src\t!" %} - effect(TEMP dst); + format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} + effect(TEMP_DEF dst, TEMP vtmp); ins_encode %{ int vlen_enc = Assembler::AVX_128bit; - __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); - __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); - __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); + __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); + __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); + __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); + __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); + __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); + __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); %} ins_pipe( pipe_slow ); %} -instruct vstoreMask4B_evex(vec dst, vec src, immI_4 size) %{ - predicate(UseAVX > 2); +instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ + predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == NULL); match(Set dst (VectorStoreMask src size)); - format %{ "vector_store_mask $dst,$src\t!" %} + format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} ins_encode %{ int src_vlen_enc = vector_length_encoding(this, $src); int dst_vlen_enc = vector_length_encoding(this); @@ -7611,53 +8018,60 @@ instruct vstoreMask4B_evex(vec dst, vec src, immI_4 size) %{ ins_pipe( pipe_slow ); %} -instruct storeMask8B(vec dst, vec src, immI_8 size) %{ - predicate(Matcher::vector_length(n) == 2 && UseAVX <= 2); +instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ + predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == NULL); match(Set dst (VectorStoreMask src size)); - format %{ "vector_store_mask $dst,$src\t!" %} + format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} ins_encode %{ - assert(UseSSE >= 3, "required"); - __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); - __ packssdw($dst$$XMMRegister, $dst$$XMMRegister); - __ packsswb($dst$$XMMRegister, $dst$$XMMRegister); - __ pabsb($dst$$XMMRegister, $dst$$XMMRegister); - %} + int src_vlen_enc = vector_length_encoding(this, $src); + int dst_vlen_enc = vector_length_encoding(this); + if (!VM_Version::supports_avx512vl()) { + src_vlen_enc = Assembler::AVX_512bit; + } + __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); + __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); + %} ins_pipe( pipe_slow ); %} -instruct storeMask8B_avx(vec dst, vec src, immI_8 size, legVec vtmp) %{ - predicate(Matcher::vector_length(n) == 4 && UseAVX <= 2); - match(Set dst (VectorStoreMask src size)); - format %{ "vector_store_mask $dst,$src\t! using $vtmp as TEMP" %} - effect(TEMP dst, TEMP vtmp); +instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size, rRegI tmp) %{ + predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); + match(Set dst (VectorStoreMask mask size)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} ins_encode %{ - int vlen_enc = Assembler::AVX_128bit; - __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); - __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); - __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); - __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); - __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); - __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); + assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); + __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), + false, Assembler::AVX_512bit, $tmp$$Register); + __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); %} ins_pipe( pipe_slow ); %} -instruct vstoreMask8B_evex(vec dst, vec src, immI_8 size) %{ - predicate(UseAVX > 2); - match(Set dst (VectorStoreMask src size)); - format %{ "vector_store_mask $dst,$src\t!" %} +instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ + predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); + match(Set dst (VectorStoreMask mask size)); + effect(TEMP_DEF dst); + format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} ins_encode %{ - int src_vlen_enc = vector_length_encoding(this, $src); int dst_vlen_enc = vector_length_encoding(this); - if (!VM_Version::supports_avx512vl()) { - src_vlen_enc = Assembler::AVX_512bit; - } - __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); + __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); %} ins_pipe( pipe_slow ); %} +instruct vmaskcast_evex(kReg dst) %{ + predicate(Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); + match(Set dst (VectorMaskCast dst)); + ins_cost(0); + format %{ "vector_mask_cast $dst" %} + ins_encode %{ + // empty + %} + ins_pipe(empty); +%} + instruct vmaskcast(vec dst) %{ predicate((Matcher::vector_length(n) == Matcher::vector_length(n->in(1))) && (Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)))); @@ -8227,69 +8641,793 @@ instruct vmasked_store64(memory mem, vec src, kReg mask) %{ ins_pipe( pipe_slow ); %} -instruct vmask_truecount_evex(rRegI dst, vec mask, rRegL tmp, kReg ktmp, vec xtmp) %{ - predicate(VM_Version::supports_avx512vlbw()); +instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ + predicate(n->in(1)->bottom_type()->isa_vectmask()); + match(Set dst (VectorMaskToLong mask)); + effect(TEMP dst, KILL cr); + format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} + ins_encode %{ + int mask_len = Matcher::vector_length(this, $mask); + BasicType mbt = Matcher::vector_element_basic_type(this, $mask); + if (VM_Version::supports_avx512vlbw()) { + __ kmovql($dst$$Register, $mask$$KRegister); + } else { + assert(mask_len <= 16, ""); + __ kmovwl($dst$$Register, $mask$$KRegister); + } + // Mask generated out of partial vector comparisons/replicate/mask manipulation + // operations needs to be clipped. + int mask_size = mask_len * type2aelembytes(mbt); + if (mask_size < 16) { + __ andq($dst$$Register, (((jlong)1 << mask_len) - 1)); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct vmask_tolong_avx(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ + predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL && + n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN); + match(Set dst (VectorMaskToLong mask)); + format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} + effect(TEMP_DEF dst, TEMP xtmp, KILL cr); + ins_encode %{ + int mask_len = Matcher::vector_length(this, $mask); + BasicType mbt = Matcher::vector_element_basic_type(this, $mask); + int vlen_enc = vector_length_encoding(this, $mask); + __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); + __ vpsubb($xtmp$$XMMRegister, $xtmp$$XMMRegister, $mask$$XMMRegister, vlen_enc); + __ vpmovmskb($dst$$Register, $xtmp$$XMMRegister, vlen_enc); + // Mask generated out of partial vector comparisons/replicate/mask manipulation + // operations needs to be clipped. + int mask_size = mask_len * type2aelembytes(mbt); + if (mask_size < 16) { + __ andq($dst$$Register, (((jlong)1 << mask_len) - 1)); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ + predicate(n->in(1)->bottom_type()->isa_vectmask()); match(Set dst (VectorMaskTrueCount mask)); - effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp, TEMP xtmp); - format %{ "vector_truecount_evex $mask \t! vector mask true count" %} + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen_enc = vector_length_encoding(this, $mask); + BasicType mbt = Matcher::vector_element_basic_type(this, $mask); int mask_len = Matcher::vector_length(this, $mask); - __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, - $tmp$$Register, $ktmp$$KRegister, mask_len, vlen_enc); + int mask_size = mask_len * type2aelembytes(mbt); + int vlen_enc = vector_length_encoding(this, $mask); + __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, $tmp$$Register, + mask_len, mask_size, vlen_enc); %} ins_pipe( pipe_slow ); %} -instruct vmask_first_or_last_true_evex(rRegI dst, vec mask, rRegL tmp, kReg ktmp, vec xtmp, rFlagsReg cr) %{ - predicate(VM_Version::supports_avx512vlbw()); - match(Set dst (VectorMaskFirstTrue mask)); - match(Set dst (VectorMaskLastTrue mask)); - effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp, TEMP xtmp, KILL cr); - format %{ "vector_mask_first_or_last_true_evex $mask \t! vector first/last true location" %} +instruct vmask_truecount_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp, vec xtmp1, rFlagsReg cr) %{ + predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL); + match(Set dst (VectorMaskTrueCount mask)); + effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, TEMP xtmp1, KILL cr); + format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp and $xtmp1 as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen_enc = vector_length_encoding(this, $mask); + BasicType mbt = Matcher::vector_element_basic_type(this, $mask); int mask_len = Matcher::vector_length(this, $mask); + int mask_size = mask_len * type2aelembytes(mbt); + int vlen_enc = vector_length_encoding(this, $mask); __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, - $tmp$$Register, $ktmp$$KRegister, mask_len, vlen_enc); + $xtmp1$$XMMRegister, $tmp$$Register, mask_len, mask_size, vlen_enc); %} ins_pipe( pipe_slow ); %} -instruct vmask_truecount_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp, vec xtmp1) %{ - predicate(!VM_Version::supports_avx512vlbw()); - match(Set dst (VectorMaskTrueCount mask)); - effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, TEMP xtmp1); - format %{ "vector_truecount_avx $mask \t! vector mask true count" %} +instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ + predicate(n->in(1)->bottom_type()->isa_vectmask()); + match(Set dst (VectorMaskFirstTrue mask)); + match(Set dst (VectorMaskLastTrue mask)); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen_enc = vector_length_encoding(this, $mask); + BasicType mbt = Matcher::vector_element_basic_type(this, $mask); int mask_len = Matcher::vector_length(this, $mask); - __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, - $xtmp1$$XMMRegister, $tmp$$Register, mask_len, vlen_enc); + int mask_size = mask_len * type2aelembytes(mbt); + int vlen_enc = vector_length_encoding(this, $mask); + __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, $tmp$$Register, mask_len, + mask_size, vlen_enc); %} ins_pipe( pipe_slow ); %} instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp, vec xtmp1, rFlagsReg cr) %{ - predicate(!VM_Version::supports_avx512vlbw()); + predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL); match(Set dst (VectorMaskFirstTrue mask)); match(Set dst (VectorMaskLastTrue mask)); effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, TEMP xtmp1, KILL cr); - format %{ "vector_mask_first_or_last_true_avx $mask \t! vector first/last true location" %} + format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp and $xtmp1 as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - int vlen_enc = vector_length_encoding(this, $mask); + BasicType mbt = Matcher::vector_element_basic_type(this, $mask); int mask_len = Matcher::vector_length(this, $mask); + int mask_size = mask_len * type2aelembytes(mbt); + int vlen_enc = vector_length_encoding(this, $mask); __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, - $xtmp1$$XMMRegister, $tmp$$Register, mask_len, vlen_enc); + $xtmp1$$XMMRegister, $tmp$$Register, mask_len, mask_size, vlen_enc); %} ins_pipe( pipe_slow ); %} #endif // _LP64 +// ---------------------------------- Vector Masked Operations ------------------------------------ + +instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ + match(Set dst (AddVB (Binary dst src2) mask)); + match(Set dst (AddVS (Binary dst src2) mask)); + match(Set dst (AddVI (Binary dst src2) mask)); + match(Set dst (AddVL (Binary dst src2) mask)); + match(Set dst (AddVF (Binary dst src2) mask)); + match(Set dst (AddVD (Binary dst src2) mask)); + format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ + match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); + match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); + match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); + match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); + match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); + match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); + format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$Address, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ + match(Set dst (XorV (Binary dst src2) mask)); + format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ + match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); + format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$Address, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ + match(Set dst (OrV (Binary dst src2) mask)); + format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ + match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); + format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$Address, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ + match(Set dst (AndV (Binary dst src2) mask)); + format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ + match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); + format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$Address, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ + match(Set dst (SubVB (Binary dst src2) mask)); + match(Set dst (SubVS (Binary dst src2) mask)); + match(Set dst (SubVI (Binary dst src2) mask)); + match(Set dst (SubVL (Binary dst src2) mask)); + match(Set dst (SubVF (Binary dst src2) mask)); + match(Set dst (SubVD (Binary dst src2) mask)); + format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ + match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); + match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); + match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); + match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); + match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); + match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); + format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$Address, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ + match(Set dst (MulVS (Binary dst src2) mask)); + match(Set dst (MulVI (Binary dst src2) mask)); + match(Set dst (MulVL (Binary dst src2) mask)); + match(Set dst (MulVF (Binary dst src2) mask)); + match(Set dst (MulVD (Binary dst src2) mask)); + format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ + match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); + match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); + match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); + match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); + match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); + format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$Address, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsqrt_reg_masked(vec dst, kReg mask) %{ + match(Set dst (SqrtVF dst mask)); + match(Set dst (SqrtVD dst mask)); + ins_cost(100); + format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ + match(Set dst (DivVF (Binary dst src2) mask)); + match(Set dst (DivVD (Binary dst src2) mask)); + format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ + match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); + match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); + format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$Address, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + + +instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ + match(Set dst (RotateLeftV (Binary dst shift) mask)); + match(Set dst (RotateRightV (Binary dst shift) mask)); + format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $shift$$constant, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ + match(Set dst (RotateLeftV (Binary dst src2) mask)); + match(Set dst (RotateRightV (Binary dst src2) mask)); + format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ + match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); + match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); + match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); + format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $shift$$constant, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ + match(Set dst (LShiftVS (Binary dst src2) mask)); + match(Set dst (LShiftVI (Binary dst src2) mask)); + match(Set dst (LShiftVL (Binary dst src2) mask)); + format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + bool is_varshift = !VectorNode::is_vshift_cnt_opcode(in(2)->isa_Mach()->ideal_Opcode()); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, is_varshift); + %} + ins_pipe( pipe_slow ); +%} + +instruct vlshift_mem_masked(vec dst, memory src2, kReg mask) %{ + match(Set dst (LShiftVS (Binary dst (LoadVector src2)) mask)); + match(Set dst (LShiftVI (Binary dst (LoadVector src2)) mask)); + match(Set dst (LShiftVL (Binary dst (LoadVector src2)) mask)); + format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$Address, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ + match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); + match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); + match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); + format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $shift$$constant, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ + match(Set dst (RShiftVS (Binary dst src2) mask)); + match(Set dst (RShiftVI (Binary dst src2) mask)); + match(Set dst (RShiftVL (Binary dst src2) mask)); + format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + bool is_varshift = !VectorNode::is_vshift_cnt_opcode(in(2)->isa_Mach()->ideal_Opcode()); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, is_varshift); + %} + ins_pipe( pipe_slow ); +%} + +instruct vrshift_mem_masked(vec dst, memory src2, kReg mask) %{ + match(Set dst (RShiftVS (Binary dst (LoadVector src2)) mask)); + match(Set dst (RShiftVI (Binary dst (LoadVector src2)) mask)); + match(Set dst (RShiftVL (Binary dst (LoadVector src2)) mask)); + format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$Address, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ + match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); + match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); + match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); + format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $shift$$constant, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ + match(Set dst (URShiftVS (Binary dst src2) mask)); + match(Set dst (URShiftVI (Binary dst src2) mask)); + match(Set dst (URShiftVL (Binary dst src2) mask)); + format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + bool is_varshift = !VectorNode::is_vshift_cnt_opcode(in(2)->isa_Mach()->ideal_Opcode()); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, is_varshift); + %} + ins_pipe( pipe_slow ); +%} + +instruct vurshift_mem_masked(vec dst, memory src2, kReg mask) %{ + match(Set dst (URShiftVS (Binary dst (LoadVector src2)) mask)); + match(Set dst (URShiftVI (Binary dst (LoadVector src2)) mask)); + match(Set dst (URShiftVL (Binary dst (LoadVector src2)) mask)); + format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$Address, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ + match(Set dst (MaxV (Binary dst src2) mask)); + format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ + match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); + format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$Address, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ + match(Set dst (MinV (Binary dst src2) mask)); + format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ + match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); + format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$Address, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ + match(Set dst (VectorRearrange (Binary dst src2) mask)); + format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabs_masked(vec dst, kReg mask) %{ + match(Set dst (AbsVB dst mask)); + match(Set dst (AbsVS dst mask)); + match(Set dst (AbsVI dst mask)); + match(Set dst (AbsVL dst mask)); + format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} + ins_cost(100); + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ + match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); + match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); + format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ + match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); + match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); + format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + BasicType bt = Matcher::vector_element_basic_type(this); + int opc = this->ideal_Opcode(); + __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, + $src2$$XMMRegister, $src3$$Address, true, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask, rRegP scratch) %{ + match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); + effect(TEMP scratch); + format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask\t! using $scratch as TEMP" %} + ins_encode %{ + assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); + int vlen_enc = vector_length_encoding(this, $src1); + BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); + + // Comparison i + switch (src1_elem_bt) { + case T_BYTE: { + bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); + Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); + __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); + break; + } + case T_SHORT: { + bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); + Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); + __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); + break; + } + case T_INT: { + bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); + Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); + __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); + break; + } + case T_LONG: { + bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); + Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); + __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); + break; + } + case T_FLOAT: { + Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); + __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); + break; + } + case T_DOUBLE: { + Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); + __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); + break; + } + default: assert(false, "%s", type2name(src1_elem_bt)); break; + } + %} + ins_pipe( pipe_slow ); +%} + +#ifdef _LP64 +instruct mask_all_evexI_imm(kReg dst, immI cnt, rRegL tmp) %{ + match(Set dst (MaskAll cnt)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "mask_all_evexI $dst, $cnt \t! using $tmp as TEMP" %} + ins_encode %{ + int vec_len = Matcher::vector_length(this); + if (VM_Version::supports_avx512bw()) { + __ movq($tmp$$Register, $cnt$$constant); + __ kmovql($dst$$KRegister, $tmp$$Register); + __ kshiftrql($dst$$KRegister, $dst$$KRegister, 64 - vec_len); + } else { + assert(vec_len <= 16, ""); + __ movq($tmp$$Register, $cnt$$constant); + __ kmovwl($dst$$KRegister, $tmp$$Register); + __ kshiftrwl($dst$$KRegister, $dst$$KRegister, 16 - vec_len); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct mask_all_evexI(kReg dst, rRegI src, rRegL tmp) %{ + match(Set dst (MaskAll src)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "mask_all_evexI $dst, $src \t! using $tmp as TEMP" %} + ins_encode %{ + int vec_len = Matcher::vector_length(this); + if (VM_Version::supports_avx512bw()) { + __ movslq($tmp$$Register, $src$$Register); + __ kmovql($dst$$KRegister, $tmp$$Register); + __ kshiftrql($dst$$KRegister, $dst$$KRegister, 64 - vec_len); + } else { + assert(vec_len <= 16, ""); + __ kmovwl($dst$$KRegister, $src$$Register); + __ kshiftrwl($dst$$KRegister, $dst$$KRegister, 16 - vec_len); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct mask_all_evexL(kReg dst, rRegL src) %{ + match(Set dst (MaskAll src)); + effect(TEMP_DEF dst); + format %{ "mask_all_evexL $dst, $src \t! mask all operation" %} + ins_encode %{ + int vec_len = Matcher::vector_length(this); + if (VM_Version::supports_avx512bw()) { + __ kmovql($dst$$KRegister, $src$$Register); + __ kshiftrql($dst$$KRegister, $dst$$KRegister, 64 - vec_len); + } else { + assert(vec_len <= 16, ""); + __ kmovwl($dst$$KRegister, $src$$Register); + __ kshiftrwl($dst$$KRegister, $dst$$KRegister, 16 - vec_len); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ + predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); + match(Set dst (XorVMask src (MaskAll cnt))); + effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); + format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} + ins_encode %{ + uint masklen = Matcher::vector_length(this); + __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ + predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || + (Matcher::vector_length(n) == 16) || + (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); + match(Set dst (XorVMask src (MaskAll cnt))); + format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} + ins_encode %{ + uint masklen = Matcher::vector_length(this); + __ knot(masklen, $dst$$KRegister, $src$$KRegister); + %} + ins_pipe( pipe_slow ); +%} +#endif + +instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ + match(Set dst (AndVMask src1 src2)); + match(Set dst (OrVMask src1 src2)); + match(Set dst (XorVMask src1 src2)); + effect(TEMP kscratch); + format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} + ins_encode %{ + const MachNode* mask1 = static_cast(this->in(this->operand_index($src1))); + const MachNode* mask2 = static_cast(this->in(this->operand_index($src2))); + assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); + uint masklen = Matcher::vector_length(this); + masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; + __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct castMM(kReg dst) +%{ + match(Set dst (CastVV dst)); + + size(0); + format %{ "# castVV of $dst" %} + ins_encode(/* empty encoding */); + ins_cost(0); + ins_pipe(empty); +%} + instruct castVV(vec dst) %{ match(Set dst (CastVV dst)); diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad index e4738af43210810d29406f071148910a83fe7464..4475f053fd035e310e174f48af42bace2b2fd212 100644 --- a/src/hotspot/cpu/x86/x86_32.ad +++ b/src/hotspot/cpu/x86/x86_32.ad @@ -7204,7 +7204,7 @@ instruct castLL( eRegL dst ) %{ %} instruct castFF( regF dst ) %{ - predicate(UseSSE >= 2); + predicate(UseSSE >= 1); match(Set dst (CastFF dst)); format %{ "#castFF of $dst" %} ins_encode( /*empty encoding*/ ); @@ -7222,7 +7222,7 @@ instruct castDD( regD dst ) %{ %} instruct castFF_PR( regFPR dst ) %{ - predicate(UseSSE < 2); + predicate(UseSSE < 1); match(Set dst (CastFF dst)); format %{ "#castFF of $dst" %} ins_encode( /*empty encoding*/ ); diff --git a/src/hotspot/cpu/zero/sharedRuntime_zero.cpp b/src/hotspot/cpu/zero/sharedRuntime_zero.cpp index f2b90c3a6275a9490d737ac90c1c413545468fc3..30c3df8adee199c5665f2992c1f989b343cf30ac 100644 --- a/src/hotspot/cpu/zero/sharedRuntime_zero.cpp +++ b/src/hotspot/cpu/zero/sharedRuntime_zero.cpp @@ -73,8 +73,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, int compile_id, BasicType *sig_bt, VMRegPair *regs, - BasicType ret_type, - address critical_entry) { + BasicType ret_type) { ShouldNotCallThis(); return NULL; } diff --git a/src/hotspot/cpu/zero/vm_version_zero.cpp b/src/hotspot/cpu/zero/vm_version_zero.cpp index 333857afa690f506c0125a44c6ba5228921d2009..6fa56c24cce14c265ab5324e97f3f00c1536d781 100644 --- a/src/hotspot/cpu/zero/vm_version_zero.cpp +++ b/src/hotspot/cpu/zero/vm_version_zero.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. * Copyright 2009 Red Hat, Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -116,7 +116,6 @@ void VM_Version::initialize() { } // Not implemented - UNSUPPORTED_OPTION(CriticalJNINatives); UNSUPPORTED_OPTION(UseCompiler); #ifdef ASSERT UNSUPPORTED_OPTION(CountCompiledCalls); diff --git a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp index f2e8dc2fe2322566bf20fa34ea0c70f90ac44153..f3f1327a62969a264f8c58697d4d6e67b1adb899 100644 --- a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp +++ b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp @@ -186,9 +186,17 @@ void ZeroInterpreter::main_loop(int recurse, TRAPS) { // Call the interpreter if (JvmtiExport::can_post_interpreter_events()) { - BytecodeInterpreter::run(istate); + if (RewriteBytecodes) { + BytecodeInterpreter::run(istate); + } else { + BytecodeInterpreter::run(istate); + } } else { - BytecodeInterpreter::run(istate); + if (RewriteBytecodes) { + BytecodeInterpreter::run(istate); + } else { + BytecodeInterpreter::run(istate); + } } fixup_after_potential_safepoint(); diff --git a/src/hotspot/share/adlc/forms.cpp b/src/hotspot/share/adlc/forms.cpp index cdd053f25b5fe0808a04c4f601c49f6f8dddfbbf..3a246285b3a6975d28de7cd5804cf695bda54761 100644 --- a/src/hotspot/share/adlc/forms.cpp +++ b/src/hotspot/share/adlc/forms.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -269,6 +269,7 @@ Form::DataType Form::is_load_from_memory(const char *opType) const { if( strcmp(opType,"LoadS")==0 ) return Form::idealS; if( strcmp(opType,"LoadVector")==0 ) return Form::idealV; if( strcmp(opType,"LoadVectorGather")==0 ) return Form::idealV; + if( strcmp(opType,"LoadVectorGatherMasked")==0 ) return Form::idealV; if( strcmp(opType,"LoadVectorMasked")==0 ) return Form::idealV; assert( strcmp(opType,"Load") != 0, "Must type Loads" ); return Form::none; @@ -287,6 +288,7 @@ Form::DataType Form::is_store_to_memory(const char *opType) const { if( strcmp(opType,"StoreNKlass")==0) return Form::idealNKlass; if( strcmp(opType,"StoreVector")==0 ) return Form::idealV; if( strcmp(opType,"StoreVectorScatter")==0 ) return Form::idealV; + if( strcmp(opType,"StoreVectorScatterMasked")==0 ) return Form::idealV; if( strcmp(opType,"StoreVectorMasked")==0 ) return Form::idealV; assert( strcmp(opType,"Store") != 0, "Must type Stores" ); return Form::none; diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp index 10886e233a2a57a92b5d9fd97786a362ea90c361..651511f375185feff85a4d5b0aee0bb25e2b12ef 100644 --- a/src/hotspot/share/adlc/formssel.cpp +++ b/src/hotspot/share/adlc/formssel.cpp @@ -2281,6 +2281,7 @@ bool OperandForm::is_bound_register() const { if (strcmp(name, "RegD") == 0) size = 2; if (strcmp(name, "RegL") == 0) size = 2; if (strcmp(name, "RegN") == 0) size = 1; + if (strcmp(name, "RegVectMask") == 0) size = globalAD->get_preproc_def("AARCH64") ? 1 : 2; if (strcmp(name, "VecX") == 0) size = 4; if (strcmp(name, "VecY") == 0) size = 8; if (strcmp(name, "VecZ") == 0) size = 16; @@ -3514,7 +3515,8 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const { "StoreB","StoreC","Store" ,"StoreFP", "LoadI", "LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF" , "LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load" , - "StoreVector", "LoadVector", "LoadVectorGather", "StoreVectorScatter", "LoadVectorMasked", "StoreVectorMasked", + "StoreVector", "LoadVector", "LoadVectorMasked", "StoreVectorMasked", + "LoadVectorGather", "StoreVectorScatter", "LoadVectorGatherMasked", "StoreVectorScatterMasked", "LoadRange", "LoadKlass", "LoadNKlass", "LoadL_unaligned", "LoadD_unaligned", "LoadPLocked", "StorePConditional", "StoreIConditional", "StoreLConditional", @@ -3818,51 +3820,77 @@ bool MatchNode::equivalent(FormDict &globals, MatchNode *mNode2) { return true; } -//-------------------------- has_commutative_op ------------------------------- +//-------------------------- count_commutative_op ------------------------------- // Recursively check for commutative operations with subtree operands // which could be swapped. void MatchNode::count_commutative_op(int& count) { static const char *commut_op_list[] = { "AddI","AddL","AddF","AddD", - "AddVB","AddVS","AddVI","AddVL","AddVF","AddVD", "AndI","AndL", - "AndV", "MaxI","MinI","MaxF","MinF","MaxD","MinD", - "MaxV", "MinV", "MulI","MulL","MulF","MulD", - "MulVB","MulVS","MulVI","MulVL","MulVF","MulVD", "OrI","OrL", - "OrV", - "XorI","XorL", - "XorV" + "XorI","XorL" }; - int cnt = sizeof(commut_op_list)/sizeof(char*); - if( _lChild && _rChild && (_lChild->_lChild || _rChild->_lChild) ) { + static const char *commut_vector_op_list[] = { + "AddVB", "AddVS", "AddVI", "AddVL", "AddVF", "AddVD", + "MulVB", "MulVS", "MulVI", "MulVL", "MulVF", "MulVD", + "AndV", "OrV", "XorV", + "MaxV", "MinV" + }; + + if (_lChild && _rChild && (_lChild->_lChild || _rChild->_lChild)) { // Don't swap if right operand is an immediate constant. bool is_const = false; - if( _rChild->_lChild == NULL && _rChild->_rChild == NULL ) { + if (_rChild->_lChild == NULL && _rChild->_rChild == NULL) { FormDict &globals = _AD.globalNames(); const Form *form = globals[_rChild->_opType]; - if ( form ) { - OperandForm *oper = form->is_operand(); - if( oper && oper->interface_type(globals) == Form::constant_interface ) + if (form) { + OperandForm *oper = form->is_operand(); + if (oper && oper->interface_type(globals) == Form::constant_interface) is_const = true; } } - if( !is_const ) { - for( int i=0; i 0 + + if (!is_const) { + int scalar_cnt = sizeof(commut_op_list)/sizeof(char*); + int vector_cnt = sizeof(commut_vector_op_list)/sizeof(char*); + bool matched = false; + + // Check the commutative vector op first. It's noncommutative if + // the current node is a masked vector op, since a mask value + // is added to the original vector node's input list and the original + // first two inputs are packed into one BinaryNode. So don't swap + // if one of the operands is a BinaryNode. + for (int i = 0; i < vector_cnt; i++) { + if (strcmp(_opType, commut_vector_op_list[i]) == 0) { + if (strcmp(_lChild->_opType, "Binary") != 0 && + strcmp(_rChild->_opType, "Binary") != 0) { + count++; + _commutative_id = count; // id should be > 0 + } + matched = true; break; } } + + // Then check the scalar op if the current op is not in + // the commut_vector_op_list. + if (!matched) { + for (int i = 0; i < scalar_cnt; i++) { + if (strcmp(_opType, commut_op_list[i]) == 0) { + count++; + _commutative_id = count; // id should be > 0 + break; + } + } + } } } - if( _lChild ) + if (_lChild) _lChild->count_commutative_op(count); - if( _rChild ) + if (_rChild) _rChild->count_commutative_op(count); } @@ -4088,6 +4116,7 @@ int MatchRule::is_expensive() const { strcmp(opType,"AndReductionV")==0 || strcmp(opType,"OrReductionV")==0 || strcmp(opType,"XorReductionV")==0 || + strcmp(opType,"MaskAll")==0 || 0 /* 0 to line up columns nicely */ ) return 1; } @@ -4200,17 +4229,18 @@ bool MatchRule::is_vector() const { "URShiftVB","URShiftVS","URShiftVI","URShiftVL", "ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD", "RoundDoubleModeV","RotateLeftV" , "RotateRightV", "LoadVector","StoreVector", - "LoadVectorGather", "StoreVectorScatter", + "LoadVectorGather", "StoreVectorScatter", "LoadVectorGatherMasked", "StoreVectorScatterMasked", "VectorTest", "VectorLoadMask", "VectorStoreMask", "VectorBlend", "VectorInsert", "VectorRearrange","VectorLoadShuffle", "VectorLoadConst", "VectorCastB2X", "VectorCastS2X", "VectorCastI2X", "VectorCastL2X", "VectorCastF2X", "VectorCastD2X", "VectorMaskWrapper", "VectorMaskCmp", "VectorReinterpret","LoadVectorMasked","StoreVectorMasked", "FmaVD", "FmaVF","PopCountVI", + // Next are vector mask ops. + "MaskAll", "AndVMask", "OrVMask", "XorVMask", "VectorMaskCast", // Next are not supported currently. "PackB","PackS","PackI","PackL","PackF","PackD","Pack2L","Pack2D", - "ExtractB","ExtractUB","ExtractC","ExtractS","ExtractI","ExtractL","ExtractF","ExtractD", - "VectorMaskCast" + "ExtractB","ExtractUB","ExtractC","ExtractS","ExtractI","ExtractL","ExtractF","ExtractD" }; int cnt = sizeof(vector_list)/sizeof(char*); if (_rChild) { diff --git a/src/hotspot/share/c1/c1_Compilation.hpp b/src/hotspot/share/c1/c1_Compilation.hpp index 3c13a261cc1a050e0a042abdae359778daa06a3f..f3be9ed7cee295410cfbc521dd1a415f3e899757 100644 --- a/src/hotspot/share/c1/c1_Compilation.hpp +++ b/src/hotspot/share/c1/c1_Compilation.hpp @@ -49,11 +49,9 @@ class CodeEmitInfo; class ciEnv; class ciMethod; class ValueStack; -class LIR_OprDesc; class C1_MacroAssembler; class CFGPrinter; class CFGPrinterOutput; -typedef LIR_OprDesc* LIR_Opr; typedef GrowableArray BasicTypeArray; typedef GrowableArray BasicTypeList; diff --git a/src/hotspot/share/c1/c1_FrameMap.hpp b/src/hotspot/share/c1/c1_FrameMap.hpp index 5e6524f872764358fcf1d5043fc5609ffed3e6dd..5cbb37422dcbbee4b7c5f95f31b21af259853ad9 100644 --- a/src/hotspot/share/c1/c1_FrameMap.hpp +++ b/src/hotspot/share/c1/c1_FrameMap.hpp @@ -62,10 +62,6 @@ class CallingConvention; // ABI = ABI area (SPARC) or nothing (i486) -class LIR_OprDesc; -typedef LIR_OprDesc* LIR_Opr; - - class FrameMap : public CompilationResourceObj { public: enum { @@ -83,7 +79,7 @@ class FrameMap : public CompilationResourceObj { #include CPU_HEADER(c1_FrameMap) - friend class LIR_OprDesc; + friend class LIR_Opr; private: static bool _init_done; diff --git a/src/hotspot/share/c1/c1_Instruction.hpp b/src/hotspot/share/c1/c1_Instruction.hpp index 6eb080841a960a0d124a41cb32693e79f92a54f8..6adaae6f30f6869a7737166f9aa5a463e2635e5a 100644 --- a/src/hotspot/share/c1/c1_Instruction.hpp +++ b/src/hotspot/share/c1/c1_Instruction.hpp @@ -35,8 +35,6 @@ class ciField; class ValueStack; class InstructionPrinter; class IRScope; -class LIR_OprDesc; -typedef LIR_OprDesc* LIR_Opr; // Instruction class hierarchy diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index 1e4529d7c2b418f20e14907be562eda86ff18036..4ab1d887e3ffe2e00b8c1a1a653399105dbd64fb 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -33,19 +33,20 @@ #include "runtime/sharedRuntime.hpp" #include "runtime/vm_version.hpp" -Register LIR_OprDesc::as_register() const { +Register LIR_Opr::as_register() const { return FrameMap::cpu_rnr2reg(cpu_regnr()); } -Register LIR_OprDesc::as_register_lo() const { +Register LIR_Opr::as_register_lo() const { return FrameMap::cpu_rnr2reg(cpu_regnrLo()); } -Register LIR_OprDesc::as_register_hi() const { +Register LIR_Opr::as_register_hi() const { return FrameMap::cpu_rnr2reg(cpu_regnrHi()); } LIR_Opr LIR_OprFact::illegalOpr = LIR_OprFact::illegal(); +LIR_Opr LIR_OprFact::nullOpr = LIR_Opr(); LIR_Opr LIR_OprFact::value_type(ValueType* type) { ValueTag tag = type->tag(); @@ -92,7 +93,7 @@ LIR_Address::Scale LIR_Address::scale(BasicType type) { //--------------------------------------------------- -char LIR_OprDesc::type_char(BasicType t) { +char LIR_Opr::type_char(BasicType t) { switch (t) { case T_ARRAY: t = T_OBJECT; @@ -120,7 +121,7 @@ char LIR_OprDesc::type_char(BasicType t) { } #ifndef PRODUCT -void LIR_OprDesc::validate_type() const { +void LIR_Opr::validate_type() const { #ifdef ASSERT if (!is_pointer() && !is_illegal()) { @@ -172,7 +173,7 @@ void LIR_OprDesc::validate_type() const { #endif // PRODUCT -bool LIR_OprDesc::is_oop() const { +bool LIR_Opr::is_oop() const { if (is_pointer()) { return pointer()->is_oop_pointer(); } else { @@ -1372,7 +1373,7 @@ void LIR_List::unlock_object(LIR_Opr hdr, LIR_Opr obj, LIR_Opr lock, LIR_Opr scr void check_LIR() { // cannot do the proper checking as PRODUCT and other modes return different results - // guarantee(sizeof(LIR_OprDesc) == wordSize, "may not have a v-table"); + // guarantee(sizeof(LIR_Opr) == wordSize, "may not have a v-table"); } @@ -1447,12 +1448,12 @@ void print_LIR(BlockList* blocks) { } #else -// LIR_OprDesc -void LIR_OprDesc::print() const { +// LIR_Opr +void LIR_Opr::print() const { print(tty); } -void LIR_OprDesc::print(outputStream* out) const { +void LIR_Opr::print(outputStream* out) const { if (is_illegal()) { return; } diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index 43f400653b4e4bc7642ce3ecb0335ea6d8fb6b62..9334ae273d009c2f369866cf3e7140bff891a96a 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -46,26 +46,23 @@ class FpuStackSim; //--------------------------------------------------------------------- // LIR Operands -// LIR_OprDesc // LIR_OprPtr // LIR_Const // LIR_Address //--------------------------------------------------------------------- -class LIR_OprDesc; class LIR_OprPtr; class LIR_Const; class LIR_Address; class LIR_OprVisitor; +class LIR_Opr; - -typedef LIR_OprDesc* LIR_Opr; typedef int RegNr; typedef GrowableArray LIR_OprList; typedef GrowableArray LIR_OpArray; typedef GrowableArray LIR_OpList; -// define LIR_OprPtr early so LIR_OprDesc can refer to it +// define LIR_OprPtr early so LIR_Opr can refer to it class LIR_OprPtr: public CompilationResourceObj { public: bool is_oop_pointer() const { return (type() == T_OBJECT); } @@ -184,14 +181,21 @@ class LIR_Const: public LIR_OprPtr { //---------------------LIR Operand descriptor------------------------------------ // -// The class LIR_OprDesc represents a LIR instruction operand; +// The class LIR_Opr represents a LIR instruction operand; // it can be a register (ALU/FPU), stack location or a constant; // Constants and addresses are represented as resource area allocated -// structures (see above). -// Registers and stack locations are inlined into the this pointer +// structures (see above), and pointers are stored in the _value field (cast to +// an intptr_t). +// Registers and stack locations are represented inline as integers. // (see value function). -class LIR_OprDesc: public CompilationResourceObj { +// Previously, this class was derived from CompilationResourceObj. +// However, deriving from any of the "Obj" types in allocation.hpp seems +// detrimental, since in some build modes it would add a vtable to this class, +// which make it no longer be a 1-word trivially-copyable wrapper object, +// which is the entire point of it. + +class LIR_Opr { public: // value structure: // data opr-type opr-kind @@ -206,8 +210,9 @@ class LIR_OprDesc: public CompilationResourceObj { private: friend class LIR_OprFact; + intptr_t _value; // Conversion - intptr_t value() const { return (intptr_t) this; } + intptr_t value() const { return _value; } bool check_value_mask(intptr_t mask, intptr_t masked_value) const { return (value() & mask) == masked_value; @@ -279,12 +284,26 @@ class LIR_OprDesc: public CompilationResourceObj { static char type_char(BasicType t); public: + LIR_Opr() : _value(0) {} + LIR_Opr(intptr_t val) : _value(val) {} + LIR_Opr(LIR_OprPtr *val) : _value(reinterpret_cast(val)) {} + bool operator==(const LIR_Opr &other) const { return _value == other._value; } + bool operator!=(const LIR_Opr &other) const { return _value != other._value; } + explicit operator bool() const { return _value != 0; } + + // UGLY HACK: make this value object look like a pointer (to itself). This + // operator overload should be removed, and all callers updated from + // `opr->fn()` to `opr.fn()`. + const LIR_Opr* operator->() const { return this; } + LIR_Opr* operator->() { return this; } + enum { vreg_base = ConcreteRegisterImpl::number_of_registers, vreg_max = (1 << data_bits) - 1 }; static inline LIR_Opr illegalOpr(); + static inline LIR_Opr nullOpr(); enum OprType { unknown_type = 0 << type_shift // means: not set (catch uninitialized types) @@ -343,7 +362,7 @@ class LIR_OprDesc: public CompilationResourceObj { char type_char() const { return type_char((is_pointer()) ? pointer()->type() : type()); } - bool is_equal(LIR_Opr opr) const { return this == opr; } + bool is_equal(LIR_Opr opr) const { return *this == opr; } // checks whether types are same bool is_same_type(LIR_Opr opr) const { assert(type_field() != unknown_type && @@ -422,7 +441,7 @@ class LIR_OprDesc: public CompilationResourceObj { RegNr xmm_regnrHi() const { assert(is_double_xmm() && !is_virtual(), "type check"); return (RegNr)hi_reg_half(); } int vreg_number() const { assert(is_virtual(), "type check"); return (RegNr)data(); } - LIR_OprPtr* pointer() const { assert(is_pointer(), "type check"); return (LIR_OprPtr*)this; } + LIR_OprPtr* pointer() const { assert(_value != 0 && is_pointer(), "nullness and type check"); return (LIR_OprPtr*)_value; } LIR_Const* as_constant_ptr() const { return pointer()->as_constant(); } LIR_Address* as_address_ptr() const { return pointer()->as_address(); } @@ -459,32 +478,31 @@ class LIR_OprDesc: public CompilationResourceObj { void print(outputStream* out) const PRODUCT_RETURN; }; - -inline LIR_OprDesc::OprType as_OprType(BasicType type) { +inline LIR_Opr::OprType as_OprType(BasicType type) { switch (type) { - case T_INT: return LIR_OprDesc::int_type; - case T_LONG: return LIR_OprDesc::long_type; - case T_FLOAT: return LIR_OprDesc::float_type; - case T_DOUBLE: return LIR_OprDesc::double_type; + case T_INT: return LIR_Opr::int_type; + case T_LONG: return LIR_Opr::long_type; + case T_FLOAT: return LIR_Opr::float_type; + case T_DOUBLE: return LIR_Opr::double_type; case T_OBJECT: - case T_ARRAY: return LIR_OprDesc::object_type; - case T_ADDRESS: return LIR_OprDesc::address_type; - case T_METADATA: return LIR_OprDesc::metadata_type; + case T_ARRAY: return LIR_Opr::object_type; + case T_ADDRESS: return LIR_Opr::address_type; + case T_METADATA: return LIR_Opr::metadata_type; case T_ILLEGAL: // fall through - default: ShouldNotReachHere(); return LIR_OprDesc::unknown_type; + default: ShouldNotReachHere(); return LIR_Opr::unknown_type; } } -inline BasicType as_BasicType(LIR_OprDesc::OprType t) { +inline BasicType as_BasicType(LIR_Opr::OprType t) { switch (t) { - case LIR_OprDesc::int_type: return T_INT; - case LIR_OprDesc::long_type: return T_LONG; - case LIR_OprDesc::float_type: return T_FLOAT; - case LIR_OprDesc::double_type: return T_DOUBLE; - case LIR_OprDesc::object_type: return T_OBJECT; - case LIR_OprDesc::address_type: return T_ADDRESS; - case LIR_OprDesc::metadata_type:return T_METADATA; - case LIR_OprDesc::unknown_type: // fall through + case LIR_Opr::int_type: return T_INT; + case LIR_Opr::long_type: return T_LONG; + case LIR_Opr::float_type: return T_FLOAT; + case LIR_Opr::double_type: return T_DOUBLE; + case LIR_Opr::object_type: return T_OBJECT; + case LIR_Opr::address_type: return T_ADDRESS; + case LIR_Opr::metadata_type:return T_METADATA; + case LIR_Opr::unknown_type: // fall through default: ShouldNotReachHere(); return T_ILLEGAL; } } @@ -522,14 +540,14 @@ class LIR_Address: public LIR_OprPtr { LIR_Address(LIR_Opr base, intx disp, BasicType type): _base(base) - , _index(LIR_OprDesc::illegalOpr()) + , _index(LIR_Opr::illegalOpr()) , _scale(times_1) , _disp(disp) , _type(type) { verify(); } LIR_Address(LIR_Opr base, BasicType type): _base(base) - , _index(LIR_OprDesc::illegalOpr()) + , _index(LIR_Opr::illegalOpr()) , _scale(times_1) , _disp(0) , _type(type) { verify(); } @@ -570,45 +588,46 @@ class LIR_OprFact: public AllStatic { public: static LIR_Opr illegalOpr; + static LIR_Opr nullOpr; static LIR_Opr single_cpu(int reg) { - return (LIR_Opr)(intptr_t)((reg << LIR_OprDesc::reg1_shift) | - LIR_OprDesc::int_type | - LIR_OprDesc::cpu_register | - LIR_OprDesc::single_size); + return (LIR_Opr)(intptr_t)((reg << LIR_Opr::reg1_shift) | + LIR_Opr::int_type | + LIR_Opr::cpu_register | + LIR_Opr::single_size); } static LIR_Opr single_cpu_oop(int reg) { - return (LIR_Opr)(intptr_t)((reg << LIR_OprDesc::reg1_shift) | - LIR_OprDesc::object_type | - LIR_OprDesc::cpu_register | - LIR_OprDesc::single_size); + return (LIR_Opr)(intptr_t)((reg << LIR_Opr::reg1_shift) | + LIR_Opr::object_type | + LIR_Opr::cpu_register | + LIR_Opr::single_size); } static LIR_Opr single_cpu_address(int reg) { - return (LIR_Opr)(intptr_t)((reg << LIR_OprDesc::reg1_shift) | - LIR_OprDesc::address_type | - LIR_OprDesc::cpu_register | - LIR_OprDesc::single_size); + return (LIR_Opr)(intptr_t)((reg << LIR_Opr::reg1_shift) | + LIR_Opr::address_type | + LIR_Opr::cpu_register | + LIR_Opr::single_size); } static LIR_Opr single_cpu_metadata(int reg) { - return (LIR_Opr)(intptr_t)((reg << LIR_OprDesc::reg1_shift) | - LIR_OprDesc::metadata_type | - LIR_OprDesc::cpu_register | - LIR_OprDesc::single_size); + return (LIR_Opr)(intptr_t)((reg << LIR_Opr::reg1_shift) | + LIR_Opr::metadata_type | + LIR_Opr::cpu_register | + LIR_Opr::single_size); } static LIR_Opr double_cpu(int reg1, int reg2) { LP64_ONLY(assert(reg1 == reg2, "must be identical")); - return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | - (reg2 << LIR_OprDesc::reg2_shift) | - LIR_OprDesc::long_type | - LIR_OprDesc::cpu_register | - LIR_OprDesc::double_size); + return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) | + (reg2 << LIR_Opr::reg2_shift) | + LIR_Opr::long_type | + LIR_Opr::cpu_register | + LIR_Opr::double_size); } static LIR_Opr single_fpu(int reg) { - return (LIR_Opr)(intptr_t)((reg << LIR_OprDesc::reg1_shift) | - LIR_OprDesc::float_type | - LIR_OprDesc::fpu_register | - LIR_OprDesc::single_size); + return (LIR_Opr)(intptr_t)((reg << LIR_Opr::reg1_shift) | + LIR_Opr::float_type | + LIR_Opr::fpu_register | + LIR_Opr::single_size); } // Platform dependant. @@ -616,40 +635,40 @@ class LIR_OprFact: public AllStatic { #ifdef ARM32 static LIR_Opr single_softfp(int reg) { - return (LIR_Opr)(intptr_t)((reg << LIR_OprDesc::reg1_shift) | - LIR_OprDesc::float_type | - LIR_OprDesc::cpu_register | - LIR_OprDesc::single_size); + return (LIR_Opr)(intptr_t)((reg << LIR_Opr::reg1_shift) | + LIR_Opr::float_type | + LIR_Opr::cpu_register | + LIR_Opr::single_size); } static LIR_Opr double_softfp(int reg1, int reg2) { - return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | - (reg2 << LIR_OprDesc::reg2_shift) | - LIR_OprDesc::double_type | - LIR_OprDesc::cpu_register | - LIR_OprDesc::double_size); + return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) | + (reg2 << LIR_Opr::reg2_shift) | + LIR_Opr::double_type | + LIR_Opr::cpu_register | + LIR_Opr::double_size); } #endif // ARM32 #if defined(X86) static LIR_Opr single_xmm(int reg) { - return (LIR_Opr)(intptr_t)((reg << LIR_OprDesc::reg1_shift) | - LIR_OprDesc::float_type | - LIR_OprDesc::fpu_register | - LIR_OprDesc::single_size | - LIR_OprDesc::is_xmm_mask); + return (LIR_Opr)(intptr_t)((reg << LIR_Opr::reg1_shift) | + LIR_Opr::float_type | + LIR_Opr::fpu_register | + LIR_Opr::single_size | + LIR_Opr::is_xmm_mask); } static LIR_Opr double_xmm(int reg) { - return (LIR_Opr)(intptr_t)((reg << LIR_OprDesc::reg1_shift) | - (reg << LIR_OprDesc::reg2_shift) | - LIR_OprDesc::double_type | - LIR_OprDesc::fpu_register | - LIR_OprDesc::double_size | - LIR_OprDesc::is_xmm_mask); + return (LIR_Opr)(intptr_t)((reg << LIR_Opr::reg1_shift) | + (reg << LIR_Opr::reg2_shift) | + LIR_Opr::double_type | + LIR_Opr::fpu_register | + LIR_Opr::double_size | + LIR_Opr::is_xmm_mask); } #endif // X86 static LIR_Opr virtual_register(int index, BasicType type) { - if (index > LIR_OprDesc::vreg_max) { + if (index > LIR_Opr::vreg_max) { // Running out of virtual registers. Caller should bailout. return illegalOpr; } @@ -658,75 +677,75 @@ class LIR_OprFact: public AllStatic { switch (type) { case T_OBJECT: // fall through case T_ARRAY: - res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::object_type | - LIR_OprDesc::cpu_register | - LIR_OprDesc::single_size | - LIR_OprDesc::virtual_mask); + res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | + LIR_Opr::object_type | + LIR_Opr::cpu_register | + LIR_Opr::single_size | + LIR_Opr::virtual_mask); break; case T_METADATA: - res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::metadata_type| - LIR_OprDesc::cpu_register | - LIR_OprDesc::single_size | - LIR_OprDesc::virtual_mask); + res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | + LIR_Opr::metadata_type| + LIR_Opr::cpu_register | + LIR_Opr::single_size | + LIR_Opr::virtual_mask); break; case T_INT: - res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::int_type | - LIR_OprDesc::cpu_register | - LIR_OprDesc::single_size | - LIR_OprDesc::virtual_mask); + res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | + LIR_Opr::int_type | + LIR_Opr::cpu_register | + LIR_Opr::single_size | + LIR_Opr::virtual_mask); break; case T_ADDRESS: - res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::address_type | - LIR_OprDesc::cpu_register | - LIR_OprDesc::single_size | - LIR_OprDesc::virtual_mask); + res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | + LIR_Opr::address_type | + LIR_Opr::cpu_register | + LIR_Opr::single_size | + LIR_Opr::virtual_mask); break; case T_LONG: - res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::long_type | - LIR_OprDesc::cpu_register | - LIR_OprDesc::double_size | - LIR_OprDesc::virtual_mask); + res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | + LIR_Opr::long_type | + LIR_Opr::cpu_register | + LIR_Opr::double_size | + LIR_Opr::virtual_mask); break; #ifdef __SOFTFP__ case T_FLOAT: - res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::float_type | - LIR_OprDesc::cpu_register | - LIR_OprDesc::single_size | - LIR_OprDesc::virtual_mask); + res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | + LIR_Opr::float_type | + LIR_Opr::cpu_register | + LIR_Opr::single_size | + LIR_Opr::virtual_mask); break; case T_DOUBLE: - res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::double_type | - LIR_OprDesc::cpu_register | - LIR_OprDesc::double_size | - LIR_OprDesc::virtual_mask); + res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | + LIR_Opr::double_type | + LIR_Opr::cpu_register | + LIR_Opr::double_size | + LIR_Opr::virtual_mask); break; #else // __SOFTFP__ case T_FLOAT: - res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::float_type | - LIR_OprDesc::fpu_register | - LIR_OprDesc::single_size | - LIR_OprDesc::virtual_mask); + res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | + LIR_Opr::float_type | + LIR_Opr::fpu_register | + LIR_Opr::single_size | + LIR_Opr::virtual_mask); break; case - T_DOUBLE: res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::double_type | - LIR_OprDesc::fpu_register | - LIR_OprDesc::double_size | - LIR_OprDesc::virtual_mask); + T_DOUBLE: res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | + LIR_Opr::double_type | + LIR_Opr::fpu_register | + LIR_Opr::double_size | + LIR_Opr::virtual_mask); break; #endif // __SOFTFP__ default: ShouldNotReachHere(); res = illegalOpr; @@ -735,20 +754,20 @@ class LIR_OprFact: public AllStatic { #ifdef ASSERT res->validate_type(); assert(res->vreg_number() == index, "conversion check"); - assert(index >= LIR_OprDesc::vreg_base, "must start at vreg_base"); - assert(index <= (max_jint >> LIR_OprDesc::data_shift), "index is too big"); + assert(index >= LIR_Opr::vreg_base, "must start at vreg_base"); + assert(index <= (max_jint >> LIR_Opr::data_shift), "index is too big"); // old-style calculation; check if old and new method are equal - LIR_OprDesc::OprType t = as_OprType(type); + LIR_Opr::OprType t = as_OprType(type); #ifdef __SOFTFP__ - LIR_Opr old_res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | + LIR_Opr old_res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | t | - LIR_OprDesc::cpu_register | - LIR_OprDesc::size_for(type) | LIR_OprDesc::virtual_mask); + LIR_Opr::cpu_register | + LIR_Opr::size_for(type) | LIR_Opr::virtual_mask); #else // __SOFTFP__ - LIR_Opr old_res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | t | - ((type == T_FLOAT || type == T_DOUBLE) ? LIR_OprDesc::fpu_register : LIR_OprDesc::cpu_register) | - LIR_OprDesc::size_for(type) | LIR_OprDesc::virtual_mask); + LIR_Opr old_res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | t | + ((type == T_FLOAT || type == T_DOUBLE) ? LIR_Opr::fpu_register : LIR_Opr::cpu_register) | + LIR_Opr::size_for(type) | LIR_Opr::virtual_mask); assert(res == old_res, "old and new method not equal"); #endif // __SOFTFP__ #endif // ASSERT @@ -764,50 +783,50 @@ class LIR_OprFact: public AllStatic { switch (type) { case T_OBJECT: // fall through case T_ARRAY: - res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::object_type | - LIR_OprDesc::stack_value | - LIR_OprDesc::single_size); + res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | + LIR_Opr::object_type | + LIR_Opr::stack_value | + LIR_Opr::single_size); break; case T_METADATA: - res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::metadata_type | - LIR_OprDesc::stack_value | - LIR_OprDesc::single_size); + res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | + LIR_Opr::metadata_type | + LIR_Opr::stack_value | + LIR_Opr::single_size); break; case T_INT: - res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::int_type | - LIR_OprDesc::stack_value | - LIR_OprDesc::single_size); + res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | + LIR_Opr::int_type | + LIR_Opr::stack_value | + LIR_Opr::single_size); break; case T_ADDRESS: - res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::address_type | - LIR_OprDesc::stack_value | - LIR_OprDesc::single_size); + res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | + LIR_Opr::address_type | + LIR_Opr::stack_value | + LIR_Opr::single_size); break; case T_LONG: - res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::long_type | - LIR_OprDesc::stack_value | - LIR_OprDesc::double_size); + res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | + LIR_Opr::long_type | + LIR_Opr::stack_value | + LIR_Opr::double_size); break; case T_FLOAT: - res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::float_type | - LIR_OprDesc::stack_value | - LIR_OprDesc::single_size); + res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | + LIR_Opr::float_type | + LIR_Opr::stack_value | + LIR_Opr::single_size); break; case T_DOUBLE: - res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::double_type | - LIR_OprDesc::stack_value | - LIR_OprDesc::double_size); + res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | + LIR_Opr::double_type | + LIR_Opr::stack_value | + LIR_Opr::double_size); break; default: ShouldNotReachHere(); res = illegalOpr; @@ -815,12 +834,12 @@ class LIR_OprFact: public AllStatic { #ifdef ASSERT assert(index >= 0, "index must be positive"); - assert(index <= (max_jint >> LIR_OprDesc::data_shift), "index is too big"); + assert(index <= (max_jint >> LIR_Opr::data_shift), "index is too big"); - LIR_Opr old_res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::stack_value | + LIR_Opr old_res = (LIR_Opr)(intptr_t)((index << LIR_Opr::data_shift) | + LIR_Opr::stack_value | as_OprType(type) | - LIR_OprDesc::size_for(type)); + LIR_Opr::size_for(type)); assert(res == old_res, "old and new method not equal"); #endif @@ -2446,6 +2465,8 @@ class LIR_OpVisitState: public StackObj { }; -inline LIR_Opr LIR_OprDesc::illegalOpr() { return LIR_OprFact::illegalOpr; }; +inline LIR_Opr LIR_Opr::illegalOpr() { return LIR_OprFact::illegalOpr; }; + +inline LIR_Opr LIR_Opr::nullOpr() { return LIR_OprFact::nullOpr; }; #endif // SHARE_C1_C1_LIR_HPP diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index f9a7bf6907875ce34363652f8d899af1902bf10e..bca42cd7cf56e5b3fb5dfd9269f47e54b35d0094 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -1019,12 +1019,12 @@ LIR_Opr LIRGenerator::new_register(BasicType type) { int vreg_num = _virtual_register_number; // Add a little fudge factor for the bailout since the bailout is only checked periodically. This allows us to hand out // a few extra registers before we really run out which helps to avoid to trip over assertions. - if (vreg_num + 20 >= LIR_OprDesc::vreg_max) { + if (vreg_num + 20 >= LIR_Opr::vreg_max) { bailout("out of virtual registers in LIR generator"); - if (vreg_num + 2 >= LIR_OprDesc::vreg_max) { + if (vreg_num + 2 >= LIR_Opr::vreg_max) { // Wrap it around and continue until bailout really happens to avoid hitting assertions. - _virtual_register_number = LIR_OprDesc::vreg_base; - vreg_num = LIR_OprDesc::vreg_base; + _virtual_register_number = LIR_Opr::vreg_base; + vreg_num = LIR_Opr::vreg_base; } } _virtual_register_number += 1; @@ -1865,7 +1865,7 @@ void LIRGenerator::do_PreconditionsCheckIndex(Intrinsic* x, BasicType type) { CodeEmitInfo* info = state_for(x, state); LIR_Opr len = length.result(); - LIR_Opr zero = NULL; + LIR_Opr zero; if (type == T_INT) { zero = LIR_OprFact::intConst(0); if (length.result()->is_constant()){ @@ -3271,7 +3271,7 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, assert(level > CompLevel_simple, "Shouldn't be here"); int offset = -1; - LIR_Opr counter_holder = NULL; + LIR_Opr counter_holder; if (level == CompLevel_limited_profile) { MethodCounters* counters_adr = method->ensure_method_counters(); if (counters_adr == NULL) { diff --git a/src/hotspot/share/c1/c1_LIRGenerator.hpp b/src/hotspot/share/c1/c1_LIRGenerator.hpp index cefadc9b11443d1eea0ce9c2b9c86d1c55b60880..ffe7108c34e572a1d2870527bc317d187162a0a1 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.hpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.hpp @@ -309,7 +309,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { LIR_Opr atomic_add(BasicType type, LIR_Opr addr, LIRItem& new_value); #ifdef CARDTABLEBARRIERSET_POST_BARRIER_HELPER - virtual void CardTableBarrierSet_post_barrier_helper(LIR_OprDesc* addr, LIR_Const* card_table_base); + virtual void CardTableBarrierSet_post_barrier_helper(LIR_Opr addr, LIR_Const* card_table_base); #endif // specific implementations @@ -502,7 +502,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { LIRGenerator(Compilation* compilation, ciMethod* method) : _compilation(compilation) , _method(method) - , _virtual_register_number(LIR_OprDesc::vreg_base) + , _virtual_register_number(LIR_Opr::vreg_base) , _vreg_flags(num_vreg_flags) , _barrier_set(BarrierSet::barrier_set()->barrier_set_c1()) { } diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp index 84846e8e43d54a7b140b8b8a6c9b7e37ae3412fd..27b11d05c8b7e19ea521305fb6bf3c9f4d9904ff 100644 --- a/src/hotspot/share/c1/c1_LinearScan.cpp +++ b/src/hotspot/share/c1/c1_LinearScan.cpp @@ -173,7 +173,7 @@ bool LinearScan::is_precolored_interval(const Interval* i) { } bool LinearScan::is_virtual_interval(const Interval* i) { - return i->reg_num() >= LIR_OprDesc::vreg_base; + return i->reg_num() >= LIR_Opr::vreg_base; } bool LinearScan::is_precolored_cpu_interval(const Interval* i) { @@ -182,9 +182,9 @@ bool LinearScan::is_precolored_cpu_interval(const Interval* i) { bool LinearScan::is_virtual_cpu_interval(const Interval* i) { #if defined(__SOFTFP__) || defined(E500V2) - return i->reg_num() >= LIR_OprDesc::vreg_base; + return i->reg_num() >= LIR_Opr::vreg_base; #else - return i->reg_num() >= LIR_OprDesc::vreg_base && (i->type() != T_FLOAT && i->type() != T_DOUBLE); + return i->reg_num() >= LIR_Opr::vreg_base && (i->type() != T_FLOAT && i->type() != T_DOUBLE); #endif // __SOFTFP__ or E500V2 } @@ -196,7 +196,7 @@ bool LinearScan::is_virtual_fpu_interval(const Interval* i) { #if defined(__SOFTFP__) || defined(E500V2) return false; #else - return i->reg_num() >= LIR_OprDesc::vreg_base && (i->type() == T_FLOAT || i->type() == T_DOUBLE); + return i->reg_num() >= LIR_Opr::vreg_base && (i->type() == T_FLOAT || i->type() == T_DOUBLE); #endif // __SOFTFP__ or E500V2 } @@ -274,7 +274,7 @@ Interval* LinearScan::create_interval(int reg_num) { _intervals.at_put(reg_num, interval); // assign register number for precolored intervals - if (reg_num < LIR_OprDesc::vreg_base) { + if (reg_num < LIR_Opr::vreg_base) { interval->assign_reg(reg_num); } return interval; @@ -819,7 +819,7 @@ void LinearScan::compute_global_live_sets() { // (live set must be empty at fixed intervals) for (int i = 0; i < num_blocks; i++) { BlockBegin* block = block_at(i); - for (int j = 0; j < LIR_OprDesc::vreg_base; j++) { + for (int j = 0; j < LIR_Opr::vreg_base; j++) { assert(block->live_in().at(j) == false, "live_in set of fixed register must be empty"); assert(block->live_out().at(j) == false, "live_out set of fixed register must be empty"); assert(block->live_gen().at(j) == false, "live_gen set of fixed register must be empty"); @@ -1333,7 +1333,7 @@ void LinearScan::build_intervals() { int size = (int)live.size(); for (int number = (int)live.get_next_one_offset(0, size); number < size; number = (int)live.get_next_one_offset(number + 1, size)) { assert(live.at(number), "should not stop here otherwise"); - assert(number >= LIR_OprDesc::vreg_base, "fixed intervals must not be live on block bounds"); + assert(number >= LIR_Opr::vreg_base, "fixed intervals must not be live on block bounds"); TRACE_LINEAR_SCAN(2, tty->print_cr("live in %d to %d", number, block_to + 2)); add_use(number, block_from, block_to + 2, noUse, T_ILLEGAL); @@ -1706,7 +1706,7 @@ Interval* LinearScan::split_child_at_op_id(Interval* interval, int op_id, LIR_Op } assert(false, "must find an interval, but do a clean bailout in product mode"); - result = new Interval(LIR_OprDesc::vreg_base); + result = new Interval(LIR_Opr::vreg_base); result->assign_reg(0); result->set_type(T_INT); BAILOUT_("LinearScan: interval is NULL", result); @@ -2435,7 +2435,7 @@ OopMap* LinearScan::compute_oop_map(IntervalWalker* iw, LIR_Op* op, CodeEmitInfo assert(interval->current_from() <= op->id() && op->id() <= interval->current_to(), "interval should not be active otherwise"); assert(interval->assigned_regHi() == any_reg, "oop must be single word"); - assert(interval->reg_num() >= LIR_OprDesc::vreg_base, "fixed interval found"); + assert(interval->reg_num() >= LIR_Opr::vreg_base, "fixed interval found"); // Check if this range covers the instruction. Intervals that // start or end at the current operation are not included in the @@ -3218,7 +3218,7 @@ void LinearScan::print_reg_num(outputStream* out, int reg_num) { if (reg_num == -1) { out->print("[ANY]"); return; - } else if (reg_num >= LIR_OprDesc::vreg_base) { + } else if (reg_num >= LIR_Opr::vreg_base) { out->print("[VREG %d]", reg_num); return; } @@ -3298,7 +3298,7 @@ void LinearScan::verify_intervals() { has_error = true; } - if (i1->reg_num() >= LIR_OprDesc::vreg_base && i1->type() == T_ILLEGAL) { + if (i1->reg_num() >= LIR_Opr::vreg_base && i1->type() == T_ILLEGAL) { tty->print_cr("Interval %d has no type assigned", i1->reg_num()); i1->print(); tty->cr(); has_error = true; } @@ -3969,11 +3969,11 @@ LIR_Opr MoveResolver::get_virtual_register(Interval* interval) { // Add a little fudge factor for the bailout since the bailout is only checked periodically. This allows us to hand out // a few extra registers before we really run out which helps to avoid to trip over assertions. int reg_num = interval->reg_num(); - if (reg_num + 20 >= LIR_OprDesc::vreg_max) { + if (reg_num + 20 >= LIR_Opr::vreg_max) { _allocator->bailout("out of virtual registers in linear scan"); - if (reg_num + 2 >= LIR_OprDesc::vreg_max) { + if (reg_num + 2 >= LIR_Opr::vreg_max) { // Wrap it around and continue until bailout really happens to avoid hitting assertions. - reg_num = LIR_OprDesc::vreg_base; + reg_num = LIR_Opr::vreg_base; } } LIR_Opr vreg = LIR_OprFact::virtual_register(reg_num, interval->type()); @@ -4405,7 +4405,7 @@ void Interval::add_use_pos(int pos, IntervalUseKind use_kind) { // do not add use positions for precolored intervals because // they are never used - if (use_kind != noUse && reg_num() >= LIR_OprDesc::vreg_base) { + if (use_kind != noUse && reg_num() >= LIR_Opr::vreg_base) { #ifdef ASSERT assert(_use_pos_and_kinds.length() % 2 == 0, "must be"); for (int i = 0; i < _use_pos_and_kinds.length(); i += 2) { @@ -4635,7 +4635,7 @@ void Interval::print_on(outputStream* out, bool is_cfg_printer) const { const char* UseKind2Name[] = { "N", "L", "S", "M" }; const char* type_name; - if (reg_num() < LIR_OprDesc::vreg_base) { + if (reg_num() < LIR_Opr::vreg_base) { type_name = "fixed"; } else { type_name = type2name(type()); @@ -4652,7 +4652,7 @@ void Interval::print_on(outputStream* out, bool is_cfg_printer) const { } } else { // Improved output for normal debugging. - if (reg_num() < LIR_OprDesc::vreg_base) { + if (reg_num() < LIR_Opr::vreg_base) { LinearScan::print_reg_num(out, assigned_reg()); } else if (assigned_reg() != -1 && (LinearScan::num_physical_regs(type()) == 1 || assigned_regHi() != -1)) { LinearScan::calc_operand_for_interval(this)->print(out); diff --git a/src/hotspot/share/c1/c1_LinearScan.hpp b/src/hotspot/share/c1/c1_LinearScan.hpp index 20c1a66f40febbba561c04ae2b227c1577189742..5808675a5436943da367a4306c27804994d62c0d 100644 --- a/src/hotspot/share/c1/c1_LinearScan.hpp +++ b/src/hotspot/share/c1/c1_LinearScan.hpp @@ -547,8 +547,8 @@ class Interval : public CompilationResourceObj { // accessors int reg_num() const { return _reg_num; } void set_reg_num(int r) { assert(_reg_num == -1, "cannot change reg_num"); _reg_num = r; } - BasicType type() const { assert(_reg_num == -1 || _reg_num >= LIR_OprDesc::vreg_base, "cannot access type for fixed interval"); return _type; } - void set_type(BasicType type) { assert(_reg_num < LIR_OprDesc::vreg_base || _type == T_ILLEGAL || _type == type, "overwriting existing type"); _type = type; } + BasicType type() const { assert(_reg_num == -1 || _reg_num >= LIR_Opr::vreg_base, "cannot access type for fixed interval"); return _type; } + void set_type(BasicType type) { assert(_reg_num < LIR_Opr::vreg_base || _type == T_ILLEGAL || _type == type, "overwriting existing type"); _type = type; } Range* first() const { return _first; } int from() const { return _first->from(); } diff --git a/src/hotspot/share/cds/dumpAllocStats.cpp b/src/hotspot/share/cds/dumpAllocStats.cpp index 0568bc550e67df7e7d8913b9d124e82d0a85dcce..72bb80fd17a2b1c4f494973ebadbbdc3765b9257 100644 --- a/src/hotspot/share/cds/dumpAllocStats.cpp +++ b/src/hotspot/share/cds/dumpAllocStats.cpp @@ -42,14 +42,6 @@ void DumpAllocStats::print_stats(int ro_all, int rw_all) { _counts[RO][StringBucketType] = _string_stats.bucket_count; _bytes [RO][StringBucketType] = _string_stats.bucket_bytes; - // prevent divide-by-zero - if (ro_all < 1) { - ro_all = 1; - } - if (rw_all < 1) { - rw_all = 1; - } - int all_ro_count = 0; int all_ro_bytes = 0; int all_rw_count = 0; @@ -102,8 +94,11 @@ void DumpAllocStats::print_stats(int ro_all, int rw_all) { all_rw_count, all_rw_bytes, all_rw_perc, all_count, all_bytes, all_perc); - assert(all_ro_bytes == ro_all, "everything should have been counted"); - assert(all_rw_bytes == rw_all, "everything should have been counted"); + msg.flush(); + + assert(all_ro_bytes == ro_all && all_rw_bytes == rw_all, + "everything should have been counted (used/counted: ro %d/%d, rw %d/%d", + ro_all, all_ro_bytes, rw_all, all_rw_bytes); #undef fmt_stats } diff --git a/src/hotspot/share/cds/dynamicArchive.cpp b/src/hotspot/share/cds/dynamicArchive.cpp index 33da23e10fec65ba096c4465640914b467d52179..c821d46d4d0de45461ee3265f2ba30a13316766d 100644 --- a/src/hotspot/share/cds/dynamicArchive.cpp +++ b/src/hotspot/share/cds/dynamicArchive.cpp @@ -50,7 +50,9 @@ class DynamicArchiveBuilder : public ArchiveBuilder { + const char* _archive_name; public: + DynamicArchiveBuilder(const char* archive_name) : _archive_name(archive_name) {} void mark_pointer(address* ptr_loc) { ArchivePtrMarker::mark_pointer(ptr_loc); } @@ -112,7 +114,6 @@ public: // Block concurrent class unloading from changing the _dumptime_table MutexLocker ml(DumpTimeTable_lock, Mutex::_no_safepoint_check_flag); SystemDictionaryShared::check_excluded_classes(); - SystemDictionaryShared::cleanup_lambda_proxy_class_dictionary(); // save dumptime tables SystemDictionaryShared::clone_dumptime_tables(); @@ -319,7 +320,7 @@ void DynamicArchiveBuilder::write_archive(char* serialized_data) { FileMapInfo* dynamic_info = FileMapInfo::dynamic_info(); assert(dynamic_info != NULL, "Sanity"); - dynamic_info->open_for_write(Arguments::GetSharedDynamicArchivePath()); + dynamic_info->open_for_write(_archive_name); ArchiveBuilder::write_archive(dynamic_info, NULL, NULL, NULL, NULL); address base = _requested_dynamic_archive_bottom; @@ -334,9 +335,10 @@ void DynamicArchiveBuilder::write_archive(char* serialized_data) { } class VM_PopulateDynamicDumpSharedSpace: public VM_GC_Sync_Operation { - DynamicArchiveBuilder builder; + DynamicArchiveBuilder _builder; public: - VM_PopulateDynamicDumpSharedSpace() : VM_GC_Sync_Operation() {} + VM_PopulateDynamicDumpSharedSpace(const char* archive_name) + : VM_GC_Sync_Operation(), _builder(archive_name) {} VMOp_Type type() const { return VMOp_PopulateDumpSharedSpace; } void doit() { ResourceMark rm; @@ -350,11 +352,30 @@ public: } FileMapInfo::check_nonempty_dir_in_shared_path_table(); - builder.doit(); + _builder.doit(); } }; -void DynamicArchive::prepare_for_dynamic_dumping() { +void DynamicArchive::check_for_dynamic_dump() { + if (DynamicDumpSharedSpaces && !UseSharedSpaces) { + // This could happen if SharedArchiveFile has failed to load: + // - -Xshare:off was specified + // - SharedArchiveFile points to an non-existent file. + // - SharedArchiveFile points to an archive that has failed CRC check + // - SharedArchiveFile is not specified and the VM doesn't have a compatible default archive + +#define __THEMSG " is unsupported when base CDS archive is not loaded. Run with -Xlog:cds for more info." + if (RecordDynamicDumpInfo) { + vm_exit_during_initialization("-XX:+RecordDynamicDumpInfo" __THEMSG, NULL); + } else { + assert(ArchiveClassesAtExit != nullptr, "sanity"); + vm_exit_during_initialization("-XX:ArchiveClassesAtExit" __THEMSG, NULL); +#undef __THEMSG + } + } +} + +void DynamicArchive::prepare_for_dump_at_exit() { EXCEPTION_MARK; ResourceMark rm(THREAD); MetaspaceShared::link_shared_classes(THREAD); @@ -368,41 +389,27 @@ void DynamicArchive::prepare_for_dynamic_dumping() { } } -void DynamicArchive::dump(const char* archive_name, TRAPS) { - assert(UseSharedSpaces && RecordDynamicDumpInfo, "already checked in arguments.cpp?"); - assert(ArchiveClassesAtExit == nullptr, "already checked in arguments.cpp?"); - ArchiveClassesAtExit = archive_name; - if (Arguments::init_shared_archive_paths()) { - prepare_for_dynamic_dumping(); - if (DynamicDumpSharedSpaces) { - dump(CHECK); - } - } else { - ArchiveClassesAtExit = nullptr; - THROW_MSG(vmSymbols::java_lang_RuntimeException(), - "Could not setup SharedDynamicArchivePath"); - } - // prevent do dynamic dump at exit. - ArchiveClassesAtExit = nullptr; - if (!Arguments::init_shared_archive_paths()) { - THROW_MSG(vmSymbols::java_lang_RuntimeException(), - "Could not restore SharedDynamicArchivePath"); - } +// This is called by "jcmd VM.cds dynamic_dump" +void DynamicArchive::dump_for_jcmd(const char* archive_name, TRAPS) { + assert(UseSharedSpaces && RecordDynamicDumpInfo, "already checked in arguments.cpp"); + assert(ArchiveClassesAtExit == nullptr, "already checked in arguments.cpp"); + assert(DynamicDumpSharedSpaces, "already checked by check_for_dynamic_dump() during VM startup"); + MetaspaceShared::link_shared_classes(CHECK); + dump(archive_name, THREAD); } -void DynamicArchive::dump(TRAPS) { - if (Arguments::GetSharedDynamicArchivePath() == NULL) { - log_warning(cds, dynamic)("SharedDynamicArchivePath is not specified"); - return; - } - +void DynamicArchive::dump(const char* archive_name, TRAPS) { // copy shared path table to saved. FileMapInfo::clone_shared_path_table(CHECK); - VM_PopulateDynamicDumpSharedSpace op; + VM_PopulateDynamicDumpSharedSpace op(archive_name); VMThread::execute(&op); } +bool DynamicArchive::should_dump_at_vm_exit() { + return DynamicDumpSharedSpaces && (ArchiveClassesAtExit != nullptr); +} + bool DynamicArchive::validate(FileMapInfo* dynamic_info) { assert(!dynamic_info->is_static(), "must be"); // Check if the recorded base archive matches with the current one diff --git a/src/hotspot/share/cds/dynamicArchive.hpp b/src/hotspot/share/cds/dynamicArchive.hpp index 0371b94849752aad06eaf08d38333b7291637113..fbb831ae1953b9b4afa65b8bc853fbdd38b13d12 100644 --- a/src/hotspot/share/cds/dynamicArchive.hpp +++ b/src/hotspot/share/cds/dynamicArchive.hpp @@ -59,9 +59,11 @@ public: class DynamicArchive : AllStatic { public: - static void prepare_for_dynamic_dumping(); + static void check_for_dynamic_dump(); + static bool should_dump_at_vm_exit(); + static void prepare_for_dump_at_exit(); + static void dump_for_jcmd(const char* archive_name, TRAPS); static void dump(const char* archive_name, TRAPS); - static void dump(TRAPS); static bool is_mapped() { return FileMapInfo::dynamic_info() != NULL; } static bool validate(FileMapInfo* dynamic_info); }; diff --git a/src/hotspot/share/cds/metaspaceShared.cpp b/src/hotspot/share/cds/metaspaceShared.cpp index 5dd296410c783adfd41f99fba6b2421734f22a79..17e27a5372f6a65fced5c0f6a7af3a94c1c486af 100644 --- a/src/hotspot/share/cds/metaspaceShared.cpp +++ b/src/hotspot/share/cds/metaspaceShared.cpp @@ -521,7 +521,6 @@ void VM_PopulateDumpSharedSpace::doit() { // Block concurrent class unloading from changing the _dumptime_table MutexLocker ml(DumpTimeTable_lock, Mutex::_no_safepoint_check_flag); SystemDictionaryShared::check_excluded_classes(); - SystemDictionaryShared::cleanup_lambda_proxy_class_dictionary(); StaticArchiveBuilder builder; builder.gather_source_objs(); diff --git a/src/hotspot/share/ci/ciEnv.cpp b/src/hotspot/share/ci/ciEnv.cpp index 0c6f2b5fa2b4c5e54eb5b5e238253755d38513fe..7fae95ae6329483bf70a24e4d38bdf0f452d281b 100644 --- a/src/hotspot/share/ci/ciEnv.cpp +++ b/src/hotspot/share/ci/ciEnv.cpp @@ -1643,6 +1643,7 @@ void ciEnv::dump_replay_data_helper(outputStream* out) { NoSafepointVerifier no_safepoint; ResourceMark rm; + out->print_cr("version %d", REPLAY_VERSION); #if INCLUDE_JVMTI out->print_cr("JvmtiExport can_access_local_variables %d", _jvmti_can_access_local_variables); out->print_cr("JvmtiExport can_hotswap_or_post_breakpoint %d", _jvmti_can_hotswap_or_post_breakpoint); @@ -1680,8 +1681,8 @@ void ciEnv::dump_replay_data(outputStream* out) { } void ciEnv::dump_replay_data(int compile_id) { - static char buffer[O_BUFLEN]; - int ret = jio_snprintf(buffer, O_BUFLEN, "replay_pid%p_compid%d.log", os::current_process_id(), compile_id); + char buffer[64]; + int ret = jio_snprintf(buffer, sizeof(buffer), "replay_pid%d_compid%d.log", os::current_process_id(), compile_id); if (ret > 0) { int fd = os::open(buffer, O_RDWR | O_CREAT | O_TRUNC, 0666); if (fd != -1) { @@ -1698,8 +1699,8 @@ void ciEnv::dump_replay_data(int compile_id) { } void ciEnv::dump_inline_data(int compile_id) { - static char buffer[O_BUFLEN]; - int ret = jio_snprintf(buffer, O_BUFLEN, "inline_pid%p_compid%d.log", os::current_process_id(), compile_id); + char buffer[64]; + int ret = jio_snprintf(buffer, sizeof(buffer), "inline_pid%d_compid%d.log", os::current_process_id(), compile_id); if (ret > 0) { int fd = os::open(buffer, O_RDWR | O_CREAT | O_TRUNC, 0666); if (fd != -1) { diff --git a/src/hotspot/share/ci/ciInstanceKlass.cpp b/src/hotspot/share/ci/ciInstanceKlass.cpp index cc361744149b5a45bb71f0a7a3e17bda1a28f78c..88c475d93d8127fb142fb894402533c9d4e3cfad 100644 --- a/src/hotspot/share/ci/ciInstanceKlass.cpp +++ b/src/hotspot/share/ci/ciInstanceKlass.cpp @@ -87,7 +87,7 @@ ciInstanceKlass::ciInstanceKlass(Klass* k) : (void)CURRENT_ENV->get_object(holder); } - Thread *thread = Thread::current(); + JavaThread *thread = JavaThread::current(); if (ciObjectFactory::is_initialized()) { _loader = JNIHandles::make_local(thread, ik->class_loader()); _protection_domain = JNIHandles::make_local(thread, diff --git a/src/hotspot/share/ci/ciMethodData.cpp b/src/hotspot/share/ci/ciMethodData.cpp index cdf85bb1b4757ce4ed21a3440210c28c8bb955f2..d8b77ab2f5193e6b55863982243600ca0df6afc5 100644 --- a/src/hotspot/share/ci/ciMethodData.cpp +++ b/src/hotspot/share/ci/ciMethodData.cpp @@ -49,10 +49,7 @@ ciMethodData::ciMethodData(MethodData* md) _saw_free_extra_data(false), // Initialize the escape information (to "don't know."); _eflags(0), _arg_local(0), _arg_stack(0), _arg_returned(0), - _creation_mileage(0), - _current_mileage(0), _invocation_counter(0), - _backedge_counter(0), _orig(), _parameters(NULL) {} @@ -250,10 +247,7 @@ bool ciMethodData::load_data() { load_remaining_extra_data(); // Note: Extra data are all BitData, and do not need translation. - _creation_mileage = mdo->creation_mileage(); - _current_mileage = MethodData::mileage_of(mdo->method()); _invocation_counter = mdo->invocation_count(); - _backedge_counter = mdo->backedge_count(); _state = mdo->is_mature()? mature_state: immature_state; _eflags = mdo->eflags(); @@ -706,7 +700,7 @@ void ciMethodData::dump_replay_data(outputStream* out) { Method* method = mdo->method(); out->print("ciMethodData "); ciMethod::dump_name_as_ascii(out, method); - out->print(" %d %d", _state, current_mileage()); + out->print(" %d %d", _state, _invocation_counter); // dump the contents of the MDO header as raw data unsigned char* orig = (unsigned char*)&_orig; diff --git a/src/hotspot/share/ci/ciMethodData.hpp b/src/hotspot/share/ci/ciMethodData.hpp index c19bbfc2dc496e45296682b657ef307a19783cfb..783b2b8d94f79f83372bc644715d2f69e5f795a0 100644 --- a/src/hotspot/share/ci/ciMethodData.hpp +++ b/src/hotspot/share/ci/ciMethodData.hpp @@ -395,16 +395,10 @@ private: intx _arg_stack; // bit set of stack-allocatable arguments intx _arg_returned; // bit set of returned arguments - int _creation_mileage; // method mileage at MDO creation - - // Maturity of the oop when the snapshot is taken. - int _current_mileage; - // These counters hold the age of MDO in tiered. In tiered we can have the same method // running at different compilation levels concurrently. So, in order to precisely measure // its maturity we need separate counters. int _invocation_counter; - int _backedge_counter; // Coherent snapshot of original header. MethodData::CompilerCounters _orig; @@ -477,11 +471,7 @@ public: bool is_empty() { return _state == empty_state; } bool is_mature() { return _state == mature_state; } - int creation_mileage() { return _creation_mileage; } - int current_mileage() { return _current_mileage; } - int invocation_count() { return _invocation_counter; } - int backedge_count() { return _backedge_counter; } #if INCLUDE_RTM_OPT // return cached value diff --git a/src/hotspot/share/ci/ciReplay.cpp b/src/hotspot/share/ci/ciReplay.cpp index b1f5b82c678ef34681ef13904ade522b1a64551f..04b9b0a83f1fda5ee0b17f93521dc36094bd9baf 100644 --- a/src/hotspot/share/ci/ciReplay.cpp +++ b/src/hotspot/share/ci/ciReplay.cpp @@ -65,7 +65,7 @@ typedef struct _ciMethodDataRecord { const char* _signature; int _state; - int _current_mileage; + int _invocation_counter; intptr_t* _data; char* _orig_data; @@ -115,6 +115,7 @@ class CompileReplay : public StackObj { Handle _protection_domain; bool _protection_domain_initialized; Handle _loader; + int _version; GrowableArray _ci_method_records; GrowableArray _ci_method_data_records; @@ -159,6 +160,7 @@ class CompileReplay : public StackObj { _iklass = NULL; _entry_bci = 0; _comp_level = 0; + _version = 0; test(); } @@ -434,7 +436,7 @@ class CompileReplay : public StackObj { } else if (strcmp(dyno_ref, "") == 0) { int pool_index = cp_cache_entry->constant_pool_index(); BootstrapInfo bootstrap_specifier(cp, pool_index, index); - obj = cp->resolve_possibly_cached_constant_at(bootstrap_specifier.bsm_index(), thread); + obj = cp->resolve_possibly_cached_constant_at(bootstrap_specifier.bsm_index(), CHECK_NULL); } else { report_error("unrecognized token"); return NULL; @@ -463,57 +465,57 @@ class CompileReplay : public StackObj { report_error("no method handle found at cpi"); return NULL; } - { - bool found_it; - obj = cp->find_cached_constant_at(cpi, found_it, thread); - } + ik->link_class(CHECK_NULL); + obj = cp->resolve_possibly_cached_constant_at(cpi, CHECK_NULL); + } + if (obj == NULL) { + report_error("null cp object found"); + return NULL; } Klass* k = NULL; - if (obj != NULL) { - skip_ws(); - // loop: read fields - char* field = NULL; - do { - field = parse_string(); - if (field == NULL) { - report_error("no field found"); + skip_ws(); + // loop: read fields + char* field = NULL; + do { + field = parse_string(); + if (field == NULL) { + report_error("no field found"); + return NULL; + } + if (strcmp(field, ";") == 0) { + break; + } + // raw Method* + if (strcmp(field, "") == 0) { + Method* vmtarget = java_lang_invoke_MemberName::vmtarget(obj); + k = (vmtarget == NULL) ? NULL : vmtarget->method_holder(); + if (k == NULL) { + report_error("null vmtarget found"); return NULL; } - if (strcmp(field, ";") == 0) { - break; - } - // raw Method* - if (strcmp(field, "") == 0) { - Method* vmtarget = java_lang_invoke_MemberName::vmtarget(obj); - k = (vmtarget == NULL) ? NULL : vmtarget->method_holder(); - if (k == NULL) { - report_error("null vmtarget found"); - return NULL; - } - if (!parse_terminator()) { - report_error("missing terminator"); - return NULL; - } - return k; + if (!parse_terminator()) { + report_error("missing terminator"); + return NULL; } - obj = ciReplay::obj_field(obj, field); - // array - if (obj != NULL && obj->is_objArray()) { - objArrayOop arr = (objArrayOop)obj; - int index = parse_int("index"); - if (index >= arr->length()) { - report_error("bad array index"); - return NULL; - } - obj = arr->obj_at(index); + return k; + } + obj = ciReplay::obj_field(obj, field); + // array + if (obj != NULL && obj->is_objArray()) { + objArrayOop arr = (objArrayOop)obj; + int index = parse_int("index"); + if (index >= arr->length()) { + report_error("bad array index"); + return NULL; } - } while (obj != NULL); - if (obj == NULL) { - report_error("null field found"); - return NULL; + obj = arr->obj_at(index); } - k = obj->klass(); + } while (obj != NULL); + if (obj == NULL) { + report_error("null field found"); + return NULL; } + k = obj->klass(); return k; } @@ -638,6 +640,11 @@ class CompileReplay : public StackObj { tty->print_cr("# %s", _bufptr); } skip_remaining(); + } else if (strcmp("version", cmd) == 0) { + _version = parse_int("version"); + if (_version < 0 || _version > REPLAY_VERSION) { + tty->print_cr("# unrecognized version %d, expected 0 <= version <= %d", _version, REPLAY_VERSION); + } } else if (strcmp("compile", cmd) == 0) { process_compile(CHECK); } else if (strcmp("ciMethod", cmd) == 0) { @@ -802,7 +809,7 @@ class CompileReplay : public StackObj { rec->_instructions_size = parse_int("instructions_size"); } - // ciMethodData orig * data * oops ( )* methods ( )* + // ciMethodData orig * data * oops ( )* methods ( )* void process_ciMethodData(TRAPS) { Method* method = parse_method(CHECK); if (had_error()) return; @@ -827,7 +834,11 @@ class CompileReplay : public StackObj { // collect and record all the needed information for later ciMethodDataRecord* rec = new_ciMethodData(method); rec->_state = parse_int("state"); - rec->_current_mileage = parse_int("current_mileage"); + if (_version < 1) { + parse_int("current_mileage"); + } else { + rec->_invocation_counter = parse_int("invocation_counter"); + } rec->_orig_data = parse_data("orig", rec->_orig_data_length); if (rec->_orig_data == NULL) { @@ -876,17 +887,18 @@ class CompileReplay : public StackObj { void process_instanceKlass(TRAPS) { // just load the referenced class Klass* k = parse_klass(CHECK); - if (!_protection_domain_initialized && k != NULL) { - assert(_protection_domain() == NULL, "must be uninitialized"); - // The first entry is the holder class of the method for which a replay compilation is requested. - // Use the same protection domain to load all subsequent classes in order to resolve all classes - // in signatures of inlinees. This ensures that inlining can be done as stated in the replay file. - _protection_domain = Handle(_thread, k->protection_domain()); - } - // Only initialize the protection domain handle with the protection domain of the very first entry. - // This also ensures that older replay files work. - _protection_domain_initialized = true; + if (_version >= 1) { + if (!_protection_domain_initialized && k != NULL) { + assert(_protection_domain() == NULL, "must be uninitialized"); + // The first entry is the holder class of the method for which a replay compilation is requested. + // Use the same protection domain to load all subsequent classes in order to resolve all classes + // in signatures of inlinees. This ensures that inlining can be done as stated in the replay file. + _protection_domain = Handle(_thread, k->protection_domain()); + } + + _protection_domain_initialized = true; + } if (k == NULL) { return; @@ -915,6 +927,7 @@ class CompileReplay : public StackObj { void process_ciInstanceKlass(TRAPS) { InstanceKlass* k = (InstanceKlass*)parse_klass(CHECK); if (k == NULL) { + skip_remaining(); return; } int is_linked = parse_int("is_linked"); @@ -1413,7 +1426,7 @@ void ciReplay::initialize(ciMethodData* m) { tty->cr(); } else { m->_state = rec->_state; - m->_current_mileage = rec->_current_mileage; + m->_invocation_counter = rec->_invocation_counter; if (rec->_data_length != 0) { assert(m->_data_size + m->_extra_data_size == rec->_data_length * (int)sizeof(rec->_data[0]) || m->_data_size == rec->_data_length * (int)sizeof(rec->_data[0]), "must agree"); diff --git a/src/hotspot/share/ci/ciReplay.hpp b/src/hotspot/share/ci/ciReplay.hpp index e7bd626fea592097192ecdf636f7d7b5d79030fb..187f47497bdda827474d207c2a14f9a6c88bd3a2 100644 --- a/src/hotspot/share/ci/ciReplay.hpp +++ b/src/hotspot/share/ci/ciReplay.hpp @@ -131,4 +131,10 @@ class ciReplay { }; +// Replay file format version history +// 0: legacy (no version number) +// 1: first instanceKlass sets protection domain (8275868) +// replace current_mileage with invocation_count (8276095) +#define REPLAY_VERSION 1 // current version, bump up for incompatible changes + #endif // SHARE_CI_CIREPLAY_HPP diff --git a/src/hotspot/share/classfile/systemDictionaryShared.cpp b/src/hotspot/share/classfile/systemDictionaryShared.cpp index 48f8147b6ee2c7df63a461bcacbf3d1362273371..3e4bd3cfb34f455dd3b9b0344392045efbfcd7bd 100644 --- a/src/hotspot/share/classfile/systemDictionaryShared.cpp +++ b/src/hotspot/share/classfile/systemDictionaryShared.cpp @@ -245,6 +245,14 @@ bool SystemDictionaryShared::is_registered_lambda_proxy_class(InstanceKlass* ik) return (info != NULL) ? info->_is_archived_lambda_proxy : false; } +void SystemDictionaryShared::reset_registered_lambda_proxy_class(InstanceKlass* ik) { + DumpTimeClassInfo* info = _dumptime_table->get(ik); + if (info != NULL) { + info->_is_archived_lambda_proxy = false; + info->set_excluded(); + } +} + bool SystemDictionaryShared::is_early_klass(InstanceKlass* ik) { DumpTimeClassInfo* info = _dumptime_table->get(ik); return (info != NULL) ? info->is_early_klass() : false; @@ -325,6 +333,7 @@ bool SystemDictionaryShared::check_for_exclusion_impl(InstanceKlass* k) { for (int i = 0; i < len; i++) { InstanceKlass* intf = interfaces->at(i); if (check_for_exclusion(intf, NULL)) { + ResourceMark rm; log_warning(cds)("Skipping %s: interface %s is excluded", k->name()->as_C_string(), intf->name()->as_C_string()); return true; } @@ -663,6 +672,8 @@ void SystemDictionaryShared::check_excluded_classes() { ExcludeDumpTimeSharedClasses excl; _dumptime_table->iterate(&excl); _dumptime_table->update_counts(); + + cleanup_lambda_proxy_class_dictionary(); } bool SystemDictionaryShared::is_excluded_class(InstanceKlass* k) { @@ -1602,9 +1613,19 @@ class CleanupDumpTimeLambdaProxyClassTable: StackObj { public: bool do_entry(LambdaProxyClassKey& key, DumpTimeLambdaProxyClassInfo& info) { assert_lock_strong(DumpTimeTable_lock); - for (int i = 0; i < info._proxy_klasses->length(); i++) { + InstanceKlass* caller_ik = key.caller_ik(); + if (SystemDictionaryShared::check_for_exclusion(caller_ik, NULL)) { + // If the caller class is excluded, unregister all the associated lambda proxy classes + // so that they will not be included in the CDS archive. + for (int i = info._proxy_klasses->length() - 1; i >= 0; i--) { + SystemDictionaryShared::reset_registered_lambda_proxy_class(info._proxy_klasses->at(i)); + info._proxy_klasses->remove_at(i); + } + } + for (int i = info._proxy_klasses->length() - 1; i >= 0; i--) { InstanceKlass* ik = info._proxy_klasses->at(i); - if (!ik->can_be_verified_at_dumptime()) { + if (SystemDictionaryShared::check_for_exclusion(ik, NULL)) { + SystemDictionaryShared::reset_registered_lambda_proxy_class(ik); info._proxy_klasses->remove_at(i); } } diff --git a/src/hotspot/share/classfile/systemDictionaryShared.hpp b/src/hotspot/share/classfile/systemDictionaryShared.hpp index 039b9d01e9a3425cab23c662d5a627ed47c6f786..bbd76c8d7b75f429f8a72ebfe2d98e90bb71a6c2 100644 --- a/src/hotspot/share/classfile/systemDictionaryShared.hpp +++ b/src/hotspot/share/classfile/systemDictionaryShared.hpp @@ -136,6 +136,7 @@ class SharedClassLoadingMark { class SystemDictionaryShared: public SystemDictionary { friend class ExcludeDumpTimeSharedClasses; + friend class CleanupDumpTimeLambdaProxyClassTable; public: enum { FROM_FIELD_IS_PROTECTED = 1 << 0, @@ -173,6 +174,8 @@ private: static void write_dictionary(RunTimeSharedDictionary* dictionary, bool is_builtin); static void write_lambda_proxy_class_dictionary(LambdaProxyClassDictionary* dictionary); + static void cleanup_lambda_proxy_class_dictionary(); + static void reset_registered_lambda_proxy_class(InstanceKlass* ik); static bool is_jfr_event_class(InstanceKlass *k); static bool is_registered_lambda_proxy_class(InstanceKlass* ik); static bool check_for_exclusion_impl(InstanceKlass* k); @@ -288,7 +291,6 @@ public: static size_t estimate_size_for_archive(); static void write_to_archive(bool is_static_archive = true); static void adjust_lambda_proxy_class_dictionary(); - static void cleanup_lambda_proxy_class_dictionary(); static void serialize_dictionary_headers(class SerializeClosure* soc, bool is_static_archive = true); static void serialize_vm_classes(class SerializeClosure* soc); diff --git a/src/hotspot/share/classfile/vmIntrinsics.cpp b/src/hotspot/share/classfile/vmIntrinsics.cpp index 88f38f406ad24607405b45daf74e10e7f5dfb7ae..cc3dc1ebdccf58fc689140cfcc02f432ec07dfef 100644 --- a/src/hotspot/share/classfile/vmIntrinsics.cpp +++ b/src/hotspot/share/classfile/vmIntrinsics.cpp @@ -33,13 +33,13 @@ // These are flag-matching functions: inline bool match_F_R(jshort flags) { const int req = 0; - const int neg = JVM_ACC_STATIC | JVM_ACC_SYNCHRONIZED; + const int neg = JVM_ACC_STATIC | JVM_ACC_SYNCHRONIZED | JVM_ACC_NATIVE; return (flags & (req | neg)) == req; } inline bool match_F_Y(jshort flags) { const int req = JVM_ACC_SYNCHRONIZED; - const int neg = JVM_ACC_STATIC; + const int neg = JVM_ACC_STATIC | JVM_ACC_NATIVE; return (flags & (req | neg)) == req; } @@ -51,7 +51,7 @@ inline bool match_F_RN(jshort flags) { inline bool match_F_S(jshort flags) { const int req = JVM_ACC_STATIC; - const int neg = JVM_ACC_SYNCHRONIZED; + const int neg = JVM_ACC_SYNCHRONIZED | JVM_ACC_NATIVE; return (flags & (req | neg)) == req; } diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp index 5c79db8a41c585d594b0a0313e9717089734783f..08cc2bec0e697dcb5c767918dd89ce789ea36cfc 100644 --- a/src/hotspot/share/classfile/vmIntrinsics.hpp +++ b/src/hotspot/share/classfile/vmIntrinsics.hpp @@ -107,15 +107,15 @@ class methodHandle; // add the declaration of the intrinsic to the approriate section of the list. #define VM_INTRINSICS_DO(do_intrinsic, do_class, do_name, do_signature, do_alias) \ /* (1) Library intrinsics */ \ - do_intrinsic(_hashCode, java_lang_Object, hashCode_name, void_int_signature, F_R) \ + do_intrinsic(_hashCode, java_lang_Object, hashCode_name, void_int_signature, F_RN) \ do_name( hashCode_name, "hashCode") \ - do_intrinsic(_getClass, java_lang_Object, getClass_name, void_class_signature, F_R) \ + do_intrinsic(_getClass, java_lang_Object, getClass_name, void_class_signature, F_RN) \ do_name( getClass_name, "getClass") \ - do_intrinsic(_clone, java_lang_Object, clone_name, void_object_signature, F_R) \ + do_intrinsic(_clone, java_lang_Object, clone_name, void_object_signature, F_RN) \ do_name( clone_name, "clone") \ - do_intrinsic(_notify, java_lang_Object, notify_name, void_method_signature, F_R) \ + do_intrinsic(_notify, java_lang_Object, notify_name, void_method_signature, F_RN) \ do_name( notify_name, "notify") \ - do_intrinsic(_notifyAll, java_lang_Object, notifyAll_name, void_method_signature, F_R) \ + do_intrinsic(_notifyAll, java_lang_Object, notifyAll_name, void_method_signature, F_RN) \ do_name( notifyAll_name, "notifyAll") \ \ /* Math & StrictMath intrinsics are defined in terms of just a few signatures: */ \ @@ -200,17 +200,17 @@ class methodHandle; /* Special flavor of dsqrt intrinsic to handle the "native" method in StrictMath. Otherwise the same as in Math. */ \ do_intrinsic(_dsqrt_strict, java_lang_StrictMath, sqrt_name, double_double_signature, F_SN) \ \ - do_intrinsic(_floatToRawIntBits, java_lang_Float, floatToRawIntBits_name, float_int_signature, F_S) \ + do_intrinsic(_floatToRawIntBits, java_lang_Float, floatToRawIntBits_name, float_int_signature, F_SN) \ do_name( floatToRawIntBits_name, "floatToRawIntBits") \ do_intrinsic(_floatToIntBits, java_lang_Float, floatToIntBits_name, float_int_signature, F_S) \ do_name( floatToIntBits_name, "floatToIntBits") \ - do_intrinsic(_intBitsToFloat, java_lang_Float, intBitsToFloat_name, int_float_signature, F_S) \ + do_intrinsic(_intBitsToFloat, java_lang_Float, intBitsToFloat_name, int_float_signature, F_SN) \ do_name( intBitsToFloat_name, "intBitsToFloat") \ - do_intrinsic(_doubleToRawLongBits, java_lang_Double, doubleToRawLongBits_name, double_long_signature, F_S) \ + do_intrinsic(_doubleToRawLongBits, java_lang_Double, doubleToRawLongBits_name, double_long_signature, F_SN)\ do_name( doubleToRawLongBits_name, "doubleToRawLongBits") \ do_intrinsic(_doubleToLongBits, java_lang_Double, doubleToLongBits_name, double_long_signature, F_S) \ do_name( doubleToLongBits_name, "doubleToLongBits") \ - do_intrinsic(_longBitsToDouble, java_lang_Double, longBitsToDouble_name, long_double_signature, F_S) \ + do_intrinsic(_longBitsToDouble, java_lang_Double, longBitsToDouble_name, long_double_signature, F_SN)\ do_name( longBitsToDouble_name, "longBitsToDouble") \ \ do_intrinsic(_numberOfLeadingZeros_i, java_lang_Integer, numberOfLeadingZeros_name,int_int_signature, F_S) \ @@ -231,20 +231,20 @@ class methodHandle; do_intrinsic(_reverseBytes_s, java_lang_Short, reverseBytes_name, short_short_signature, F_S) \ /* (symbol reverseBytes_name defined above) */ \ \ - do_intrinsic(_identityHashCode, java_lang_System, identityHashCode_name, object_int_signature, F_S) \ + do_intrinsic(_identityHashCode, java_lang_System, identityHashCode_name, object_int_signature, F_SN) \ do_name( identityHashCode_name, "identityHashCode") \ - do_intrinsic(_currentTimeMillis, java_lang_System, currentTimeMillis_name, void_long_signature, F_S) \ + do_intrinsic(_currentTimeMillis, java_lang_System, currentTimeMillis_name, void_long_signature, F_SN) \ \ do_name( currentTimeMillis_name, "currentTimeMillis") \ - do_intrinsic(_nanoTime, java_lang_System, nanoTime_name, void_long_signature, F_S) \ + do_intrinsic(_nanoTime, java_lang_System, nanoTime_name, void_long_signature, F_SN) \ do_name( nanoTime_name, "nanoTime") \ \ JFR_INTRINSICS(do_intrinsic, do_class, do_name, do_signature, do_alias) \ \ - do_intrinsic(_arraycopy, java_lang_System, arraycopy_name, arraycopy_signature, F_S) \ + do_intrinsic(_arraycopy, java_lang_System, arraycopy_name, arraycopy_signature, F_SN) \ do_name( arraycopy_name, "arraycopy") \ do_signature(arraycopy_signature, "(Ljava/lang/Object;ILjava/lang/Object;II)V") \ - do_intrinsic(_currentThread, java_lang_Thread, currentThread_name, currentThread_signature, F_S) \ + do_intrinsic(_currentThread, java_lang_Thread, currentThread_name, currentThread_signature, F_SN) \ do_name( currentThread_name, "currentThread") \ do_signature(currentThread_signature, "()Ljava/lang/Thread;") \ \ @@ -401,9 +401,9 @@ class methodHandle; do_signature(vectorizedMismatch_signature, "(Ljava/lang/Object;JLjava/lang/Object;JII)I") \ \ /* java/lang/ref/Reference */ \ - do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \ - do_intrinsic(_Reference_refersTo0, java_lang_ref_Reference, refersTo0_name, object_boolean_signature, F_R) \ - do_intrinsic(_PhantomReference_refersTo0, java_lang_ref_PhantomReference, refersTo0_name, object_boolean_signature, F_R) \ + do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \ + do_intrinsic(_Reference_refersTo0, java_lang_ref_Reference, refersTo0_name, object_boolean_signature, F_RN) \ + do_intrinsic(_PhantomReference_refersTo0, java_lang_ref_PhantomReference, refersTo0_name, object_boolean_signature, F_RN) \ \ /* support for com.sun.crypto.provider.AESCrypt and some of its callers */ \ do_class(com_sun_crypto_provider_aescrypt, "com/sun/crypto/provider/AESCrypt") \ @@ -832,129 +832,286 @@ class methodHandle; /* Vector API intrinsification support */ \ \ do_intrinsic(_VectorUnaryOp, jdk_internal_vm_vector_VectorSupport, vector_unary_op_name, vector_unary_op_sig, F_S) \ - do_signature(vector_unary_op_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;Ljava/util/function/Function;)Ljava/lang/Object;") \ + do_signature(vector_unary_op_sig, "(I" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;Ljava/lang/Class;" \ + "I" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \ + "Ljdk/internal/vm/vector/VectorSupport$UnaryOperation;)" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;") \ do_name(vector_unary_op_name, "unaryOp") \ \ do_intrinsic(_VectorBinaryOp, jdk_internal_vm_vector_VectorSupport, vector_binary_op_name, vector_binary_op_sig, F_S) \ - do_signature(vector_binary_op_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;Ljava/lang/Object;" \ - "Ljava/util/function/BiFunction;)Ljava/lang/Object;") \ + do_signature(vector_binary_op_sig, "(I" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorPayload;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorPayload;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \ + "Ljdk/internal/vm/vector/VectorSupport$BinaryOperation;)" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \ do_name(vector_binary_op_name, "binaryOp") \ \ do_intrinsic(_VectorTernaryOp, jdk_internal_vm_vector_VectorSupport, vector_ternary_op_name, vector_ternary_op_sig, F_S) \ - do_signature(vector_ternary_op_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;Ljava/lang/Object;" \ - "Ljava/lang/Object;Ljdk/internal/vm/vector/VectorSupport$TernaryOperation;)Ljava/lang/Object;") \ + do_signature(vector_ternary_op_sig, "(I" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \ + "Ljdk/internal/vm/vector/VectorSupport$TernaryOperation;)" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;") \ do_name(vector_ternary_op_name, "ternaryOp") \ \ do_intrinsic(_VectorBroadcastCoerced, jdk_internal_vm_vector_VectorSupport, vector_broadcast_coerced_name, vector_broadcast_coerced_sig, F_S)\ - do_signature(vector_broadcast_coerced_sig, "(Ljava/lang/Class;Ljava/lang/Class;IJLjdk/internal/vm/vector/VectorSupport$VectorSpecies;" \ - "Ljdk/internal/vm/vector/VectorSupport$BroadcastOperation;)Ljava/lang/Object;") \ + do_signature(vector_broadcast_coerced_sig, "(Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "J" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \ + "Ljdk/internal/vm/vector/VectorSupport$BroadcastOperation;)" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \ do_name(vector_broadcast_coerced_name, "broadcastCoerced") \ \ do_intrinsic(_VectorShuffleIota, jdk_internal_vm_vector_VectorSupport, vector_shuffle_step_iota_name, vector_shuffle_step_iota_sig, F_S) \ - do_signature(vector_shuffle_step_iota_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \ - "IIIILjdk/internal/vm/vector/VectorSupport$ShuffleIotaOperation;)Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;") \ + do_signature(vector_shuffle_step_iota_sig, "(Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \ + "IIII" \ + "Ljdk/internal/vm/vector/VectorSupport$ShuffleIotaOperation;)" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;") \ do_name(vector_shuffle_step_iota_name, "shuffleIota") \ \ do_intrinsic(_VectorShuffleToVector, jdk_internal_vm_vector_VectorSupport, vector_shuffle_to_vector_name, vector_shuffle_to_vector_sig, F_S) \ - do_signature(vector_shuffle_to_vector_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \ - "ILjdk/internal/vm/vector/VectorSupport$ShuffleToVectorOperation;)Ljava/lang/Object;") \ + do_signature(vector_shuffle_to_vector_sig, "(Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \ + "ILjdk/internal/vm/vector/VectorSupport$ShuffleToVectorOperation;)" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;") \ do_name(vector_shuffle_to_vector_name, "shuffleToVector") \ \ do_intrinsic(_VectorLoadOp, jdk_internal_vm_vector_VectorSupport, vector_load_op_name, vector_load_op_sig, F_S) \ - do_signature(vector_load_op_sig, "(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JLjava/lang/Object;" \ - "ILjdk/internal/vm/vector/VectorSupport$VectorSpecies;Ljdk/internal/vm/vector/VectorSupport$LoadOperation;)Ljava/lang/Object;") \ + do_signature(vector_load_op_sig, "(Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "Ljava/lang/Object;" \ + "J" \ + "Ljava/lang/Object;" \ + "I" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \ + "Ljdk/internal/vm/vector/VectorSupport$LoadOperation;)" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \ do_name(vector_load_op_name, "load") \ \ + do_intrinsic(_VectorLoadMaskedOp, jdk_internal_vm_vector_VectorSupport, vector_load_masked_op_name, vector_load_masked_op_sig, F_S) \ + do_signature(vector_load_masked_op_sig, "(Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "Ljava/lang/Object;" \ + "J" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \ + "Ljava/lang/Object;" \ + "I" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \ + "Ljdk/internal/vm/vector/VectorSupport$LoadVectorMaskedOperation;)" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;") \ + do_name(vector_load_masked_op_name, "loadMasked") \ + \ do_intrinsic(_VectorStoreOp, jdk_internal_vm_vector_VectorSupport, vector_store_op_name, vector_store_op_sig, F_S) \ - do_signature(vector_store_op_sig, "(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$Vector;" \ - "Ljava/lang/Object;ILjdk/internal/vm/vector/VectorSupport$StoreVectorOperation;)V") \ + do_signature(vector_store_op_sig, "(Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "Ljava/lang/Object;" \ + "J" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljava/lang/Object;ILjdk/internal/vm/vector/VectorSupport$StoreVectorOperation;)" \ + "V") \ do_name(vector_store_op_name, "store") \ \ - do_intrinsic(_VectorReductionCoerced, jdk_internal_vm_vector_VectorSupport, vector_reduction_coerced_name, vector_reduction_coerced_sig, F_S) \ - do_signature(vector_reduction_coerced_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjdk/internal/vm/vector/VectorSupport$Vector;Ljava/util/function/Function;)J") \ + do_intrinsic(_VectorStoreMaskedOp, jdk_internal_vm_vector_VectorSupport, vector_store_masked_op_name, vector_store_masked_op_sig, F_S) \ + do_signature(vector_store_masked_op_sig, "(Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "Ljava/lang/Object;" \ + "J" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \ + "Ljava/lang/Object;" \ + "I" \ + "Ljdk/internal/vm/vector/VectorSupport$StoreVectorMaskedOperation;)" \ + "V") \ + do_name(vector_store_masked_op_name, "storeMasked") \ + \ + do_intrinsic(_VectorReductionCoerced, jdk_internal_vm_vector_VectorSupport, vector_reduction_coerced_name, vector_reduction_coerced_sig, F_S)\ + do_signature(vector_reduction_coerced_sig, "(I" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \ + "Ljdk/internal/vm/vector/VectorSupport$ReductionOperation;)" \ + "J") \ do_name(vector_reduction_coerced_name, "reductionCoerced") \ \ do_intrinsic(_VectorTest, jdk_internal_vm_vector_VectorSupport, vector_test_name, vector_test_sig, F_S) \ - do_signature(vector_test_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;Ljava/lang/Object;Ljava/util/function/BiFunction;)Z") \ + do_signature(vector_test_sig, "(I" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \ + "Ljava/util/function/BiFunction;)" \ + "Z") \ do_name(vector_test_name, "test") \ \ do_intrinsic(_VectorBlend, jdk_internal_vm_vector_VectorSupport, vector_blend_name, vector_blend_sig, F_S) \ - do_signature(vector_blend_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;I" \ - "Ljdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \ - "Ljdk/internal/vm/vector/VectorSupport$VectorBlendOp;)Ljdk/internal/vm/vector/VectorSupport$Vector;") \ + do_signature(vector_blend_sig, "(Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorBlendOp;)" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;") \ do_name(vector_blend_name, "blend") \ \ do_intrinsic(_VectorCompare, jdk_internal_vm_vector_VectorSupport, vector_compare_name, vector_compare_sig, F_S) \ - do_signature(vector_compare_sig, "(ILjava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;I" \ - "Ljdk/internal/vm/vector/VectorSupport$Vector;" "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ - "Ljdk/internal/vm/vector/VectorSupport$VectorCompareOp;" ")" "Ljdk/internal/vm/vector/VectorSupport$VectorMask;") \ + do_signature(vector_compare_sig, "(I" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;Ljava/lang/Class;" \ + "I" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorCompareOp;)" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorMask;") \ do_name(vector_compare_name, "compare") \ \ do_intrinsic(_VectorRearrange, jdk_internal_vm_vector_VectorSupport, vector_rearrange_name, vector_rearrange_sig, F_S) \ - do_signature(vector_rearrange_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;I" \ - "Ljdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \ - "Ljdk/internal/vm/vector/VectorSupport$VectorRearrangeOp;)Ljdk/internal/vm/vector/VectorSupport$Vector;") \ + do_signature(vector_rearrange_sig, "(Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorRearrangeOp;)" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;") \ do_name(vector_rearrange_name, "rearrangeOp") \ \ do_intrinsic(_VectorExtract, jdk_internal_vm_vector_VectorSupport, vector_extract_name, vector_extract_sig, F_S) \ - do_signature(vector_extract_sig, "(Ljava/lang/Class;Ljava/lang/Class;I" \ - "Ljdk/internal/vm/vector/VectorSupport$Vector;I" \ - "Ljdk/internal/vm/vector/VectorSupport$VecExtractOp;)J") \ + do_signature(vector_extract_sig, "(Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "I" \ + "Ljdk/internal/vm/vector/VectorSupport$VecExtractOp;)" \ + "J") \ do_name(vector_extract_name, "extract") \ \ do_intrinsic(_VectorInsert, jdk_internal_vm_vector_VectorSupport, vector_insert_name, vector_insert_sig, F_S) \ - do_signature(vector_insert_sig, "(Ljava/lang/Class;Ljava/lang/Class;I" \ - "Ljdk/internal/vm/vector/VectorSupport$Vector;IJ" \ - "Ljdk/internal/vm/vector/VectorSupport$VecInsertOp;)Ljdk/internal/vm/vector/VectorSupport$Vector;") \ + do_signature(vector_insert_sig, "(Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "IJ" \ + "Ljdk/internal/vm/vector/VectorSupport$VecInsertOp;)" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;") \ do_name(vector_insert_name, "insert") \ \ do_intrinsic(_VectorBroadcastInt, jdk_internal_vm_vector_VectorSupport, vector_broadcast_int_name, vector_broadcast_int_sig, F_S) \ - do_signature(vector_broadcast_int_sig, "(ILjava/lang/Class;Ljava/lang/Class;I" \ - "Ljdk/internal/vm/vector/VectorSupport$Vector;I" \ - "Ljdk/internal/vm/vector/VectorSupport$VectorBroadcastIntOp;)Ljdk/internal/vm/vector/VectorSupport$Vector;") \ + do_signature(vector_broadcast_int_sig, "(I" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "I" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorBroadcastIntOp;)" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;") \ do_name(vector_broadcast_int_name, "broadcastInt") \ \ do_intrinsic(_VectorConvert, jdk_internal_vm_vector_VectorSupport, vector_convert_name, vector_convert_sig, F_S) \ - do_signature(vector_convert_sig, "(ILjava/lang/Class;Ljava/lang/Class;I" \ - "Ljava/lang/Class;Ljava/lang/Class;I" \ + do_signature(vector_convert_sig, "(I" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ "Ljdk/internal/vm/vector/VectorSupport$VectorPayload;" \ "Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \ - "Ljdk/internal/vm/vector/VectorSupport$VectorConvertOp;)Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \ + "Ljdk/internal/vm/vector/VectorSupport$VectorConvertOp;)" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \ do_name(vector_convert_name, "convert") \ \ do_intrinsic(_VectorGatherOp, jdk_internal_vm_vector_VectorSupport, vector_gather_name, vector_gather_sig, F_S) \ - do_signature(vector_gather_sig, "(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Class;" \ - "Ljava/lang/Object;J" \ + do_signature(vector_gather_sig, "(Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Object;" \ + "J" \ "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ - "Ljava/lang/Object;I[II" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \ + "Ljava/lang/Object;" \ + "I[II" \ "Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \ "Ljdk/internal/vm/vector/VectorSupport$LoadVectorOperationWithMap;)" \ "Ljdk/internal/vm/vector/VectorSupport$Vector;") \ do_name(vector_gather_name, "loadWithMap") \ \ do_intrinsic(_VectorScatterOp, jdk_internal_vm_vector_VectorSupport, vector_scatter_name, vector_scatter_sig, F_S) \ - do_signature(vector_scatter_sig, "(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Class;" \ - "Ljava/lang/Object;J" \ - "Ljdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$Vector;" \ - "Ljava/lang/Object;I[II" \ - "Ljdk/internal/vm/vector/VectorSupport$StoreVectorOperationWithMap;)V") \ + do_signature(vector_scatter_sig, "(Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Object;" \ + "J" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorMask;Ljava/lang/Object;" \ + "I[II" \ + "Ljdk/internal/vm/vector/VectorSupport$StoreVectorOperationWithMap;)" \ + "V") \ do_name(vector_scatter_name, "storeWithMap") \ \ do_intrinsic(_VectorRebox, jdk_internal_vm_vector_VectorSupport, vector_rebox_name, vector_rebox_sig, F_S) \ - do_alias(vector_rebox_sig, object_object_signature) \ + do_signature(vector_rebox_sig, "(Ljdk/internal/vm/vector/VectorSupport$VectorPayload;)" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \ do_name(vector_rebox_name, "maybeRebox") \ \ do_intrinsic(_VectorMaskOp, jdk_internal_vm_vector_VectorSupport, vector_mask_oper_name, vector_mask_oper_sig, F_S) \ - do_signature(vector_mask_oper_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;" \ - "Ljdk/internal/vm/vector/VectorSupport$VectorMaskOp;)I") \ + do_signature(vector_mask_oper_sig, "(I" \ + "Ljava/lang/Class;" \ + "Ljava/lang/Class;" \ + "I" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \ + "Ljdk/internal/vm/vector/VectorSupport$VectorMaskOp;)" \ + "J") \ do_name(vector_mask_oper_name, "maskReductionCoerced") \ \ /* (2) Bytecode intrinsics */ \ \ - do_intrinsic(_park, jdk_internal_misc_Unsafe, park_name, park_signature, F_R) \ + do_intrinsic(_park, jdk_internal_misc_Unsafe, park_name, park_signature, F_RN) \ do_name( park_name, "park") \ do_signature(park_signature, "(ZJ)V") \ - do_intrinsic(_unpark, jdk_internal_misc_Unsafe, unpark_name, unpark_signature, F_R) \ + do_intrinsic(_unpark, jdk_internal_misc_Unsafe, unpark_name, unpark_signature, F_RN) \ do_name( unpark_name, "unpark") \ do_alias( unpark_signature, /*(LObject;)V*/ object_void_signature) \ \ @@ -1090,9 +1247,9 @@ class vmIntrinsics : AllStatic { enum Flags { // AccessFlags syndromes relevant to intrinsics. F_none = 0, - F_R, // !static ?native !synchronized (R="regular") - F_S, // static ?native !synchronized - F_Y, // !static ?native synchronized + F_R, // !static !native !synchronized (R="regular") + F_S, // static !native !synchronized + F_Y, // !static !native synchronized F_RN, // !static native !synchronized F_SN, // static native !synchronized @@ -1102,6 +1259,50 @@ class vmIntrinsics : AllStatic { log2_FLAG_LIMIT = 3 // checked by an assert at start-up }; + static constexpr bool is_flag_static(Flags flags) { + switch (flags) { + case F_S: + case F_SN: + return true; + case F_R: + case F_Y: + case F_RN: + return false; + default: + ShouldNotReachHere(); + return false; + } + } + + static constexpr bool is_flag_synchronized(Flags flags) { + switch (flags) { + case F_Y: + return true; + case F_RN: + case F_SN: + case F_S: + case F_R: + return false; + default: + ShouldNotReachHere(); + return false; + } + } + + static constexpr bool is_flag_native(Flags flags) { + switch (flags) { + case F_RN: + case F_SN: + return true; + case F_S: + case F_R: + case F_Y: + return false; + default: + ShouldNotReachHere(); + return false; + } + } // Convert an arbitrary vmIntrinsicID to int (checks validity): // vmIntrinsicID x = ...; int n = vmIntrinsics::as_int(x); @@ -1153,9 +1354,15 @@ public: // ID _none does not hold the following asserts. if (id == _none) return id; #endif - assert( class_for(id) == holder, "correct id"); - assert( name_for(id) == name, "correct id"); - assert(signature_for(id) == sig, "correct id"); + assert( class_for(id) == holder, "correct class: %s", name_at(id)); + assert( name_for(id) == name, "correct name: %s", name_at(id)); + assert(signature_for(id) == sig, "correct signature: %s", name_at(id)); + assert( is_flag_static(flags_for(id)) == ((flags & JVM_ACC_STATIC) != 0), + "correct static flag: %s", name_at(id)); + assert(is_flag_synchronized(flags_for(id)) == ((flags & JVM_ACC_SYNCHRONIZED) != 0), + "correct synchronized flag: %s", name_at(id)); + assert( is_flag_native(flags_for(id)) == ((flags & JVM_ACC_NATIVE) != 0), + "correct native flag: %s", name_at(id)); return id; } @@ -1164,7 +1371,7 @@ public: static vmSymbolID class_for(ID id); static vmSymbolID name_for(ID id); static vmSymbolID signature_for(ID id); - static Flags flags_for(ID id); + static Flags flags_for(ID id); #endif static bool class_has_intrinsics(vmSymbolID holder); diff --git a/src/hotspot/share/code/nmethod.cpp b/src/hotspot/share/code/nmethod.cpp index f695f242231209c1c07af21130863cad5ad8a591..543892bc434aa2d2072073579486ea7550e0dbc9 100644 --- a/src/hotspot/share/code/nmethod.cpp +++ b/src/hotspot/share/code/nmethod.cpp @@ -924,7 +924,7 @@ void nmethod::print_on(outputStream* st, const char* msg) const { CompileTask::print(st, this, msg, /*short_form:*/ true); st->print_cr(" (" INTPTR_FORMAT ")", p2i(this)); } else { - CompileTask::print(st, this, msg, /*short_form:*/ false, /* cr */ true, /* timestamp */ false); + CompileTask::print(st, this, msg, /*short_form:*/ false); } } } diff --git a/src/hotspot/share/compiler/compileBroker.cpp b/src/hotspot/share/compiler/compileBroker.cpp index 810706ff1dd2d5e5a5434a978054461ce019f33d..fbb295b4c4d9e423c8f4c34f6644a6e0e6615708 100644 --- a/src/hotspot/share/compiler/compileBroker.cpp +++ b/src/hotspot/share/compiler/compileBroker.cpp @@ -2200,7 +2200,7 @@ void CompileBroker::invoke_compiler_on_method(CompileTask* task) { } // Allocate a new set of JNI handles. - push_jni_handle_block(); + JNIHandleMark jhm(thread); Method* target_handle = task->method(); int compilable = ciEnv::MethodCompilable; const char* failure_reason = NULL; @@ -2319,9 +2319,6 @@ void CompileBroker::invoke_compiler_on_method(CompileTask* task) { post_compilation_event(event, task); } } - // Remove the JNI handle block after the ciEnv destructor has run in - // the previous block. - pop_jni_handle_block(); if (failure_reason != NULL) { task->set_failure_reason(failure_reason, failure_reason_on_C_heap); @@ -2482,38 +2479,6 @@ void CompileBroker::update_compile_perf_data(CompilerThread* thread, const metho counters->set_compile_type((jlong) last_compile_type); } -// ------------------------------------------------------------------ -// CompileBroker::push_jni_handle_block -// -// Push on a new block of JNI handles. -void CompileBroker::push_jni_handle_block() { - JavaThread* thread = JavaThread::current(); - - // Allocate a new block for JNI handles. - // Inlined code from jni_PushLocalFrame() - JNIHandleBlock* java_handles = thread->active_handles(); - JNIHandleBlock* compile_handles = JNIHandleBlock::allocate_block(thread); - assert(compile_handles != NULL && java_handles != NULL, "should not be NULL"); - compile_handles->set_pop_frame_link(java_handles); // make sure java handles get gc'd. - thread->set_active_handles(compile_handles); -} - - -// ------------------------------------------------------------------ -// CompileBroker::pop_jni_handle_block -// -// Pop off the current block of JNI handles. -void CompileBroker::pop_jni_handle_block() { - JavaThread* thread = JavaThread::current(); - - // Release our JNI handle block - JNIHandleBlock* compile_handles = thread->active_handles(); - JNIHandleBlock* java_handles = compile_handles->pop_frame_link(); - thread->set_active_handles(java_handles); - compile_handles->set_pop_frame_link(NULL); - JNIHandleBlock::release_block(compile_handles, thread); // may block -} - // ------------------------------------------------------------------ // CompileBroker::collect_statistics // diff --git a/src/hotspot/share/compiler/compileBroker.hpp b/src/hotspot/share/compiler/compileBroker.hpp index a33b7ea0a780aad16af740111b09c52fbcc24665..33fde0dace6cdd7480dd9f5143ebbeb3f13fac4c 100644 --- a/src/hotspot/share/compiler/compileBroker.hpp +++ b/src/hotspot/share/compiler/compileBroker.hpp @@ -259,8 +259,6 @@ class CompileBroker: AllStatic { int compilable, const char* failure_reason); static void update_compile_perf_data(CompilerThread *thread, const methodHandle& method, bool is_osr); - static void push_jni_handle_block(); - static void pop_jni_handle_block(); static void collect_statistics(CompilerThread* thread, elapsedTimer time, CompileTask* task); static void compile_method_base(const methodHandle& method, diff --git a/src/hotspot/share/compiler/compileTask.cpp b/src/hotspot/share/compiler/compileTask.cpp index b78806346c5db44ff38163c72172ef9efd23add8..d610d8bdcf814730579233cf65bd9649566b2d25 100644 --- a/src/hotspot/share/compiler/compileTask.cpp +++ b/src/hotspot/share/compiler/compileTask.cpp @@ -236,13 +236,11 @@ void CompileTask::print_tty() { // CompileTask::print_impl void CompileTask::print_impl(outputStream* st, Method* method, int compile_id, int comp_level, bool is_osr_method, int osr_bci, bool is_blocking, - const char* msg, bool short_form, bool cr, bool timestamp, + const char* msg, bool short_form, bool cr, jlong time_queued, jlong time_started) { if (!short_form) { - if (timestamp) { - // Print current time - st->print("%7d ", (int)tty->time_stamp().milliseconds()); - } + // Print current time + st->print("%7d ", (int)tty->time_stamp().milliseconds()); if (Verbose && time_queued != 0) { // Print time in queue and time being processed by compiler thread jlong now = os::elapsed_counter(); diff --git a/src/hotspot/share/compiler/compileTask.hpp b/src/hotspot/share/compiler/compileTask.hpp index a900bfd4f44b29812186dd5e3584817dce2e06b1..6281dea7a4a319907355ca13264a8684e819b57f 100644 --- a/src/hotspot/share/compiler/compileTask.hpp +++ b/src/hotspot/share/compiler/compileTask.hpp @@ -187,16 +187,16 @@ class CompileTask : public CHeapObj { private: static void print_impl(outputStream* st, Method* method, int compile_id, int comp_level, bool is_osr_method = false, int osr_bci = -1, bool is_blocking = false, - const char* msg = NULL, bool short_form = false, bool cr = true, bool timestamp = true, + const char* msg = NULL, bool short_form = false, bool cr = true, jlong time_queued = 0, jlong time_started = 0); public: void print(outputStream* st = tty, const char* msg = NULL, bool short_form = false, bool cr = true); void print_ul(const char* msg = NULL); - static void print(outputStream* st, const nmethod* nm, const char* msg = NULL, bool short_form = false, bool cr = true, bool timestamp = true) { + static void print(outputStream* st, const nmethod* nm, const char* msg = NULL, bool short_form = false, bool cr = true) { print_impl(st, nm->method(), nm->compile_id(), nm->comp_level(), nm->is_osr_method(), nm->is_osr_method() ? nm->osr_entry_bci() : -1, /*is_blocking*/ false, - msg, short_form, cr, timestamp); + msg, short_form, cr); } static void print_ul(const nmethod* nm, const char* msg = NULL); diff --git a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp index 60a213aec77344ad488e75eedce6de892a255181..fbed1921182d2d1b00a4b61d581791ecf5ffbe5d 100644 --- a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp +++ b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp @@ -107,7 +107,7 @@ void G1BarrierSetC1::pre_barrier(LIRAccess& access, LIR_Opr addr_opr, __ branch_destination(slow->continuation()); } -void G1BarrierSetC1::post_barrier(LIRAccess& access, LIR_OprDesc* addr, LIR_OprDesc* new_val) { +void G1BarrierSetC1::post_barrier(LIRAccess& access, LIR_Opr addr, LIR_Opr new_val) { LIRGenerator* gen = access.gen(); DecoratorSet decorators = access.decorators(); bool in_heap = (decorators & IN_HEAP) != 0; @@ -152,13 +152,13 @@ void G1BarrierSetC1::post_barrier(LIRAccess& access, LIR_OprDesc* addr, LIR_OprD __ unsigned_shift_right(xor_shift_res, LIR_OprFact::intConst(HeapRegion::LogOfHRGrainBytes), xor_shift_res, - LIR_OprDesc::illegalOpr()); + LIR_Opr::illegalOpr()); } else { __ logical_xor(addr, new_val, xor_res); __ unsigned_shift_right(xor_res, LIR_OprFact::intConst(HeapRegion::LogOfHRGrainBytes), xor_shift_res, - LIR_OprDesc::illegalOpr()); + LIR_Opr::illegalOpr()); } if (!new_val->is_register()) { diff --git a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.hpp b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.hpp index 5284dd952e0829c7322cbc1bc37a53319f788b70..f6d3c2ab79f9bac52cf6005b939aa02a00cc1e63 100644 --- a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.hpp +++ b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.hpp @@ -120,7 +120,7 @@ class G1BarrierSetC1 : public ModRefBarrierSetC1 { virtual void pre_barrier(LIRAccess& access, LIR_Opr addr_opr, LIR_Opr pre_val, CodeEmitInfo* info); - virtual void post_barrier(LIRAccess& access, LIR_OprDesc* addr, LIR_OprDesc* new_val); + virtual void post_barrier(LIRAccess& access, LIR_Opr addr, LIR_Opr new_val); virtual void load_at_resolved(LIRAccess& access, LIR_Opr result); diff --git a/src/hotspot/share/gc/g1/g1BlockOffsetTable.cpp b/src/hotspot/share/gc/g1/g1BlockOffsetTable.cpp index fd7ea33ea9291dbc59a087b2f5d7c5d8e04a6e62..dfb5da1a7a477618c459e9c38049804039cc645e 100644 --- a/src/hotspot/share/gc/g1/g1BlockOffsetTable.cpp +++ b/src/hotspot/share/gc/g1/g1BlockOffsetTable.cpp @@ -213,42 +213,6 @@ void G1BlockOffsetTablePart::check_all_cards(size_t start_card, size_t end_card) } } -HeapWord* G1BlockOffsetTablePart::forward_to_block_containing_addr_slow(HeapWord* q, - HeapWord* n, - const void* addr) { - // We're not in the normal case. We need to handle an important subcase - // here: LAB allocation. An allocation previously recorded in the - // offset table was actually a lab allocation, and was divided into - // several objects subsequently. Fix this situation as we answer the - // query, by updating entries as we cross them. - - // If the fist object's end q is at the card boundary. Start refining - // with the corresponding card (the value of the entry will be basically - // set to 0). If the object crosses the boundary -- start from the next card. - size_t n_index = _bot->index_for(n); - size_t next_index = _bot->index_for(n) + !_bot->is_card_boundary(n); - // Calculate a consistent next boundary. If "n" is not at the boundary - // already, step to the boundary. - HeapWord* next_boundary = _bot->address_for_index(n_index) + - (n_index == next_index ? 0 : BOTConstants::N_words); - assert(next_boundary <= _bot->_reserved.end(), - "next_boundary is beyond the end of the covered region " - " next_boundary " PTR_FORMAT " _array->_end " PTR_FORMAT, - p2i(next_boundary), p2i(_bot->_reserved.end())); - while (next_boundary < addr) { - while (n <= next_boundary) { - q = n; - oop obj = cast_to_oop(q); - if (obj->klass_or_null_acquire() == NULL) return q; - n += block_size(q); - } - assert(q <= next_boundary && n > next_boundary, "Consequence of loop"); - // [q, n) is the block that crosses the boundary. - alloc_block_work(&next_boundary, q, n); - } - return forward_to_block_containing_addr_const(q, n, addr); -} - // // threshold_ // | _index_ diff --git a/src/hotspot/share/gc/g1/g1BlockOffsetTable.hpp b/src/hotspot/share/gc/g1/g1BlockOffsetTable.hpp index 89a91d5bce2036c8f3e5f5cc2aba301d56f6bdd6..128e9d105a2e6631e6218b4540de2e01ad73fba3 100644 --- a/src/hotspot/share/gc/g1/g1BlockOffsetTable.hpp +++ b/src/hotspot/share/gc/g1/g1BlockOffsetTable.hpp @@ -141,26 +141,11 @@ private: // Returns the address of a block whose start is at most "addr". inline HeapWord* block_at_or_preceding(const void* addr) const; + // Return the address of the beginning of the block that contains "addr". // "q" is a block boundary that is <= "addr"; "n" is the address of the - // next block (or the end of the space.) Return the address of the - // beginning of the block that contains "addr". Does so without side - // effects (see, e.g., spec of block_start.) - inline HeapWord* forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n, - const void* addr) const; - - // "q" is a block boundary that is <= "addr"; return the address of the - // beginning of the block that contains "addr". May have side effects - // on "this", by updating imprecise entries. - inline HeapWord* forward_to_block_containing_addr(HeapWord* q, - const void* addr); - - // "q" is a block boundary that is <= "addr"; "n" is the address of the - // next block (or the end of the space.) Return the address of the - // beginning of the block that contains "addr". May have side effects - // on "this", by updating imprecise entries. - HeapWord* forward_to_block_containing_addr_slow(HeapWord* q, - HeapWord* n, - const void* addr); + // next block (or the end of the space.) + inline HeapWord* forward_to_block_containing_addr(HeapWord* q, HeapWord* n, + const void* addr) const; // Requires that "*threshold_" be the first array entry boundary at or // above "blk_start". If the block starts at or crosses "*threshold_", records diff --git a/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp b/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp index 104200eeea2111eb4df3cb4769fc34304644d732..e4e24c219de14efb3aa744a730fa5889933b4d7f 100644 --- a/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp +++ b/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp @@ -46,16 +46,10 @@ inline HeapWord* G1BlockOffsetTablePart::threshold_for_addr(const void* addr) { } inline HeapWord* G1BlockOffsetTablePart::block_start(const void* addr) { - assert(addr >= _hr->bottom() && addr < _hr->top(), "invalid address"); - HeapWord* q = block_at_or_preceding(addr); - return forward_to_block_containing_addr(q, addr); -} - -inline HeapWord* G1BlockOffsetTablePart::block_start_const(const void* addr) const { assert(addr >= _hr->bottom() && addr < _hr->top(), "invalid address"); HeapWord* q = block_at_or_preceding(addr); HeapWord* n = q + block_size(q); - return forward_to_block_containing_addr_const(q, n, addr); + return forward_to_block_containing_addr(q, n, addr); } u_char G1BlockOffsetTable::offset_array(size_t index) const { @@ -141,9 +135,16 @@ inline HeapWord* G1BlockOffsetTablePart::block_at_or_preceding(const void* addr) return q; } -inline HeapWord* G1BlockOffsetTablePart::forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n, - const void* addr) const { +inline HeapWord* G1BlockOffsetTablePart::forward_to_block_containing_addr(HeapWord* q, HeapWord* n, + const void* addr) const { while (n <= addr) { + // When addr is not covered by the block starting at q we need to + // step forward until we find the correct block. With the BOT + // being precise, we should never have to step through more than + // a single card. + assert(_bot->index_for(n) == _bot->index_for(addr), + "BOT not precise. Index for n: " SIZE_FORMAT " must be equal to the index for addr: " SIZE_FORMAT, + _bot->index_for(n), _bot->index_for(addr)); q = n; oop obj = cast_to_oop(q); if (obj->klass_or_null_acquire() == NULL) { @@ -156,21 +157,4 @@ inline HeapWord* G1BlockOffsetTablePart::forward_to_block_containing_addr_const( return q; } -inline HeapWord* G1BlockOffsetTablePart::forward_to_block_containing_addr(HeapWord* q, - const void* addr) { - if (cast_to_oop(q)->klass_or_null_acquire() == NULL) { - return q; - } - HeapWord* n = q + block_size(q); - // In the normal case, where the query "addr" is a card boundary, and the - // offset table chunks are the same size as cards, the block starting at - // "q" will contain addr, so the test below will fail, and we'll fall - // through quickly. - if (n <= addr) { - q = forward_to_block_containing_addr_slow(q, n, addr); - } - assert(q <= addr, "wrong order for current and arg"); - return q; -} - #endif // SHARE_GC_G1_G1BLOCKOFFSETTABLE_INLINE_HPP diff --git a/src/hotspot/share/gc/g1/g1CardSet.cpp b/src/hotspot/share/gc/g1/g1CardSet.cpp index 4b91dc5409283684bf7fc7edc14421acc310ac20..e9c986ebc5315c42950308fcd5ef0e17028c5f35 100644 --- a/src/hotspot/share/gc/g1/g1CardSet.cpp +++ b/src/hotspot/share/gc/g1/g1CardSet.cpp @@ -44,20 +44,36 @@ G1CardSet::CardSetPtr G1CardSet::FullCardSet = (G1CardSet::CardSetPtr)-1; +static uint default_log2_card_region_per_region() { + uint log2_card_region_per_heap_region = 0; + + const uint card_container_limit = G1CardSetContainer::LogCardsPerRegionLimit; + if (card_container_limit < (uint)HeapRegion::LogCardsPerRegion) { + log2_card_region_per_heap_region = (uint)HeapRegion::LogCardsPerRegion - card_container_limit; + } + + return log2_card_region_per_heap_region; +} + G1CardSetConfiguration::G1CardSetConfiguration() : G1CardSetConfiguration(HeapRegion::LogCardsPerRegion, /* inline_ptr_bits_per_card */ G1RemSetArrayOfCardsEntries, /* num_cards_in_array */ (double)G1RemSetCoarsenHowlBitmapToHowlFullPercent / 100, /* cards_in_bitmap_threshold_percent */ G1RemSetHowlNumBuckets, /* num_buckets_in_howl */ (double)G1RemSetCoarsenHowlToFullPercent / 100, /* cards_in_howl_threshold_percent */ - (uint)HeapRegion::CardsPerRegion) /* max_cards_in_cardset */ - { } + (uint)HeapRegion::CardsPerRegion, /* max_cards_in_cardset */ + default_log2_card_region_per_region()) /* log2_card_region_per_region */ +{ + assert((_log2_card_region_per_heap_region + _log2_cards_per_card_region) == (uint)HeapRegion::LogCardsPerRegion, + "inconsistent heap region virtualization setup"); +} G1CardSetConfiguration::G1CardSetConfiguration(uint num_cards_in_array, double cards_in_bitmap_threshold_percent, uint max_buckets_in_howl, double cards_in_howl_threshold_percent, - uint max_cards_in_card_set) : + uint max_cards_in_card_set, + uint log2_card_region_per_region) : G1CardSetConfiguration(log2i_exact(max_cards_in_card_set), /* inline_ptr_bits_per_card */ num_cards_in_array, /* num_cards_in_array */ cards_in_bitmap_threshold_percent, /* cards_in_bitmap_threshold_percent */ @@ -65,15 +81,17 @@ G1CardSetConfiguration::G1CardSetConfiguration(uint num_cards_in_array, num_cards_in_array, max_buckets_in_howl), cards_in_howl_threshold_percent, /* cards_in_howl_threshold_percent */ - max_cards_in_card_set) /* max_cards_in_cardset */ - { } + max_cards_in_card_set, /* max_cards_in_cardset */ + log2_card_region_per_region) +{ } G1CardSetConfiguration::G1CardSetConfiguration(uint inline_ptr_bits_per_card, uint num_cards_in_array, double cards_in_bitmap_threshold_percent, uint num_buckets_in_howl, double cards_in_howl_threshold_percent, - uint max_cards_in_card_set) : + uint max_cards_in_card_set, + uint log2_card_region_per_heap_region) : _inline_ptr_bits_per_card(inline_ptr_bits_per_card), _num_cards_in_array(num_cards_in_array), _num_buckets_in_howl(num_buckets_in_howl), @@ -82,13 +100,14 @@ G1CardSetConfiguration::G1CardSetConfiguration(uint inline_ptr_bits_per_card, _num_cards_in_howl_bitmap(G1CardSetHowl::bitmap_size(_max_cards_in_card_set, _num_buckets_in_howl)), _cards_in_howl_bitmap_threshold(_num_cards_in_howl_bitmap * cards_in_bitmap_threshold_percent), _log2_num_cards_in_howl_bitmap(log2i_exact(_num_cards_in_howl_bitmap)), - _bitmap_hash_mask(~(~(0) << _log2_num_cards_in_howl_bitmap)) { + _bitmap_hash_mask(~(~(0) << _log2_num_cards_in_howl_bitmap)), + _log2_card_region_per_heap_region(log2_card_region_per_heap_region), + _log2_cards_per_card_region(log2i_exact(_max_cards_in_card_set) - _log2_card_region_per_heap_region) { assert(is_power_of_2(_max_cards_in_card_set), "max_cards_in_card_set must be a power of 2: %u", _max_cards_in_card_set); init_card_set_alloc_options(); - log_configuration(); } @@ -109,11 +128,14 @@ void G1CardSetConfiguration::log_configuration() { "InlinePtr #elems %u size %zu " "Array Of Cards #elems %u size %zu " "Howl #buckets %u coarsen threshold %u " - "Howl Bitmap #elems %u size %zu coarsen threshold %u", + "Howl Bitmap #elems %u size %zu coarsen threshold %u " + "Card regions per heap region %u cards per card region %u", num_cards_in_inline_ptr(), sizeof(void*), num_cards_in_array(), G1CardSetArray::size_in_bytes(num_cards_in_array()), num_buckets_in_howl(), cards_in_howl_threshold(), - num_cards_in_howl_bitmap(), G1CardSetBitMap::size_in_bytes(num_cards_in_howl_bitmap()), cards_in_howl_bitmap_threshold()); + num_cards_in_howl_bitmap(), G1CardSetBitMap::size_in_bytes(num_cards_in_howl_bitmap()), cards_in_howl_bitmap_threshold(), + (uint)1 << log2_card_region_per_heap_region(), + (uint)1 << log2_cards_per_card_region()); } uint G1CardSetConfiguration::num_cards_in_inline_ptr() const { @@ -209,9 +231,9 @@ class G1CardSetHashTable : public CHeapObj { }; class G1CardSetHashTableScan : public StackObj { - G1CardSet::G1CardSetPtrIterator* _scan_f; + G1CardSet::CardSetPtrClosure* _scan_f; public: - explicit G1CardSetHashTableScan(G1CardSet::G1CardSetPtrIterator* f) : _scan_f(f) { } + explicit G1CardSetHashTableScan(G1CardSet::CardSetPtrClosure* f) : _scan_f(f) { } bool operator()(G1CardSetHashTableValue* value) { _scan_f->do_cardsetptr(value->_region_idx, value->_num_occupied, value->_card_set); @@ -262,12 +284,12 @@ public: return found.value(); } - void iterate_safepoint(G1CardSet::G1CardSetPtrIterator* cl2) { + void iterate_safepoint(G1CardSet::CardSetPtrClosure* cl2) { G1CardSetHashTableScan cl(cl2); _table.do_safepoint_scan(cl); } - void iterate(G1CardSet::G1CardSetPtrIterator* cl2) { + void iterate(G1CardSet::CardSetPtrClosure* cl2) { G1CardSetHashTableScan cl(cl2); _table.do_scan(Thread::current(), cl); } @@ -778,7 +800,7 @@ void G1CardSet::print_info(outputStream* st, uint card_region, uint card_in_regi } template -void G1CardSet::iterate_cards_during_transfer(CardSetPtr const card_set, CardVisitor& found) { +void G1CardSet::iterate_cards_during_transfer(CardSetPtr const card_set, CardVisitor& cl) { uint type = card_set_type(card_set); assert(type == CardSetInlinePtr || type == CardSetArrayOfCards, "invalid card set type %d to transfer from", @@ -787,11 +809,11 @@ void G1CardSet::iterate_cards_during_transfer(CardSetPtr const card_set, CardVis switch (type) { case CardSetInlinePtr: { G1CardSetInlinePtr ptr(card_set); - ptr.iterate(found, _config->inline_ptr_bits_per_card()); + ptr.iterate(cl, _config->inline_ptr_bits_per_card()); return; } case CardSetArrayOfCards : { - card_set_ptr(card_set)->iterate(found); + card_set_ptr(card_set)->iterate(cl); return; } default: @@ -799,38 +821,57 @@ void G1CardSet::iterate_cards_during_transfer(CardSetPtr const card_set, CardVis } } -void G1CardSet::iterate_containers(G1CardSetPtrIterator* found, bool at_safepoint) { +void G1CardSet::iterate_containers(CardSetPtrClosure* cl, bool at_safepoint) { if (at_safepoint) { - _table->iterate_safepoint(found); + _table->iterate_safepoint(cl); } else { - _table->iterate(found); + _table->iterate(cl); } } +// Applied to all card (ranges) of the containers. template -class G1ContainerCards { - Closure& _iter; +class G1ContainerCardsClosure { + Closure& _cl; uint _region_idx; public: - G1ContainerCards(Closure& iter, uint region_idx) : _iter(iter), _region_idx(region_idx) { } + G1ContainerCardsClosure(Closure& cl, uint region_idx) : _cl(cl), _region_idx(region_idx) { } bool start_iterate(uint tag) { return true; } void operator()(uint card_idx) { - _iter.do_card(_region_idx, card_idx); + _cl.do_card(_region_idx, card_idx); } void operator()(uint card_idx, uint length) { for (uint i = 0; i < length; i++) { - _iter.do_card(_region_idx, card_idx); + _cl.do_card(_region_idx, card_idx); } } }; -void G1CardSet::iterate_cards(G1CardSetCardIterator& iter) { - G1CardSetMergeCardIterator cl(this, iter); - iterate_containers(&cl); +template class CardOrRanges> +class G1CardSetContainersClosure : public G1CardSet::CardSetPtrClosure { + G1CardSet* _card_set; + Closure& _cl; + +public: + + G1CardSetContainersClosure(G1CardSet* card_set, + Closure& cl) : + _card_set(card_set), + _cl(cl) { } + + void do_cardsetptr(uint region_idx, size_t num_occupied, G1CardSet::CardSetPtr card_set) override { + CardOrRanges cl(_cl, region_idx); + _card_set->iterate_cards_or_ranges_in_container(card_set, cl); + } +}; + +void G1CardSet::iterate_cards(CardClosure& cl) { + G1CardSetContainersClosure cl2(this, cl); + iterate_containers(&cl2); } bool G1CardSet::occupancy_less_or_equal_to(size_t limit) const { @@ -846,11 +887,11 @@ size_t G1CardSet::occupied() const { } size_t G1CardSet::num_containers() { - class GetNumberOfContainers : public G1CardSetPtrIterator { + class GetNumberOfContainers : public CardSetPtrClosure { public: size_t _count; - GetNumberOfContainers() : G1CardSetPtrIterator(), _count(0) { } + GetNumberOfContainers() : CardSetPtrClosure(), _count(0) { } void do_cardsetptr(uint region_idx, size_t num_occupied, CardSetPtr card_set) override { _count++; diff --git a/src/hotspot/share/gc/g1/g1CardSet.hpp b/src/hotspot/share/gc/g1/g1CardSet.hpp index 3589ef4988e00cc8b5fd6a843a9c311628c553ef..c8e09cec1267871bdb94a693d169a9e5775786ca 100644 --- a/src/hotspot/share/gc/g1/g1CardSet.hpp +++ b/src/hotspot/share/gc/g1/g1CardSet.hpp @@ -58,6 +58,8 @@ class G1CardSetConfiguration { uint _cards_in_howl_bitmap_threshold; uint _log2_num_cards_in_howl_bitmap; size_t _bitmap_hash_mask; + uint _log2_card_region_per_heap_region; + uint _log2_cards_per_card_region; G1CardSetAllocOptions* _card_set_alloc_options; @@ -66,7 +68,8 @@ class G1CardSetConfiguration { double cards_in_bitmap_threshold_percent, uint num_buckets_in_howl, double cards_in_howl_threshold_percent, - uint max_cards_in_card_set); + uint max_cards_in_card_set, + uint log2_card_region_per_heap_region); void init_card_set_alloc_options(); void log_configuration(); @@ -75,12 +78,13 @@ public: // Initialize card set configuration from globals. G1CardSetConfiguration(); // Initialize card set configuration from parameters. - // Only for test + // Testing only. G1CardSetConfiguration(uint num_cards_in_array, double cards_in_bitmap_threshold_percent, uint max_buckets_in_howl, double cards_in_howl_threshold_percent, - uint max_cards_in_card_set); + uint max_cards_in_cardset, + uint log2_card_region_per_region); ~G1CardSetConfiguration(); @@ -115,6 +119,20 @@ public: // with more entries per region are coarsened to Full. uint max_cards_in_region() const { return _max_cards_in_card_set; } + // Heap region virtualization: there are some limitations to how many cards the + // containers can cover to save memory for the common case. Heap region virtualization + // allows to use multiple entries in the G1CardSet hash table per area covered + // by the remembered set (e.g. heap region); each such entry is called "card_region". + // + // The next two members give information about how many card regions are there + // per area (heap region) and how many cards each card region has. + + // The log2 of the amount of card regions per heap region configured. + uint log2_card_region_per_heap_region() const { return _log2_card_region_per_heap_region; } + // The log2 of the number of cards per card region. This is calculated from max_cards_in_region() + // and above. + uint log2_cards_per_card_region() const { return _log2_cards_per_card_region; } + // Memory object types configuration // Number of distinctly sized memory objects on the card set heap. // Currently contains CHT-Nodes, ArrayOfCards, BitMaps, Howl @@ -171,9 +189,6 @@ class G1CardSet : public CHeapObj { friend class G1CardSetTest; friend class G1CardSetMtTestTask; - template class CardorRanges> - friend class G1CardSetMergeCardIterator; - friend class G1TransferCard; friend class G1ReleaseCardsets; @@ -276,24 +291,7 @@ private: // // on the given class. template - void iterate_cards_during_transfer(CardSetPtr const card_set, CardVisitor& found); - - // Iterate over the container, calling a method on every card or card range contained - // in the card container. - // For every container, first calls - // - // void start_iterate(uint tag, uint region_idx); - // - // Then for every card or card range it calls - // - // void do_card(uint card_idx); - // void do_card_range(uint card_idx, uint length); - // - // where card_idx is the card index within that region_idx passed before in - // start_iterate(). - // - template - void iterate_cards_or_ranges_in_container(CardSetPtr const card_set, CardOrRangeVisitor& found); + void iterate_cards_during_transfer(CardSetPtr const card_set, CardVisitor& vl); uint card_set_type_to_mem_object_type(uintptr_t type) const; uint8_t* allocate_mem_object(uintptr_t type); @@ -340,25 +338,36 @@ public: void print(outputStream* os); - // Various iterators - should be made inlineable somehow. - class G1CardSetPtrIterator { + // Iterate over the container, calling a method on every card or card range contained + // in the card container. + // For every container, first calls + // + // void start_iterate(uint tag, uint region_idx); + // + // Then for every card or card range it calls + // + // void do_card(uint card_idx); + // void do_card_range(uint card_idx, uint length); + // + // where card_idx is the card index within that region_idx passed before in + // start_iterate(). + // + template + void iterate_cards_or_ranges_in_container(CardSetPtr const card_set, CardOrRangeVisitor& cl); + + class CardSetPtrClosure { public: virtual void do_cardsetptr(uint region_idx, size_t num_occupied, CardSetPtr card_set) = 0; }; - void iterate_containers(G1CardSetPtrIterator* iter, bool safepoint = false); + void iterate_containers(CardSetPtrClosure* cl, bool safepoint = false); - class G1CardSetCardIterator { + class CardClosure { public: virtual void do_card(uint region_idx, uint card_idx) = 0; }; - void iterate_cards(G1CardSetCardIterator& iter); - - // Iterate all cards for card set merging. Must be a CardOrRangeVisitor as - // explained above. - template - void iterate_for_merge(CardOrRangeVisitor& cl); + void iterate_cards(CardClosure& cl); }; class G1CardSetHashTableValue { diff --git a/src/hotspot/share/gc/g1/g1CardSet.inline.hpp b/src/hotspot/share/gc/g1/g1CardSet.inline.hpp index 879f6f1eaff5d4e070c98e9ec8d028c4c349d432..99938b4b74eb55313e244ecfebe5b07806dd9c55 100644 --- a/src/hotspot/share/gc/g1/g1CardSet.inline.hpp +++ b/src/hotspot/share/gc/g1/g1CardSet.inline.hpp @@ -42,18 +42,18 @@ inline G1CardSet::CardSetPtr G1CardSet::make_card_set_ptr(void* value, uintptr_t } template -inline void G1CardSet::iterate_cards_or_ranges_in_container(CardSetPtr const card_set, CardOrRangeVisitor& found) { +inline void G1CardSet::iterate_cards_or_ranges_in_container(CardSetPtr const card_set, CardOrRangeVisitor& cl) { switch (card_set_type(card_set)) { case CardSetInlinePtr: { - if (found.start_iterate(G1GCPhaseTimes::MergeRSMergedInline)) { + if (cl.start_iterate(G1GCPhaseTimes::MergeRSMergedInline)) { G1CardSetInlinePtr ptr(card_set); - ptr.iterate(found, _config->inline_ptr_bits_per_card()); + ptr.iterate(cl, _config->inline_ptr_bits_per_card()); } return; } case CardSetArrayOfCards : { - if (found.start_iterate(G1GCPhaseTimes::MergeRSMergedArrayOfCards)) { - card_set_ptr(card_set)->iterate(found); + if (cl.start_iterate(G1GCPhaseTimes::MergeRSMergedArrayOfCards)) { + card_set_ptr(card_set)->iterate(cl); } return; } @@ -65,13 +65,13 @@ inline void G1CardSet::iterate_cards_or_ranges_in_container(CardSetPtr const car case CardSetHowl: { assert(card_set_type(FullCardSet) == CardSetHowl, "Must be"); if (card_set == FullCardSet) { - if (found.start_iterate(G1GCPhaseTimes::MergeRSMergedFull)) { - found(0, _config->max_cards_in_region()); + if (cl.start_iterate(G1GCPhaseTimes::MergeRSMergedFull)) { + cl(0, _config->max_cards_in_region()); } return; } - if (found.start_iterate(G1GCPhaseTimes::MergeRSMergedHowl)) { - card_set_ptr(card_set)->iterate(found, _config); + if (cl.start_iterate(G1GCPhaseTimes::MergeRSMergedHowl)) { + card_set_ptr(card_set)->iterate(cl, _config); } return; } @@ -80,46 +80,4 @@ inline void G1CardSet::iterate_cards_or_ranges_in_container(CardSetPtr const car ShouldNotReachHere(); } -template -class G1ContainerCardsOrRanges { - Closure& _iter; - uint _region_idx; - -public: - G1ContainerCardsOrRanges(Closure& iter, uint region_idx) : _iter(iter), _region_idx(region_idx) { } - - bool start_iterate(uint tag) { - return _iter.start_iterate(tag, _region_idx); - } - - void operator()(uint card_idx) { - _iter.do_card(card_idx); - } - - void operator()(uint card_idx, uint length) { - _iter.do_card_range(card_idx, length); - } -}; - -template class CardOrRanges> -class G1CardSetMergeCardIterator : public G1CardSet::G1CardSetPtrIterator { - G1CardSet* _card_set; - Closure& _iter; - -public: - - G1CardSetMergeCardIterator(G1CardSet* card_set, Closure& iter) : _card_set(card_set), _iter(iter) { } - - void do_cardsetptr(uint region_idx, size_t num_occupied, G1CardSet::CardSetPtr card_set) override { - CardOrRanges cl(_iter, region_idx); - _card_set->iterate_cards_or_ranges_in_container(card_set, cl); - } -}; - -template -inline void G1CardSet::iterate_for_merge(CardOrRangeVisitor& cl) { - G1CardSetMergeCardIterator cl2(this, cl); - iterate_containers(&cl2, true /* at_safepoint */); -} - #endif // SHARE_GC_G1_G1CARDSET_INLINE_HPP diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp index 13fce2dd1264bc821a192e5d0bf1da2cc906635e..d27ac1243ef3373a0a61a677bb35cd9e6c2a866a 100644 --- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp @@ -1828,7 +1828,6 @@ void G1CollectedHeap::ref_processing_init() { ParallelGCThreads, // degree of mt processing // We discover with the gc worker threads during Remark, so both // thread counts must be considered for discovery. - (ParallelGCThreads > 1) || (ConcGCThreads > 1), // mt discovery MAX2(ParallelGCThreads, ConcGCThreads), // degree of mt discovery true, // Reference discovery is concurrent &_is_alive_closure_cm); // is alive closure @@ -1837,7 +1836,6 @@ void G1CollectedHeap::ref_processing_init() { _ref_processor_stw = new ReferenceProcessor(&_is_subject_to_discovery_stw, ParallelGCThreads, // degree of mt processing - (ParallelGCThreads > 1), // mt discovery ParallelGCThreads, // degree of mt discovery false, // Reference discovery is not concurrent &_is_alive_closure_stw); // is alive closure diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp index 2d6cdb45940178c1faae244bea08664b5bad6df3..2178dfc9d0fbe06caa09df7857f83feb4c3addda 100644 --- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp @@ -100,21 +100,21 @@ class G1STWIsAliveClosure : public BoolObjectClosure { G1CollectedHeap* _g1h; public: G1STWIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {} - bool do_object_b(oop p); + bool do_object_b(oop p) override; }; class G1STWSubjectToDiscoveryClosure : public BoolObjectClosure { G1CollectedHeap* _g1h; public: G1STWSubjectToDiscoveryClosure(G1CollectedHeap* g1h) : _g1h(g1h) {} - bool do_object_b(oop p); + bool do_object_b(oop p) override; }; class G1RegionMappingChangedListener : public G1MappingChangedListener { private: void reset_from_card_cache(uint start_idx, size_t num_regions); public: - virtual void on_commit(uint start_idx, size_t num_regions, bool zero_filled); + void on_commit(uint start_idx, size_t num_regions, bool zero_filled) override; }; class G1CollectedHeap : public CollectedHeap { @@ -235,7 +235,7 @@ private: // roots or the young generation. class HumongousReclaimCandidates : public G1BiasedMappedArray { protected: - bool default_value() const { return false; } + bool default_value() const override { return false; } public: void clear() { G1BiasedMappedArray::clear(); } void set_candidate(uint region, bool value) { @@ -306,7 +306,7 @@ private: size_t size, size_t translation_factor); - void trace_heap(GCWhen::Type when, const GCTracer* tracer); + void trace_heap(GCWhen::Type when, const GCTracer* tracer) override; // These are macros so that, if the assert fires, we get the correct // line number, file, etc. @@ -439,12 +439,12 @@ private: // humongous allocation requests should go to mem_allocate() which // will satisfy them with a special path. - virtual HeapWord* allocate_new_tlab(size_t min_size, - size_t requested_size, - size_t* actual_size); + HeapWord* allocate_new_tlab(size_t min_size, + size_t requested_size, + size_t* actual_size) override; - virtual HeapWord* mem_allocate(size_t word_size, - bool* gc_overhead_limit_was_exceeded); + HeapWord* mem_allocate(size_t word_size, + bool* gc_overhead_limit_was_exceeded) override; // First-level mutator allocation attempt: try to allocate out of // the mutator alloc region without taking the Heap_lock. This @@ -495,7 +495,7 @@ private: bool do_maximum_compaction); // Callback from VM_G1CollectFull operation, or collect_as_vm_thread. - virtual void do_full_collection(bool clear_all_soft_refs); + void do_full_collection(bool clear_all_soft_refs) override; // Helper to do a full collection that clears soft references. bool upgrade_to_full_collection(); @@ -777,8 +777,6 @@ public: // Start a concurrent cycle. void start_concurrent_cycle(bool concurrent_operation_is_full_mark); - void wait_for_root_region_scanning(); - void prepare_tlabs_for_mutator(); void retire_tlabs(); @@ -891,26 +889,26 @@ public: // Initialize the G1CollectedHeap to have the initial and // maximum sizes and remembered and barrier sets // specified by the policy object. - jint initialize(); + jint initialize() override; // Returns whether concurrent mark threads (and the VM) are about to terminate. bool concurrent_mark_is_terminating() const; - virtual void stop(); - virtual void safepoint_synchronize_begin(); - virtual void safepoint_synchronize_end(); + void stop() override; + void safepoint_synchronize_begin() override; + void safepoint_synchronize_end() override; // Does operations required after initialization has been done. - void post_initialize(); + void post_initialize() override; // Initialize weak reference processing. void ref_processing_init(); - virtual Name kind() const { + Name kind() const override { return CollectedHeap::G1; } - virtual const char* name() const { + const char* name() const override { return "G1"; } @@ -927,17 +925,14 @@ public: const G1CollectionSet* collection_set() const { return &_collection_set; } G1CollectionSet* collection_set() { return &_collection_set; } - virtual SoftRefPolicy* soft_ref_policy(); - - virtual void initialize_serviceability(); - virtual MemoryUsage memory_usage(); - virtual GrowableArray memory_managers(); - virtual GrowableArray memory_pools(); + SoftRefPolicy* soft_ref_policy() override; - virtual void fill_with_dummy_object(HeapWord* start, HeapWord* end, bool zap); + void initialize_serviceability() override; + MemoryUsage memory_usage() override; + GrowableArray memory_managers() override; + GrowableArray memory_pools() override; - // Try to minimize the remembered set. - void scrub_rem_set(); + void fill_with_dummy_object(HeapWord* start, HeapWord* end, bool zap) override; // Apply the given closure on all cards in the Hot Card Cache, emptying it. void iterate_hcc_closure(G1CardTableEntryClosure* cl, uint worker_id); @@ -958,8 +953,8 @@ public: size_t unused_committed_regions_in_bytes() const; - virtual size_t capacity() const; - virtual size_t used() const; + size_t capacity() const override; + size_t used() const override; // This should be called when we're not holding the heap lock. The // result might be a bit inaccurate. size_t used_unlocked() const; @@ -971,7 +966,7 @@ public: // end fields defining the extent of the contiguous allocation region.) // But G1CollectedHeap doesn't yet support this. - virtual bool is_maximal_no_gc() const { + bool is_maximal_no_gc() const override { return _hrm.available() == 0; } @@ -1026,7 +1021,7 @@ public: // Perform a collection of the heap; intended for use in implementing // "System.gc". This probably implies as full a collection as the // "CollectedHeap" supports. - virtual void collect(GCCause::Cause cause); + void collect(GCCause::Cause cause) override; // Perform a collection of the heap with the given cause. // Returns whether this collection actually executed. @@ -1040,7 +1035,7 @@ public: void prepend_to_freelist(FreeRegionList* list); void decrement_summary_bytes(size_t bytes); - virtual bool is_in(const void* p) const; + bool is_in(const void* p) const override; // Return "TRUE" iff the given object address is within the collection // set. Assumes that the reference points into the heap. @@ -1080,12 +1075,12 @@ public: void object_iterate_parallel(ObjectClosure* cl, uint worker_id, HeapRegionClaimer* claimer); // Iterate over all objects, calling "cl.do_object" on each. - virtual void object_iterate(ObjectClosure* cl); + void object_iterate(ObjectClosure* cl) override; - virtual ParallelObjectIterator* parallel_object_iterator(uint thread_num); + ParallelObjectIterator* parallel_object_iterator(uint thread_num) override; // Keep alive an object that was loaded with AS_NO_KEEPALIVE. - virtual void keep_alive(oop obj); + void keep_alive(oop obj) override; // Iterate over heap regions, in address order, terminating the // iteration early if the "do_heap_region" method returns "true". @@ -1176,10 +1171,10 @@ public: // Section on thread-local allocation buffers (TLABs) // See CollectedHeap for semantics. - size_t tlab_capacity(Thread* ignored) const; - size_t tlab_used(Thread* ignored) const; - size_t max_tlab_size() const; - size_t unsafe_max_tlab_alloc(Thread* ignored) const; + size_t tlab_capacity(Thread* ignored) const override; + size_t tlab_used(Thread* ignored) const override; + size_t max_tlab_size() const override; + size_t unsafe_max_tlab_alloc(Thread* ignored) const override; inline bool is_in_young(const oop obj); @@ -1203,7 +1198,7 @@ public: static size_t humongous_obj_size_in_regions(size_t word_size); // Print the maximum heap capacity. - virtual size_t max_capacity() const; + size_t max_capacity() const override; Tickspan time_since_last_collection() const { return Ticks::now() - _collection_pause_end; } @@ -1266,16 +1261,16 @@ public: // Optimized nmethod scanning support routines // Register the given nmethod with the G1 heap. - virtual void register_nmethod(nmethod* nm); + void register_nmethod(nmethod* nm) override; // Unregister the given nmethod from the G1 heap. - virtual void unregister_nmethod(nmethod* nm); + void unregister_nmethod(nmethod* nm) override; // No nmethod flushing needed. - virtual void flush_nmethod(nmethod* nm) {} + void flush_nmethod(nmethod* nm) override {} // No nmethod verification implemented. - virtual void verify_nmethod(nmethod* nm) {} + void verify_nmethod(nmethod* nm) override {} // Recalculate amount of used memory after GC. Must be called after all allocation // has finished. @@ -1297,7 +1292,7 @@ public: // Verification // Perform any cleanup actions necessary before allowing a verification. - virtual void prepare_for_verify(); + void prepare_for_verify() override; // Perform verification. @@ -1314,14 +1309,14 @@ public: // Currently there is only one place where this is called with // vo == UseFullMarking, which is to verify the marking during a // full GC. - void verify(VerifyOption vo); + void verify(VerifyOption vo) override; // WhiteBox testing support. - virtual bool supports_concurrent_gc_breakpoints() const; + bool supports_concurrent_gc_breakpoints() const override; - virtual WorkerThreads* safepoint_workers() { return _workers; } + WorkerThreads* safepoint_workers() override { return _workers; } - virtual bool is_archived_object(oop object) const; + bool is_archived_object(oop object) const override; // The methods below are here for convenience and dispatch the // appropriate method depending on value of the given VerifyOption @@ -1344,21 +1339,21 @@ private: void print_regions_on(outputStream* st) const; public: - virtual void print_on(outputStream* st) const; - virtual void print_extended_on(outputStream* st) const; - virtual void print_on_error(outputStream* st) const; + void print_on(outputStream* st) const override; + void print_extended_on(outputStream* st) const override; + void print_on_error(outputStream* st) const override; - virtual void gc_threads_do(ThreadClosure* tc) const; + void gc_threads_do(ThreadClosure* tc) const override; // Override - void print_tracing_info() const; + void print_tracing_info() const override; // The following two methods are helpful for debugging RSet issues. void print_cset_rsets() PRODUCT_RETURN; void print_all_rsets() PRODUCT_RETURN; // Used to print information about locations in the hs_err file. - virtual bool print_location(outputStream* st, void* addr) const; + bool print_location(outputStream* st, void* addr) const override; }; // Scoped object that performs common pre- and post-gc heap printing operations. diff --git a/src/hotspot/share/gc/g1/g1CommittedRegionMap.inline.hpp b/src/hotspot/share/gc/g1/g1CommittedRegionMap.inline.hpp index f00cc9a1fbacaa582ca46c0e740f7398e3844538..d5e3eb0679f078d694f219fa605e9179c35897cc 100644 --- a/src/hotspot/share/gc/g1/g1CommittedRegionMap.inline.hpp +++ b/src/hotspot/share/gc/g1/g1CommittedRegionMap.inline.hpp @@ -30,7 +30,7 @@ #include "utilities/bitMap.inline.hpp" inline bool G1CommittedRegionMap::active(uint index) const { - return _active.at(index); + return _active.par_at(index); } inline bool G1CommittedRegionMap::inactive(uint index) const { diff --git a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp index 7f23d6734e531ab1d0af365024c3234569cdb898..82239973d26bdc5c0d64b32e7f03a0590631c6f7 100644 --- a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp +++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp @@ -1618,10 +1618,6 @@ void G1ConcurrentMark::weak_refs_work() { uint active_workers = (ParallelRefProcEnabled ? _g1h->workers()->active_workers() : 1U); active_workers = clamp(active_workers, 1u, _max_num_tasks); - // Set the concurrency level. The phase was already set prior to - // executing the remark task. - set_concurrency(active_workers); - // Set the degree of MT processing here. If the discovery was done MT, // the number of threads involved during discovery could differ from // the number of active workers. This is OK as long as the discovered diff --git a/src/hotspot/share/gc/g1/g1EvacFailureObjectsSet.cpp b/src/hotspot/share/gc/g1/g1EvacFailureObjectsSet.cpp index 20bae37c6be724b25cd84b3abe33762c7685d9de..b14e63d5669864484b4d84ac1937edc1eba60e06 100644 --- a/src/hotspot/share/gc/g1/g1EvacFailureObjectsSet.cpp +++ b/src/hotspot/share/gc/g1/g1EvacFailureObjectsSet.cpp @@ -26,7 +26,6 @@ #include "gc/g1/g1EvacFailureObjectsSet.hpp" #include "gc/g1/g1CollectedHeap.hpp" #include "gc/g1/g1SegmentedArray.inline.hpp" -#include "gc/g1/heapRegion.hpp" #include "gc/g1/heapRegion.inline.hpp" #include "gc/shared/taskqueue.inline.hpp" #include "utilities/globalDefinitions.hpp" diff --git a/src/hotspot/share/gc/g1/g1EvacFailureObjectsSet.inline.hpp b/src/hotspot/share/gc/g1/g1EvacFailureObjectsSet.inline.hpp new file mode 100644 index 0000000000000000000000000000000000000000..86ed1adada28d3d33c7758afb8fd5c205fcb542a --- /dev/null +++ b/src/hotspot/share/gc/g1/g1EvacFailureObjectsSet.inline.hpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021, Huawei and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_GC_G1_G1EVACFAILUREOBJECTSSET_INLINE_HPP +#define SHARE_GC_G1_G1EVACFAILUREOBJECTSSET_INLINE_HPP + +#include "gc/g1/g1EvacFailureObjectsSet.hpp" +#include "gc/g1/g1CollectedHeap.hpp" +#include "gc/g1/g1SegmentedArray.inline.hpp" +#include "gc/g1/heapRegion.inline.hpp" + +void G1EvacFailureObjectsSet::record(oop obj) { + assert(obj != NULL, "must be"); + assert(_region_idx == G1CollectedHeap::heap()->heap_region_containing(obj)->hrm_index(), "must be"); + OffsetInRegion* e = _offsets.allocate(); + *e = to_offset(obj); +} + +#endif //SHARE_GC_G1_G1EVACFAILUREOBJECTSSET_INLINE_HPP diff --git a/src/hotspot/share/gc/g1/g1FullGCCompactTask.cpp b/src/hotspot/share/gc/g1/g1FullGCCompactTask.cpp index d0dc0593d3bbc90bb1fd88923d2d08d8f600422b..2ccda820fb0245891870200e0d4b8739206a0f44 100644 --- a/src/hotspot/share/gc/g1/g1FullGCCompactTask.cpp +++ b/src/hotspot/share/gc/g1/g1FullGCCompactTask.cpp @@ -60,12 +60,13 @@ public: size_t G1FullGCCompactTask::G1CompactRegionClosure::apply(oop obj) { size_t size = obj->size(); - HeapWord* destination = cast_from_oop(obj->forwardee()); - if (destination == NULL) { + if (!obj->is_forwarded()) { // Object not moving return size; } + HeapWord* destination = cast_from_oop(obj->forwardee()); + // copy object and reinit its mark HeapWord* obj_addr = cast_from_oop(obj); assert(obj_addr != destination, "everything in this pass should be moving"); diff --git a/src/hotspot/share/gc/g1/g1FullGCCompactionPoint.cpp b/src/hotspot/share/gc/g1/g1FullGCCompactionPoint.cpp index e14f0ea2569603ec6c4cd1cb60012d02b0e4672b..25ff38da77fd653e29e89197466d59b6b839e8d8 100644 --- a/src/hotspot/share/gc/g1/g1FullGCCompactionPoint.cpp +++ b/src/hotspot/share/gc/g1/g1FullGCCompactionPoint.cpp @@ -103,21 +103,9 @@ void G1FullGCCompactionPoint::forward(oop object, size_t size) { // Store a forwarding pointer if the object should be moved. if (cast_from_oop(object) != _compaction_top) { object->forward_to(cast_to_oop(_compaction_top)); + assert(object->is_forwarded(), "must be forwarded"); } else { - if (object->forwardee() != NULL) { - // Object should not move but mark-word is used so it looks like the - // object is forwarded. Need to clear the mark and it's no problem - // since it will be restored by preserved marks. - object->init_mark(); - } else { - // Make sure object has the correct mark-word set or that it will be - // fixed when restoring the preserved marks. - assert(object->mark() == markWord::prototype() || // Correct mark - object->mark_must_be_preserved(), // Will be restored by PreservedMarksSet - "should have correct prototype obj: " PTR_FORMAT " mark: " PTR_FORMAT " prototype: " PTR_FORMAT, - p2i(object), object->mark().value(), markWord::prototype().value()); - } - assert(object->forwardee() == NULL, "should be forwarded to NULL"); + assert(!object->is_forwarded(), "must not be forwarded"); } // Update compaction values. diff --git a/src/hotspot/share/gc/g1/g1FullGCOopClosures.inline.hpp b/src/hotspot/share/gc/g1/g1FullGCOopClosures.inline.hpp index d9a1759b029578ecb40607ca118f0e3845af18ad..f4b9d9e11768364b5309b3d847f7a0a32fcdf60b 100644 --- a/src/hotspot/share/gc/g1/g1FullGCOopClosures.inline.hpp +++ b/src/hotspot/share/gc/g1/g1FullGCOopClosures.inline.hpp @@ -77,19 +77,13 @@ template inline void G1AdjustClosure::adjust_pointer(T* p) { return; } - oop forwardee = obj->forwardee(); - if (forwardee == NULL) { - // Not forwarded, return current reference. - assert(obj->mark() == markWord::prototype() || // Correct mark - obj->mark_must_be_preserved(), // Will be restored by PreservedMarksSet - "Must have correct prototype or be preserved, obj: " PTR_FORMAT ", mark: " PTR_FORMAT ", prototype: " PTR_FORMAT, - p2i(obj), obj->mark().value(), markWord::prototype().value()); - return; + if (obj->is_forwarded()) { + oop forwardee = obj->forwardee(); + // Forwarded, just update. + assert(G1CollectedHeap::heap()->is_in_reserved(forwardee), "should be in object space"); + RawAccess::oop_store(p, forwardee); } - // Forwarded, just update. - assert(G1CollectedHeap::heap()->is_in_reserved(forwardee), "should be in object space"); - RawAccess::oop_store(p, forwardee); } inline void G1AdjustClosure::do_oop(oop* p) { do_oop_work(p); } diff --git a/src/hotspot/share/gc/g1/g1FullGCPrepareTask.cpp b/src/hotspot/share/gc/g1/g1FullGCPrepareTask.cpp index 817966a273bbf31b77a86b7b3671451eb833dbcb..ea7f94880c429fa27c668e73d25f1a97bccb058d 100644 --- a/src/hotspot/share/gc/g1/g1FullGCPrepareTask.cpp +++ b/src/hotspot/share/gc/g1/g1FullGCPrepareTask.cpp @@ -168,8 +168,7 @@ size_t G1FullGCPrepareTask::G1PrepareCompactLiveClosure::apply(oop object) { size_t G1FullGCPrepareTask::G1RePrepareClosure::apply(oop obj) { // We only re-prepare objects forwarded within the current region, so // skip objects that are already forwarded to another region. - oop forwarded_to = obj->forwardee(); - if (forwarded_to != NULL && !_current->is_in(forwarded_to)) { + if (obj->is_forwarded() && !_current->is_in(obj->forwardee())) { return obj->size(); } diff --git a/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp b/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp index 74b5c19ff45823ac89499457411199a1c8e1bf5a..ac8c370bfd082720419efc55a2d60c3c0b115379 100644 --- a/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp +++ b/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/src/hotspot/share/gc/g1/g1RemSet.cpp b/src/hotspot/share/gc/g1/g1RemSet.cpp index 123de75f91660ec0711e39e8786acbfb3d032b31..b689e6497625564f9e1d0d3f8eda2a221fc0e6fa 100644 --- a/src/hotspot/share/gc/g1/g1RemSet.cpp +++ b/src/hotspot/share/gc/g1/g1RemSet.cpp @@ -106,14 +106,14 @@ class G1RemSetScanState : public CHeapObj { // within a region to claim. Dependent on the region size as proxy for the heap // size, we limit the total number of chunks to limit memory usage and maintenance // effort of that table vs. granularity of distributing scanning work. - // Testing showed that 8 for 1M/2M region, 16 for 4M/8M regions, 32 for 16/32M regions - // seems to be such a good trade-off. + // Testing showed that 8 for 1M/2M region, 16 for 4M/8M regions, 32 for 16/32M regions, + // and so on seems to be such a good trade-off. static uint get_chunks_per_region(uint log_region_size) { // Limit the expected input values to current known possible values of the // (log) region size. Adjust as necessary after testing if changing the permissible // values for region size. - assert(log_region_size >= 20 && log_region_size <= 25, - "expected value in [20,25], but got %u", log_region_size); + assert(log_region_size >= 20 && log_region_size <= 29, + "expected value in [20,29], but got %u", log_region_size); return 1u << (log_region_size / 2 - 7); } diff --git a/src/hotspot/share/gc/g1/g1YoungCollector.cpp b/src/hotspot/share/gc/g1/g1YoungCollector.cpp index c900c1ce2488ca7cad6878e108ab9c23a40eb6c5..51fbfbb20dd7c4b97655b32446a379dbf8ab3aac 100644 --- a/src/hotspot/share/gc/g1/g1YoungCollector.cpp +++ b/src/hotspot/share/gc/g1/g1YoungCollector.cpp @@ -856,7 +856,7 @@ public: return; } if (region_attr.is_in_cset()) { - assert( obj->is_forwarded(), "invariant" ); + assert(obj->is_forwarded(), "invariant" ); *p = obj->forwardee(); } else { assert(!obj->is_forwarded(), "invariant" ); diff --git a/src/hotspot/share/gc/g1/g1_globals.hpp b/src/hotspot/share/gc/g1/g1_globals.hpp index 34dbabe4fecc5d6992d1b625af79d374df6c4baa..ed71b6d45b2802d8ef89105de103cb3d474cc812 100644 --- a/src/hotspot/share/gc/g1/g1_globals.hpp +++ b/src/hotspot/share/gc/g1/g1_globals.hpp @@ -257,7 +257,7 @@ \ product(size_t, G1HeapRegionSize, 0, \ "Size of the G1 regions.") \ - range(0, 32*M) \ + range(0, NOT_LP64(32*M) LP64_ONLY(512*M)) \ constraint(G1HeapRegionSizeConstraintFunc,AfterMemoryInit) \ \ product(uint, G1ConcRefinementThreads, 0, \ diff --git a/src/hotspot/share/gc/g1/heapRegion.cpp b/src/hotspot/share/gc/g1/heapRegion.cpp index ee092949f5ffa890ee3077baa2adfb63495e3d92..bb2a9c407daa92dcb690c0773fee82ccaba204e0 100644 --- a/src/hotspot/share/gc/g1/heapRegion.cpp +++ b/src/hotspot/share/gc/g1/heapRegion.cpp @@ -65,8 +65,9 @@ void HeapRegion::setup_heap_region_size(size_t max_heap_size) { size_t region_size = G1HeapRegionSize; // G1HeapRegionSize = 0 means decide ergonomically. if (region_size == 0) { - region_size = MAX2(max_heap_size / HeapRegionBounds::target_number(), - HeapRegionBounds::min_size()); + region_size = clamp(max_heap_size / HeapRegionBounds::target_number(), + HeapRegionBounds::min_size(), + HeapRegionBounds::max_ergonomics_size()); } // Make sure region size is a power of 2. Rounding up since this diff --git a/src/hotspot/share/gc/g1/heapRegion.inline.hpp b/src/hotspot/share/gc/g1/heapRegion.inline.hpp index f0f1b3aabeea384e221fa25f9c4b3fb1135d251d..8a0c4496781ccba083bc618eb1bf8281204b7869 100644 --- a/src/hotspot/share/gc/g1/heapRegion.inline.hpp +++ b/src/hotspot/share/gc/g1/heapRegion.inline.hpp @@ -30,6 +30,7 @@ #include "gc/g1/g1BlockOffsetTable.inline.hpp" #include "gc/g1/g1CollectedHeap.inline.hpp" #include "gc/g1/g1ConcurrentMarkBitMap.inline.hpp" +#include "gc/g1/g1EvacFailureObjectsSet.inline.hpp" #include "gc/g1/g1Predictions.hpp" #include "gc/g1/g1SegmentedArray.inline.hpp" #include "oops/oop.inline.hpp" @@ -83,10 +84,6 @@ inline HeapWord* HeapRegion::block_start(const void* p) { return _bot_part.block_start(p); } -inline HeapWord* HeapRegion::block_start_const(const void* p) const { - return _bot_part.block_start_const(p); -} - inline bool HeapRegion::is_obj_dead_with_size(const oop obj, const G1CMBitMap* const prev_bitmap, size_t* size) const { HeapWord* addr = cast_from_oop(obj); @@ -355,8 +352,6 @@ HeapWord* HeapRegion::oops_on_memregion_seq_iterate_careful(MemRegion mr, HeapWord* const end = mr.end(); // Find the obj that extends onto mr.start(). - // Update BOT as needed while finding start of (possibly dead) - // object containing the start of the region. HeapWord* cur = block_start(start); #ifdef ASSERT diff --git a/src/hotspot/share/gc/g1/heapRegionBounds.hpp b/src/hotspot/share/gc/g1/heapRegionBounds.hpp index 5a3f269749f2e6800f5879c4e9c4ee440c586ab3..5e748de59286b814d0c9fdc6aca41535f8b42144 100644 --- a/src/hotspot/share/gc/g1/heapRegionBounds.hpp +++ b/src/hotspot/share/gc/g1/heapRegionBounds.hpp @@ -34,12 +34,14 @@ private: // heaps a bit more efficiently. static const size_t MIN_REGION_SIZE = 1024 * 1024; + // Maximum region size determined ergonomically. + static const size_t MAX_ERGONOMICS_SIZE = 32 * 1024 * 1024; // Maximum region size; we don't go higher than that. There's a good // reason for having an upper bound. We don't want regions to get too // large, otherwise cleanup's effectiveness would decrease as there // will be fewer opportunities to find totally empty regions after // marking. - static const size_t MAX_REGION_SIZE = 32 * 1024 * 1024; + static const size_t MAX_REGION_SIZE = 512 * 1024 * 1024; // The automatic region size calculation will try to have around this // many regions in the heap. @@ -47,6 +49,7 @@ private: public: static inline size_t min_size(); + static inline size_t max_ergonomics_size(); static inline size_t max_size(); static inline size_t target_number(); }; diff --git a/src/hotspot/share/gc/g1/heapRegionBounds.inline.hpp b/src/hotspot/share/gc/g1/heapRegionBounds.inline.hpp index 20d81f6a62e1e32f763d4d68712ba400b15f5631..73a44bcf1794f10816f6f035e16c3ca0760fd56e 100644 --- a/src/hotspot/share/gc/g1/heapRegionBounds.inline.hpp +++ b/src/hotspot/share/gc/g1/heapRegionBounds.inline.hpp @@ -31,6 +31,10 @@ size_t HeapRegionBounds::min_size() { return MIN_REGION_SIZE; } +size_t HeapRegionBounds::max_ergonomics_size() { + return MAX_ERGONOMICS_SIZE; +} + size_t HeapRegionBounds::max_size() { return MAX_REGION_SIZE; } diff --git a/src/hotspot/share/gc/g1/heapRegionRemSet.cpp b/src/hotspot/share/gc/g1/heapRegionRemSet.cpp index c7633e772351c392423ec149123083e816487957..a0e740ad26790864477f942051f232bb20df36d6 100644 --- a/src/hotspot/share/gc/g1/heapRegionRemSet.cpp +++ b/src/hotspot/share/gc/g1/heapRegionRemSet.cpp @@ -26,6 +26,7 @@ #include "precompiled.hpp" #include "gc/g1/g1BlockOffsetTable.inline.hpp" +#include "gc/g1/g1CardSetContainers.inline.hpp" #include "gc/g1/g1CollectedHeap.inline.hpp" #include "gc/g1/g1ConcurrentRefine.hpp" #include "gc/g1/heapRegionManager.inline.hpp" diff --git a/src/hotspot/share/gc/g1/heapRegionRemSet.inline.hpp b/src/hotspot/share/gc/g1/heapRegionRemSet.inline.hpp index b6c4ee9c1bbd11ba694625fee29adcf704fa907c..fc7da0994e48dfa41fb30f20cfef0cdaca8a78fe 100644 --- a/src/hotspot/share/gc/g1/heapRegionRemSet.inline.hpp +++ b/src/hotspot/share/gc/g1/heapRegionRemSet.inline.hpp @@ -55,10 +55,64 @@ void HeapRegionRemSet::set_state_complete() { _state = Complete; } +template +class G1ContainerCardsOrRanges { + Closure& _cl; + uint _region_idx; + uint _offset; + +public: + G1ContainerCardsOrRanges(Closure& cl, uint region_idx, uint offset) : _cl(cl), _region_idx(region_idx), _offset(offset) { } + + bool start_iterate(uint tag) { + return _cl.start_iterate(tag, _region_idx); + } + + void operator()(uint card_idx) { + _cl.do_card(card_idx + _offset); + } + + void operator()(uint card_idx, uint length) { + _cl.do_card_range(card_idx + _offset, length); + } +}; + +template class CardOrRanges> +class G1HeapRegionRemSetMergeCardClosure : public G1CardSet::CardSetPtrClosure { + G1CardSet* _card_set; + Closure& _cl; + uint _log_card_regions_per_region; + uint _card_regions_per_region_mask; + uint _log_card_region_size; + +public: + + G1HeapRegionRemSetMergeCardClosure(G1CardSet* card_set, + Closure& cl, + uint log_card_regions_per_region, + uint log_card_region_size) : + _card_set(card_set), + _cl(cl), + _log_card_regions_per_region(log_card_regions_per_region), + _card_regions_per_region_mask((1 << log_card_regions_per_region) - 1), + _log_card_region_size(log_card_region_size) { + } + + void do_cardsetptr(uint card_region_idx, size_t num_occupied, G1CardSet::CardSetPtr card_set) override { + CardOrRanges cl(_cl, + card_region_idx >> _log_card_regions_per_region, + (card_region_idx & _card_regions_per_region_mask) << _log_card_region_size); + _card_set->iterate_cards_or_ranges_in_container(card_set, cl); + } +}; template inline void HeapRegionRemSet::iterate_for_merge(CardOrRangeVisitor& cl) { - _card_set.iterate_for_merge(cl); + G1HeapRegionRemSetMergeCardClosure cl2(&_card_set, + cl, + _card_set.config()->log2_card_region_per_heap_region(), + _card_set.config()->log2_cards_per_card_region()); + _card_set.iterate_containers(&cl2, true /* at_safepoint */); } void HeapRegionRemSet::split_card(OopOrNarrowOopStar from, uint& card_region, uint& card_within_region) const { diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.cpp b/src/hotspot/share/gc/parallel/psParallelCompact.cpp index 212bd424e63ce0e07347d3dbf1895fb771e39ac1..2a3d95192b6df19b9bfe7d65ac60efa75194db89 100644 --- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp +++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp @@ -852,7 +852,6 @@ public: BoolObjectClosure* is_alive_non_header) : ReferenceProcessor(is_subject_to_discovery, ParallelGCThreads, // mt processing degree - true, // mt discovery ParallelGCThreads, // mt discovery degree true, // atomic_discovery is_alive_non_header) { diff --git a/src/hotspot/share/gc/parallel/psScavenge.cpp b/src/hotspot/share/gc/parallel/psScavenge.cpp index f4a2fce76ec8b6cd950fdac02672c49bca3089a2..e3b3e76ca470569f7815ba58da4d365627e37d3d 100644 --- a/src/hotspot/share/gc/parallel/psScavenge.cpp +++ b/src/hotspot/share/gc/parallel/psScavenge.cpp @@ -799,7 +799,6 @@ void PSScavenge::initialize() { _ref_processor = new ReferenceProcessor(&_span_based_discoverer, ParallelGCThreads, // mt processing degree - true, // mt discovery ParallelGCThreads, // mt discovery degree false, // concurrent_discovery NULL); // header provides liveness info diff --git a/src/hotspot/share/gc/serial/markSweep.inline.hpp b/src/hotspot/share/gc/serial/markSweep.inline.hpp index ee0ef1e0b1bb1284d4e1e4dcd8be561d908c1dce..27fb8bb2f29d76b44e23b369970e8773b059e2df 100644 --- a/src/hotspot/share/gc/serial/markSweep.inline.hpp +++ b/src/hotspot/share/gc/serial/markSweep.inline.hpp @@ -88,13 +88,8 @@ template inline void MarkSweep::adjust_pointer(T* p) { oop obj = CompressedOops::decode_not_null(heap_oop); assert(Universe::heap()->is_in(obj), "should be in heap"); - oop new_obj = cast_to_oop(obj->mark().decode_pointer()); - - assert(new_obj != NULL || // is forwarding ptr? - obj->mark() == markWord::prototype(), // not gc marked? - "should be forwarded"); - - if (new_obj != NULL) { + if (obj->is_forwarded()) { + oop new_obj = obj->forwardee(); assert(is_object_aligned(new_obj), "oop must be aligned"); RawAccess::oop_store(p, new_obj); } diff --git a/src/hotspot/share/gc/shared/c1/barrierSetC1.hpp b/src/hotspot/share/gc/shared/c1/barrierSetC1.hpp index e1fc0f410c1689be1eae509565b7d6a619767a15..ac19a2c4d3757f19c094143c7bdc4db6368eacbe 100644 --- a/src/hotspot/share/gc/shared/c1/barrierSetC1.hpp +++ b/src/hotspot/share/gc/shared/c1/barrierSetC1.hpp @@ -41,7 +41,7 @@ class LIRAddressOpr: public StackObj { LIRItem* _item; LIR_Opr _opr; public: - LIRAddressOpr(LIRItem& item) : _item(&item), _opr(NULL) {} + LIRAddressOpr(LIRItem& item) : _item(&item), _opr() {} LIRAddressOpr(LIR_Opr opr) : _item(NULL), _opr(opr) {} LIRAddressOpr(const LIRAddressOpr& other) : _item(other._item), _opr(other._opr) {} @@ -80,7 +80,7 @@ public: _base(base), _offset(offset), _type(type), - _resolved_addr(NULL), + _resolved_addr(), _patch_emit_info(patch_emit_info), _access_emit_info(access_emit_info) {} diff --git a/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp b/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp index c39d8ae0c96958d69bf6641240e2ad627344cd3f..b66e94836ed43bf661ccd6ff87425f5c2692d7a5 100644 --- a/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp +++ b/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp @@ -35,7 +35,7 @@ #define __ gen->lir()-> #endif -void CardTableBarrierSetC1::post_barrier(LIRAccess& access, LIR_OprDesc* addr, LIR_OprDesc* new_val) { +void CardTableBarrierSetC1::post_barrier(LIRAccess& access, LIR_Opr addr, LIR_Opr new_val) { DecoratorSet decorators = access.decorators(); LIRGenerator* gen = access.gen(); bool in_heap = (decorators & IN_HEAP) != 0; diff --git a/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.hpp b/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.hpp index e213ead0ab67a548d980e7fb0a433a98c8c8a6e1..2b1629575b34e8c2959e53be9b1d28610c300a70 100644 --- a/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.hpp +++ b/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.hpp @@ -29,7 +29,7 @@ class CardTableBarrierSetC1 : public ModRefBarrierSetC1 { protected: - virtual void post_barrier(LIRAccess& access, LIR_OprDesc* addr, LIR_OprDesc* new_val); + virtual void post_barrier(LIRAccess& access, LIR_Opr addr, LIR_Opr new_val); }; #endif // SHARE_GC_SHARED_C1_CARDTABLEBARRIERSETC1_HPP diff --git a/src/hotspot/share/gc/shared/c1/modRefBarrierSetC1.hpp b/src/hotspot/share/gc/shared/c1/modRefBarrierSetC1.hpp index fb65cca83ecb668dc00cdd40bbb2b4f0521fb34a..2bd547c41b129c1996c5d5495a773368ba2a52d3 100644 --- a/src/hotspot/share/gc/shared/c1/modRefBarrierSetC1.hpp +++ b/src/hotspot/share/gc/shared/c1/modRefBarrierSetC1.hpp @@ -35,8 +35,8 @@ class ModRefBarrierSetC1 : public BarrierSetC1 { protected: virtual void pre_barrier(LIRAccess& access, LIR_Opr addr_opr, LIR_Opr pre_val, CodeEmitInfo* info) {} - virtual void post_barrier(LIRAccess& access, LIR_OprDesc* addr, - LIR_OprDesc* new_val) {} + virtual void post_barrier(LIRAccess& access, LIR_Opr addr, + LIR_Opr new_val) {} virtual LIR_Opr resolve_address(LIRAccess& access, bool resolve_in_register); diff --git a/src/hotspot/share/gc/shared/collectedHeap.hpp b/src/hotspot/share/gc/shared/collectedHeap.hpp index efac5e76c86ac617af01a2382d4fd04cbb3be249..4418a9d31ef5a58fabbb7e5495a94caf176f91a0 100644 --- a/src/hotspot/share/gc/shared/collectedHeap.hpp +++ b/src/hotspot/share/gc/shared/collectedHeap.hpp @@ -461,14 +461,15 @@ class CollectedHeap : public CHeapObj { // this collector. The default implementation returns false. virtual bool supports_concurrent_gc_breakpoints() const; - // Provides a thread pool to SafepointSynchronize to use - // for parallel safepoint cleanup. - // GCs that use a GC worker thread pool may want to share - // it for use during safepoint cleanup. This is only possible - // if the GC can pause and resume concurrent work (e.g. G1 - // concurrent marking) for an intermittent non-GC safepoint. - // If this method returns NULL, SafepointSynchronize will - // perform cleanup tasks serially in the VMThread. + // Workers used in non-GC safepoints for parallel safepoint cleanup. If this + // method returns NULL, cleanup tasks are done serially in the VMThread. See + // `SafepointSynchronize::do_cleanup_tasks` for details. + // GCs using a GC worker thread pool inside GC safepoints may opt to share + // that pool with non-GC safepoints, avoiding creating extraneous threads. + // Such sharing is safe, because GC safepoints and non-GC safepoints never + // overlap. For example, `G1CollectedHeap::workers()` (for GC safepoints) and + // `G1CollectedHeap::safepoint_workers()` (for non-GC safepoints) return the + // same thread-pool. virtual WorkerThreads* safepoint_workers() { return NULL; } // Support for object pinning. This is used by JNI Get*Critical() diff --git a/src/hotspot/share/gc/shared/concurrentGCThread.cpp b/src/hotspot/share/gc/shared/concurrentGCThread.cpp index 4988f505ded9431917469144a6e244247395c1d5..04cf571749fe4cceab19f2edd2eab59180237244 100644 --- a/src/hotspot/share/gc/shared/concurrentGCThread.cpp +++ b/src/hotspot/share/gc/shared/concurrentGCThread.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -42,9 +42,6 @@ void ConcurrentGCThread::create_and_start(ThreadPriority prio) { } void ConcurrentGCThread::run() { - // Setup handle area - set_active_handles(JNIHandleBlock::allocate_block()); - // Wait for initialization to complete wait_init_completed(); diff --git a/src/hotspot/share/gc/shared/gcTimer.cpp b/src/hotspot/share/gc/shared/gcTimer.cpp index d7669604d82944898b578ca24c5f85ca7d03de8a..00a712c6a9767f8d9001867e7ab1e0f9138f2809 100644 --- a/src/hotspot/share/gc/shared/gcTimer.cpp +++ b/src/hotspot/share/gc/shared/gcTimer.cpp @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "gc/shared/gcTimer.hpp" +#include "gc/shared/gc_globals.hpp" #include "utilities/growableArray.hpp" // the "time" parameter for most functions @@ -130,7 +131,7 @@ void TimePartitions::clear() { } void TimePartitions::report_gc_phase_start(const char* name, const Ticks& time, GCPhase::PhaseType type) { - assert(_phases->length() <= 1000, "Too many recorded phases? (count: %d)", _phases->length()); + assert(UseZGC || _phases->length() <= 1000, "Too many recorded phases? (count: %d)", _phases->length()); int level = _active_phases.count(); diff --git a/src/hotspot/share/gc/shared/referenceProcessor.cpp b/src/hotspot/share/gc/shared/referenceProcessor.cpp index b1b411e411999b8dd57d9199cec17722ceb398bf..e5fbc4df3ea3e067e1c174969d7e0a0a913b1d7c 100644 --- a/src/hotspot/share/gc/shared/referenceProcessor.cpp +++ b/src/hotspot/share/gc/shared/referenceProcessor.cpp @@ -87,7 +87,6 @@ void ReferenceProcessor::enable_discovery(bool check_no_refs) { ReferenceProcessor::ReferenceProcessor(BoolObjectClosure* is_subject_to_discovery, uint mt_processing_degree, - bool mt_discovery, uint mt_discovery_degree, bool concurrent_discovery, BoolObjectClosure* is_alive_non_header) : @@ -99,7 +98,7 @@ ReferenceProcessor::ReferenceProcessor(BoolObjectClosure* is_subject_to_discover assert(is_subject_to_discovery != NULL, "must be set"); _discovery_is_concurrent = concurrent_discovery; - _discovery_is_mt = mt_discovery; + _discovery_is_mt = (mt_discovery_degree > 1); _num_queues = MAX2(1U, mt_processing_degree); _max_num_queues = MAX2(_num_queues, mt_discovery_degree); _discovered_refs = NEW_C_HEAP_ARRAY(DiscoveredList, diff --git a/src/hotspot/share/gc/shared/referenceProcessor.hpp b/src/hotspot/share/gc/shared/referenceProcessor.hpp index 2ab68ab29fec4ffce8a9a779fbfc11e54f5fae1a..120a7afe2cf160b009c6e8eef7158a192c976922 100644 --- a/src/hotspot/share/gc/shared/referenceProcessor.hpp +++ b/src/hotspot/share/gc/shared/referenceProcessor.hpp @@ -374,7 +374,7 @@ public: // Default parameters give you a vanilla reference processor. ReferenceProcessor(BoolObjectClosure* is_subject_to_discovery, uint mt_processing_degree = 1, - bool mt_discovery = false, uint mt_discovery_degree = 1, + uint mt_discovery_degree = 1, bool concurrent_discovery = false, BoolObjectClosure* is_alive_non_header = NULL); diff --git a/src/hotspot/share/gc/shared/space.cpp b/src/hotspot/share/gc/shared/space.cpp index 1638d994e407e4b5a02c2774c30368df3cd4815c..560225f88b933a82d784ff5489c7609c6827ff9e 100644 --- a/src/hotspot/share/gc/shared/space.cpp +++ b/src/hotspot/share/gc/shared/space.cpp @@ -376,7 +376,7 @@ HeapWord* CompactibleSpace::forward(oop q, size_t size, // if the object isn't moving we can just set the mark to the default // mark and handle it specially later on. q->init_mark(); - assert(q->forwardee() == NULL, "should be forwarded to NULL"); + assert(!q->is_forwarded(), "should not be forwarded"); } compact_top += size; @@ -536,7 +536,7 @@ void CompactibleSpace::compact() { debug_only(HeapWord* prev_obj = NULL); while (cur_obj < end_of_live) { - if (!cast_to_oop(cur_obj)->is_gc_marked()) { + if (!cast_to_oop(cur_obj)->is_forwarded()) { debug_only(prev_obj = cur_obj); // The first word of the dead object contains a pointer to the next live object or end of space. cur_obj = *(HeapWord**)cur_obj; diff --git a/src/hotspot/share/gc/shared/taskqueue.hpp b/src/hotspot/share/gc/shared/taskqueue.hpp index 325ab3529175f5404af98d5888c6b6985aef3490..e2c21f1dc6deb812f26f6019a4ae142133e31e63 100644 --- a/src/hotspot/share/gc/shared/taskqueue.hpp +++ b/src/hotspot/share/gc/shared/taskqueue.hpp @@ -483,6 +483,7 @@ GenericTaskQueueSet::register_queue(uint i, T* q) { template T* GenericTaskQueueSet::queue(uint i) { + assert(i < _n, "index out of range."); return _queues[i]; } diff --git a/src/hotspot/share/gc/shared/taskqueue.inline.hpp b/src/hotspot/share/gc/shared/taskqueue.inline.hpp index b6e8e7a4095c965a13caf9a0508b3fa3a8e96ac0..c0ea64ba165b9f2794541e4d318e387f5896bd7e 100644 --- a/src/hotspot/share/gc/shared/taskqueue.inline.hpp +++ b/src/hotspot/share/gc/shared/taskqueue.inline.hpp @@ -320,6 +320,7 @@ GenericTaskQueueSet::steal_best_of_2(uint queue_num, E& t) { template bool GenericTaskQueueSet::steal(uint queue_num, E& t) { + assert(queue_num < _n, "index out of range."); for (uint i = 0; i < 2 * _n; i++) { TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal_attempt()); if (steal_best_of_2(queue_num, t)) { diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp index 624f004e3ccd35ec771f9237e9cb9dee57abe033..c7e0c9b0cd9d707baf76c4ef55cfbc0d7edb3450 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp @@ -35,7 +35,7 @@ #include "utilities/defaultStream.hpp" void ShenandoahArguments::initialize() { -#if !(defined AARCH64 || defined AMD64 || defined IA32) +#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined PPC64) vm_exit_during_initialization("Shenandoah GC is not supported on this platform."); #endif diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCodeRoots.cpp b/src/hotspot/share/gc/shenandoah/shenandoahCodeRoots.cpp index 197eeec58565fa3c61c41d1f6eafe5ae9c07b0c0..4d97c7c0e7ef6d7fc7fda4e88df8c3654563c911 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahCodeRoots.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahCodeRoots.cpp @@ -111,7 +111,7 @@ void ShenandoahCodeRoots::initialize() { } void ShenandoahCodeRoots::register_nmethod(nmethod* nm) { - assert_locked_or_safepoint(CodeCache_lock); + assert(CodeCache_lock->owned_by_self(), "Must have CodeCache_lock held"); _nmethod_table->register_nmethod(nm); } @@ -121,7 +121,7 @@ void ShenandoahCodeRoots::unregister_nmethod(nmethod* nm) { } void ShenandoahCodeRoots::flush_nmethod(nmethod* nm) { - assert_locked_or_safepoint(CodeCache_lock); + assert(CodeCache_lock->owned_by_self(), "Must have CodeCache_lock held"); _nmethod_table->flush_nmethod(nm); } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahNMethod.cpp b/src/hotspot/share/gc/shenandoah/shenandoahNMethod.cpp index 755e29ce57e53215930224c1d47b53cdf14b629e..e0f5b9ee71ba4ab08fc87627b8ec644d307d6f80 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahNMethod.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahNMethod.cpp @@ -271,13 +271,17 @@ void ShenandoahNMethodTable::register_nmethod(nmethod* nm) { assert(_index >= 0 && _index <= _list->size(), "Sanity"); ShenandoahNMethod* data = ShenandoahNMethod::gc_data(nm); - ShenandoahReentrantLocker data_locker(data != NULL ? data->lock() : NULL); if (data != NULL) { assert(contain(nm), "Must have been registered"); assert(nm == data->nm(), "Must be same nmethod"); + // Prevent updating a nmethod while concurrent iteration is in progress. + wait_until_concurrent_iteration_done(); + ShenandoahReentrantLocker data_locker(data->lock()); data->update(); } else { + // For a new nmethod, we can safely append it to the list, because + // concurrent iteration will not touch it. data = ShenandoahNMethod::for_nmethod(nm); assert(data != NULL, "Sanity"); ShenandoahNMethod::attach_gc_data(nm, data); @@ -382,11 +386,13 @@ void ShenandoahNMethodTable::rebuild(int size) { } ShenandoahNMethodTableSnapshot* ShenandoahNMethodTable::snapshot_for_iteration() { + assert(CodeCache_lock->owned_by_self(), "Must have CodeCache_lock held"); _itr_cnt++; return new ShenandoahNMethodTableSnapshot(this); } void ShenandoahNMethodTable::finish_iteration(ShenandoahNMethodTableSnapshot* snapshot) { + assert(CodeCache_lock->owned_by_self(), "Must have CodeCache_lock held"); assert(iteration_in_progress(), "Why we here?"); assert(snapshot != NULL, "No snapshot"); _itr_cnt--; diff --git a/src/hotspot/share/gc/z/zCollectedHeap.cpp b/src/hotspot/share/gc/z/zCollectedHeap.cpp index e07ec3e80c775a43db7bcc9398d396b6604300db..1d099122315750b6c563f7f237cadfee02f07c2d 100644 --- a/src/hotspot/share/gc/z/zCollectedHeap.cpp +++ b/src/hotspot/share/gc/z/zCollectedHeap.cpp @@ -38,6 +38,7 @@ #include "gc/z/zUtils.inline.hpp" #include "memory/classLoaderMetaspace.hpp" #include "memory/iterator.hpp" +#include "memory/metaspaceCriticalAllocation.hpp" #include "memory/universe.hpp" #include "utilities/align.hpp" @@ -153,34 +154,17 @@ HeapWord* ZCollectedHeap::mem_allocate(size_t size, bool* gc_overhead_limit_was_ MetaWord* ZCollectedHeap::satisfy_failed_metadata_allocation(ClassLoaderData* loader_data, size_t size, Metaspace::MetadataType mdtype) { - MetaWord* result; - // Start asynchronous GC collect(GCCause::_metadata_GC_threshold); // Expand and retry allocation - result = loader_data->metaspace_non_null()->expand_and_allocate(size, mdtype); - if (result != NULL) { - return result; - } - - // Start synchronous GC - collect(GCCause::_metadata_GC_clear_soft_refs); - - // Retry allocation - result = loader_data->metaspace_non_null()->allocate(size, mdtype); - if (result != NULL) { - return result; - } - - // Expand and retry allocation - result = loader_data->metaspace_non_null()->expand_and_allocate(size, mdtype); + MetaWord* const result = loader_data->metaspace_non_null()->expand_and_allocate(size, mdtype); if (result != NULL) { return result; } - // Out of memory - return NULL; + // As a last resort, try a critical allocation, riding on a synchronous full GC + return MetaspaceCriticalAllocation::allocate(loader_data, size, mdtype); } void ZCollectedHeap::collect(GCCause::Cause cause) { diff --git a/src/hotspot/share/include/jmm.h b/src/hotspot/share/include/jmm.h index d7788e7a4e841adff698d8701178e38075f9fa70..ee1c77e504a424252d4414be3d4cc075e946caf3 100644 --- a/src/hotspot/share/include/jmm.h +++ b/src/hotspot/share/include/jmm.h @@ -333,7 +333,8 @@ typedef struct jmmInterface_1_ { void (JNICALL *GetDiagnosticCommandArgumentsInfo) (JNIEnv *env, jstring commandName, - dcmdArgInfo *infoArray); + dcmdArgInfo *infoArray, + jint count); jstring (JNICALL *ExecuteDiagnosticCommand) (JNIEnv *env, jstring command); diff --git a/src/hotspot/share/interpreter/zero/bytecodeInterpreter.cpp b/src/hotspot/share/interpreter/zero/bytecodeInterpreter.cpp index d8fac577d5927625c30f71f48a76b39c4dde929a..f1bfa3d190d5a8941da7a90a767f350df24243b2 100644 --- a/src/hotspot/share/interpreter/zero/bytecodeInterpreter.cpp +++ b/src/hotspot/share/interpreter/zero/bytecodeInterpreter.cpp @@ -293,6 +293,8 @@ istate->set_bcp(pc+opsize); \ return; +#define REWRITE_AT_PC(val) \ + *pc = val; #define METHOD istate->method() #define GET_METHOD_COUNTERS(res) @@ -389,6 +391,81 @@ if (THREAD->has_pending_exception()) goto label; \ } +#define MAYBE_POST_FIELD_ACCESS(obj) { \ + if (JVMTI_ENABLED) { \ + int* count_addr; \ + /* Check to see if a field modification watch has been set */ \ + /* before we take the time to call into the VM. */ \ + count_addr = (int*)JvmtiExport::get_field_access_count_addr(); \ + if (*count_addr > 0) { \ + oop target; \ + if ((Bytecodes::Code)opcode == Bytecodes::_getstatic) { \ + target = NULL; \ + } else { \ + target = obj; \ + } \ + CALL_VM(InterpreterRuntime::post_field_access(THREAD, \ + target, cache), \ + handle_exception); \ + } \ + } \ +} + +#define MAYBE_POST_FIELD_MODIFICATION(obj) { \ + if (JVMTI_ENABLED) { \ + int* count_addr; \ + /* Check to see if a field modification watch has been set */ \ + /* before we take the time to call into the VM. */ \ + count_addr = (int*)JvmtiExport::get_field_modification_count_addr(); \ + if (*count_addr > 0) { \ + oop target; \ + if ((Bytecodes::Code)opcode == Bytecodes::_putstatic) { \ + target = NULL; \ + } else { \ + target = obj; \ + } \ + CALL_VM(InterpreterRuntime::post_field_modification(THREAD, \ + target, cache, \ + (jvalue*)STACK_SLOT(-1)), \ + handle_exception); \ + } \ + } \ +} + +static inline int fast_get_type(TosState tos) { + switch (tos) { + case ztos: + case btos: return Bytecodes::_fast_bgetfield; + case ctos: return Bytecodes::_fast_cgetfield; + case stos: return Bytecodes::_fast_sgetfield; + case itos: return Bytecodes::_fast_igetfield; + case ltos: return Bytecodes::_fast_lgetfield; + case ftos: return Bytecodes::_fast_fgetfield; + case dtos: return Bytecodes::_fast_dgetfield; + case atos: return Bytecodes::_fast_agetfield; + default: + ShouldNotReachHere(); + return -1; + } +} + +static inline int fast_put_type(TosState tos) { + switch (tos) { + case ztos: return Bytecodes::_fast_zputfield; + case btos: return Bytecodes::_fast_bputfield; + case ctos: return Bytecodes::_fast_cputfield; + case stos: return Bytecodes::_fast_sputfield; + case itos: return Bytecodes::_fast_iputfield; + case ltos: return Bytecodes::_fast_lputfield; + case ftos: return Bytecodes::_fast_fputfield; + case dtos: return Bytecodes::_fast_dputfield; + case atos: return Bytecodes::_fast_aputfield; + default: + ShouldNotReachHere(); + return -1; + } +} + /* * BytecodeInterpreter::run(interpreterState istate) * @@ -397,11 +474,13 @@ * the method passed in. */ -// Instantiate two variants of the method for future linking. -template void BytecodeInterpreter::run(interpreterState istate); -template void BytecodeInterpreter::run(interpreterState istate); +// Instantiate variants of the method for future linking. +template void BytecodeInterpreter::run(interpreterState istate); +template void BytecodeInterpreter::run(interpreterState istate); +template void BytecodeInterpreter::run< true, false>(interpreterState istate); +template void BytecodeInterpreter::run< true, true>(interpreterState istate); -template +template void BytecodeInterpreter::run(interpreterState istate) { intptr_t* topOfStack = (intptr_t *)istate->stack(); /* access with STACK macros */ address pc = istate->bcp(); @@ -497,15 +576,15 @@ void BytecodeInterpreter::run(interpreterState istate) { /* 0xC0 */ &&opc_checkcast, &&opc_instanceof, &&opc_monitorenter, &&opc_monitorexit, /* 0xC4 */ &&opc_wide, &&opc_multianewarray, &&opc_ifnull, &&opc_ifnonnull, -/* 0xC8 */ &&opc_goto_w, &&opc_jsr_w, &&opc_breakpoint, &&opc_default, -/* 0xCC */ &&opc_default, &&opc_default, &&opc_default, &&opc_default, +/* 0xC8 */ &&opc_goto_w, &&opc_jsr_w, &&opc_breakpoint, &&opc_fast_agetfield, +/* 0xCC */ &&opc_fast_bgetfield,&&opc_fast_cgetfield, &&opc_fast_dgetfield, &&opc_fast_fgetfield, -/* 0xD0 */ &&opc_default, &&opc_default, &&opc_default, &&opc_default, -/* 0xD4 */ &&opc_default, &&opc_default, &&opc_default, &&opc_default, -/* 0xD8 */ &&opc_default, &&opc_default, &&opc_default, &&opc_default, -/* 0xDC */ &&opc_default, &&opc_default, &&opc_default, &&opc_default, +/* 0xD0 */ &&opc_fast_igetfield,&&opc_fast_lgetfield, &&opc_fast_sgetfield, &&opc_fast_aputfield, +/* 0xD4 */ &&opc_fast_bputfield,&&opc_fast_zputfield, &&opc_fast_cputfield, &&opc_fast_dputfield, +/* 0xD8 */ &&opc_fast_fputfield,&&opc_fast_iputfield, &&opc_fast_lputfield, &&opc_fast_sputfield, +/* 0xDC */ &&opc_fast_aload_0, &&opc_fast_iaccess_0, &&opc_fast_aaccess_0, &&opc_fast_faccess_0, -/* 0xE0 */ &&opc_default, &&opc_default, &&opc_default, &&opc_default, +/* 0xE0 */ &&opc_fast_iload, &&opc_fast_iload2, &&opc_fast_icaload, &&opc_fast_invokevfinal, /* 0xE4 */ &&opc_default, &&opc_default, &&opc_fast_aldc, &&opc_fast_aldc_w, /* 0xE8 */ &&opc_return_register_finalizer, &&opc_invokehandle, &&opc_default, &&opc_default, @@ -747,10 +826,41 @@ run: UPDATE_PC_AND_TOS_AND_CONTINUE(2, 1); CASE(_iload): + { + if (REWRITE_BYTECODES) { + // Attempt to rewrite iload, iload -> fast_iload2 + // iload, caload -> fast_icaload + // Normal iloads will be rewritten to fast_iload to avoid checking again. + switch (*(pc + 2)) { + case Bytecodes::_fast_iload: + REWRITE_AT_PC(Bytecodes::_fast_iload2); + break; + case Bytecodes::_caload: + REWRITE_AT_PC(Bytecodes::_fast_icaload); + break; + case Bytecodes::_iload: + // Wait until rewritten to _fast_iload. + break; + default: + // Last iload in a (potential) series, don't check again. + REWRITE_AT_PC(Bytecodes::_fast_iload); + } + } + // Normal iload handling. + SET_STACK_SLOT(LOCALS_SLOT(pc[1]), 0); + UPDATE_PC_AND_TOS_AND_CONTINUE(2, 1); + } + + CASE(_fast_iload): CASE(_fload): SET_STACK_SLOT(LOCALS_SLOT(pc[1]), 0); UPDATE_PC_AND_TOS_AND_CONTINUE(2, 1); + CASE(_fast_iload2): + SET_STACK_SLOT(LOCALS_SLOT(pc[1]), 0); + SET_STACK_SLOT(LOCALS_SLOT(pc[3]), 1); + UPDATE_PC_AND_TOS_AND_CONTINUE(4, 2); + CASE(_lload): SET_STACK_LONG_FROM_ADDR(LOCALS_LONG_AT(pc[1]), 1); UPDATE_PC_AND_TOS_AND_CONTINUE(2, 2); @@ -761,11 +871,6 @@ run: #undef OPC_LOAD_n #define OPC_LOAD_n(num) \ - CASE(_aload_##num): \ - VERIFY_OOP(LOCALS_OBJECT(num)); \ - SET_STACK_OBJECT(LOCALS_OBJECT(num), 0); \ - UPDATE_PC_AND_TOS_AND_CONTINUE(1, 1); \ - \ CASE(_iload_##num): \ CASE(_fload_##num): \ SET_STACK_SLOT(LOCALS_SLOT(num), 0); \ @@ -778,10 +883,53 @@ run: SET_STACK_DOUBLE_FROM_ADDR(LOCALS_DOUBLE_AT(num), 1); \ UPDATE_PC_AND_TOS_AND_CONTINUE(1, 2); - OPC_LOAD_n(0); - OPC_LOAD_n(1); - OPC_LOAD_n(2); - OPC_LOAD_n(3); + OPC_LOAD_n(0); + OPC_LOAD_n(1); + OPC_LOAD_n(2); + OPC_LOAD_n(3); + +#undef OPC_ALOAD_n +#define OPC_ALOAD_n(num) \ + CASE(_aload_##num): { \ + oop obj = LOCALS_OBJECT(num); \ + VERIFY_OOP(obj); \ + SET_STACK_OBJECT(obj, 0); \ + UPDATE_PC_AND_TOS_AND_CONTINUE(1, 1); \ + } + + CASE(_aload_0): + { + /* Maybe rewrite if following bytecode is one of the supported _fast_Xgetfield bytecodes. */ + if (REWRITE_BYTECODES) { + switch (*(pc + 1)) { + case Bytecodes::_fast_agetfield: + REWRITE_AT_PC(Bytecodes::_fast_aaccess_0); + break; + case Bytecodes::_fast_fgetfield: + REWRITE_AT_PC(Bytecodes::_fast_faccess_0); + break; + case Bytecodes::_fast_igetfield: + REWRITE_AT_PC(Bytecodes::_fast_iaccess_0); + break; + case Bytecodes::_getfield: { + /* Otherwise, do nothing here, wait until it gets rewritten to _fast_Xgetfield. + * Unfortunately, this punishes volatile field access, because it never gets + * rewritten. */ + break; + } + default: + REWRITE_AT_PC(Bytecodes::_fast_aload_0); + break; + } + } + VERIFY_OOP(LOCALS_OBJECT(0)); + SET_STACK_OBJECT(LOCALS_OBJECT(0), 0); + UPDATE_PC_AND_TOS_AND_CONTINUE(1, 1); + } + + OPC_ALOAD_n(1); + OPC_ALOAD_n(2); + OPC_ALOAD_n(3); /* store to a local variable */ @@ -1313,11 +1461,7 @@ run: /* Array access byte-codes */ - /* Every array access byte-code starts out like this */ -// arrayOopDesc* arrObj = (arrayOopDesc*)STACK_OBJECT(arrayOff); -#define ARRAY_INTRO(arrayOff) \ - arrayOop arrObj = (arrayOop)STACK_OBJECT(arrayOff); \ - jint index = STACK_INT(arrayOff + 1); \ +#define ARRAY_INDEX_CHECK(arrObj, index) \ /* Two integers, the additional message, and the null-terminator */ \ char message[2 * jintAsStringSize + 33]; \ CHECK_NULL(arrObj); \ @@ -1329,6 +1473,13 @@ run: message); \ } + /* Every array access byte-code starts out like this */ +// arrayOopDesc* arrObj = (arrayOopDesc*)STACK_OBJECT(arrayOff); +#define ARRAY_INTRO(arrayOff) \ + arrayOop arrObj = (arrayOop)STACK_OBJECT(arrayOff); \ + jint index = STACK_INT(arrayOff + 1); \ + ARRAY_INDEX_CHECK(arrObj, index) + /* 32-bit loads. These handle conversion from < 32-bit types */ #define ARRAY_LOADTO32(T, T2, format, stackRes, extra) \ { \ @@ -1368,6 +1519,15 @@ run: CASE(_daload): ARRAY_LOADTO64(T_DOUBLE, jdouble, STACK_DOUBLE, 0); + CASE(_fast_icaload): { + // Custom fast access for iload,caload pair. + arrayOop arrObj = (arrayOop) STACK_OBJECT(-1); + jint index = LOCALS_INT(pc[1]); + ARRAY_INDEX_CHECK(arrObj, index); + SET_STACK_INT(*(jchar *)(((address) arrObj->base(T_CHAR)) + index * sizeof(jchar)), -1); + UPDATE_PC_AND_TOS_AND_CONTINUE(3, 0); + } + /* 32-bit stores. These handle conversion to < 32-bit types */ #define ARRAY_STOREFROM32(T, T2, format, stackSrc, extra) \ { \ @@ -1537,26 +1697,6 @@ run: cache = cp->entry_at(index); } - if (JVMTI_ENABLED) { - int *count_addr; - oop obj; - // Check to see if a field modification watch has been set - // before we take the time to call into the VM. - count_addr = (int *)JvmtiExport::get_field_access_count_addr(); - if ( *count_addr > 0 ) { - if ((Bytecodes::Code)opcode == Bytecodes::_getstatic) { - obj = NULL; - } else { - obj = STACK_OBJECT(-1); - VERIFY_OOP(obj); - } - CALL_VM(InterpreterRuntime::post_field_access(THREAD, - obj, - cache), - handle_exception); - } - } - oop obj; if ((Bytecodes::Code)opcode == Bytecodes::_getstatic) { Klass* k = cache->f1_as_klass(); @@ -1565,8 +1705,15 @@ run: } else { obj = STACK_OBJECT(-1); CHECK_NULL(obj); + // Check if we can rewrite non-volatile _getfield to one of the _fast_Xgetfield. + if (REWRITE_BYTECODES && !cache->is_volatile()) { + // Rewrite current BC to _fast_Xgetfield. + REWRITE_AT_PC(fast_get_type(cache->flag_state())); + } } + MAYBE_POST_FIELD_ACCESS(obj); + // // Now store the result on the stack // @@ -1661,33 +1808,6 @@ run: cache = cp->entry_at(index); } - if (JVMTI_ENABLED) { - int *count_addr; - oop obj; - // Check to see if a field modification watch has been set - // before we take the time to call into the VM. - count_addr = (int *)JvmtiExport::get_field_modification_count_addr(); - if ( *count_addr > 0 ) { - if ((Bytecodes::Code)opcode == Bytecodes::_putstatic) { - obj = NULL; - } - else { - if (cache->is_long() || cache->is_double()) { - obj = STACK_OBJECT(-3); - } else { - obj = STACK_OBJECT(-2); - } - VERIFY_OOP(obj); - } - - CALL_VM(InterpreterRuntime::post_field_modification(THREAD, - obj, - cache, - (jvalue *)STACK_SLOT(-1)), - handle_exception); - } - } - // QQQ Need to make this as inlined as possible. Probably need to split all the bytecode cases // out so c++ compiler has a chance for constant prop to fold everything possible away. @@ -1706,8 +1826,16 @@ run: --count; obj = STACK_OBJECT(count); CHECK_NULL(obj); + + // Check if we can rewrite non-volatile _putfield to one of the _fast_Xputfield. + if (REWRITE_BYTECODES && !cache->is_volatile()) { + // Rewrite current BC to _fast_Xputfield. + REWRITE_AT_PC(fast_put_type(cache->flag_state())); + } } + MAYBE_POST_FIELD_MODIFICATION(obj); + // // Now store the result // @@ -2266,6 +2394,10 @@ run: CHECK_NULL(STACK_OBJECT(-(cache->parameter_size()))); if (cache->is_vfinal()) { callee = cache->f2_as_vfinal_method(); + if (REWRITE_BYTECODES) { + // Rewrite to _fast_invokevfinal. + REWRITE_AT_PC(Bytecodes::_fast_invokevfinal); + } } else { // get receiver int parms = cache->parameter_size(); @@ -2400,6 +2532,329 @@ run: goto opcode_switch; } + CASE(_fast_agetfield): { + u2 index = Bytes::get_native_u2(pc+1); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + int field_offset = cache->f2_as_index(); + + oop obj = STACK_OBJECT(-1); + CHECK_NULL(obj); + + MAYBE_POST_FIELD_ACCESS(obj); + + VERIFY_OOP(obj->obj_field(field_offset)); + SET_STACK_OBJECT(obj->obj_field(field_offset), -1); + UPDATE_PC_AND_CONTINUE(3); + } + + CASE(_fast_bgetfield): { + u2 index = Bytes::get_native_u2(pc+1); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + int field_offset = cache->f2_as_index(); + + oop obj = STACK_OBJECT(-1); + CHECK_NULL(obj); + + MAYBE_POST_FIELD_ACCESS(obj); + + SET_STACK_INT(obj->byte_field(field_offset), -1); + UPDATE_PC_AND_CONTINUE(3); + } + + CASE(_fast_cgetfield): { + u2 index = Bytes::get_native_u2(pc+1); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + int field_offset = cache->f2_as_index(); + + oop obj = STACK_OBJECT(-1); + CHECK_NULL(obj); + + MAYBE_POST_FIELD_ACCESS(obj); + + SET_STACK_INT(obj->char_field(field_offset), -1); + UPDATE_PC_AND_CONTINUE(3); + } + + CASE(_fast_dgetfield): { + u2 index = Bytes::get_native_u2(pc+1); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + int field_offset = cache->f2_as_index(); + + oop obj = STACK_OBJECT(-1); + CHECK_NULL(obj); + + MAYBE_POST_FIELD_ACCESS(obj); + + SET_STACK_DOUBLE(obj->double_field(field_offset), 0); + MORE_STACK(1); + UPDATE_PC_AND_CONTINUE(3); + } + + CASE(_fast_fgetfield): { + u2 index = Bytes::get_native_u2(pc+1); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + int field_offset = cache->f2_as_index(); + + oop obj = STACK_OBJECT(-1); + CHECK_NULL(obj); + + MAYBE_POST_FIELD_ACCESS(obj); + + SET_STACK_FLOAT(obj->float_field(field_offset), -1); + UPDATE_PC_AND_CONTINUE(3); + } + + CASE(_fast_igetfield): { + u2 index = Bytes::get_native_u2(pc+1); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + int field_offset = cache->f2_as_index(); + + oop obj = STACK_OBJECT(-1); + CHECK_NULL(obj); + + MAYBE_POST_FIELD_ACCESS(obj); + + SET_STACK_INT(obj->int_field(field_offset), -1); + UPDATE_PC_AND_CONTINUE(3); + } + + CASE(_fast_lgetfield): { + u2 index = Bytes::get_native_u2(pc+1); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + int field_offset = cache->f2_as_index(); + + oop obj = STACK_OBJECT(-1); + CHECK_NULL(obj); + + MAYBE_POST_FIELD_ACCESS(obj); + + SET_STACK_LONG(obj->long_field(field_offset), 0); + MORE_STACK(1); + UPDATE_PC_AND_CONTINUE(3); + } + + CASE(_fast_sgetfield): { + u2 index = Bytes::get_native_u2(pc+1); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + int field_offset = cache->f2_as_index(); + + oop obj = STACK_OBJECT(-1); + CHECK_NULL(obj); + + MAYBE_POST_FIELD_ACCESS(obj); + + SET_STACK_INT(obj->short_field(field_offset), -1); + UPDATE_PC_AND_CONTINUE(3); + } + + CASE(_fast_aputfield): { + u2 index = Bytes::get_native_u2(pc+1); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + + oop obj = STACK_OBJECT(-2); + CHECK_NULL(obj); + + MAYBE_POST_FIELD_MODIFICATION(obj); + + int field_offset = cache->f2_as_index(); + obj->obj_field_put(field_offset, STACK_OBJECT(-1)); + + UPDATE_PC_AND_TOS_AND_CONTINUE(3, -2); + } + + CASE(_fast_bputfield): { + u2 index = Bytes::get_native_u2(pc+1); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + + oop obj = STACK_OBJECT(-2); + CHECK_NULL(obj); + + MAYBE_POST_FIELD_MODIFICATION(obj); + + int field_offset = cache->f2_as_index(); + obj->byte_field_put(field_offset, STACK_INT(-1)); + + UPDATE_PC_AND_TOS_AND_CONTINUE(3, -2); + } + + CASE(_fast_zputfield): { + u2 index = Bytes::get_native_u2(pc+1); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + + oop obj = STACK_OBJECT(-2); + CHECK_NULL(obj); + + MAYBE_POST_FIELD_MODIFICATION(obj); + + int field_offset = cache->f2_as_index(); + obj->byte_field_put(field_offset, (STACK_INT(-1) & 1)); // only store LSB + + UPDATE_PC_AND_TOS_AND_CONTINUE(3, -2); + } + + CASE(_fast_cputfield): { + u2 index = Bytes::get_native_u2(pc+1); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + + oop obj = STACK_OBJECT(-2); + CHECK_NULL(obj); + + MAYBE_POST_FIELD_MODIFICATION(obj); + + int field_offset = cache->f2_as_index(); + obj->char_field_put(field_offset, STACK_INT(-1)); + + UPDATE_PC_AND_TOS_AND_CONTINUE(3, -2); + } + + CASE(_fast_dputfield): { + u2 index = Bytes::get_native_u2(pc+1); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + + oop obj = STACK_OBJECT(-3); + CHECK_NULL(obj); + + MAYBE_POST_FIELD_MODIFICATION(obj); + + int field_offset = cache->f2_as_index(); + obj->double_field_put(field_offset, STACK_DOUBLE(-1)); + + UPDATE_PC_AND_TOS_AND_CONTINUE(3, -3); + } + + CASE(_fast_fputfield): { + u2 index = Bytes::get_native_u2(pc+1); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + + oop obj = STACK_OBJECT(-2); + CHECK_NULL(obj); + + MAYBE_POST_FIELD_MODIFICATION(obj); + + int field_offset = cache->f2_as_index(); + obj->float_field_put(field_offset, STACK_FLOAT(-1)); + + UPDATE_PC_AND_TOS_AND_CONTINUE(3, -2); + } + + CASE(_fast_iputfield): { + u2 index = Bytes::get_native_u2(pc+1); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + + oop obj = STACK_OBJECT(-2); + CHECK_NULL(obj); + + MAYBE_POST_FIELD_MODIFICATION(obj); + + int field_offset = cache->f2_as_index(); + obj->int_field_put(field_offset, STACK_INT(-1)); + + UPDATE_PC_AND_TOS_AND_CONTINUE(3, -2); + } + + CASE(_fast_lputfield): { + u2 index = Bytes::get_native_u2(pc+1); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + + oop obj = STACK_OBJECT(-3); + CHECK_NULL(obj); + + MAYBE_POST_FIELD_MODIFICATION(obj); + + int field_offset = cache->f2_as_index(); + obj->long_field_put(field_offset, STACK_LONG(-1)); + + UPDATE_PC_AND_TOS_AND_CONTINUE(3, -3); + } + + CASE(_fast_sputfield): { + u2 index = Bytes::get_native_u2(pc+1); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + + oop obj = STACK_OBJECT(-2); + CHECK_NULL(obj); + + MAYBE_POST_FIELD_MODIFICATION(obj); + + int field_offset = cache->f2_as_index(); + obj->short_field_put(field_offset, STACK_INT(-1)); + + UPDATE_PC_AND_TOS_AND_CONTINUE(3, -2); + } + + CASE(_fast_aload_0): { + oop obj = LOCALS_OBJECT(0); + VERIFY_OOP(obj); + SET_STACK_OBJECT(obj, 0); + UPDATE_PC_AND_TOS_AND_CONTINUE(1, 1); + } + + CASE(_fast_aaccess_0): { + u2 index = Bytes::get_native_u2(pc+2); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + int field_offset = cache->f2_as_index(); + + oop obj = LOCALS_OBJECT(0); + CHECK_NULL(obj); + VERIFY_OOP(obj); + + MAYBE_POST_FIELD_ACCESS(obj); + + VERIFY_OOP(obj->obj_field(field_offset)); + SET_STACK_OBJECT(obj->obj_field(field_offset), 0); + UPDATE_PC_AND_TOS_AND_CONTINUE(4, 1); + } + + CASE(_fast_faccess_0): { + u2 index = Bytes::get_native_u2(pc+2); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + int field_offset = cache->f2_as_index(); + + oop obj = LOCALS_OBJECT(0); + CHECK_NULL(obj); + VERIFY_OOP(obj); + + MAYBE_POST_FIELD_ACCESS(obj); + + SET_STACK_INT(obj->int_field(field_offset), 0); + UPDATE_PC_AND_TOS_AND_CONTINUE(4, 1); + } + + CASE(_fast_iaccess_0): { + u2 index = Bytes::get_native_u2(pc+2); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + int field_offset = cache->f2_as_index(); + + oop obj = LOCALS_OBJECT(0); + CHECK_NULL(obj); + VERIFY_OOP(obj); + + MAYBE_POST_FIELD_ACCESS(obj); + + SET_STACK_FLOAT(obj->float_field(field_offset), 0); + UPDATE_PC_AND_TOS_AND_CONTINUE(4, 1); + } + + CASE(_fast_invokevfinal): { + u2 index = Bytes::get_native_u2(pc+1); + ConstantPoolCacheEntry* cache = cp->entry_at(index); + + assert(cache->is_resolved(Bytecodes::_invokevirtual), "Should be resolved before rewriting"); + + istate->set_msg(call_method); + + CHECK_NULL(STACK_OBJECT(-(cache->parameter_size()))); + Method* callee = cache->f2_as_vfinal_method(); + istate->set_callee(callee); + if (JVMTI_ENABLED && THREAD->is_interp_only_mode()) { + istate->set_callee_entry_point(callee->interpreter_entry()); + } else { + istate->set_callee_entry_point(callee->from_interpreted_entry()); + } + istate->set_bcp_advance(3); + UPDATE_PC_AND_RETURN(0); + } + DEFAULT: fatal("Unimplemented opcode %d = %s", opcode, Bytecodes::name((Bytecodes::Code)opcode)); diff --git a/src/hotspot/share/interpreter/zero/bytecodeInterpreter.hpp b/src/hotspot/share/interpreter/zero/bytecodeInterpreter.hpp index e4a09d492bd6852a40528f31c4aa6ae17fff43b8..9941055bfd41838fe6bb9a54a1f42d5fb74d60f7 100644 --- a/src/hotspot/share/interpreter/zero/bytecodeInterpreter.hpp +++ b/src/hotspot/share/interpreter/zero/bytecodeInterpreter.hpp @@ -503,7 +503,7 @@ static void dup2_x1(intptr_t *tos); /* insert top 2 slots three down */ static void dup2_x2(intptr_t *tos); /* insert top 2 slots four down */ static void swap(intptr_t *tos); /* swap top two elements */ -template +template static void run(interpreterState istate); static void astore(intptr_t* topOfStack, int stack_offset, diff --git a/src/hotspot/share/jfr/dcmd/jfrDcmds.cpp b/src/hotspot/share/jfr/dcmd/jfrDcmds.cpp index 875e7f8e475a36f3269d2c2ae0df3aa2108af3e3..9763ed62b76cc3c3e06bc2890b6fc405b9a4c248 100644 --- a/src/hotspot/share/jfr/dcmd/jfrDcmds.cpp +++ b/src/hotspot/share/jfr/dcmd/jfrDcmds.cpp @@ -55,53 +55,6 @@ bool register_jfr_dcmds() { return true; } -// JNIHandle management - -// ------------------------------------------------------------------ -// push_jni_handle_block -// -// Push on a new block of JNI handles. -static void push_jni_handle_block(JavaThread* const thread) { - DEBUG_ONLY(JfrJavaSupport::check_java_thread_in_vm(thread)); - - // Allocate a new block for JNI handles. - // Inlined code from jni_PushLocalFrame() - JNIHandleBlock* prev_handles = thread->active_handles(); - JNIHandleBlock* entry_handles = JNIHandleBlock::allocate_block(thread); - assert(entry_handles != NULL && prev_handles != NULL, "should not be NULL"); - entry_handles->set_pop_frame_link(prev_handles); // make sure prev handles get gc'd. - thread->set_active_handles(entry_handles); -} - -// ------------------------------------------------------------------ -// pop_jni_handle_block -// -// Pop off the current block of JNI handles. -static void pop_jni_handle_block(JavaThread* const thread) { - DEBUG_ONLY(JfrJavaSupport::check_java_thread_in_vm(thread)); - - // Release our JNI handle block - JNIHandleBlock* entry_handles = thread->active_handles(); - JNIHandleBlock* prev_handles = entry_handles->pop_frame_link(); - // restore - thread->set_active_handles(prev_handles); - entry_handles->set_pop_frame_link(NULL); - JNIHandleBlock::release_block(entry_handles, thread); // may block -} - -class JNIHandleBlockManager : public StackObj { - private: - JavaThread* const _thread; - public: - JNIHandleBlockManager(JavaThread* thread) : _thread(thread) { - push_jni_handle_block(_thread); - } - - ~JNIHandleBlockManager() { - pop_jni_handle_block(_thread); - } -}; - static bool is_module_available(outputStream* output, TRAPS) { return JfrJavaSupport::is_jdk_jfr_module_available(output, THREAD); } @@ -223,7 +176,7 @@ void JfrDCmd::invoke(JfrJavaArguments& method, TRAPS) const { constructor_args.set_klass(javaClass(), CHECK); HandleMark hm(THREAD); - JNIHandleBlockManager jni_handle_management(THREAD); + JNIHandleMark jni_handle_management(THREAD); const oop dcmd = construct_dcmd_instance(&constructor_args, CHECK); @@ -494,7 +447,7 @@ void JfrConfigureFlightRecorderDCmd::execute(DCmdSource source, TRAPS) { } HandleMark hm(THREAD); - JNIHandleBlockManager jni_handle_management(THREAD); + JNIHandleMark jni_handle_management(THREAD); JavaValue result(T_OBJECT); JfrJavaArguments constructor_args(&result); diff --git a/src/hotspot/share/jfr/jni/jfrJavaSupport.cpp b/src/hotspot/share/jfr/jni/jfrJavaSupport.cpp index e24cc6209388dda47f51fa65144133b3a5e55548..5b7891ab45c8ed91077497a656e2cbdd09b6f523 100644 --- a/src/hotspot/share/jfr/jni/jfrJavaSupport.cpp +++ b/src/hotspot/share/jfr/jni/jfrJavaSupport.cpp @@ -71,7 +71,7 @@ static void check_new_unstarted_java_thread(JavaThread* t) { */ jobject JfrJavaSupport::local_jni_handle(const oop obj, JavaThread* t) { DEBUG_ONLY(check_java_thread_in_vm(t)); - return t->active_handles()->allocate_handle(obj); + return t->active_handles()->allocate_handle(t, obj); } jobject JfrJavaSupport::local_jni_handle(const jobject handle, JavaThread* t) { diff --git a/src/hotspot/share/jvmci/jvmciCodeInstaller.cpp b/src/hotspot/share/jvmci/jvmciCodeInstaller.cpp index 1eeb48a9a5fde04102f2d5c8ed672d7426212ae2..cd46aa5c523c3f641560604be4086f3183f17f75 100644 --- a/src/hotspot/share/jvmci/jvmciCodeInstaller.cpp +++ b/src/hotspot/share/jvmci/jvmciCodeInstaller.cpp @@ -899,7 +899,7 @@ GrowableArray* CodeInstaller::record_virtual_objects(JVMCIObject de } Klass* klass = jvmci_env()->asKlass(type); oop javaMirror = klass->java_mirror(); - ScopeValue *klass_sv = new ConstantOopWriteValue(JNIHandles::make_local(Thread::current(), javaMirror)); + ScopeValue *klass_sv = new ConstantOopWriteValue(JNIHandles::make_local(javaMirror)); ObjectValue* sv = is_auto_box ? new AutoBoxObjectValue(id, klass_sv) : new ObjectValue(id, klass_sv); if (id < 0 || id >= objects->length()) { JVMCI_ERROR_NULL("virtual object id %d out of bounds", id); diff --git a/src/hotspot/share/jvmci/jvmciCompilerToVM.cpp b/src/hotspot/share/jvmci/jvmciCompilerToVM.cpp index 4a5e726ffe3216839788f406e23dec951ec23825..4368e1fb5363f7efff94ad11fd647435e375b727 100644 --- a/src/hotspot/share/jvmci/jvmciCompilerToVM.cpp +++ b/src/hotspot/share/jvmci/jvmciCompilerToVM.cpp @@ -86,29 +86,6 @@ static void requireInHotSpot(const char* caller, JVMCI_TRAPS) { } } -void JNIHandleMark::push_jni_handle_block(JavaThread* thread) { - if (thread != NULL) { - // Allocate a new block for JNI handles. - // Inlined code from jni_PushLocalFrame() - JNIHandleBlock* java_handles = thread->active_handles(); - JNIHandleBlock* compile_handles = JNIHandleBlock::allocate_block(thread); - assert(compile_handles != NULL && java_handles != NULL, "should not be NULL"); - compile_handles->set_pop_frame_link(java_handles); - thread->set_active_handles(compile_handles); - } -} - -void JNIHandleMark::pop_jni_handle_block(JavaThread* thread) { - if (thread != NULL) { - // Release our JNI handle block - JNIHandleBlock* compile_handles = thread->active_handles(); - JNIHandleBlock* java_handles = compile_handles->pop_frame_link(); - thread->set_active_handles(java_handles); - compile_handles->set_pop_frame_link(NULL); - JNIHandleBlock::release_block(compile_handles, thread); // may block - } -} - class JVMCITraceMark : public StackObj { const char* _msg; public: diff --git a/src/hotspot/share/jvmci/jvmciCompilerToVM.hpp b/src/hotspot/share/jvmci/jvmciCompilerToVM.hpp index 04d4748105b1835a0f6d2f8a7dcf1769e30162f2..1a0799566a97b8491b767157596274ba456b92a3 100644 --- a/src/hotspot/share/jvmci/jvmciCompilerToVM.hpp +++ b/src/hotspot/share/jvmci/jvmciCompilerToVM.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -173,15 +173,4 @@ class JavaArgumentUnboxer : public SignatureIterator { } }; -class JNIHandleMark : public StackObj { - JavaThread* _thread; - public: - JNIHandleMark(JavaThread* thread) : _thread(thread) { push_jni_handle_block(thread); } - ~JNIHandleMark() { pop_jni_handle_block(_thread); } - - private: - static void push_jni_handle_block(JavaThread* thread); - static void pop_jni_handle_block(JavaThread* thread); -}; - #endif // SHARE_JVMCI_JVMCICOMPILERTOVM_HPP diff --git a/src/hotspot/share/memory/metaspace.cpp b/src/hotspot/share/memory/metaspace.cpp index 74b5d1a0b30c15baa44ab36ce65465ed7abd048a..d2fadba3a09b1ac70658211110fdd06deda0ce20 100644 --- a/src/hotspot/share/memory/metaspace.cpp +++ b/src/hotspot/share/memory/metaspace.cpp @@ -31,6 +31,7 @@ #include "logging/logStream.hpp" #include "memory/classLoaderMetaspace.hpp" #include "memory/metaspace.hpp" +#include "memory/metaspaceCriticalAllocation.hpp" #include "memory/metaspace/chunkHeaderPool.hpp" #include "memory/metaspace/chunkManager.hpp" #include "memory/metaspace/commitLimiter.hpp" @@ -749,10 +750,6 @@ void Metaspace::global_initialize() { // If any of the archived space fails to map, UseSharedSpaces // is reset to false. } - - if (DynamicDumpSharedSpaces && !UseSharedSpaces) { - vm_exit_during_initialization("DynamicDumpSharedSpaces is unsupported when base CDS archive is not loaded", NULL); - } #endif // INCLUDE_CDS #ifdef _LP64 @@ -881,6 +878,9 @@ MetaWord* Metaspace::allocate(ClassLoaderData* loader_data, size_t word_size, assert(loader_data != NULL, "Should never pass around a NULL loader_data. " "ClassLoaderData::the_null_class_loader_data() should have been used."); + // Deal with concurrent unloading failed allocation starvation + MetaspaceCriticalAllocation::block_if_concurrent_purge(); + MetadataType mdtype = (type == MetaspaceObj::ClassType) ? ClassType : NonClassType; // Try to allocate metadata. @@ -996,6 +996,10 @@ const char* Metaspace::metadata_type_name(Metaspace::MetadataType mdtype) { } void Metaspace::purge() { + // The MetaspaceCritical_lock is used by a concurrent GC to block out concurrent metaspace + // allocations, that would starve critical metaspace allocations, that are about to throw + // OOM if they fail; they need precedence for correctness. + MutexLocker ml(MetaspaceCritical_lock, Mutex::_no_safepoint_check_flag); ChunkManager* cm = ChunkManager::chunkmanager_nonclass(); if (cm != NULL) { cm->purge(); @@ -1006,6 +1010,8 @@ void Metaspace::purge() { cm->purge(); } } + + MetaspaceCriticalAllocation::satisfy(); } bool Metaspace::contains(const void* ptr) { diff --git a/src/hotspot/share/memory/metaspaceCriticalAllocation.cpp b/src/hotspot/share/memory/metaspaceCriticalAllocation.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1edb67bf7f42db1efd8d2470b1baddd516ad8c5c --- /dev/null +++ b/src/hotspot/share/memory/metaspaceCriticalAllocation.cpp @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "classfile/classLoaderData.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "logging/log.hpp" +#include "memory/classLoaderMetaspace.hpp" +#include "memory/metaspaceCriticalAllocation.hpp" +#include "memory/universe.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/mutexLocker.hpp" + +class MetadataAllocationRequest { + ClassLoaderData* _loader_data; + size_t _word_size; + Metaspace::MetadataType _type; + MetadataAllocationRequest* _next; + MetaWord* _result; + bool _has_result; + +public: + MetadataAllocationRequest(ClassLoaderData* loader_data, + size_t word_size, + Metaspace::MetadataType type) + : _loader_data(loader_data), + _word_size(word_size), + _type(type), + _next(NULL), + _result(NULL), + _has_result(false) { + MetaspaceCriticalAllocation::add(this); + } + + ~MetadataAllocationRequest() { + MetaspaceCriticalAllocation::remove(this); + } + + ClassLoaderData* loader_data() const { return _loader_data; } + size_t word_size() const { return _word_size; } + Metaspace::MetadataType type() const { return _type; } + MetadataAllocationRequest* next() const { return _next; } + MetaWord* result() const { return _result; } + bool has_result() const { return _has_result; } + + void set_next(MetadataAllocationRequest* next) { _next = next; } + void set_result(MetaWord* result) { + _result = result; + _has_result = true; + } +}; + +volatile bool MetaspaceCriticalAllocation::_has_critical_allocation = false; +MetadataAllocationRequest* MetaspaceCriticalAllocation::_requests_head = NULL; +MetadataAllocationRequest* MetaspaceCriticalAllocation::_requests_tail = NULL; + +void MetaspaceCriticalAllocation::add(MetadataAllocationRequest* request) { + MutexLocker ml(MetaspaceCritical_lock, Mutex::_no_safepoint_check_flag); + log_info(metaspace)("Requesting critical metaspace allocation; almost out of memory"); + Atomic::store(&_has_critical_allocation, true); + if (_requests_head == NULL) { + _requests_head = _requests_tail = request; + } else { + _requests_tail->set_next(request); + _requests_tail = request; + } +} + +void MetaspaceCriticalAllocation::unlink(MetadataAllocationRequest* curr, MetadataAllocationRequest* prev) { + if (_requests_head == curr) { + _requests_head = curr->next(); + } + if (_requests_tail == curr) { + _requests_tail = prev; + } + if (prev != NULL) { + prev->set_next(curr->next()); + } +} + +void MetaspaceCriticalAllocation::remove(MetadataAllocationRequest* request) { + MutexLocker ml(MetaspaceCritical_lock, Mutex::_no_safepoint_check_flag); + MetadataAllocationRequest* prev = NULL; + for (MetadataAllocationRequest* curr = _requests_head; curr != NULL; curr = curr->next()) { + if (curr == request) { + unlink(curr, prev); + break; + } else { + prev = curr; + } + } +} + +bool MetaspaceCriticalAllocation::try_allocate_critical(MetadataAllocationRequest* request) { + MutexLocker ml(MetaspaceCritical_lock, Mutex::_no_safepoint_check_flag); + if (_requests_head == request) { + // The first request can't opportunistically ride on a previous GC + return false; + } + // Try to ride on a previous GC and hope for early satisfaction + wait_for_purge(request); + return request->result() != NULL; +} + +void MetaspaceCriticalAllocation::wait_for_purge(MetadataAllocationRequest* request) { + while (!request->has_result()) { + ThreadBlockInVM tbivm(JavaThread::current()); + MetaspaceCritical_lock->wait_without_safepoint_check(); + } +} + +void MetaspaceCriticalAllocation::block_if_concurrent_purge() { + if (Atomic::load(&_has_critical_allocation)) { + // If there is a concurrent Metaspace::purge() operation, we will block here, + // to make sure critical allocations get precedence and don't get starved. + MutexLocker ml(MetaspaceCritical_lock, Mutex::_no_safepoint_check_flag); + } +} + +void MetaspaceCriticalAllocation::satisfy() { + assert_lock_strong(MetaspaceCritical_lock); + bool all_satisfied = true; + for (MetadataAllocationRequest* curr = _requests_head; curr != NULL; curr = curr->next()) { + if (curr->result() != NULL) { + // Don't satisfy twice + continue; + } + // Try to allocate metadata. + MetaWord* result = curr->loader_data()->metaspace_non_null()->allocate(curr->word_size(), curr->type()); + if (result == NULL) { + result = curr->loader_data()->metaspace_non_null()->expand_and_allocate(curr->word_size(), curr->type()); + } + if (result == NULL) { + all_satisfied = false; + } + curr->set_result(result); + } + if (all_satisfied) { + Atomic::store(&_has_critical_allocation, false); + } + MetaspaceCritical_lock->notify_all(); +} + +MetaWord* MetaspaceCriticalAllocation::allocate(ClassLoaderData* loader_data, size_t word_size, Metaspace::MetadataType type) { + MetadataAllocationRequest request(loader_data, word_size, type); + + if (try_allocate_critical(&request)) { + // Try to allocate on a previous concurrent GC if there was one, and return if successful + return request.result(); + } + + // Always perform a synchronous full GC before bailing + Universe::heap()->collect(GCCause::_metadata_GC_clear_soft_refs); + + // Return the result, be that success or failure + return request.result(); +} diff --git a/src/hotspot/share/memory/metaspaceCriticalAllocation.hpp b/src/hotspot/share/memory/metaspaceCriticalAllocation.hpp new file mode 100644 index 0000000000000000000000000000000000000000..71e8819aef338a52d01d07dc14b7cb03e476c3ce --- /dev/null +++ b/src/hotspot/share/memory/metaspaceCriticalAllocation.hpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_MEMORY_METASPACECRITICALALLOCATION_HPP +#define SHARE_MEMORY_METASPACECRITICALALLOCATION_HPP + +#include "memory/allocation.hpp" +#include "memory/metaspace.hpp" + +class MetadataAllocationRequest; +class ClassLoaderData; + +// == Critical allocation support == +// +// The critical allocation support has the purpose of preventing starvation of failed +// metadata allocations that need a GC, in particular for concurrent GCs. +// A "critical" allocation request is registered, then a concurrent full GC is executed. +// When there is any critical allocation present in the system, allocations compete for +// a global lock, so that allocations can be shut out from the concurrent purge() call, +// which takes the same lock. The reasoning is that we gather all the critical allocations +// that are one more failure away from throwing metaspace OOM, in a queue before the GC, +// then free up metaspace due to class unloading in the purge() operation of that GC, +// and satisfy the registered critical allocations. This allows the critical allocations +// to get precedence over normal metaspace allocations, so that the critical allocations +// that are about to throw, do not get starved by other metaspace allocations that have +// not gone through the same dance. +// +// The solution has an intended accuracy of not one allocation, but one per thread. What +// I mean by that, is that the allocations are allowed to throw if they got starved by +// one metaspace allocation per thread, even though a more complicated dance could have +// survived that situation in theory. The motivation is that we are at this point so close +// to being out of memory, and the VM is not having a good time, so the user really ought +// to increase the amount of available metaspace anyway, instead of GC:ing around more +// to satisfy a very small number of additional allocations. But it does solve pathologial +// unbounded starvation scenarios where OOM can get thrown even though most of metaspace +// is full of dead metadata. +// +// The contract for this to work for a given GC is that GCCause::_metadata_GC_clear_soft_refs +// yields a full synchronous GC that unloads metaspace. And it is only intended to be used +// by GCs with concurrent class unloading. + +class MetaspaceCriticalAllocation : public AllStatic { + friend class MetadataAllocationRequest; + + static volatile bool _has_critical_allocation; + static MetadataAllocationRequest* _requests_head; + static MetadataAllocationRequest* _requests_tail; + + static void unlink(MetadataAllocationRequest* curr, MetadataAllocationRequest* prev); + + static void add(MetadataAllocationRequest* request); + static void remove(MetadataAllocationRequest* request); + + static bool try_allocate_critical(MetadataAllocationRequest* request); + static void wait_for_purge(MetadataAllocationRequest* request); + +public: + static void block_if_concurrent_purge(); + static void satisfy(); + static MetaWord* allocate(ClassLoaderData* loader_data, size_t word_size, Metaspace::MetadataType type); +}; + +#endif // SHARE_MEMORY_METASPACECRITICALALLOCATION_HPP diff --git a/src/hotspot/share/memory/universe.cpp b/src/hotspot/share/memory/universe.cpp index 6c7f5a2fab682c0aadcd5c820b5b5f7bc5cba00e..ffe92cf65b16b07336e423f1649fb1dd78fc66c6 100644 --- a/src/hotspot/share/memory/universe.cpp +++ b/src/hotspot/share/memory/universe.cpp @@ -23,6 +23,7 @@ */ #include "precompiled.hpp" +#include "cds/dynamicArchive.hpp" #include "cds/heapShared.hpp" #include "cds/metaspaceShared.hpp" #include "classfile/classLoader.hpp" @@ -765,6 +766,7 @@ jint universe_init() { Universe::_do_stack_walk_cache = new LatestMethodCache(); #if INCLUDE_CDS + DynamicArchive::check_for_dynamic_dump(); if (UseSharedSpaces) { // Read the data structures supporting the shared spaces (shared // system dictionary, symbol table, etc.). After that, access to diff --git a/src/hotspot/share/oops/instanceKlass.cpp b/src/hotspot/share/oops/instanceKlass.cpp index 6c83b0bd7dadbe33ab4a9e3c1c5fe70209b546c3..1e237de66ec4c831c1947750d2ebb93a7a29ef98 100644 --- a/src/hotspot/share/oops/instanceKlass.cpp +++ b/src/hotspot/share/oops/instanceKlass.cpp @@ -728,7 +728,6 @@ oop InstanceKlass::protection_domain() const { return java_lang_Class::protection_domain(java_mirror()); } -// To remove these from requires an incompatible change and CSR review. objArrayOop InstanceKlass::signers() const { // return the signers from the mirror return java_lang_Class::signers(java_mirror()); @@ -2599,6 +2598,11 @@ void InstanceKlass::restore_unshareable_info(ClassLoaderData* loader_data, Handl // retrieved during dump time. // Verification of archived old classes will be performed during run time. bool InstanceKlass::can_be_verified_at_dumptime() const { + if (MetaspaceShared::is_in_shared_metaspace(this)) { + // This is a class that was dumped into the base archive, so we know + // it was verified at dump time. + return true; + } if (major_version() < 50 /*JAVA_6_VERSION*/) { return false; } diff --git a/src/hotspot/share/oops/instanceKlass.hpp b/src/hotspot/share/oops/instanceKlass.hpp index a3e8da4c8a48d2d33c33c8755014189194cfc59e..d55b021b37026f2e7e03e88b1ec34f62d7825ae7 100644 --- a/src/hotspot/share/oops/instanceKlass.hpp +++ b/src/hotspot/share/oops/instanceKlass.hpp @@ -254,8 +254,7 @@ class InstanceKlass: public Klass { _misc_is_shared_platform_class = 1 << 11, // defining class loader is platform class loader _misc_is_shared_app_class = 1 << 12, // defining class loader is app class loader _misc_has_resolved_methods = 1 << 13, // resolved methods table entries added for this class - _misc_is_being_redefined = 1 << 14, // used for locking redefinition - _misc_has_contended_annotations = 1 << 15 // has @Contended annotation + _misc_has_contended_annotations = 1 << 14 // has @Contended annotation }; u2 shared_loader_type_bits() const { return _misc_is_shared_boot_class|_misc_is_shared_platform_class|_misc_is_shared_app_class; @@ -733,14 +732,16 @@ public: #if INCLUDE_JVMTI // Redefinition locking. Class can only be redefined by one thread at a time. + // The flag is in access_flags so that it can be set and reset using atomic + // operations, and not be reset by other misc_flag settings. bool is_being_redefined() const { - return ((_misc_flags & _misc_is_being_redefined) != 0); + return _access_flags.is_being_redefined(); } void set_is_being_redefined(bool value) { if (value) { - _misc_flags |= _misc_is_being_redefined; + _access_flags.set_is_being_redefined(); } else { - _misc_flags &= ~_misc_is_being_redefined; + _access_flags.clear_is_being_redefined(); } } diff --git a/src/hotspot/share/oops/oop.inline.hpp b/src/hotspot/share/oops/oop.inline.hpp index c161e479deb4f8e5c3239f43f45a9e1fa1f137f5..371e97824f769635b1a822c83b62dc2a115aa89f 100644 --- a/src/hotspot/share/oops/oop.inline.hpp +++ b/src/hotspot/share/oops/oop.inline.hpp @@ -289,12 +289,13 @@ oop oopDesc::forward_to_atomic(oop p, markWord compare, atomic_memory_order orde // The forwardee is used when copying during scavenge and mark-sweep. // It does need to clear the low two locking- and GC-related bits. oop oopDesc::forwardee() const { + assert(is_forwarded(), "only decode when actually forwarded"); return cast_to_oop(mark().decode_pointer()); } // The following method needs to be MT safe. uint oopDesc::age() const { - assert(!is_forwarded(), "Attempt to read age from forwarded mark"); + assert(!mark().is_marked(), "Attempt to read age from forwarded mark"); if (has_displaced_mark()) { return displaced_mark().age(); } else { @@ -303,7 +304,7 @@ uint oopDesc::age() const { } void oopDesc::incr_age() { - assert(!is_forwarded(), "Attempt to increment age of forwarded mark"); + assert(!mark().is_marked(), "Attempt to increment age of forwarded mark"); if (has_displaced_mark()) { set_displaced_mark(displaced_mark().incr_age()); } else { diff --git a/src/hotspot/share/oops/symbol.hpp b/src/hotspot/share/oops/symbol.hpp index 8653811ce159bd3f5e34092ee1c225f549ee95a1..703797a88fb3cb32ed34a2e8ca55aff27b97c4e6 100644 --- a/src/hotspot/share/oops/symbol.hpp +++ b/src/hotspot/share/oops/symbol.hpp @@ -155,7 +155,7 @@ class Symbol : public MetaspaceObj { // Returns the largest size symbol we can safely hold. static int max_length() { return max_symbol_length; } unsigned identity_hash() const { - unsigned addr_bits = (unsigned)((uintptr_t)this >> (LogBytesPerWord + 3)); + unsigned addr_bits = (unsigned)((uintptr_t)this >> LogBytesPerWord); return ((unsigned)extract_hash(_hash_and_refcount) & 0xffff) | ((addr_bits ^ (length() << 8) ^ (( _body[0] << 8) | _body[1])) << 16); } diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp index acb36f05658b714d07ab6f47a952ec5ec585606b..56b6c13176b1dec733cec03cff0c5c7b7b056d6a 100644 --- a/src/hotspot/share/opto/c2compiler.cpp +++ b/src/hotspot/share/opto/c2compiler.cpp @@ -694,7 +694,9 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt case vmIntrinsics::_VectorShuffleIota: case vmIntrinsics::_VectorShuffleToVector: case vmIntrinsics::_VectorLoadOp: + case vmIntrinsics::_VectorLoadMaskedOp: case vmIntrinsics::_VectorStoreOp: + case vmIntrinsics::_VectorStoreMaskedOp: case vmIntrinsics::_VectorGatherOp: case vmIntrinsics::_VectorScatterOp: case vmIntrinsics::_VectorReductionCoerced: diff --git a/src/hotspot/share/opto/callGenerator.cpp b/src/hotspot/share/opto/callGenerator.cpp index a0746c763b2c3b0bdbb76b478bfcbc1cdf625cd4..e9c376e42891edcfc9318260a10a83413b4bdb98 100644 --- a/src/hotspot/share/opto/callGenerator.cpp +++ b/src/hotspot/share/opto/callGenerator.cpp @@ -675,12 +675,13 @@ void CallGenerator::do_late_inline_helper() { bool result_not_used = false; if (is_pure_call()) { - if (is_boxing_late_inline() && callprojs.resproj != nullptr) { - // replace box node to scalar node only in case it is directly referenced by debug info - assert(call->as_CallStaticJava()->is_boxing_method(), "sanity"); - if (!has_non_debug_usages(callprojs.resproj) && is_box_cache_valid(call)) { - scalarize_debug_usages(call, callprojs.resproj); - } + // Disabled due to JDK-8276112 + if (false && is_boxing_late_inline() && callprojs.resproj != nullptr) { + // replace box node to scalar node only in case it is directly referenced by debug info + assert(call->as_CallStaticJava()->is_boxing_method(), "sanity"); + if (!has_non_debug_usages(callprojs.resproj) && is_box_cache_valid(call)) { + scalarize_debug_usages(call, callprojs.resproj); + } } // The call is marked as pure (no important side effects), but result isn't used. diff --git a/src/hotspot/share/opto/cfgnode.cpp b/src/hotspot/share/opto/cfgnode.cpp index cad5e835321ea9e1fbe0ae1f351696d4662d62de..21526a6246ad42479ab21a57c7c45aa2ed67a3da 100644 --- a/src/hotspot/share/opto/cfgnode.cpp +++ b/src/hotspot/share/opto/cfgnode.cpp @@ -2398,47 +2398,103 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) { #endif // Phi (VB ... VB) => VB (Phi ...) (Phi ...) - if (EnableVectorReboxing && can_reshape && progress == NULL) { - PhaseIterGVN* igvn = phase->is_IterGVN(); - - bool all_inputs_are_equiv_vboxes = true; - for (uint i = 1; i < req(); ++i) { - Node* n = in(i); - if (in(i)->Opcode() != Op_VectorBox) { - all_inputs_are_equiv_vboxes = false; - break; - } - // Check that vector type of vboxes is equivalent - if (i != 1) { - if (Type::cmp(in(i-0)->in(VectorBoxNode::Value)->bottom_type(), - in(i-1)->in(VectorBoxNode::Value)->bottom_type()) != 0) { - all_inputs_are_equiv_vboxes = false; - break; - } - if (Type::cmp(in(i-0)->in(VectorBoxNode::Box)->bottom_type(), - in(i-1)->in(VectorBoxNode::Box)->bottom_type()) != 0) { - all_inputs_are_equiv_vboxes = false; - break; + if (EnableVectorReboxing && can_reshape && progress == NULL && type()->isa_oopptr()) { + progress = merge_through_phi(this, phase->is_IterGVN()); + } + + return progress; // Return any progress +} + +Node* PhiNode::clone_through_phi(Node* root_phi, const Type* t, uint c, PhaseIterGVN* igvn) { + Node_Stack stack(1); + VectorSet visited; + Node_List node_map; + + stack.push(root_phi, 1); // ignore control + visited.set(root_phi->_idx); + + Node* new_phi = new PhiNode(root_phi->in(0), t); + node_map.map(root_phi->_idx, new_phi); + + while (stack.is_nonempty()) { + Node* n = stack.node(); + uint idx = stack.index(); + assert(n->is_Phi(), "not a phi"); + if (idx < n->req()) { + stack.set_index(idx + 1); + Node* def = n->in(idx); + if (def == NULL) { + continue; // ignore dead path + } else if (def->is_Phi()) { // inner node + Node* new_phi = node_map[n->_idx]; + if (!visited.test_set(def->_idx)) { // not visited yet + node_map.map(def->_idx, new PhiNode(def->in(0), t)); + stack.push(def, 1); // ignore control } + Node* new_in = node_map[def->_idx]; + new_phi->set_req(idx, new_in); + } else if (def->Opcode() == Op_VectorBox) { // leaf + assert(n->is_Phi(), "not a phi"); + Node* new_phi = node_map[n->_idx]; + new_phi->set_req(idx, def->in(c)); + } else { + assert(false, "not optimizeable"); + return NULL; } + } else { + Node* new_phi = node_map[n->_idx]; + igvn->register_new_node_with_optimizer(new_phi, n); + stack.pop(); } + } + return new_phi; +} - if (all_inputs_are_equiv_vboxes) { - VectorBoxNode* vbox = static_cast(in(1)); - PhiNode* new_vbox_phi = new PhiNode(r, vbox->box_type()); - PhiNode* new_vect_phi = new PhiNode(r, vbox->vec_type()); - for (uint i = 1; i < req(); ++i) { - VectorBoxNode* old_vbox = static_cast(in(i)); - new_vbox_phi->set_req(i, old_vbox->in(VectorBoxNode::Box)); - new_vect_phi->set_req(i, old_vbox->in(VectorBoxNode::Value)); +Node* PhiNode::merge_through_phi(Node* root_phi, PhaseIterGVN* igvn) { + Node_Stack stack(1); + VectorSet visited; + + stack.push(root_phi, 1); // ignore control + visited.set(root_phi->_idx); + + VectorBoxNode* cached_vbox = NULL; + while (stack.is_nonempty()) { + Node* n = stack.node(); + uint idx = stack.index(); + if (idx < n->req()) { + stack.set_index(idx + 1); + Node* in = n->in(idx); + if (in == NULL) { + continue; // ignore dead path + } else if (in->isa_Phi()) { + if (!visited.test_set(in->_idx)) { + stack.push(in, 1); // ignore control + } + } else if (in->Opcode() == Op_VectorBox) { + VectorBoxNode* vbox = static_cast(in); + if (cached_vbox == NULL) { + cached_vbox = vbox; + } else if (vbox->vec_type() != cached_vbox->vec_type()) { + // TODO: vector type mismatch can be handled with additional reinterpret casts + assert(Type::cmp(vbox->vec_type(), cached_vbox->vec_type()) != 0, "inconsistent"); + return NULL; // not optimizable: vector type mismatch + } else if (vbox->box_type() != cached_vbox->box_type()) { + assert(Type::cmp(vbox->box_type(), cached_vbox->box_type()) != 0, "inconsistent"); + return NULL; // not optimizable: box type mismatch + } + } else { + return NULL; // not optimizable: neither Phi nor VectorBox } - igvn->register_new_node_with_optimizer(new_vbox_phi, this); - igvn->register_new_node_with_optimizer(new_vect_phi, this); - progress = new VectorBoxNode(igvn->C, new_vbox_phi, new_vect_phi, vbox->box_type(), vbox->vec_type()); + } else { + stack.pop(); } } - - return progress; // Return any progress + assert(cached_vbox != NULL, "sanity"); + const TypeInstPtr* btype = cached_vbox->box_type(); + const TypeVect* vtype = cached_vbox->vec_type(); + Node* new_vbox_phi = clone_through_phi(root_phi, btype, VectorBoxNode::Box, igvn); + Node* new_vect_phi = clone_through_phi(root_phi, vtype, VectorBoxNode::Value, igvn); + return new VectorBoxNode(igvn->C, new_vbox_phi, new_vect_phi, btype, vtype); } bool PhiNode::is_data_loop(RegionNode* r, Node* uin, const PhaseGVN* phase) { diff --git a/src/hotspot/share/opto/cfgnode.hpp b/src/hotspot/share/opto/cfgnode.hpp index d0a22b78b6e54fc80fcafa23395201461139d945..3672eaa2ed3a4722b9cc88738ca765ec177ab3d7 100644 --- a/src/hotspot/share/opto/cfgnode.hpp +++ b/src/hotspot/share/opto/cfgnode.hpp @@ -143,6 +143,9 @@ class PhiNode : public TypeNode { bool wait_for_region_igvn(PhaseGVN* phase); bool is_data_loop(RegionNode* r, Node* uin, const PhaseGVN* phase); + static Node* clone_through_phi(Node* root_phi, const Type* t, uint c, PhaseIterGVN* igvn); + static Node* merge_through_phi(Node* root_phi, PhaseIterGVN* igvn); + public: // Node layout (parallels RegionNode): enum { Region, // Control input is the Phi's region. diff --git a/src/hotspot/share/opto/chaitin.cpp b/src/hotspot/share/opto/chaitin.cpp index 41917c2b7b19ba35edd29c1aa497cb404f458148..fe3c7983c6334bb22c286dfa260dafd6dbd451e1 100644 --- a/src/hotspot/share/opto/chaitin.cpp +++ b/src/hotspot/share/opto/chaitin.cpp @@ -77,6 +77,7 @@ void LRG::dump() const { if( _is_oop ) tty->print("Oop "); if( _is_float ) tty->print("Float "); if( _is_vector ) tty->print("Vector "); + if( _is_predicate ) tty->print("Predicate "); if( _is_scalable ) tty->print("Scalable "); if( _was_spilled1 ) tty->print("Spilled "); if( _was_spilled2 ) tty->print("Spilled2 "); @@ -638,7 +639,8 @@ void PhaseChaitin::Register_Allocate() { LRG &lrg = lrgs(_lrg_map.live_range_id(i)); if (!lrg.alive()) { set_bad(i); - } else if (lrg.num_regs() == 1) { + } else if ((lrg.num_regs() == 1 && !lrg.is_scalable()) || + (lrg.is_scalable() && lrg.scalable_reg_slots() == 1)) { set1(i, lrg.reg()); } else { // Must be a register-set if (!lrg._fat_proj) { // Must be aligned adjacent register set @@ -653,15 +655,19 @@ void PhaseChaitin::Register_Allocate() { // num_regs, which reflects the physical length of scalable registers. num_regs = lrg.scalable_reg_slots(); } - OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo - // We have to use pair [lo,lo+1] even for wide vectors because - // the rest of code generation works only with pairs. It is safe - // since for registers encoding only 'lo' is used. - // Second reg from pair is used in ScheduleAndBundle on SPARC where - // vector max size is 8 which corresponds to registers pair. - // It is also used in BuildOopMaps but oop operations are not - // vectorized. - set2(i, lo); + if (num_regs == 1) { + set1(i, hi); + } else { + OptoReg::Name lo = OptoReg::add(hi, (1 - num_regs)); // Find lo + // We have to use pair [lo,lo+1] even for wide vectors/vmasks because + // the rest of code generation works only with pairs. It is safe + // since for registers encoding only 'lo' is used. + // Second reg from pair is used in ScheduleAndBundle with vector max + // size 8 which corresponds to registers pair. + // It is also used in BuildOopMaps but oop operations are not + // vectorized. + set2(i, lo); + } } else { // Misaligned; extract 2 bits OptoReg::Name hi = lrg.reg(); // Get hi register lrg.Remove(hi); // Yank from mask @@ -824,8 +830,22 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT)); } } + + if (ireg == Op_RegVectMask) { + assert(Matcher::has_predicated_vectors(), "predicated vector should be supported"); + lrg._is_predicate = 1; + if (Matcher::supports_scalable_vector()) { + lrg._is_scalable = 1; + // For scalable predicate, when it is allocated in physical register, + // num_regs is RegMask::SlotsPerRegVectMask for reg mask, + // which may not be the actual physical register size. + // If it is allocated in stack, we need to get the actual + // physical length of scalable predicate register. + lrg.set_scalable_reg_slots(Matcher::scalable_predicate_reg_slots()); + } + } assert(n_type->isa_vect() == NULL || lrg._is_vector || - ireg == Op_RegD || ireg == Op_RegL || ireg == Op_RegVectMask, + ireg == Op_RegD || ireg == Op_RegL || ireg == Op_RegVectMask, "vector must be in vector registers"); // Check for bound register masks @@ -919,6 +939,8 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { } break; case Op_RegVectMask: + assert(Matcher::has_predicated_vectors(), "sanity"); + assert(RegMask::num_registers(Op_RegVectMask) == RegMask::SlotsPerRegVectMask, "sanity"); lrg.set_num_regs(RegMask::SlotsPerRegVectMask); lrg.set_reg_pressure(1); break; @@ -1371,6 +1393,11 @@ static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) { } } return OptoReg::Bad; // will cause chunk change, and retry next chunk + } else if (lrg._is_predicate) { + assert(num_regs == RegMask::SlotsPerRegVectMask, "scalable predicate register"); + num_regs = lrg.scalable_reg_slots(); + mask.clear_to_sets(num_regs); + return mask.find_first_set(lrg, num_regs); } } @@ -1417,7 +1444,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { } // If no bias info exists, just go with the register selection ordering - if (lrg._is_vector || lrg.num_regs() == 2) { + if (lrg._is_vector || lrg.num_regs() == 2 || lrg.is_scalable()) { // Find an aligned set return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk); } diff --git a/src/hotspot/share/opto/chaitin.hpp b/src/hotspot/share/opto/chaitin.hpp index 842d5c652d47561a0afc444bdf8edb0362a8056c..2e9213b4bab9db8cd630d8e6d024206e4b4a5fdb 100644 --- a/src/hotspot/share/opto/chaitin.hpp +++ b/src/hotspot/share/opto/chaitin.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -163,8 +163,8 @@ public: bool is_scalable() { #ifdef ASSERT if (_is_scalable) { - // Should only be a vector for now, but it could also be a RegVectMask in future. - assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg"); + assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA) || + _is_predicate && (_num_regs == RegMask::SlotsPerRegVectMask), "unexpected scalable reg"); } #endif return Matcher::implements_scalable_vector && _is_scalable; @@ -195,6 +195,7 @@ public: uint _is_oop:1, // Live-range holds an oop _is_float:1, // True if in float registers _is_vector:1, // True if in vector registers + _is_predicate:1, // True if in mask/predicate registers _is_scalable:1, // True if register size is scalable // e.g. Arm SVE vector/predicate registers. _was_spilled1:1, // True if prior spilling on def diff --git a/src/hotspot/share/opto/classes.hpp b/src/hotspot/share/opto/classes.hpp index 3b9697ecb4beb88678658220460a09a684ce1fef..da5faa5d4a1f029685ab4971ad40e9834ae69351 100644 --- a/src/hotspot/share/opto/classes.hpp +++ b/src/hotspot/share/opto/classes.hpp @@ -415,8 +415,10 @@ macro(MinReductionV) macro(MaxReductionV) macro(LoadVector) macro(LoadVectorGather) +macro(LoadVectorGatherMasked) macro(StoreVector) macro(StoreVectorScatter) +macro(StoreVectorScatterMasked) macro(LoadVectorMasked) macro(StoreVectorMasked) macro(VectorCmpMasked) @@ -425,6 +427,7 @@ macro(VectorMaskOp) macro(VectorMaskTrueCount) macro(VectorMaskFirstTrue) macro(VectorMaskLastTrue) +macro(VectorMaskToLong) macro(Pack) macro(PackB) macro(PackS) @@ -475,3 +478,7 @@ macro(VectorCastL2X) macro(VectorCastF2X) macro(VectorCastD2X) macro(VectorInsert) +macro(MaskAll) +macro(AndVMask) +macro(OrVMask) +macro(XorVMask) diff --git a/src/hotspot/share/opto/compile.cpp b/src/hotspot/share/opto/compile.cpp index c5affe1a2b813648330f5dde8d8a04ea3949609b..b2f5dd557881934e94ebc6894a09b74586cb0d9f 100644 --- a/src/hotspot/share/opto/compile.cpp +++ b/src/hotspot/share/opto/compile.cpp @@ -2134,7 +2134,8 @@ void Compile::Optimize() { if (!failing() && RenumberLiveNodes && live_nodes() + NodeLimitFudgeFactor < unique()) { Compile::TracePhase tp("", &timers[_t_renumberLive]); initial_gvn()->replace_with(&igvn); - for_igvn()->clear(); + Unique_Node_List* old_worklist = for_igvn(); + old_worklist->clear(); Unique_Node_List new_worklist(C->comp_arena()); { ResourceMark rm; @@ -2144,7 +2145,7 @@ void Compile::Optimize() { set_for_igvn(&new_worklist); igvn = PhaseIterGVN(initial_gvn()); igvn.optimize(); - set_for_igvn(save_for_igvn); + set_for_igvn(old_worklist); // new_worklist is dead beyond this point } // Now that all inlining is over and no PhaseRemoveUseless will run, cut edge from root to loop @@ -2358,6 +2359,7 @@ bool Compile::has_vbox_nodes() { static bool is_vector_unary_bitwise_op(Node* n) { return n->Opcode() == Op_XorV && + n->req() == 2 && VectorNode::is_vector_bitwise_not_pattern(n); } @@ -2365,7 +2367,7 @@ static bool is_vector_binary_bitwise_op(Node* n) { switch (n->Opcode()) { case Op_AndV: case Op_OrV: - return true; + return n->req() == 2; case Op_XorV: return !is_vector_unary_bitwise_op(n); @@ -3424,6 +3426,8 @@ void Compile::final_graph_reshaping_main_switch(Node* n, Final_Reshape_Counts& f case Op_StoreVector: case Op_LoadVectorGather: case Op_StoreVectorScatter: + case Op_LoadVectorGatherMasked: + case Op_StoreVectorScatterMasked: case Op_VectorCmpMasked: case Op_VectorMaskGen: case Op_LoadVectorMasked: diff --git a/src/hotspot/share/opto/ifnode.cpp b/src/hotspot/share/opto/ifnode.cpp index 38b40a68b1f80f649ae9eb4097d4bfaa259ecafb..03f3a9e3728ea310c583f5dba42d36348cafb1c9 100644 --- a/src/hotspot/share/opto/ifnode.cpp +++ b/src/hotspot/share/opto/ifnode.cpp @@ -1721,6 +1721,16 @@ Node* IfProjNode::Identity(PhaseGVN* phase) { // will cause this node to be reprocessed once the dead branch is killed. in(0)->outcnt() == 1))) { // IfNode control + if (in(0)->is_BaseCountedLoopEnd()) { + // CountedLoopEndNode may be eliminated by if subsuming, replace CountedLoopNode with LoopNode to + // avoid mismatching between CountedLoopNode and CountedLoopEndNode in the following optimization. + Node* head = unique_ctrl_out(); + if (head != NULL && head->is_BaseCountedLoop() && head->in(LoopNode::LoopBackControl) == this) { + Node* new_head = new LoopNode(head->in(LoopNode::EntryControl), this); + phase->is_IterGVN()->register_new_node_with_optimizer(new_head); + phase->is_IterGVN()->replace_node(head, new_head); + } + } return in(0)->in(0); } // no progress diff --git a/src/hotspot/share/opto/lcm.cpp b/src/hotspot/share/opto/lcm.cpp index 7197f1f3c2230fcba258d96b7c85c07d9ce81fe0..fe799dd98475dc4549cbf8e0b9fba58faeea839f 100644 --- a/src/hotspot/share/opto/lcm.cpp +++ b/src/hotspot/share/opto/lcm.cpp @@ -702,8 +702,9 @@ void PhaseCFG::adjust_register_pressure(Node* n, Block* block, intptr_t* recalc_ case Op_StoreP: case Op_StoreN: case Op_StoreVector: - case Op_StoreVectorScatter: case Op_StoreVectorMasked: + case Op_StoreVectorScatter: + case Op_StoreVectorScatterMasked: case Op_StoreNKlass: for (uint k = 1; k < m->req(); k++) { Node *in = m->in(k); diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp index da9e357ac47bfe173dc4856b9e571825bdbd2f62..0270b1ac01aa804e6ced8fc523879b2ff301c599 100644 --- a/src/hotspot/share/opto/library_call.cpp +++ b/src/hotspot/share/opto/library_call.cpp @@ -662,8 +662,12 @@ bool LibraryCallKit::try_to_inline(int predicate) { return inline_vector_shuffle_to_vector(); case vmIntrinsics::_VectorLoadOp: return inline_vector_mem_operation(/*is_store=*/false); + case vmIntrinsics::_VectorLoadMaskedOp: + return inline_vector_mem_masked_operation(/*is_store*/false); case vmIntrinsics::_VectorStoreOp: return inline_vector_mem_operation(/*is_store=*/true); + case vmIntrinsics::_VectorStoreMaskedOp: + return inline_vector_mem_masked_operation(/*is_store=*/true); case vmIntrinsics::_VectorGatherOp: return inline_vector_gather_scatter(/*is_scatter*/ false); case vmIntrinsics::_VectorScatterOp: diff --git a/src/hotspot/share/opto/library_call.hpp b/src/hotspot/share/opto/library_call.hpp index dfbad65a7b2a241dd99c78c1e55a1137aadb94fe..197f5d80db8a46691b8ab60f1247753eeec0b5d9 100644 --- a/src/hotspot/share/opto/library_call.hpp +++ b/src/hotspot/share/opto/library_call.hpp @@ -319,6 +319,7 @@ class LibraryCallKit : public GraphKit { bool inline_vector_shuffle_iota(); bool inline_vector_mask_operation(); bool inline_vector_mem_operation(bool is_store); + bool inline_vector_mem_masked_operation(bool is_store); bool inline_vector_gather_scatter(bool is_scatter); bool inline_vector_reduction(); bool inline_vector_test(); @@ -332,14 +333,15 @@ class LibraryCallKit : public GraphKit { Node* gen_call_to_svml(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2); enum VectorMaskUseType { - VecMaskUseLoad, - VecMaskUseStore, - VecMaskUseAll, - VecMaskNotUsed + VecMaskUseLoad = 1 << 0, + VecMaskUseStore = 1 << 1, + VecMaskUseAll = VecMaskUseLoad | VecMaskUseStore, + VecMaskUsePred = 1 << 2, + VecMaskNotUsed = 1 << 3 }; bool arch_supports_vector(int op, int num_elem, BasicType type, VectorMaskUseType mask_use_type, bool has_scalar_args = false); - bool arch_supports_vector_rotate(int opc, int num_elem, BasicType elem_bt, bool has_scalar_args = false); + bool arch_supports_vector_rotate(int opc, int num_elem, BasicType elem_bt, VectorMaskUseType mask_use_type, bool has_scalar_args = false); void clear_upper_avx() { #ifdef X86 diff --git a/src/hotspot/share/opto/loopPredicate.cpp b/src/hotspot/share/opto/loopPredicate.cpp index ad79fbf3b5bb864cb416f66d93aa115c66b19432..1378f80331f4d594e0caa7992a3fd50504d7ea6a 100644 --- a/src/hotspot/share/opto/loopPredicate.cpp +++ b/src/hotspot/share/opto/loopPredicate.cpp @@ -802,7 +802,7 @@ bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invari BoolNode* PhaseIdealLoop::rc_predicate(IdealLoopTree *loop, Node* ctrl, int scale, Node* offset, Node* init, Node* limit, jint stride, - Node* range, bool upper, bool &overflow) { + Node* range, bool upper, bool &overflow, bool negate) { jint con_limit = (limit != NULL && limit->is_Con()) ? limit->get_int() : 0; jint con_init = init->is_Con() ? init->get_int() : 0; jint con_offset = offset->is_Con() ? offset->get_int() : 0; @@ -928,7 +928,7 @@ BoolNode* PhaseIdealLoop::rc_predicate(IdealLoopTree *loop, Node* ctrl, cmp = new CmpUNode(max_idx_expr, range); } register_new_node(cmp, ctrl); - BoolNode* bol = new BoolNode(cmp, BoolTest::lt); + BoolNode* bol = new BoolNode(cmp, negate ? BoolTest::ge : BoolTest::lt); register_new_node(bol, ctrl); if (TraceLoopPredicate) { @@ -1295,36 +1295,26 @@ bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree *loop, ProjNode* } // If predicate expressions may overflow in the integer range, longs are used. bool overflow = false; + bool negate = (proj->_con != predicate_proj->_con); // Test the lower bound - BoolNode* lower_bound_bol = rc_predicate(loop, ctrl, scale, offset, init, limit, stride, rng, false, overflow); - // Negate test if necessary - bool negated = false; - if (proj->_con != predicate_proj->_con) { - lower_bound_bol = new BoolNode(lower_bound_bol->in(1), lower_bound_bol->_test.negate()); - register_new_node(lower_bound_bol, ctrl); - negated = true; - } + BoolNode* lower_bound_bol = rc_predicate(loop, ctrl, scale, offset, init, limit, stride, rng, false, overflow, negate); + ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, reason, overflow ? Op_If : iff->Opcode()); IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If(); _igvn.hash_delete(lower_bound_iff); lower_bound_iff->set_req(1, lower_bound_bol); - if (TraceLoopPredicate) tty->print_cr("lower bound check if: %s %d ", negated ? " negated" : "", lower_bound_iff->_idx); + if (TraceLoopPredicate) tty->print_cr("lower bound check if: %s %d ", negate ? " negated" : "", lower_bound_iff->_idx); // Test the upper bound - BoolNode* upper_bound_bol = rc_predicate(loop, lower_bound_proj, scale, offset, init, limit, stride, rng, true, overflow); - negated = false; - if (proj->_con != predicate_proj->_con) { - upper_bound_bol = new BoolNode(upper_bound_bol->in(1), upper_bound_bol->_test.negate()); - register_new_node(upper_bound_bol, ctrl); - negated = true; - } + BoolNode* upper_bound_bol = rc_predicate(loop, lower_bound_proj, scale, offset, init, limit, stride, rng, true, overflow, negate); + ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, reason, overflow ? Op_If : iff->Opcode()); assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate"); IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If(); _igvn.hash_delete(upper_bound_iff); upper_bound_iff->set_req(1, upper_bound_bol); - if (TraceLoopPredicate) tty->print_cr("upper bound check if: %s %d ", negated ? " negated" : "", lower_bound_iff->_idx); + if (TraceLoopPredicate) tty->print_cr("upper bound check if: %s %d ", negate ? " negated" : "", lower_bound_iff->_idx); // Fall through into rest of the clean up code which will move // any dependent nodes onto the upper bound test. @@ -1370,10 +1360,10 @@ ProjNode* PhaseIdealLoop::insert_initial_skeleton_predicate(IfNode* iff, IdealLo Node* rng, bool &overflow, Deoptimization::DeoptReason reason) { // First predicate for the initial value on first loop iteration - assert(proj->_con && predicate_proj->_con, "not a range check?"); Node* opaque_init = new OpaqueLoopInitNode(C, init); register_new_node(opaque_init, upper_bound_proj); - BoolNode* bol = rc_predicate(loop, upper_bound_proj, scale, offset, opaque_init, limit, stride, rng, (stride > 0) != (scale > 0), overflow); + bool negate = (proj->_con != predicate_proj->_con); + BoolNode* bol = rc_predicate(loop, upper_bound_proj, scale, offset, opaque_init, limit, stride, rng, (stride > 0) != (scale > 0), overflow, negate); Node* opaque_bol = new Opaque4Node(C, bol, _igvn.intcon(1)); // This will go away once loop opts are over C->add_skeleton_predicate_opaq(opaque_bol); register_new_node(opaque_bol, upper_bound_proj); @@ -1391,7 +1381,7 @@ ProjNode* PhaseIdealLoop::insert_initial_skeleton_predicate(IfNode* iff, IdealLo register_new_node(max_value, new_proj); max_value = new AddINode(opaque_init, max_value); register_new_node(max_value, new_proj); - bol = rc_predicate(loop, new_proj, scale, offset, max_value, limit, stride, rng, (stride > 0) != (scale > 0), overflow); + bol = rc_predicate(loop, new_proj, scale, offset, max_value, limit, stride, rng, (stride > 0) != (scale > 0), overflow, negate); opaque_bol = new Opaque4Node(C, bol, _igvn.intcon(1)); C->add_skeleton_predicate_opaq(opaque_bol); register_new_node(opaque_bol, new_proj); diff --git a/src/hotspot/share/opto/loopTransform.cpp b/src/hotspot/share/opto/loopTransform.cpp index fa97be59d7e3645875ff16e32a9cf7c0ee847e65..bbfb5b12d08eb46dd47b92f85022b839b6e3bd3b 100644 --- a/src/hotspot/share/opto/loopTransform.cpp +++ b/src/hotspot/share/opto/loopTransform.cpp @@ -2589,7 +2589,7 @@ Node* PhaseIdealLoop::add_range_check_predicate(IdealLoopTree* loop, CountedLoop Node* predicate_proj, int scale_con, Node* offset, Node* limit, jint stride_con, Node* value) { bool overflow = false; - BoolNode* bol = rc_predicate(loop, predicate_proj, scale_con, offset, value, NULL, stride_con, limit, (stride_con > 0) != (scale_con > 0), overflow); + BoolNode* bol = rc_predicate(loop, predicate_proj, scale_con, offset, value, NULL, stride_con, limit, (stride_con > 0) != (scale_con > 0), overflow, false); Node* opaque_bol = new Opaque4Node(C, bol, _igvn.intcon(1)); register_new_node(opaque_bol, predicate_proj); IfNode* new_iff = NULL; diff --git a/src/hotspot/share/opto/loopnode.hpp b/src/hotspot/share/opto/loopnode.hpp index c851d1d9f4728f27af4b7d0254dc1516b3982395..35db18e0b6eaea0e211845a6327c7b2f2c597c18 100644 --- a/src/hotspot/share/opto/loopnode.hpp +++ b/src/hotspot/share/opto/loopnode.hpp @@ -1316,7 +1316,8 @@ public: BoolNode* rc_predicate(IdealLoopTree *loop, Node* ctrl, int scale, Node* offset, Node* init, Node* limit, jint stride, - Node* range, bool upper, bool &overflow); + Node* range, bool upper, bool &overflow, + bool negate); // Implementation of the loop predication to promote checks outside the loop bool loop_predication_impl(IdealLoopTree *loop); diff --git a/src/hotspot/share/opto/macro.hpp b/src/hotspot/share/opto/macro.hpp index 163952e69c36a5d78171ec7c46c30cbe1ec4a93c..b65b97fec8775599ca79cf95688cf78906422880 100644 --- a/src/hotspot/share/opto/macro.hpp +++ b/src/hotspot/share/opto/macro.hpp @@ -186,7 +186,6 @@ private: int replace_input(Node *use, Node *oldref, Node *newref); void migrate_outs(Node *old, Node *target); - void copy_call_debug_info(CallNode *oldcall, CallNode * newcall); Node* opt_bits_test(Node* ctrl, Node* region, int edge, Node* word, int mask, int bits, bool return_fast_path = false); void copy_predefined_input_for_runtime_call(Node * ctrl, CallNode* oldcall, CallNode* call); CallNode* make_slow_call(CallNode *oldcall, const TypeFunc* slow_call_type, address slow_call, diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp index 5b8fcb8ab607a0c76d1ac2a01094d12c72094552..fadfd11f2bfbbad92fa75e68a727b920bcf7a1b1 100644 --- a/src/hotspot/share/opto/matcher.cpp +++ b/src/hotspot/share/opto/matcher.cpp @@ -434,6 +434,24 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) { return rms; } +const int Matcher::scalable_predicate_reg_slots() { + assert(Matcher::has_predicated_vectors() && Matcher::supports_scalable_vector(), + "scalable predicate vector should be supported"); + int vector_reg_bit_size = Matcher::scalable_vector_reg_size(T_BYTE) << LogBitsPerByte; + // We assume each predicate register is one-eighth of the size of + // scalable vector register, one mask bit per vector byte. + int predicate_reg_bit_size = vector_reg_bit_size >> 3; + // Compute number of slots which is required when scalable predicate + // register is spilled. E.g. if scalable vector register is 640 bits, + // predicate register is 80 bits, which is 2.5 * slots. + // We will round up the slot number to power of 2, which is required + // by find_first_set(). + int slots = predicate_reg_bit_size & (BitsPerInt - 1) + ? (predicate_reg_bit_size >> LogBitsPerInt) + 1 + : predicate_reg_bit_size >> LogBitsPerInt; + return round_up_power_of_2(slots); +} + #define NOF_STACK_MASKS (3*13) // Create the initial stack mask used by values spilling to the stack. @@ -542,6 +560,8 @@ void Matcher::init_first_stack_mask() { if (Matcher::has_predicated_vectors()) { *idealreg2spillmask[Op_RegVectMask] = *idealreg2regmask[Op_RegVectMask]; idealreg2spillmask[Op_RegVectMask]->OR(aligned_stack_mask); + } else { + *idealreg2spillmask[Op_RegVectMask] = RegMask::Empty; } if (Matcher::vector_size_supported(T_BYTE,4)) { @@ -614,6 +634,19 @@ void Matcher::init_first_stack_mask() { if (Matcher::supports_scalable_vector()) { int k = 1; OptoReg::Name in = OptoReg::add(_in_arg_limit, -1); + // Exclude last input arg stack slots to avoid spilling vector register there, + // otherwise RegVectMask spills could stomp over stack slots in caller frame. + for (; (in >= init_in) && (k < scalable_predicate_reg_slots()); k++) { + scalable_stack_mask.Remove(in); + in = OptoReg::add(in, -1); + } + + // For RegVectMask + scalable_stack_mask.clear_to_sets(scalable_predicate_reg_slots()); + assert(scalable_stack_mask.is_AllStack(), "should be infinite stack"); + *idealreg2spillmask[Op_RegVectMask] = *idealreg2regmask[Op_RegVectMask]; + idealreg2spillmask[Op_RegVectMask]->OR(scalable_stack_mask); + // Exclude last input arg stack slots to avoid spilling vector register there, // otherwise vector spills could stomp over stack slots in caller frame. for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) { @@ -2228,6 +2261,7 @@ bool Matcher::find_shared_visit(MStack& mstack, Node* n, uint opcode, bool& mem_ case Op_MacroLogicV: case Op_LoadVectorMasked: case Op_VectorCmpMasked: + case Op_VectorLoadMask: set_shared(n); // Force result into register (it will be anyways) break; case Op_ConP: { // Convert pointers above the centerline to NUL @@ -2273,6 +2307,21 @@ bool Matcher::find_shared_visit(MStack& mstack, Node* n, uint opcode, bool& mem_ } void Matcher::find_shared_post_visit(Node* n, uint opcode) { + if (n->is_predicated_vector()) { + // Restructure into binary trees for Matching. + if (n->req() == 4) { + n->set_req(1, new BinaryNode(n->in(1), n->in(2))); + n->set_req(2, n->in(3)); + n->del_req(3); + } else if (n->req() == 5) { + n->set_req(1, new BinaryNode(n->in(1), n->in(2))); + n->set_req(2, new BinaryNode(n->in(3), n->in(4))); + n->del_req(4); + n->del_req(3); + } + return; + } + switch(opcode) { // Handle some opcodes special case Op_StorePConditional: case Op_StoreIConditional: @@ -2412,12 +2461,22 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) { n->del_req(3); break; } + case Op_LoadVectorGatherMasked: case Op_StoreVectorScatter: { Node* pair = new BinaryNode(n->in(MemNode::ValueIn), n->in(MemNode::ValueIn+1)); n->set_req(MemNode::ValueIn, pair); n->del_req(MemNode::ValueIn+1); break; } + case Op_StoreVectorScatterMasked: { + Node* pair = new BinaryNode(n->in(MemNode::ValueIn+1), n->in(MemNode::ValueIn+2)); + n->set_req(MemNode::ValueIn+1, pair); + n->del_req(MemNode::ValueIn+2); + pair = new BinaryNode(n->in(MemNode::ValueIn), n->in(MemNode::ValueIn+1)); + n->set_req(MemNode::ValueIn, pair); + n->del_req(MemNode::ValueIn+1); + break; + } case Op_VectorMaskCmp: { n->set_req(1, new BinaryNode(n->in(1), n->in(2))); n->set_req(2, n->in(3)); diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp index cae441a0a4d42526db6de6f51dada931cd1c0ef6..446fb259ee57ae920e17737da4df799ea8a679f4 100644 --- a/src/hotspot/share/opto/matcher.hpp +++ b/src/hotspot/share/opto/matcher.hpp @@ -329,6 +329,8 @@ public: // e.g. Op_ vector nodes and other intrinsics while guarding with vlen static const bool match_rule_supported_vector(int opcode, int vlen, BasicType bt); + static const bool match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt); + static const RegMask* predicate_reg_mask(void); static const TypeVect* predicate_reg_type(const Type* elemTy, int length); @@ -345,6 +347,8 @@ public: // Actual max scalable vector register length. static const int scalable_vector_reg_size(const BasicType bt); + // Actual max scalable predicate register length. + static const int scalable_predicate_reg_slots(); // Vector ideal reg static const uint vector_ideal_reg(int len); diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index f0f78bd9946a551f258570b16ce4494a6e671831..8577d1f6a9475e3e5042ae3bbf89dc05df3efa83 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -1136,7 +1136,7 @@ Node* MemNode::can_see_stored_value(Node* st, PhaseTransform* phase) const { return NULL; } // LoadVector/StoreVector needs additional check to ensure the types match. - if (store_Opcode() == Op_StoreVector) { + if (st->is_StoreVector()) { const TypeVect* in_vt = st->as_StoreVector()->vect_type(); const TypeVect* out_vt = as_LoadVector()->vect_type(); if (in_vt != out_vt) { diff --git a/src/hotspot/share/opto/node.hpp b/src/hotspot/share/opto/node.hpp index 078cbd03423c95ba576dd4b0d9eb34b1873e67c9..1c226cbf2a47b3cf5530b5df36e512a59639ed43 100644 --- a/src/hotspot/share/opto/node.hpp +++ b/src/hotspot/share/opto/node.hpp @@ -172,7 +172,9 @@ class LoadVectorGatherNode; class StoreVectorNode; class StoreVectorScatterNode; class VectorMaskCmpNode; +class VectorUnboxNode; class VectorSet; +class VectorReinterpretNode; // The type of all node counts and indexes. // It must hold at least 16 bits, but must also be fast to load and store. @@ -707,6 +709,8 @@ public: DEFINE_CLASS_ID(EncodePKlass, EncodeNarrowPtr, 1) DEFINE_CLASS_ID(Vector, Type, 7) DEFINE_CLASS_ID(VectorMaskCmp, Vector, 0) + DEFINE_CLASS_ID(VectorUnbox, Vector, 1) + DEFINE_CLASS_ID(VectorReinterpret, Vector, 2) DEFINE_CLASS_ID(Proj, Node, 3) DEFINE_CLASS_ID(CatchProj, Proj, 0) @@ -778,7 +782,8 @@ public: Flag_is_scheduled = 1 << 12, Flag_has_vector_mask_set = 1 << 13, Flag_is_expensive = 1 << 14, - Flag_for_post_loop_opts_igvn = 1 << 15, + Flag_is_predicated_vector = 1 << 15, + Flag_for_post_loop_opts_igvn = 1 << 16, _last_flag = Flag_for_post_loop_opts_igvn }; @@ -933,11 +938,13 @@ public: DEFINE_CLASS_QUERY(SubTypeCheck) DEFINE_CLASS_QUERY(Type) DEFINE_CLASS_QUERY(Vector) + DEFINE_CLASS_QUERY(VectorMaskCmp) + DEFINE_CLASS_QUERY(VectorUnbox) + DEFINE_CLASS_QUERY(VectorReinterpret); DEFINE_CLASS_QUERY(LoadVector) DEFINE_CLASS_QUERY(LoadVectorGather) DEFINE_CLASS_QUERY(StoreVector) DEFINE_CLASS_QUERY(StoreVectorScatter) - DEFINE_CLASS_QUERY(VectorMaskCmp) DEFINE_CLASS_QUERY(Unlock) #undef DEFINE_CLASS_QUERY @@ -988,6 +995,8 @@ public: // It must have the loop's phi as input and provide a def to the phi. bool is_reduction() const { return (_flags & Flag_is_reduction) != 0; } + bool is_predicated_vector() const { return (_flags & Flag_is_predicated_vector) != 0; } + // The node is a CountedLoopEnd with a mask annotation so as to emit a restore context bool has_vector_mask_set() const { return (_flags & Flag_has_vector_mask_set) != 0; } diff --git a/src/hotspot/share/opto/postaloc.cpp b/src/hotspot/share/opto/postaloc.cpp index aa9ae37a78a12574dcc56511564b20c933bc3a92..96c30a122bb50cd7e3ba6eee210e9177ee882721 100644 --- a/src/hotspot/share/opto/postaloc.cpp +++ b/src/hotspot/share/opto/postaloc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -309,17 +309,16 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v // "val_reg" and "reg". For example, when "val" resides in register // but "reg" is located in stack. if (lrgs(val_idx).is_scalable()) { - assert(val->ideal_reg() == Op_VecA, "scalable vector register"); + assert(val->ideal_reg() == Op_VecA || val->ideal_reg() == Op_RegVectMask, "scalable register"); if (OptoReg::is_stack(reg)) { n_regs = lrgs(val_idx).scalable_reg_slots(); } else { - n_regs = RegMask::SlotsPerVecA; + n_regs = lrgs(val_idx)._is_predicate ? RegMask::SlotsPerRegVectMask : RegMask::SlotsPerVecA; } } if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set uint last; - if (lrgs(val_idx).is_scalable()) { - assert(val->ideal_reg() == Op_VecA, "scalable vector register"); + if (lrgs(val_idx).is_scalable() && val->ideal_reg() == Op_VecA) { // For scalable vector register, regmask is always SlotsPerVecA bits aligned last = RegMask::SlotsPerVecA - 1; } else { diff --git a/src/hotspot/share/opto/regmask.cpp b/src/hotspot/share/opto/regmask.cpp index df7f8f4db42aa1be230d5e3cad32b2150d0ce2ea..a1f1904bfa490ba3731b7f46ffc856bf7bab9917 100644 --- a/src/hotspot/share/opto/regmask.cpp +++ b/src/hotspot/share/opto/regmask.cpp @@ -237,7 +237,7 @@ bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const { // HIGHEST register number in the set, or BAD if no sets. // Works also for size 1. OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const { - if (lrg.is_scalable()) { + if (lrg.is_scalable() && lrg._is_vector) { // For scalable vector register, regmask is SlotsPerVecA bits aligned. assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets"); } else { diff --git a/src/hotspot/share/opto/runtime.cpp b/src/hotspot/share/opto/runtime.cpp index 7e87da8a46f7ca60eece459b720bf74edb5efad7..a80d9559dd8e504617856328007dc9a0315525f6 100644 --- a/src/hotspot/share/opto/runtime.cpp +++ b/src/hotspot/share/opto/runtime.cpp @@ -1280,6 +1280,11 @@ static void trace_exception(outputStream* st, oop exception_oop, address excepti // directly from compiled code. Compiled code will call the C++ method following. // We can't allow async exception to be installed during exception processing. JRT_ENTRY_NO_ASYNC(address, OptoRuntime::handle_exception_C_helper(JavaThread* current, nmethod* &nm)) + // The frame we rethrow the exception to might not have been processed by the GC yet. + // The stack watermark barrier takes care of detecting that and ensuring the frame + // has updated oops. + StackWatermarkSet::after_unwind(current); + // Do not confuse exception_oop with pending_exception. The exception_oop // is only used to pass arguments into the method. Not for general // exception handling. DO NOT CHANGE IT to use pending_exception, since @@ -1422,7 +1427,7 @@ address OptoRuntime::handle_exception_C(JavaThread* current) { // deoptimized frame if (nm != NULL) { - RegisterMap map(current, false); + RegisterMap map(current, false /* update_map */, false /* process_frames */); frame caller = current->last_frame().sender(&map); #ifdef ASSERT assert(caller.is_compiled_frame(), "must be"); @@ -1461,11 +1466,6 @@ address OptoRuntime::rethrow_C(oopDesc* exception, JavaThread* thread, address r // Enable WXWrite: the function called directly by compiled code. MACOS_AARCH64_ONLY(ThreadWXEnable wx(WXWrite, thread)); - // The frame we rethrow the exception to might not have been processed by the GC yet. - // The stack watermark barrier takes care of detecting that and ensuring the frame - // has updated oops. - StackWatermarkSet::after_unwind(thread); - #ifndef PRODUCT SharedRuntime::_rethrow_ctr++; // count rethrows #endif diff --git a/src/hotspot/share/opto/subnode.cpp b/src/hotspot/share/opto/subnode.cpp index f721566c415909b49949899d29537cfff9ab3fd2..5956b1738f1ee6427f349eda080ba1b1754193b4 100644 --- a/src/hotspot/share/opto/subnode.cpp +++ b/src/hotspot/share/opto/subnode.cpp @@ -1480,13 +1480,15 @@ Node *BoolNode::Ideal(PhaseGVN *phase, bool can_reshape) { return NULL; } + const int cmp1_op = cmp1->Opcode(); + const int cmp2_op = cmp2->Opcode(); + // Constant on left? Node *con = cmp1; - uint op2 = cmp2->Opcode(); // Move constants to the right of compare's to canonicalize. // Do not muck with Opaque1 nodes, as this indicates a loop // guard that cannot change shape. - if( con->is_Con() && !cmp2->is_Con() && op2 != Op_Opaque1 && + if( con->is_Con() && !cmp2->is_Con() && cmp2_op != Op_Opaque1 && // Because of NaN's, CmpD and CmpF are not commutative cop != Op_CmpD && cop != Op_CmpF && // Protect against swapping inputs to a compare when it is used by a @@ -1504,7 +1506,7 @@ Node *BoolNode::Ideal(PhaseGVN *phase, bool can_reshape) { // Change "bool eq/ne (cmp (and X 16) 16)" into "bool ne/eq (cmp (and X 16) 0)". if (cop == Op_CmpI && (_test._test == BoolTest::eq || _test._test == BoolTest::ne) && - cmp1->Opcode() == Op_AndI && cmp2->Opcode() == Op_ConI && + cmp1_op == Op_AndI && cmp2_op == Op_ConI && cmp1->in(2)->Opcode() == Op_ConI) { const TypeInt *t12 = phase->type(cmp2)->isa_int(); const TypeInt *t112 = phase->type(cmp1->in(2))->isa_int(); @@ -1518,7 +1520,7 @@ Node *BoolNode::Ideal(PhaseGVN *phase, bool can_reshape) { // Same for long type: change "bool eq/ne (cmp (and X 16) 16)" into "bool ne/eq (cmp (and X 16) 0)". if (cop == Op_CmpL && (_test._test == BoolTest::eq || _test._test == BoolTest::ne) && - cmp1->Opcode() == Op_AndL && cmp2->Opcode() == Op_ConL && + cmp1_op == Op_AndL && cmp2_op == Op_ConL && cmp1->in(2)->Opcode() == Op_ConL) { const TypeLong *t12 = phase->type(cmp2)->isa_long(); const TypeLong *t112 = phase->type(cmp1->in(2))->isa_long(); @@ -1529,10 +1531,41 @@ Node *BoolNode::Ideal(PhaseGVN *phase, bool can_reshape) { } } + // Change "cmp (add X min_jint) (add Y min_jint)" into "cmpu X Y" + // and "cmp (add X min_jint) c" into "cmpu X (c + min_jint)" + if (cop == Op_CmpI && + cmp1_op == Op_AddI && + phase->type(cmp1->in(2)) == TypeInt::MIN) { + if (cmp2_op == Op_ConI) { + Node* ncmp2 = phase->intcon(java_add(cmp2->get_int(), min_jint)); + Node* ncmp = phase->transform(new CmpUNode(cmp1->in(1), ncmp2)); + return new BoolNode(ncmp, _test._test); + } else if (cmp2_op == Op_AddI && + phase->type(cmp2->in(2)) == TypeInt::MIN) { + Node* ncmp = phase->transform(new CmpUNode(cmp1->in(1), cmp2->in(1))); + return new BoolNode(ncmp, _test._test); + } + } + + // Change "cmp (add X min_jlong) (add Y min_jlong)" into "cmpu X Y" + // and "cmp (add X min_jlong) c" into "cmpu X (c + min_jlong)" + if (cop == Op_CmpL && + cmp1_op == Op_AddL && + phase->type(cmp1->in(2)) == TypeLong::MIN) { + if (cmp2_op == Op_ConL) { + Node* ncmp2 = phase->longcon(java_add(cmp2->get_long(), min_jlong)); + Node* ncmp = phase->transform(new CmpULNode(cmp1->in(1), ncmp2)); + return new BoolNode(ncmp, _test._test); + } else if (cmp2_op == Op_AddL && + phase->type(cmp2->in(2)) == TypeLong::MIN) { + Node* ncmp = phase->transform(new CmpULNode(cmp1->in(1), cmp2->in(1))); + return new BoolNode(ncmp, _test._test); + } + } + // Change "bool eq/ne (cmp (xor X 1) 0)" into "bool ne/eq (cmp X 0)". // The XOR-1 is an idiom used to flip the sense of a bool. We flip the // test instead. - int cmp1_op = cmp1->Opcode(); const TypeInt* cmp2_type = phase->type(cmp2)->isa_int(); if (cmp2_type == NULL) return NULL; Node* j_xor = cmp1; diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index 7850e19f16d4ae439eca3b2b05d0e9516de1ded3..d488d0eba5f42fc4cef15c424fad1639164c2c10 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -2558,6 +2558,14 @@ void SuperWord::output() { Node* in = vector_opd(p, 1); vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n)); vlen_in_bytes = vn->as_Vector()->length_in_bytes(); + } else if (opc == Op_ConvI2F || opc == Op_ConvL2D || + opc == Op_ConvF2I || opc == Op_ConvD2L) { + assert(n->req() == 2, "only one input expected"); + BasicType bt = velt_basic_type(n); + int vopc = VectorNode::opcode(opc, bt); + Node* in = vector_opd(p, 1); + vn = VectorCastNode::make(vopc, in, bt, vlen); + vlen_in_bytes = vn->as_Vector()->length_in_bytes(); } else if (is_cmov_pack(p)) { if (can_process_post_loop) { // do not refactor of flow in post loop context diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp index 459cbef03b751c2be448cc34a3c5c1e987176dcd..2cd8925002a67a3e69775b41f763033b42795380 100644 --- a/src/hotspot/share/opto/type.cpp +++ b/src/hotspot/share/opto/type.cpp @@ -997,7 +997,6 @@ const Type *Type::filter_helper(const Type *kills, bool include_speculative) con } //------------------------------xdual------------------------------------------ - const Type *Type::xdual() const { // Note: the base() accessor asserts the sanity of _base. assert(_type_info[base()].dual_type != Bad, "implement with v-call"); @@ -2359,7 +2358,10 @@ const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors const TypeVect *TypeVect::VECTMASK = NULL; // predicate/mask vector //------------------------------make------------------------------------------- -const TypeVect* TypeVect::make(const Type *elem, uint length) { +const TypeVect* TypeVect::make(const Type *elem, uint length, bool is_mask) { + if (is_mask) { + return makemask(elem, length); + } BasicType elem_bt = elem->array_element_basic_type(); assert(is_java_primitive(elem_bt), "only primitive types in vector"); assert(Matcher::vector_size_supported(elem_bt, length), "length in range"); @@ -2385,7 +2387,9 @@ const TypeVect* TypeVect::make(const Type *elem, uint length) { } const TypeVect *TypeVect::makemask(const Type* elem, uint length) { - if (Matcher::has_predicated_vectors()) { + BasicType elem_bt = elem->array_element_basic_type(); + if (Matcher::has_predicated_vectors() && + Matcher::match_rule_supported_vector_masked(Op_VectorLoadMask, length, elem_bt)) { const TypeVect* mtype = Matcher::predicate_reg_type(elem, length); return (TypeVect*)(const_cast(mtype))->hashcons(); } else { diff --git a/src/hotspot/share/opto/type.hpp b/src/hotspot/share/opto/type.hpp index dc9a5d8156bd76f9727d5ed9c5f5a7b691f7caf6..6a927ee49a784bf64a2ac9ddb5bac205a8859e92 100644 --- a/src/hotspot/share/opto/type.hpp +++ b/src/hotspot/share/opto/type.hpp @@ -804,12 +804,12 @@ public: virtual bool singleton(void) const; // TRUE if type is a singleton virtual bool empty(void) const; // TRUE if type is vacuous - static const TypeVect *make(const BasicType elem_bt, uint length) { + static const TypeVect *make(const BasicType elem_bt, uint length, bool is_mask = false) { // Use bottom primitive type. - return make(get_const_basic_type(elem_bt), length); + return make(get_const_basic_type(elem_bt), length, is_mask); } // Used directly by Replicate nodes to construct singleton vector. - static const TypeVect *make(const Type* elem, uint length); + static const TypeVect *make(const Type* elem, uint length, bool is_mask = false); static const TypeVect *makemask(const BasicType elem_bt, uint length) { // Use bottom primitive type. diff --git a/src/hotspot/share/opto/vector.cpp b/src/hotspot/share/opto/vector.cpp index 65f3217886562eda24fb7d2ef7af8ea73aa5fdb0..a1e324363db46da35b49bfb348d4e1c2c8ea387d 100644 --- a/src/hotspot/share/opto/vector.cpp +++ b/src/hotspot/share/opto/vector.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -353,7 +353,10 @@ Node* PhaseVector::expand_vbox_alloc_node(VectorBoxAllocateNode* vbox_alloc, int num_elem = vect_type->length(); bool is_mask = is_vector_mask(box_klass); - if (is_mask && bt != T_BOOLEAN) { + // If boxed mask value is present in a predicate register, it must be + // spilled to a vector though a VectorStoreMaskOperation before actual StoreVector + // operation to vector payload field. + if (is_mask && (value->bottom_type()->isa_vectmask() || bt != T_BOOLEAN)) { value = gvn.transform(VectorStoreMaskNode::make(gvn, value, bt, num_elem)); // Although type of mask depends on its definition, in terms of storage everything is stored in boolean array. bt = T_BOOLEAN; @@ -469,7 +472,7 @@ void PhaseVector::expand_vunbox_node(VectorUnboxNode* vec_unbox) { C->set_max_vector_size(MAX2(C->max_vector_size(), vt->length_in_bytes())); if (is_vector_mask(from_kls)) { - vec_val_load = gvn.transform(new VectorLoadMaskNode(vec_val_load, TypeVect::make(masktype, num_elem))); + vec_val_load = gvn.transform(new VectorLoadMaskNode(vec_val_load, TypeVect::makemask(masktype, num_elem))); } else if (is_vector_shuffle(from_kls) && !vec_unbox->is_shuffle_to_vector()) { assert(vec_unbox->bottom_type()->is_vect()->element_basic_type() == masktype, "expect shuffle type consistency"); vec_val_load = gvn.transform(new VectorLoadShuffleNode(vec_val_load, TypeVect::make(masktype, num_elem))); diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp index ae700ad731417673481f631ecdbcdbdb7bc7fba0..95ca0ae6d2bdc3d82aee77d360d4939731610a26 100644 --- a/src/hotspot/share/opto/vectorIntrinsics.cpp +++ b/src/hotspot/share/opto/vectorIntrinsics.cpp @@ -59,46 +59,84 @@ static bool check_vbox(const TypeInstPtr* vbox_type) { } #endif -bool LibraryCallKit::arch_supports_vector_rotate(int opc, int num_elem, BasicType elem_bt, bool has_scalar_args) { - bool is_supported = true; - // has_scalar_args flag is true only for non-constant scalar shift count, - // since in this case shift needs to be broadcasted. - if (!Matcher::match_rule_supported_vector(opc, num_elem, elem_bt) || - (has_scalar_args && - !arch_supports_vector(VectorNode::replicate_opcode(elem_bt), num_elem, elem_bt, VecMaskNotUsed))) { - is_supported = false; - } - - int lshiftopc, rshiftopc; - switch(elem_bt) { - case T_BYTE: - lshiftopc = Op_LShiftI; - rshiftopc = Op_URShiftB; - break; - case T_SHORT: - lshiftopc = Op_LShiftI; - rshiftopc = Op_URShiftS; - break; - case T_INT: - lshiftopc = Op_LShiftI; - rshiftopc = Op_URShiftI; - break; - case T_LONG: - lshiftopc = Op_LShiftL; - rshiftopc = Op_URShiftL; - break; - default: - assert(false, "Unexpected type"); - } - int lshiftvopc = VectorNode::opcode(lshiftopc, elem_bt); - int rshiftvopc = VectorNode::opcode(rshiftopc, elem_bt); - if (!is_supported && - arch_supports_vector(lshiftvopc, num_elem, elem_bt, VecMaskNotUsed, has_scalar_args) && - arch_supports_vector(rshiftvopc, num_elem, elem_bt, VecMaskNotUsed, has_scalar_args) && - arch_supports_vector(Op_OrV, num_elem, elem_bt, VecMaskNotUsed)) { - is_supported = true; - } - return is_supported; +static bool is_vector_mask(ciKlass* klass) { + return klass->is_subclass_of(ciEnv::current()->vector_VectorMask_klass()); +} + +static bool is_vector_shuffle(ciKlass* klass) { + return klass->is_subclass_of(ciEnv::current()->vector_VectorShuffle_klass()); +} + +bool LibraryCallKit::arch_supports_vector_rotate(int opc, int num_elem, BasicType elem_bt, + VectorMaskUseType mask_use_type, bool has_scalar_args) { + bool is_supported = true; + + // has_scalar_args flag is true only for non-constant scalar shift count, + // since in this case shift needs to be broadcasted. + if (!Matcher::match_rule_supported_vector(opc, num_elem, elem_bt) || + (has_scalar_args && + !arch_supports_vector(VectorNode::replicate_opcode(elem_bt), num_elem, elem_bt, VecMaskNotUsed))) { + is_supported = false; + } + + if (is_supported) { + // Check whether mask unboxing is supported. + if ((mask_use_type & VecMaskUseLoad) != 0) { + if (!Matcher::match_rule_supported_vector(Op_VectorLoadMask, num_elem, elem_bt)) { + #ifndef PRODUCT + if (C->print_intrinsics()) { + tty->print_cr(" ** Rejected vector mask loading (%s,%s,%d) because architecture does not support it", + NodeClassNames[Op_VectorLoadMask], type2name(elem_bt), num_elem); + } + #endif + return false; + } + } + + if ((mask_use_type & VecMaskUsePred) != 0) { + if (!Matcher::has_predicated_vectors() || + !Matcher::match_rule_supported_vector_masked(opc, num_elem, elem_bt)) { + #ifndef PRODUCT + if (C->print_intrinsics()) { + tty->print_cr("Rejected vector mask predicate using (%s,%s,%d) because architecture does not support it", + NodeClassNames[opc], type2name(elem_bt), num_elem); + } + #endif + return false; + } + } + } + + int lshiftopc, rshiftopc; + switch(elem_bt) { + case T_BYTE: + lshiftopc = Op_LShiftI; + rshiftopc = Op_URShiftB; + break; + case T_SHORT: + lshiftopc = Op_LShiftI; + rshiftopc = Op_URShiftS; + break; + case T_INT: + lshiftopc = Op_LShiftI; + rshiftopc = Op_URShiftI; + break; + case T_LONG: + lshiftopc = Op_LShiftL; + rshiftopc = Op_URShiftL; + break; + default: + assert(false, "Unexpected type"); + } + int lshiftvopc = VectorNode::opcode(lshiftopc, elem_bt); + int rshiftvopc = VectorNode::opcode(rshiftopc, elem_bt); + if (!is_supported && + arch_supports_vector(lshiftvopc, num_elem, elem_bt, VecMaskNotUsed, has_scalar_args) && + arch_supports_vector(rshiftvopc, num_elem, elem_bt, VecMaskNotUsed, has_scalar_args) && + arch_supports_vector(Op_OrV, num_elem, elem_bt, VecMaskNotUsed)) { + is_supported = true; + } + return is_supported; } Node* GraphKit::box_vector(Node* vector, const TypeInstPtr* vbox_type, BasicType elem_bt, int num_elem, bool deoptimize_on_exception) { @@ -115,7 +153,7 @@ Node* GraphKit::box_vector(Node* vector, const TypeInstPtr* vbox_type, BasicType Node* ret = gvn().transform(new ProjNode(alloc, TypeFunc::Parms)); assert(check_vbox(vbox_type), ""); - const TypeVect* vt = TypeVect::make(elem_bt, num_elem); + const TypeVect* vt = TypeVect::make(elem_bt, num_elem, is_vector_mask(vbox_type->klass())); VectorBoxNode* vbox = new VectorBoxNode(C, ret, vector, vbox_type, vt); return gvn().transform(vbox); } @@ -130,7 +168,7 @@ Node* GraphKit::unbox_vector(Node* v, const TypeInstPtr* vbox_type, BasicType el return NULL; // no nulls are allowed } assert(check_vbox(vbox_type), ""); - const TypeVect* vt = TypeVect::make(elem_bt, num_elem); + const TypeVect* vt = TypeVect::make(elem_bt, num_elem, is_vector_mask(vbox_type->klass())); Node* unbox = gvn().transform(new VectorUnboxNode(C, vt, v, merged_memory(), shuffle_to_vector)); return unbox; } @@ -155,7 +193,7 @@ bool LibraryCallKit::arch_supports_vector(int sopc, int num_elem, BasicType type } if (VectorNode::is_vector_rotate(sopc)) { - if(!arch_supports_vector_rotate(sopc, num_elem, type, has_scalar_args)) { + if(!arch_supports_vector_rotate(sopc, num_elem, type, mask_use_type, has_scalar_args)) { #ifndef PRODUCT if (C->print_intrinsics()) { tty->print_cr(" ** Rejected vector op (%s,%s,%d) because architecture does not support variable vector shifts", @@ -213,7 +251,7 @@ bool LibraryCallKit::arch_supports_vector(int sopc, int num_elem, BasicType type } // Check whether mask unboxing is supported. - if (mask_use_type == VecMaskUseAll || mask_use_type == VecMaskUseLoad) { + if ((mask_use_type & VecMaskUseLoad) != 0) { if (!Matcher::match_rule_supported_vector(Op_VectorLoadMask, num_elem, type)) { #ifndef PRODUCT if (C->print_intrinsics()) { @@ -226,7 +264,7 @@ bool LibraryCallKit::arch_supports_vector(int sopc, int num_elem, BasicType type } // Check whether mask boxing is supported. - if (mask_use_type == VecMaskUseAll || mask_use_type == VecMaskUseStore) { + if ((mask_use_type & VecMaskUseStore) != 0) { if (!Matcher::match_rule_supported_vector(Op_VectorStoreMask, num_elem, type)) { #ifndef PRODUCT if (C->print_intrinsics()) { @@ -238,15 +276,20 @@ bool LibraryCallKit::arch_supports_vector(int sopc, int num_elem, BasicType type } } - return true; -} - -static bool is_vector_mask(ciKlass* klass) { - return klass->is_subclass_of(ciEnv::current()->vector_VectorMask_klass()); -} + if ((mask_use_type & VecMaskUsePred) != 0) { + if (!Matcher::has_predicated_vectors() || + !Matcher::match_rule_supported_vector_masked(sopc, num_elem, type)) { + #ifndef PRODUCT + if (C->print_intrinsics()) { + tty->print_cr("Rejected vector mask predicate using (%s,%s,%d) because architecture does not support it", + NodeClassNames[sopc], type2name(type), num_elem); + } + #endif + return false; + } + } -static bool is_vector_shuffle(ciKlass* klass) { - return klass->is_subclass_of(ciEnv::current()->vector_VectorShuffle_klass()); + return true; } static bool is_klass_initialized(const TypeInstPtr* vec_klass) { @@ -259,28 +302,35 @@ static bool is_klass_initialized(const TypeInstPtr* vec_klass) { } // public static -// -// VM unaryOp(int oprId, Class vmClass, Class elementType, int length, -// VM vm, -// Function defaultImpl) { +// , +// M extends VectorMask, +// E> +// V unaryOp(int oprId, Class vmClass, Class maskClass, Class elementType, +// int length, V v, M m, +// UnaryOperation defaultImpl) // // public static -// -// VM binaryOp(int oprId, Class vmClass, Class elementType, int length, -// VM vm1, VM vm2, -// BiFunction defaultImpl) { +// , +// E> +// V binaryOp(int oprId, Class vmClass, Class maskClass, Class elementType, +// int length, V v1, V v2, M m, +// BinaryOperation defaultImpl) // // public static -// -// VM ternaryOp(int oprId, Class vmClass, Class elementType, int length, -// VM vm1, VM vm2, VM vm3, -// TernaryOperation defaultImpl) { +// , +// M extends VectorMask, +// E> +// V ternaryOp(int oprId, Class vmClass, Class maskClass, Class elementType, +// int length, V v1, V v2, V v3, M m, +// TernaryOperation defaultImpl) // bool LibraryCallKit::inline_vector_nary_operation(int n) { const TypeInt* opr = gvn().type(argument(0))->isa_int(); const TypeInstPtr* vector_klass = gvn().type(argument(1))->isa_instptr(); - const TypeInstPtr* elem_klass = gvn().type(argument(2))->isa_instptr(); - const TypeInt* vlen = gvn().type(argument(3))->isa_int(); + const TypeInstPtr* mask_klass = gvn().type(argument(2))->isa_instptr(); + const TypeInstPtr* elem_klass = gvn().type(argument(3))->isa_instptr(); + const TypeInt* vlen = gvn().type(argument(4))->isa_int(); if (opr == NULL || vector_klass == NULL || elem_klass == NULL || vlen == NULL || !opr->is_con() || vector_klass->const_oop() == NULL || elem_klass->const_oop() == NULL || !vlen->is_con()) { @@ -288,11 +338,12 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) { tty->print_cr(" ** missing constant: opr=%s vclass=%s etype=%s vlen=%s", NodeClassNames[argument(0)->Opcode()], NodeClassNames[argument(1)->Opcode()], - NodeClassNames[argument(2)->Opcode()], - NodeClassNames[argument(3)->Opcode()]); + NodeClassNames[argument(3)->Opcode()], + NodeClassNames[argument(4)->Opcode()]); } return false; // not enough info for intrinsification } + ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type(); if (!elem_type->is_primitive_type()) { if (C->print_intrinsics()) { @@ -306,6 +357,34 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) { } return false; } + + // "argument(n + 5)" should be the mask object. We assume it is "null" when no mask + // is used to control this operation. + const Type* vmask_type = gvn().type(argument(n + 5)); + bool is_masked_op = vmask_type != TypePtr::NULL_PTR; + if (is_masked_op) { + if (mask_klass == NULL || mask_klass->const_oop() == NULL) { + if (C->print_intrinsics()) { + tty->print_cr(" ** missing constant: maskclass=%s", NodeClassNames[argument(2)->Opcode()]); + } + return false; // not enough info for intrinsification + } + + if (!is_klass_initialized(mask_klass)) { + if (C->print_intrinsics()) { + tty->print_cr(" ** mask klass argument not initialized"); + } + return false; + } + + if (vmask_type->maybe_null()) { + if (C->print_intrinsics()) { + tty->print_cr(" ** null mask values are not allowed for masked op"); + } + return false; + } + } + BasicType elem_bt = elem_type->basic_type(); int num_elem = vlen->get_con(); int opc = VectorSupport::vop2ideal(opr->get_con(), elem_bt); @@ -328,6 +407,10 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) { ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass(); const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass); + if (is_vector_mask(vbox_klass)) { + assert(!is_masked_op, "mask operations do not need mask to control"); + } + if (opc == Op_CallLeafVector) { if (!UseVectorStubs) { if (C->print_intrinsics()) { @@ -350,47 +433,58 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) { } } - // TODO When mask usage is supported, VecMaskNotUsed needs to be VecMaskUseLoad. - if ((sopc != 0) && - !arch_supports_vector(sopc, num_elem, elem_bt, is_vector_mask(vbox_klass) ? VecMaskUseAll : VecMaskNotUsed)) { + // When using mask, mask use type needs to be VecMaskUseLoad. + VectorMaskUseType mask_use_type = is_vector_mask(vbox_klass) ? VecMaskUseAll + : is_masked_op ? VecMaskUseLoad : VecMaskNotUsed; + if ((sopc != 0) && !arch_supports_vector(sopc, num_elem, elem_bt, mask_use_type)) { if (C->print_intrinsics()) { - tty->print_cr(" ** not supported: arity=%d opc=%d vlen=%d etype=%s ismask=%d", + tty->print_cr(" ** not supported: arity=%d opc=%d vlen=%d etype=%s ismask=%d is_masked_op=%d", n, sopc, num_elem, type2name(elem_bt), - is_vector_mask(vbox_klass) ? 1 : 0); + is_vector_mask(vbox_klass) ? 1 : 0, is_masked_op ? 1 : 0); } return false; // not supported } + // Return true if current platform has implemented the masked operation with predicate feature. + bool use_predicate = is_masked_op && sopc != 0 && arch_supports_vector(sopc, num_elem, elem_bt, VecMaskUsePred); + if (is_masked_op && !use_predicate && !arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskUseLoad)) { + if (C->print_intrinsics()) { + tty->print_cr(" ** not supported: arity=%d opc=%d vlen=%d etype=%s ismask=0 is_masked_op=1", + n, sopc, num_elem, type2name(elem_bt)); + } + return false; + } + Node* opd1 = NULL; Node* opd2 = NULL; Node* opd3 = NULL; switch (n) { case 3: { - opd3 = unbox_vector(argument(6), vbox_type, elem_bt, num_elem); + opd3 = unbox_vector(argument(7), vbox_type, elem_bt, num_elem); if (opd3 == NULL) { if (C->print_intrinsics()) { tty->print_cr(" ** unbox failed v3=%s", - NodeClassNames[argument(6)->Opcode()]); + NodeClassNames[argument(7)->Opcode()]); } return false; } // fall-through } case 2: { - opd2 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem); + opd2 = unbox_vector(argument(6), vbox_type, elem_bt, num_elem); if (opd2 == NULL) { if (C->print_intrinsics()) { tty->print_cr(" ** unbox failed v2=%s", - NodeClassNames[argument(5)->Opcode()]); + NodeClassNames[argument(6)->Opcode()]); } return false; } // fall-through } case 1: { - opd1 = unbox_vector(argument(4), vbox_type, elem_bt, num_elem); + opd1 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem); if (opd1 == NULL) { if (C->print_intrinsics()) { tty->print_cr(" ** unbox failed v1=%s", - NodeClassNames[argument(4)->Opcode()]); + NodeClassNames[argument(5)->Opcode()]); } return false; } @@ -399,6 +493,21 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) { default: fatal("unsupported arity: %d", n); } + Node* mask = NULL; + if (is_masked_op) { + ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass(); + assert(is_vector_mask(mbox_klass), "argument(2) should be a mask class"); + const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass); + mask = unbox_vector(argument(n + 5), mbox_type, elem_bt, num_elem); + if (mask == NULL) { + if (C->print_intrinsics()) { + tty->print_cr(" ** unbox failed mask=%s", + NodeClassNames[argument(n + 5)->Opcode()]); + } + return false; + } + } + Node* operation = NULL; if (opc == Op_CallLeafVector) { assert(UseVectorStubs, "sanity"); @@ -413,20 +522,32 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) { return false; } } else { - const TypeVect* vt = TypeVect::make(elem_bt, num_elem); + const TypeVect* vt = TypeVect::make(elem_bt, num_elem, is_vector_mask(vbox_klass)); switch (n) { case 1: case 2: { - operation = gvn().transform(VectorNode::make(sopc, opd1, opd2, vt)); + operation = VectorNode::make(sopc, opd1, opd2, vt, is_vector_mask(vbox_klass)); break; } case 3: { - operation = gvn().transform(VectorNode::make(sopc, opd1, opd2, opd3, vt)); + operation = VectorNode::make(sopc, opd1, opd2, opd3, vt); break; } default: fatal("unsupported arity: %d", n); } } + + if (is_masked_op && mask != NULL) { + if (use_predicate) { + operation->add_req(mask); + operation->add_flag(Node::Flag_is_predicated_vector); + } else { + operation = gvn().transform(operation); + operation = new VectorBlendNode(opd1, operation, mask); + } + } + operation = gvn().transform(operation); + // Wrap it up in VectorBox to keep object type information. Node* vbox = box_vector(operation, vbox_type, elem_bt, num_elem); set_result(vbox); @@ -435,7 +556,7 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) { } // , E> -// Sh ShuffleIota(Class E, Class ShuffleClass, Vector.Species s, int length, +// Sh ShuffleIota(Class E, Class shuffleClass, Vector.Species s, int length, // int start, int step, int wrap, ShuffleIotaOperation defaultImpl) bool LibraryCallKit::inline_vector_shuffle_iota() { const TypeInstPtr* shuffle_klass = gvn().type(argument(1))->isa_instptr(); @@ -509,10 +630,11 @@ bool LibraryCallKit::inline_vector_shuffle_iota() { // Wrap the indices greater than lane count. res = gvn().transform(VectorNode::make(Op_AndI, res, bcast_mod, num_elem, elem_bt)); } else { - ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(1)); + ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(BoolTest::ge)); Node * lane_cnt = gvn().makecon(TypeInt::make(num_elem)); Node * bcast_lane_cnt = gvn().transform(VectorNode::scalar2vector(lane_cnt, num_elem, type_bt)); - Node* mask = gvn().transform(new VectorMaskCmpNode(BoolTest::ge, bcast_lane_cnt, res, pred_node, vt)); + const TypeVect* vmask_type = TypeVect::makemask(elem_bt, num_elem); + Node* mask = gvn().transform(new VectorMaskCmpNode(BoolTest::ge, bcast_lane_cnt, res, pred_node, vmask_type)); // Make the indices greater than lane count as -ve values. This matches the java side implementation. res = gvn().transform(VectorNode::make(Op_AndI, res, bcast_mod, num_elem, elem_bt)); @@ -531,7 +653,7 @@ bool LibraryCallKit::inline_vector_shuffle_iota() { } // -// int maskReductionCoerced(int oper, Class maskClass, Class elemClass, +// long maskReductionCoerced(int oper, Class maskClass, Class elemClass, // int length, M m, VectorMaskOp defaultImpl) bool LibraryCallKit::inline_vector_mask_operation() { const TypeInt* oper = gvn().type(argument(0))->isa_int(); @@ -576,17 +698,35 @@ bool LibraryCallKit::inline_vector_mask_operation() { ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass(); const TypeInstPtr* mask_box_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass); Node* mask_vec = unbox_vector(mask, mask_box_type, elem_bt, num_elem, true); - Node* store_mask = gvn().transform(VectorStoreMaskNode::make(gvn(), mask_vec, elem_bt, num_elem)); - Node* maskoper = gvn().transform(VectorMaskOpNode::make(store_mask, TypeInt::INT, mopc)); + if (mask_vec == NULL) { + if (C->print_intrinsics()) { + tty->print_cr(" ** unbox failed mask=%s", + NodeClassNames[argument(4)->Opcode()]); + } + return false; + } + + if (mask_vec->bottom_type()->isa_vectmask() == NULL) { + mask_vec = gvn().transform(VectorStoreMaskNode::make(gvn(), mask_vec, elem_bt, num_elem)); + } + const Type* maskoper_ty = mopc == Op_VectorMaskToLong ? (const Type*)TypeLong::LONG : (const Type*)TypeInt::INT; + Node* maskoper = gvn().transform(VectorMaskOpNode::make(mask_vec, maskoper_ty, mopc)); + if (mopc != Op_VectorMaskToLong) { + maskoper = ConvI2L(maskoper); + } set_result(maskoper); C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt)))); return true; } -// , E> -// VM shuffleToVector(Class VecClass, ClassE , Class ShuffleClass, Sh s, int length, -// ShuffleToVectorOperation defaultImpl) +// public static +// , +// E> +// V shuffleToVector(Class> vclass, Class elementType, +// Class shuffleClass, Sh s, int length, +// ShuffleToVectorOperation defaultImpl) bool LibraryCallKit::inline_vector_shuffle_to_vector() { const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr(); const TypeInstPtr* elem_klass = gvn().type(argument(1))->isa_instptr(); @@ -645,10 +785,13 @@ bool LibraryCallKit::inline_vector_shuffle_to_vector() { return true; } -// > -// V broadcastCoerced(Class vectorClass, Class elementType, int vlen, -// long bits, -// LongFunction defaultImpl) +// public static +// , +// E> +// M broadcastCoerced(Class vmClass, Class elementType, int length, +// long bits, S s, +// BroadcastOperation defaultImpl) bool LibraryCallKit::inline_vector_broadcast_coerced() { const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr(); const TypeInstPtr* elem_klass = gvn().type(argument(1))->isa_instptr(); @@ -695,7 +838,6 @@ bool LibraryCallKit::inline_vector_broadcast_coerced() { } Node* bits = argument(3); // long - Node* elem = NULL; switch (elem_bt) { case T_BOOLEAN: // fall-through @@ -722,7 +864,7 @@ bool LibraryCallKit::inline_vector_broadcast_coerced() { default: fatal("%s", type2name(elem_bt)); } - Node* broadcast = VectorNode::scalar2vector(elem, num_elem, Type::get_const_basic_type(elem_bt)); + Node* broadcast = VectorNode::scalar2vector(elem, num_elem, Type::get_const_basic_type(elem_bt), is_vector_mask(vbox_klass)); broadcast = gvn().transform(broadcast); Node* box = box_vector(broadcast, vbox_type, elem_bt, num_elem); @@ -747,19 +889,24 @@ static bool elem_consistent_with_arr(BasicType elem_bt, const TypeAryPtr* arr_ty } } -// > -// V load(Class vectorClass, Class elementType, int vlen, -// Object base, long offset, -// /* Vector.Mask m*/ -// Object container, int index, -// LoadOperation defaultImpl) { +// public static +// > +// VM load(Class vmClass, Class elementType, int length, +// Object base, long offset, // Unsafe addressing +// C container, int index, S s, // Arguments for default implementation +// LoadOperation defaultImpl) // -// > -// void store(Class vectorClass, Class elementType, int vlen, -// Object base, long offset, -// V v, /*Vector.Mask m*/ -// Object container, int index, -// StoreVectorOperation defaultImpl) { +// public static +// > +// void store(Class vectorClass, Class elementType, int length, +// Object base, long offset, // Unsafe addressing +// V v, +// C container, int index, // Arguments for default implementation +// StoreVectorOperation defaultImpl) bool LibraryCallKit::inline_vector_mem_operation(bool is_store) { const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr(); @@ -814,12 +961,23 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) { SafePointNode* old_map = clone_map(); Node* addr = make_unsafe_address(base, offset, (is_mask ? T_BOOLEAN : elem_bt), true); - // Can base be NULL? Otherwise, always on-heap access. - bool can_access_non_heap = TypePtr::NULL_PTR->higher_equal(gvn().type(base)); + + // The memory barrier checks are based on ones for unsafe access. + // This is not 1-1 implementation. + const Type *const base_type = gvn().type(base); const TypePtr *addr_type = gvn().type(addr)->isa_ptr(); const TypeAryPtr* arr_type = addr_type->isa_aryptr(); + const bool in_native = TypePtr::NULL_PTR == base_type; // base always null + const bool in_heap = !TypePtr::NULL_PTR->higher_equal(base_type); // base never null + + const bool is_mixed_access = !in_heap && !in_native; + + const bool is_mismatched_access = in_heap && (addr_type->isa_aryptr() == NULL); + + const bool needs_cpu_membar = is_mixed_access || is_mismatched_access; + // Now handle special case where load/store happens from/to byte array but element type is not byte. bool using_byte_array = arr_type != NULL && arr_type->elem()->array_element_basic_type() == T_BYTE && elem_bt != T_BYTE; // Handle loading masks. @@ -877,7 +1035,7 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) { const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass); - if (can_access_non_heap) { + if (needs_cpu_membar) { insert_mem_bar(Op_MemBarCPUOrder); } @@ -912,8 +1070,7 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) { // Special handle for masks if (is_mask) { vload = gvn().transform(LoadVectorNode::make(0, control(), memory(addr), addr, addr_type, num_elem, T_BOOLEAN)); - const TypeVect* to_vect_type = TypeVect::make(elem_bt, num_elem); - vload = gvn().transform(new VectorLoadMaskNode(vload, to_vect_type)); + vload = gvn().transform(new VectorLoadMaskNode(vload, TypeVect::makemask(elem_bt, num_elem))); } else { vload = gvn().transform(LoadVectorNode::make(0, control(), memory(addr), addr, addr_type, num_elem, elem_bt)); } @@ -924,6 +1081,243 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) { old_map->destruct(&_gvn); + if (needs_cpu_membar) { + insert_mem_bar(Op_MemBarCPUOrder); + } + + C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt)))); + return true; +} + +// public static +// , +// E, +// S extends VectorSpecies, +// M extends VectorMask> +// V loadMasked(Class vectorClass, Class maskClass, Class elementType, +// int length, Object base, long offset, M m, +// C container, int index, S s, // Arguments for default implementation +// LoadVectorMaskedOperation defaultImpl) { +// +// public static +// , +// M extends VectorMask, +// E> +// void storeMasked(Class vectorClass, Class maskClass, Class elementType, +// int length, Object base, long offset, +// V v, M m, +// C container, int index, // Arguments for default implementation +// StoreVectorMaskedOperation defaultImpl) { +// +bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) { + const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr(); + const TypeInstPtr* mask_klass = gvn().type(argument(1))->isa_instptr(); + const TypeInstPtr* elem_klass = gvn().type(argument(2))->isa_instptr(); + const TypeInt* vlen = gvn().type(argument(3))->isa_int(); + + if (vector_klass == NULL || mask_klass == NULL || elem_klass == NULL || vlen == NULL || + vector_klass->const_oop() == NULL || mask_klass->const_oop() == NULL || + elem_klass->const_oop() == NULL || !vlen->is_con()) { + if (C->print_intrinsics()) { + tty->print_cr(" ** missing constant: vclass=%s mclass=%s etype=%s vlen=%s", + NodeClassNames[argument(0)->Opcode()], + NodeClassNames[argument(1)->Opcode()], + NodeClassNames[argument(2)->Opcode()], + NodeClassNames[argument(3)->Opcode()]); + } + return false; // not enough info for intrinsification + } + if (!is_klass_initialized(vector_klass)) { + if (C->print_intrinsics()) { + tty->print_cr(" ** klass argument not initialized"); + } + return false; + } + + if (!is_klass_initialized(mask_klass)) { + if (C->print_intrinsics()) { + tty->print_cr(" ** mask klass argument not initialized"); + } + return false; + } + + ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type(); + if (!elem_type->is_primitive_type()) { + if (C->print_intrinsics()) { + tty->print_cr(" ** not a primitive bt=%d", elem_type->basic_type()); + } + return false; // should be primitive type + } + + BasicType elem_bt = elem_type->basic_type(); + int num_elem = vlen->get_con(); + + Node* base = argument(4); + Node* offset = ConvL2X(argument(5)); + + // Save state and restore on bailout + uint old_sp = sp(); + SafePointNode* old_map = clone_map(); + + Node* addr = make_unsafe_address(base, offset, elem_bt, true); + const TypePtr *addr_type = gvn().type(addr)->isa_ptr(); + const TypeAryPtr* arr_type = addr_type->isa_aryptr(); + + // Now handle special case where load/store happens from/to byte array but element type is not byte. + bool using_byte_array = arr_type != NULL && arr_type->elem()->array_element_basic_type() == T_BYTE && elem_bt != T_BYTE; + // If there is no consistency between array and vector element types, it must be special byte array case + if (arr_type != NULL && !using_byte_array && !elem_consistent_with_arr(elem_bt, arr_type)) { + if (C->print_intrinsics()) { + tty->print_cr(" ** not supported: arity=%d op=%s vlen=%d etype=%s atype=%s", + is_store, is_store ? "storeMasked" : "loadMasked", + num_elem, type2name(elem_bt), type2name(arr_type->elem()->array_element_basic_type())); + } + set_map(old_map); + set_sp(old_sp); + return false; + } + + int mem_num_elem = using_byte_array ? num_elem * type2aelembytes(elem_bt) : num_elem; + BasicType mem_elem_bt = using_byte_array ? T_BYTE : elem_bt; + bool use_predicate = arch_supports_vector(is_store ? Op_StoreVectorMasked : Op_LoadVectorMasked, + mem_num_elem, mem_elem_bt, + (VectorMaskUseType) (VecMaskUseLoad | VecMaskUsePred)); + // Masked vector store operation needs the architecture predicate feature. We need to check + // whether the predicated vector operation is supported by backend. + if (is_store && !use_predicate) { + if (C->print_intrinsics()) { + tty->print_cr(" ** not supported: op=storeMasked vlen=%d etype=%s using_byte_array=%d", + num_elem, type2name(elem_bt), using_byte_array ? 1 : 0); + } + set_map(old_map); + set_sp(old_sp); + return false; + } + + // This only happens for masked vector load. If predicate is not supported, then check whether + // the normal vector load and blend operations are supported by backend. + if (!use_predicate && (!arch_supports_vector(Op_LoadVector, mem_num_elem, mem_elem_bt, VecMaskNotUsed) || + !arch_supports_vector(Op_VectorBlend, mem_num_elem, mem_elem_bt, VecMaskUseLoad))) { + if (C->print_intrinsics()) { + tty->print_cr(" ** not supported: op=loadMasked vlen=%d etype=%s using_byte_array=%d", + num_elem, type2name(elem_bt), using_byte_array ? 1 : 0); + } + set_map(old_map); + set_sp(old_sp); + return false; + } + + // Since we are using byte array, we need to double check that the vector reinterpret operation + // with byte type is supported by backend. + if (using_byte_array) { + if (!arch_supports_vector(Op_VectorReinterpret, mem_num_elem, T_BYTE, VecMaskNotUsed)) { + if (C->print_intrinsics()) { + tty->print_cr(" ** not supported: arity=%d op=%s vlen=%d etype=%s using_byte_array=1", + is_store, is_store ? "storeMasked" : "loadMasked", + num_elem, type2name(elem_bt)); + } + set_map(old_map); + set_sp(old_sp); + return false; + } + } + + // Since it needs to unbox the mask, we need to double check that the related load operations + // for mask are supported by backend. + if (!arch_supports_vector(Op_LoadVector, num_elem, elem_bt, VecMaskUseLoad)) { + if (C->print_intrinsics()) { + tty->print_cr(" ** not supported: arity=%d op=%s vlen=%d etype=%s", + is_store, is_store ? "storeMasked" : "loadMasked", + num_elem, type2name(elem_bt)); + } + set_map(old_map); + set_sp(old_sp); + return false; + } + + // Can base be NULL? Otherwise, always on-heap access. + bool can_access_non_heap = TypePtr::NULL_PTR->higher_equal(gvn().type(base)); + if (can_access_non_heap) { + insert_mem_bar(Op_MemBarCPUOrder); + } + + ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass(); + ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass(); + assert(!is_vector_mask(vbox_klass) && is_vector_mask(mbox_klass), "Invalid class type"); + const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass); + const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass); + + Node* mask = unbox_vector(is_store ? argument(8) : argument(7), mbox_type, elem_bt, num_elem); + if (mask == NULL) { + if (C->print_intrinsics()) { + tty->print_cr(" ** unbox failed mask=%s", + is_store ? NodeClassNames[argument(8)->Opcode()] + : NodeClassNames[argument(7)->Opcode()]); + } + set_map(old_map); + set_sp(old_sp); + return false; + } + + if (is_store) { + Node* val = unbox_vector(argument(7), vbox_type, elem_bt, num_elem); + if (val == NULL) { + if (C->print_intrinsics()) { + tty->print_cr(" ** unbox failed vector=%s", + NodeClassNames[argument(7)->Opcode()]); + } + set_map(old_map); + set_sp(old_sp); + return false; // operand unboxing failed + } + set_all_memory(reset_memory()); + + if (using_byte_array) { + // Reinterpret the incoming vector to byte vector. + const TypeVect* to_vect_type = TypeVect::make(mem_elem_bt, mem_num_elem); + val = gvn().transform(new VectorReinterpretNode(val, val->bottom_type()->is_vect(), to_vect_type)); + // Reinterpret the vector mask to byte type. + const TypeVect* from_mask_type = TypeVect::makemask(elem_bt, num_elem); + const TypeVect* to_mask_type = TypeVect::makemask(mem_elem_bt, mem_num_elem); + mask = gvn().transform(new VectorReinterpretNode(mask, from_mask_type, to_mask_type)); + } + Node* vstore = gvn().transform(new StoreVectorMaskedNode(control(), memory(addr), addr, val, addr_type, mask)); + set_memory(vstore, addr_type); + } else { + Node* vload = NULL; + + if (using_byte_array) { + // Reinterpret the vector mask to byte type. + const TypeVect* from_mask_type = TypeVect::makemask(elem_bt, num_elem); + const TypeVect* to_mask_type = TypeVect::makemask(mem_elem_bt, mem_num_elem); + mask = gvn().transform(new VectorReinterpretNode(mask, from_mask_type, to_mask_type)); + } + + if (use_predicate) { + // Generate masked load vector node if predicate feature is supported. + const TypeVect* vt = TypeVect::make(mem_elem_bt, mem_num_elem); + vload = gvn().transform(new LoadVectorMaskedNode(control(), memory(addr), addr, addr_type, vt, mask)); + } else { + // Use the vector blend to implement the masked load vector. The biased elements are zeros. + Node* zero = gvn().transform(gvn().zerocon(mem_elem_bt)); + zero = gvn().transform(VectorNode::scalar2vector(zero, mem_num_elem, Type::get_const_basic_type(mem_elem_bt))); + vload = gvn().transform(LoadVectorNode::make(0, control(), memory(addr), addr, addr_type, mem_num_elem, mem_elem_bt)); + vload = gvn().transform(new VectorBlendNode(zero, vload, mask)); + } + + if (using_byte_array) { + const TypeVect* to_vect_type = TypeVect::make(elem_bt, num_elem); + vload = gvn().transform(new VectorReinterpretNode(vload, vload->bottom_type()->is_vect(), to_vect_type)); + } + + Node* box = box_vector(vload, vbox_type, elem_bt, num_elem); + set_result(box); + } + + old_map->destruct(&_gvn); + if (can_access_non_heap) { insert_mem_bar(Op_MemBarCPUOrder); } @@ -932,34 +1326,45 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) { return true; } -// , W extends IntVector, E, S extends VectorSpecies> -// void loadWithMap(Class vectorClass, Class E, int length, Class vectorIndexClass, -// Object base, long offset, // Unsafe addressing -// W index_vector, -// C container, int index, int[] indexMap, int indexM, S s, // Arguments for default implementation -// LoadVectorOperationWithMap defaultImpl) +// , +// W extends Vector, +// S extends VectorSpecies, +// M extends VectorMask, +// E> +// V loadWithMap(Class vectorClass, Class maskClass, Class elementType, int length, +// Class> vectorIndexClass, +// Object base, long offset, // Unsafe addressing +// W index_vector, M m, +// C container, int index, int[] indexMap, int indexM, S s, // Arguments for default implementation +// LoadVectorOperationWithMap defaultImpl) // -// , W extends IntVector> -// void storeWithMap(Class vectorClass, Class elementType, int length, Class vectorIndexClass, -// Object base, long offset, // Unsafe addressing -// W index_vector, V v, -// C container, int index, int[] indexMap, int indexM, // Arguments for default implementation -// StoreVectorOperationWithMap defaultImpl) { +// , +// W extends Vector, +// M extends VectorMask, +// E> +// void storeWithMap(Class vectorClass, Class maskClass, Class elementType, +// int length, Class> vectorIndexClass, Object base, long offset, // Unsafe addressing +// W index_vector, V v, M m, +// C container, int index, int[] indexMap, int indexM, // Arguments for default implementation +// StoreVectorOperationWithMap defaultImpl) // bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) { const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr(); - const TypeInstPtr* elem_klass = gvn().type(argument(1))->isa_instptr(); - const TypeInt* vlen = gvn().type(argument(2))->isa_int(); - const TypeInstPtr* vector_idx_klass = gvn().type(argument(3))->isa_instptr(); + const TypeInstPtr* mask_klass = gvn().type(argument(1))->isa_instptr(); + const TypeInstPtr* elem_klass = gvn().type(argument(2))->isa_instptr(); + const TypeInt* vlen = gvn().type(argument(3))->isa_int(); + const TypeInstPtr* vector_idx_klass = gvn().type(argument(4))->isa_instptr(); if (vector_klass == NULL || elem_klass == NULL || vector_idx_klass == NULL || vlen == NULL || vector_klass->const_oop() == NULL || elem_klass->const_oop() == NULL || vector_idx_klass->const_oop() == NULL || !vlen->is_con()) { if (C->print_intrinsics()) { tty->print_cr(" ** missing constant: vclass=%s etype=%s vlen=%s viclass=%s", NodeClassNames[argument(0)->Opcode()], - NodeClassNames[argument(1)->Opcode()], NodeClassNames[argument(2)->Opcode()], - NodeClassNames[argument(3)->Opcode()]); + NodeClassNames[argument(3)->Opcode()], + NodeClassNames[argument(4)->Opcode()]); } return false; // not enough info for intrinsification } @@ -970,6 +1375,7 @@ bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) { } return false; } + ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type(); if (!elem_type->is_primitive_type()) { if (C->print_intrinsics()) { @@ -977,30 +1383,68 @@ bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) { } return false; // should be primitive type } + BasicType elem_bt = elem_type->basic_type(); int num_elem = vlen->get_con(); - if (!arch_supports_vector(is_scatter ? Op_StoreVectorScatter : Op_LoadVectorGather, num_elem, elem_bt, VecMaskNotUsed)) { - if (C->print_intrinsics()) { - tty->print_cr(" ** not supported: arity=%d op=%s vlen=%d etype=%s ismask=no", - is_scatter, is_scatter ? "scatter" : "gather", - num_elem, type2name(elem_bt)); + const Type* vmask_type = gvn().type(is_scatter ? argument(10) : argument(9)); + bool is_masked_op = vmask_type != TypePtr::NULL_PTR; + if (is_masked_op) { + if (mask_klass == NULL || mask_klass->const_oop() == NULL) { + if (C->print_intrinsics()) { + tty->print_cr(" ** missing constant: maskclass=%s", NodeClassNames[argument(1)->Opcode()]); + } + return false; // not enough info for intrinsification + } + + if (!is_klass_initialized(mask_klass)) { + if (C->print_intrinsics()) { + tty->print_cr(" ** mask klass argument not initialized"); + } + return false; + } + + if (vmask_type->maybe_null()) { + if (C->print_intrinsics()) { + tty->print_cr(" ** null mask values are not allowed for masked op"); + } + return false; + } + + // Check whether the predicated gather/scatter node is supported by architecture. + if (!arch_supports_vector(is_scatter ? Op_StoreVectorScatterMasked : Op_LoadVectorGatherMasked, num_elem, elem_bt, + (VectorMaskUseType) (VecMaskUseLoad | VecMaskUsePred))) { + if (C->print_intrinsics()) { + tty->print_cr(" ** not supported: arity=%d op=%s vlen=%d etype=%s is_masked_op=1", + is_scatter, is_scatter ? "scatterMasked" : "gatherMasked", + num_elem, type2name(elem_bt)); + } + return false; // not supported + } + } else { + // Check whether the normal gather/scatter node is supported for non-masked operation. + if (!arch_supports_vector(is_scatter ? Op_StoreVectorScatter : Op_LoadVectorGather, num_elem, elem_bt, VecMaskNotUsed)) { + if (C->print_intrinsics()) { + tty->print_cr(" ** not supported: arity=%d op=%s vlen=%d etype=%s is_masked_op=0", + is_scatter, is_scatter ? "scatter" : "gather", + num_elem, type2name(elem_bt)); + } + return false; // not supported } - return false; // not supported } // Check that the vector holding indices is supported by architecture if (!arch_supports_vector(Op_LoadVector, num_elem, T_INT, VecMaskNotUsed)) { if (C->print_intrinsics()) { - tty->print_cr(" ** not supported: arity=%d op=%s/loadindex vlen=%d etype=int ismask=no", + tty->print_cr(" ** not supported: arity=%d op=%s/loadindex vlen=%d etype=int is_masked_op=%d", is_scatter, is_scatter ? "scatter" : "gather", - num_elem); + num_elem, is_masked_op ? 1 : 0); } return false; // not supported - } + } - Node* base = argument(4); - Node* offset = ConvL2X(argument(5)); + Node* base = argument(5); + Node* offset = ConvL2X(argument(6)); // Save state and restore on bailout uint old_sp = sp(); @@ -1022,11 +1466,10 @@ bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) { set_sp(old_sp); return false; } + ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass(); const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass); - ciKlass* vbox_idx_klass = vector_idx_klass->const_oop()->as_instance()->java_lang_Class_klass(); - if (vbox_idx_klass == NULL) { set_map(old_map); set_sp(old_sp); @@ -1034,16 +1477,33 @@ bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) { } const TypeInstPtr* vbox_idx_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_idx_klass); - - Node* index_vect = unbox_vector(argument(7), vbox_idx_type, T_INT, num_elem); + Node* index_vect = unbox_vector(argument(8), vbox_idx_type, T_INT, num_elem); if (index_vect == NULL) { set_map(old_map); set_sp(old_sp); return false; } + + Node* mask = NULL; + if (is_masked_op) { + ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass(); + const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass); + mask = unbox_vector(is_scatter ? argument(10) : argument(9), mbox_type, elem_bt, num_elem); + if (mask == NULL) { + if (C->print_intrinsics()) { + tty->print_cr(" ** unbox failed mask=%s", + is_scatter ? NodeClassNames[argument(10)->Opcode()] + : NodeClassNames[argument(9)->Opcode()]); + } + set_map(old_map); + set_sp(old_sp); + return false; + } + } + const TypeVect* vector_type = TypeVect::make(elem_bt, num_elem); if (is_scatter) { - Node* val = unbox_vector(argument(8), vbox_type, elem_bt, num_elem); + Node* val = unbox_vector(argument(9), vbox_type, elem_bt, num_elem); if (val == NULL) { set_map(old_map); set_sp(old_sp); @@ -1051,11 +1511,20 @@ bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) { } set_all_memory(reset_memory()); - Node* vstore = gvn().transform(new StoreVectorScatterNode(control(), memory(addr), addr, addr_type, val, index_vect)); + Node* vstore = NULL; + if (mask != NULL) { + vstore = gvn().transform(new StoreVectorScatterMaskedNode(control(), memory(addr), addr, addr_type, val, index_vect, mask)); + } else { + vstore = gvn().transform(new StoreVectorScatterNode(control(), memory(addr), addr, addr_type, val, index_vect)); + } set_memory(vstore, addr_type); } else { - Node* vload = gvn().transform(new LoadVectorGatherNode(control(), memory(addr), addr, addr_type, vector_type, index_vect)); - + Node* vload = NULL; + if (mask != NULL) { + vload = gvn().transform(new LoadVectorGatherMaskedNode(control(), memory(addr), addr, addr_type, vector_type, index_vect, mask)); + } else { + vload = gvn().transform(new LoadVectorGatherNode(control(), memory(addr), addr, addr_type, vector_type, index_vect)); + } Node* box = box_vector(vload, vbox_type, elem_bt, num_elem); set_result(box); } @@ -1066,16 +1535,19 @@ bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) { return true; } -// > -// long reductionCoerced(int oprId, Class vectorClass, Class elementType, int vlen, -// V v, -// Function defaultImpl) - +// public static +// , +// M extends VectorMask, +// E> +// long reductionCoerced(int oprId, Class vectorClass, Class maskClass, +// Class elementType, int length, V v, M m, +// ReductionOperation defaultImpl) bool LibraryCallKit::inline_vector_reduction() { const TypeInt* opr = gvn().type(argument(0))->isa_int(); const TypeInstPtr* vector_klass = gvn().type(argument(1))->isa_instptr(); - const TypeInstPtr* elem_klass = gvn().type(argument(2))->isa_instptr(); - const TypeInt* vlen = gvn().type(argument(3))->isa_int(); + const TypeInstPtr* mask_klass = gvn().type(argument(2))->isa_instptr(); + const TypeInstPtr* elem_klass = gvn().type(argument(3))->isa_instptr(); + const TypeInt* vlen = gvn().type(argument(4))->isa_int(); if (opr == NULL || vector_klass == NULL || elem_klass == NULL || vlen == NULL || !opr->is_con() || vector_klass->const_oop() == NULL || elem_klass->const_oop() == NULL || !vlen->is_con()) { @@ -1083,8 +1555,8 @@ bool LibraryCallKit::inline_vector_reduction() { tty->print_cr(" ** missing constant: opr=%s vclass=%s etype=%s vlen=%s", NodeClassNames[argument(0)->Opcode()], NodeClassNames[argument(1)->Opcode()], - NodeClassNames[argument(2)->Opcode()], - NodeClassNames[argument(3)->Opcode()]); + NodeClassNames[argument(3)->Opcode()], + NodeClassNames[argument(4)->Opcode()]); } return false; // not enough info for intrinsification } @@ -1101,16 +1573,51 @@ bool LibraryCallKit::inline_vector_reduction() { } return false; // should be primitive type } + + const Type* vmask_type = gvn().type(argument(6)); + bool is_masked_op = vmask_type != TypePtr::NULL_PTR; + if (is_masked_op) { + if (mask_klass == NULL || mask_klass->const_oop() == NULL) { + if (C->print_intrinsics()) { + tty->print_cr(" ** missing constant: maskclass=%s", NodeClassNames[argument(2)->Opcode()]); + } + return false; // not enough info for intrinsification + } + + if (!is_klass_initialized(mask_klass)) { + if (C->print_intrinsics()) { + tty->print_cr(" ** mask klass argument not initialized"); + } + return false; + } + + if (vmask_type->maybe_null()) { + if (C->print_intrinsics()) { + tty->print_cr(" ** null mask values are not allowed for masked op"); + } + return false; + } + } + BasicType elem_bt = elem_type->basic_type(); int num_elem = vlen->get_con(); - int opc = VectorSupport::vop2ideal(opr->get_con(), elem_bt); int sopc = ReductionNode::opcode(opc, elem_bt); - // TODO When mask usage is supported, VecMaskNotUsed needs to be VecMaskUseLoad. - if (!arch_supports_vector(sopc, num_elem, elem_bt, VecMaskNotUsed)) { + // When using mask, mask use type needs to be VecMaskUseLoad. + if (!arch_supports_vector(sopc, num_elem, elem_bt, is_masked_op ? VecMaskUseLoad : VecMaskNotUsed)) { if (C->print_intrinsics()) { - tty->print_cr(" ** not supported: arity=1 op=%d/reduce vlen=%d etype=%s ismask=no", + tty->print_cr(" ** not supported: arity=1 op=%d/reduce vlen=%d etype=%s is_masked_op=%d", + sopc, num_elem, type2name(elem_bt), is_masked_op ? 1 : 0); + } + return false; + } + + // Return true if current platform has implemented the masked operation with predicate feature. + bool use_predicate = is_masked_op && arch_supports_vector(sopc, num_elem, elem_bt, VecMaskUsePred); + if (is_masked_op && !use_predicate && !arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskUseLoad)) { + if (C->print_intrinsics()) { + tty->print_cr(" ** not supported: arity=1 op=%d/reduce vlen=%d etype=%s is_masked_op=1", sopc, num_elem, type2name(elem_bt)); } return false; @@ -1119,33 +1626,63 @@ bool LibraryCallKit::inline_vector_reduction() { ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass(); const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass); - Node* opd = unbox_vector(argument(4), vbox_type, elem_bt, num_elem); + Node* opd = unbox_vector(argument(5), vbox_type, elem_bt, num_elem); if (opd == NULL) { return false; // operand unboxing failed } + Node* mask = NULL; + if (is_masked_op) { + ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass(); + assert(is_vector_mask(mbox_klass), "argument(2) should be a mask class"); + const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass); + mask = unbox_vector(argument(6), mbox_type, elem_bt, num_elem); + if (mask == NULL) { + if (C->print_intrinsics()) { + tty->print_cr(" ** unbox failed mask=%s", + NodeClassNames[argument(6)->Opcode()]); + } + return false; + } + } + Node* init = ReductionNode::make_reduction_input(gvn(), opc, elem_bt); - Node* rn = gvn().transform(ReductionNode::make(opc, NULL, init, opd, elem_bt)); + Node* value = NULL; + if (mask == NULL) { + assert(!is_masked_op, "Masked op needs the mask value never null"); + value = ReductionNode::make(opc, NULL, init, opd, elem_bt); + } else { + if (use_predicate) { + value = ReductionNode::make(opc, NULL, init, opd, elem_bt); + value->add_req(mask); + value->add_flag(Node::Flag_is_predicated_vector); + } else { + Node* reduce_identity = gvn().transform(VectorNode::scalar2vector(init, num_elem, Type::get_const_basic_type(elem_bt))); + value = gvn().transform(new VectorBlendNode(reduce_identity, opd, mask)); + value = ReductionNode::make(opc, NULL, init, value, elem_bt); + } + } + value = gvn().transform(value); Node* bits = NULL; switch (elem_bt) { case T_BYTE: case T_SHORT: case T_INT: { - bits = gvn().transform(new ConvI2LNode(rn)); + bits = gvn().transform(new ConvI2LNode(value)); break; } case T_FLOAT: { - rn = gvn().transform(new MoveF2INode(rn)); - bits = gvn().transform(new ConvI2LNode(rn)); + value = gvn().transform(new MoveF2INode(value)); + bits = gvn().transform(new ConvI2LNode(value)); break; } case T_DOUBLE: { - bits = gvn().transform(new MoveD2LNode(rn)); + bits = gvn().transform(new MoveD2LNode(value)); break; } case T_LONG: { - bits = rn; // no conversion needed + bits = value; // no conversion needed break; } default: fatal("%s", type2name(elem_bt)); @@ -1157,7 +1694,7 @@ bool LibraryCallKit::inline_vector_reduction() { // public static boolean test(int cond, Class vectorClass, Class elementType, int vlen, // V v1, V v2, -// BiFunction defaultImpl) { +// BiFunction defaultImpl) // bool LibraryCallKit::inline_vector_test() { const TypeInt* cond = gvn().type(argument(0))->isa_int(); @@ -1218,11 +1755,12 @@ bool LibraryCallKit::inline_vector_test() { } // public static -// -// V blend(Class vectorClass, Class maskClass, Class elementType, int vlen, +// , +// M extends VectorMask, +// E> +// V blend(Class vectorClass, Class maskClass, Class elementType, int vlen, // V v1, V v2, M m, -// VectorBlendOp defaultImpl) { ... -// +// VectorBlendOp defaultImpl) bool LibraryCallKit::inline_vector_blend() { const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr(); const TypeInstPtr* mask_klass = gvn().type(argument(1))->isa_instptr(); @@ -1289,13 +1827,13 @@ bool LibraryCallKit::inline_vector_blend() { return true; } -// public static , -// M extends Vector.Mask, -// S extends Vector.Shape, E> -// M compare(int cond, Class vectorClass, Class maskClass, Class elementType, int vlen, -// V v1, V v2, -// VectorCompareOp defaultImpl) { ... -// +// public static +// , +// M extends VectorMask, +// E> +// M compare(int cond, Class vectorClass, Class maskClass, Class elementType, int vlen, +// V v1, V v2, M m, +// VectorCompareOp defaultImpl) bool LibraryCallKit::inline_vector_compare() { const TypeInt* cond = gvn().type(argument(0))->isa_int(); const TypeInstPtr* vector_klass = gvn().type(argument(1))->isa_instptr(); @@ -1363,14 +1901,45 @@ bool LibraryCallKit::inline_vector_compare() { Node* v1 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem); Node* v2 = unbox_vector(argument(6), vbox_type, elem_bt, num_elem); + bool is_masked_op = argument(7)->bottom_type() != TypePtr::NULL_PTR; + Node* mask = is_masked_op ? unbox_vector(argument(7), mbox_type, elem_bt, num_elem) : NULL; + if (is_masked_op && mask == NULL) { + if (C->print_intrinsics()) { + tty->print_cr(" ** not supported: mask = null arity=2 op=comp/%d vlen=%d etype=%s ismask=usestore is_masked_op=1", + cond->get_con(), num_elem, type2name(elem_bt)); + } + return false; + } + + bool use_predicate = is_masked_op && arch_supports_vector(Op_VectorMaskCmp, num_elem, elem_bt, VecMaskUsePred); + if (is_masked_op && !use_predicate && !arch_supports_vector(Op_AndV, num_elem, elem_bt, VecMaskUseLoad)) { + if (C->print_intrinsics()) { + tty->print_cr(" ** not supported: arity=2 op=comp/%d vlen=%d etype=%s ismask=usestore is_masked_op=1", + cond->get_con(), num_elem, type2name(elem_bt)); + } + return false; + } + if (v1 == NULL || v2 == NULL) { return false; // operand unboxing failed } BoolTest::mask pred = (BoolTest::mask)cond->get_con(); ConINode* pred_node = (ConINode*)gvn().makecon(cond); - const TypeVect* vt = TypeVect::make(mask_bt, num_elem); - Node* operation = gvn().transform(new VectorMaskCmpNode(pred, v1, v2, pred_node, vt)); + const TypeVect* vmask_type = TypeVect::makemask(mask_bt, num_elem); + Node* operation = new VectorMaskCmpNode(pred, v1, v2, pred_node, vmask_type); + + if (is_masked_op) { + if (use_predicate) { + operation->add_req(mask); + operation->add_flag(Node::Flag_is_predicated_vector); + } else { + operation = gvn().transform(operation); + operation = VectorNode::make(Op_AndV, operation, mask, vmask_type); + } + } + + operation = gvn().transform(operation); Node* box = box_vector(operation, mbox_type, mask_bt, num_elem); set_result(box); @@ -1379,32 +1948,38 @@ bool LibraryCallKit::inline_vector_compare() { } // public static -// -// V rearrangeOp(Class vectorClass, Class shuffleClass, Class< ? > elementType, int vlen, -// V v1, Sh sh, -// VectorSwizzleOp defaultImpl) { ... - +// , +// Sh extends VectorShuffle, +// M extends VectorMask, +// E> +// V rearrangeOp(Class vectorClass, Class shuffleClass, Class maskClass, Class elementType, int vlen, +// V v1, Sh sh, M m, +// VectorRearrangeOp defaultImpl) bool LibraryCallKit::inline_vector_rearrange() { const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr(); const TypeInstPtr* shuffle_klass = gvn().type(argument(1))->isa_instptr(); - const TypeInstPtr* elem_klass = gvn().type(argument(2))->isa_instptr(); - const TypeInt* vlen = gvn().type(argument(3))->isa_int(); + const TypeInstPtr* mask_klass = gvn().type(argument(2))->isa_instptr(); + const TypeInstPtr* elem_klass = gvn().type(argument(3))->isa_instptr(); + const TypeInt* vlen = gvn().type(argument(4))->isa_int(); - if (vector_klass == NULL || shuffle_klass == NULL || elem_klass == NULL || vlen == NULL) { + if (vector_klass == NULL || shuffle_klass == NULL || elem_klass == NULL || vlen == NULL) { return false; // dead code } - if (shuffle_klass->const_oop() == NULL || vector_klass->const_oop() == NULL || - elem_klass->const_oop() == NULL || !vlen->is_con()) { + if (shuffle_klass->const_oop() == NULL || + vector_klass->const_oop() == NULL || + elem_klass->const_oop() == NULL || + !vlen->is_con()) { if (C->print_intrinsics()) { tty->print_cr(" ** missing constant: vclass=%s sclass=%s etype=%s vlen=%s", NodeClassNames[argument(0)->Opcode()], NodeClassNames[argument(1)->Opcode()], - NodeClassNames[argument(2)->Opcode()], - NodeClassNames[argument(3)->Opcode()]); + NodeClassNames[argument(3)->Opcode()], + NodeClassNames[argument(4)->Opcode()]); } return false; // not enough info for intrinsification } - if (!is_klass_initialized(vector_klass) || !is_klass_initialized(shuffle_klass)) { + if (!is_klass_initialized(vector_klass) || + !is_klass_initialized(shuffle_klass)) { if (C->print_intrinsics()) { tty->print_cr(" ** klass argument not initialized"); } @@ -1428,12 +2003,30 @@ bool LibraryCallKit::inline_vector_rearrange() { } return false; // not supported } - if (!arch_supports_vector(Op_VectorRearrange, num_elem, elem_bt, VecMaskNotUsed)) { + + bool is_masked_op = argument(7)->bottom_type() != TypePtr::NULL_PTR; + bool use_predicate = is_masked_op; + if (is_masked_op && + (mask_klass == NULL || + mask_klass->const_oop() == NULL || + !is_klass_initialized(mask_klass))) { if (C->print_intrinsics()) { - tty->print_cr(" ** not supported: arity=2 op=shuffle/rearrange vlen=%d etype=%s ismask=no", - num_elem, type2name(elem_bt)); + tty->print_cr(" ** mask_klass argument not initialized"); + } + } + VectorMaskUseType checkFlags = (VectorMaskUseType)(is_masked_op ? (VecMaskUseLoad | VecMaskUsePred) : VecMaskNotUsed); + if (!arch_supports_vector(Op_VectorRearrange, num_elem, elem_bt, checkFlags)) { + use_predicate = false; + if(!is_masked_op || + (!arch_supports_vector(Op_VectorRearrange, num_elem, elem_bt, VecMaskNotUsed) || + !arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskUseLoad) || + !arch_supports_vector(VectorNode::replicate_opcode(elem_bt), num_elem, elem_bt, VecMaskNotUsed))) { + if (C->print_intrinsics()) { + tty->print_cr(" ** not supported: arity=2 op=shuffle/rearrange vlen=%d etype=%s ismask=no", + num_elem, type2name(elem_bt)); + } + return false; // not supported } - return false; // not supported } ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass(); const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass); @@ -1441,14 +2034,41 @@ bool LibraryCallKit::inline_vector_rearrange() { ciKlass* shbox_klass = shuffle_klass->const_oop()->as_instance()->java_lang_Class_klass(); const TypeInstPtr* shbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, shbox_klass); - Node* v1 = unbox_vector(argument(4), vbox_type, elem_bt, num_elem); - Node* shuffle = unbox_vector(argument(5), shbox_type, shuffle_bt, num_elem); + Node* v1 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem); + Node* shuffle = unbox_vector(argument(6), shbox_type, shuffle_bt, num_elem); if (v1 == NULL || shuffle == NULL) { return false; // operand unboxing failed } - Node* rearrange = gvn().transform(new VectorRearrangeNode(v1, shuffle)); + Node* mask = NULL; + if (is_masked_op) { + ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass(); + const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass); + mask = unbox_vector(argument(7), mbox_type, elem_bt, num_elem); + if (mask == NULL) { + if (C->print_intrinsics()) { + tty->print_cr(" ** not supported: arity=3 op=shuffle/rearrange vlen=%d etype=%s ismask=useload is_masked_op=1", + num_elem, type2name(elem_bt)); + } + return false; + } + } + + Node* rearrange = new VectorRearrangeNode(v1, shuffle); + if (is_masked_op) { + if (use_predicate) { + rearrange->add_req(mask); + rearrange->add_flag(Node::Flag_is_predicated_vector); + } else { + const TypeVect* vt = v1->bottom_type()->is_vect(); + rearrange = gvn().transform(rearrange); + Node* zero = gvn().makecon(Type::get_zero_type(elem_bt)); + Node* zerovec = gvn().transform(VectorNode::scalar2vector(zero, num_elem, Type::get_const_basic_type(elem_bt))); + rearrange = new VectorBlendNode(zerovec, rearrange, mask); + } + } + rearrange = gvn().transform(rearrange); Node* box = box_vector(rearrange, vbox_type, elem_bt, num_elem); set_result(box); @@ -1514,16 +2134,19 @@ Node* LibraryCallKit::gen_call_to_svml(int vector_api_op_id, BasicType bt, int n } // public static -// > -// V broadcastInt(int opr, Class vectorClass, Class elementType, int vlen, -// V v, int i, -// VectorBroadcastIntOp defaultImpl) { -// +// , +// M extends VectorMask, +// E> +// V broadcastInt(int opr, Class vectorClass, Class maskClass, +// Class elementType, int length, +// V v, int n, M m, +// VectorBroadcastIntOp defaultImpl) bool LibraryCallKit::inline_vector_broadcast_int() { const TypeInt* opr = gvn().type(argument(0))->isa_int(); const TypeInstPtr* vector_klass = gvn().type(argument(1))->isa_instptr(); - const TypeInstPtr* elem_klass = gvn().type(argument(2))->isa_instptr(); - const TypeInt* vlen = gvn().type(argument(3))->isa_int(); + const TypeInstPtr* mask_klass = gvn().type(argument(2))->isa_instptr(); + const TypeInstPtr* elem_klass = gvn().type(argument(3))->isa_instptr(); + const TypeInt* vlen = gvn().type(argument(4))->isa_int(); if (opr == NULL || vector_klass == NULL || elem_klass == NULL || vlen == NULL) { return false; // dead code @@ -1533,8 +2156,8 @@ bool LibraryCallKit::inline_vector_broadcast_int() { tty->print_cr(" ** missing constant: opr=%s vclass=%s etype=%s vlen=%s", NodeClassNames[argument(0)->Opcode()], NodeClassNames[argument(1)->Opcode()], - NodeClassNames[argument(2)->Opcode()], - NodeClassNames[argument(3)->Opcode()]); + NodeClassNames[argument(3)->Opcode()], + NodeClassNames[argument(4)->Opcode()]); } return false; // not enough info for intrinsification } @@ -1544,6 +2167,32 @@ bool LibraryCallKit::inline_vector_broadcast_int() { } return false; } + + const Type* vmask_type = gvn().type(argument(7)); + bool is_masked_op = vmask_type != TypePtr::NULL_PTR; + if (is_masked_op) { + if (mask_klass == NULL || mask_klass->const_oop() == NULL) { + if (C->print_intrinsics()) { + tty->print_cr(" ** missing constant: maskclass=%s", NodeClassNames[argument(2)->Opcode()]); + } + return false; // not enough info for intrinsification + } + + if (!is_klass_initialized(mask_klass)) { + if (C->print_intrinsics()) { + tty->print_cr(" ** mask klass argument not initialized"); + } + return false; + } + + if (vmask_type->maybe_null()) { + if (C->print_intrinsics()) { + tty->print_cr(" ** null mask values are not allowed for masked op"); + } + return false; + } + } + ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type(); if (!elem_type->is_primitive_type()) { if (C->print_intrinsics()) { @@ -1551,17 +2200,21 @@ bool LibraryCallKit::inline_vector_broadcast_int() { } return false; // should be primitive type } - BasicType elem_bt = elem_type->basic_type(); + int num_elem = vlen->get_con(); + BasicType elem_bt = elem_type->basic_type(); int opc = VectorSupport::vop2ideal(opr->get_con(), elem_bt); + bool is_shift = VectorNode::is_shift_opcode(opc); bool is_rotate = VectorNode::is_rotate_opcode(opc); + if (opc == 0 || (!is_shift && !is_rotate)) { if (C->print_intrinsics()) { tty->print_cr(" ** operation not supported: op=%d bt=%s", opr->get_con(), type2name(elem_bt)); } return false; // operation not supported } + int sopc = VectorNode::opcode(opc, elem_bt); if (sopc == 0) { if (C->print_intrinsics()) { @@ -1569,7 +2222,8 @@ bool LibraryCallKit::inline_vector_broadcast_int() { } return false; // operation not supported } - Node* cnt = argument(5); + + Node* cnt = argument(6); ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass(); const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass); const TypeInt* cnt_type = cnt->bottom_type()->isa_int(); @@ -1578,14 +2232,25 @@ bool LibraryCallKit::inline_vector_broadcast_int() { bool is_const_rotate = is_rotate && cnt_type && cnt_type->is_con() && Matcher::supports_vector_constant_rotates(cnt_type->get_con()); bool has_scalar_args = is_rotate ? !is_const_rotate : true; - if (!arch_supports_vector(sopc, num_elem, elem_bt, VecMaskNotUsed, has_scalar_args)) { - if (C->print_intrinsics()) { - tty->print_cr(" ** not supported: arity=0 op=int/%d vlen=%d etype=%s ismask=no", - sopc, num_elem, type2name(elem_bt)); + + VectorMaskUseType checkFlags = (VectorMaskUseType)(is_masked_op ? (VecMaskUseLoad | VecMaskUsePred) : VecMaskNotUsed); + bool use_predicate = is_masked_op; + + if (!arch_supports_vector(sopc, num_elem, elem_bt, checkFlags, has_scalar_args)) { + use_predicate = false; + if (!is_masked_op || + (!arch_supports_vector(sopc, num_elem, elem_bt, VecMaskNotUsed, has_scalar_args) || + !arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskUseLoad))) { + + if (C->print_intrinsics()) { + tty->print_cr(" ** not supported: arity=0 op=int/%d vlen=%d etype=%s is_masked_op=%d", + sopc, num_elem, type2name(elem_bt), is_masked_op ? 1 : 0); + } + return false; // not supported } - return false; // not supported } - Node* opd1 = unbox_vector(argument(4), vbox_type, elem_bt, num_elem); + + Node* opd1 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem); Node* opd2 = NULL; if (is_shift) { opd2 = vector_shift_count(cnt, opc, elem_bt, num_elem); @@ -1600,11 +2265,35 @@ bool LibraryCallKit::inline_vector_broadcast_int() { opd2 = cnt; } } + if (opd1 == NULL || opd2 == NULL) { return false; } - Node* operation = gvn().transform(VectorNode::make(opc, opd1, opd2, num_elem, elem_bt)); + Node* mask = NULL; + if (is_masked_op) { + ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass(); + const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass); + mask = unbox_vector(argument(7), mbox_type, elem_bt, num_elem); + if (mask == NULL) { + if (C->print_intrinsics()) { + tty->print_cr(" ** unbox failed mask=%s", NodeClassNames[argument(7)->Opcode()]); + } + return false; + } + } + + Node* operation = VectorNode::make(opc, opd1, opd2, num_elem, elem_bt); + if (is_masked_op && mask != NULL) { + if (use_predicate) { + operation->add_req(mask); + operation->add_flag(Node::Flag_is_predicated_vector); + } else { + operation = gvn().transform(operation); + operation = new VectorBlendNode(opd1, operation, mask); + } + } + operation = gvn().transform(operation); Node* vbox = box_vector(operation, vbox_type, elem_bt, num_elem); set_result(vbox); C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt)))); @@ -1618,7 +2307,7 @@ bool LibraryCallKit::inline_vector_broadcast_int() { // Class fromVectorClass, Class fromElementType, int fromVLen, // Class toVectorClass, Class toElementType, int toVLen, // VIN v, S s, -// VectorConvertOp defaultImpl) { +// VectorConvertOp defaultImpl) // bool LibraryCallKit::inline_vector_convert() { const TypeInt* opr = gvn().type(argument(0))->isa_int(); @@ -1679,9 +2368,6 @@ bool LibraryCallKit::inline_vector_convert() { return false; // should be primitive type } BasicType elem_bt_to = elem_type_to->basic_type(); - if (is_mask && (type2aelembytes(elem_bt_from) != type2aelembytes(elem_bt_to))) { - return false; // elem size mismatch - } int num_elem_from = vlen_from->get_con(); int num_elem_to = vlen_to->get_con(); @@ -1727,13 +2413,28 @@ bool LibraryCallKit::inline_vector_convert() { return false; } - const TypeVect* src_type = TypeVect::make(elem_bt_from, num_elem_from); - const TypeVect* dst_type = TypeVect::make(elem_bt_to, num_elem_to); + const TypeVect* src_type = TypeVect::make(elem_bt_from, num_elem_from, is_mask); + const TypeVect* dst_type = TypeVect::make(elem_bt_to, num_elem_to, is_mask); + + // Safety check to prevent casting if source mask is of type vector + // and destination mask of type predicate vector and vice-versa. + // From X86 standpoint, this case will only arise over KNL target, + // where certain masks (depending on the species) are either propagated + // through a vector or predicate register. + if (is_mask && + ((src_type->isa_vectmask() == NULL && dst_type->isa_vectmask()) || + (dst_type->isa_vectmask() == NULL && src_type->isa_vectmask()))) { + return false; + } Node* op = opd1; if (is_cast) { - assert(!is_mask, "masks cannot be casted"); - int cast_vopc = VectorCastNode::opcode(elem_bt_from); + BasicType new_elem_bt_to = elem_bt_to; + BasicType new_elem_bt_from = elem_bt_from; + if (is_mask && is_floating_point_type(elem_bt_from)) { + new_elem_bt_from = elem_bt_from == T_FLOAT ? T_INT : T_LONG; + } + int cast_vopc = VectorCastNode::opcode(new_elem_bt_from); // Make sure that cast is implemented to particular type/size combination. if (!arch_supports_vector(cast_vopc, num_elem_to, elem_bt_to, VecMaskNotUsed)) { if (C->print_intrinsics()) { @@ -1787,9 +2488,32 @@ bool LibraryCallKit::inline_vector_convert() { num_elem_for_resize))); op = gvn().transform(VectorCastNode::make(cast_vopc, op, elem_bt_to, num_elem_to)); } else { - // Since input and output number of elements match, and since we know this vector size is - // supported, simply do a cast with no resize needed. - op = gvn().transform(VectorCastNode::make(cast_vopc, op, elem_bt_to, num_elem_to)); + if (is_mask) { + if ((dst_type->isa_vectmask() && src_type->isa_vectmask()) || + (type2aelembytes(elem_bt_from) == type2aelembytes(elem_bt_to))) { + op = gvn().transform(new VectorMaskCastNode(op, dst_type)); + } else { + // Special handling for casting operation involving floating point types. + // Case A) F -> X := F -> VectorMaskCast (F->I/L [NOP]) -> VectorCast[I/L]2X + // Case B) X -> F := X -> VectorCastX2[I/L] -> VectorMaskCast ([I/L]->F [NOP]) + // Case C) F -> F := VectorMaskCast (F->I/L [NOP]) -> VectorCast[I/L]2[L/I] -> VectotMaskCast (L/I->F [NOP]) + if (is_floating_point_type(elem_bt_from)) { + const TypeVect* new_src_type = TypeVect::make(new_elem_bt_from, num_elem_to, is_mask); + op = gvn().transform(new VectorMaskCastNode(op, new_src_type)); + } + if (is_floating_point_type(elem_bt_to)) { + new_elem_bt_to = elem_bt_to == T_FLOAT ? T_INT : T_LONG; + } + op = gvn().transform(VectorCastNode::make(cast_vopc, op, new_elem_bt_to, num_elem_to)); + if (new_elem_bt_to != elem_bt_to) { + op = gvn().transform(new VectorMaskCastNode(op, dst_type)); + } + } + } else { + // Since input and output number of elements match, and since we know this vector size is + // supported, simply do a cast with no resize needed. + op = gvn().transform(VectorCastNode::make(cast_vopc, op, elem_bt_to, num_elem_to)); + } } } else if (Type::cmp(src_type, dst_type) != 0) { assert(!is_cast, "must be reinterpret"); @@ -1804,11 +2528,11 @@ bool LibraryCallKit::inline_vector_convert() { } // public static -// > -// V insert(Class vectorClass, Class elementType, int vlen, +// , +// E> +// V insert(Class vectorClass, Class elementType, int vlen, // V vec, int ix, long val, -// VecInsertOp defaultImpl) { -// +// VecInsertOp defaultImpl) bool LibraryCallKit::inline_vector_insert() { const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr(); const TypeInstPtr* elem_klass = gvn().type(argument(1))->isa_instptr(); @@ -1897,11 +2621,11 @@ bool LibraryCallKit::inline_vector_insert() { } // public static -// > -// long extract(Class vectorClass, Class elementType, int vlen, +// , +// E> +// long extract(Class vectorClass, Class elementType, int vlen, // V vec, int ix, -// VecExtractOp defaultImpl) { -// +// VecExtractOp defaultImpl) bool LibraryCallKit::inline_vector_extract() { const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr(); const TypeInstPtr* elem_klass = gvn().type(argument(1))->isa_instptr(); diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp index e9193d784adeb5398aadbf5722a7fab4783b19af..6d845271e4737b2ae9a0427ad72c5067bf835000 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -224,6 +224,14 @@ int VectorNode::opcode(int sopc, BasicType bt) { return Op_StoreVector; case Op_MulAddS2I: return Op_MulAddVS2VI; + case Op_ConvI2F: + return Op_VectorCastI2X; + case Op_ConvL2D: + return Op_VectorCastL2X; + case Op_ConvF2I: + return Op_VectorCastF2X; + case Op_ConvD2L: + return Op_VectorCastD2X; default: return 0; // Unimplemented @@ -366,8 +374,8 @@ bool VectorNode::is_scalar_rotate(Node* n) { return false; } -bool VectorNode::is_vshift_cnt(Node* n) { - switch (n->Opcode()) { +bool VectorNode::is_vshift_cnt_opcode(int opc) { + switch (opc) { case Op_LShiftCntV: case Op_RShiftCntV: return true; @@ -376,6 +384,10 @@ bool VectorNode::is_vshift_cnt(Node* n) { } } +bool VectorNode::is_vshift_cnt(Node* n) { + return is_vshift_cnt_opcode(n->Opcode()); +} + // Check if input is loop invariant vector. bool VectorNode::is_invariant_vector(Node* n) { // Only Replicate vector nodes are loop invariant for now. @@ -442,10 +454,40 @@ void VectorNode::vector_operands(Node* n, uint* start, uint* end) { } } +VectorNode* VectorNode::make_mask_node(int vopc, Node* n1, Node* n2, uint vlen, BasicType bt) { + guarantee(vopc > 0, "vopc must be > 0"); + const TypeVect* vmask_type = TypeVect::makemask(bt, vlen); + switch (vopc) { + case Op_AndV: + if (Matcher::match_rule_supported_vector_masked(Op_AndVMask, vlen, bt)) { + return new AndVMaskNode(n1, n2, vmask_type); + } + return new AndVNode(n1, n2, vmask_type); + case Op_OrV: + if (Matcher::match_rule_supported_vector_masked(Op_OrVMask, vlen, bt)) { + return new OrVMaskNode(n1, n2, vmask_type); + } + return new OrVNode(n1, n2, vmask_type); + case Op_XorV: + if (Matcher::match_rule_supported_vector_masked(Op_XorVMask, vlen, bt)) { + return new XorVMaskNode(n1, n2, vmask_type); + } + return new XorVNode(n1, n2, vmask_type); + default: + fatal("Unsupported mask vector creation for '%s'", NodeClassNames[vopc]); + return NULL; + } +} + // Make a vector node for binary operation -VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt) { +VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt, bool is_mask) { // This method should not be called for unimplemented vectors. guarantee(vopc > 0, "vopc must be > 0"); + + if (is_mask) { + return make_mask_node(vopc, n1, n2, vt->length(), vt->element_basic_type()); + } + switch (vopc) { case Op_AddVB: return new AddVBNode(n1, n2, vt); case Op_AddVS: return new AddVSNode(n1, n2, vt); @@ -552,10 +594,15 @@ VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, Node* n3, uint vlen, B } // Scalar promotion -VectorNode* VectorNode::scalar2vector(Node* s, uint vlen, const Type* opd_t) { +VectorNode* VectorNode::scalar2vector(Node* s, uint vlen, const Type* opd_t, bool is_mask) { BasicType bt = opd_t->array_element_basic_type(); - const TypeVect* vt = opd_t->singleton() ? TypeVect::make(opd_t, vlen) - : TypeVect::make(bt, vlen); + const TypeVect* vt = opd_t->singleton() ? TypeVect::make(opd_t, vlen, is_mask) + : TypeVect::make(bt, vlen, is_mask); + + if (is_mask && Matcher::match_rule_supported_vector(Op_MaskAll, vlen, bt)) { + return new MaskAllNode(s, vt); + } + switch (bt) { case T_BOOLEAN: case T_BYTE: @@ -1006,9 +1053,10 @@ ReductionNode* ReductionNode::make(int opc, Node *ctrl, Node* n1, Node* n2, Basi Node* VectorLoadMaskNode::Identity(PhaseGVN* phase) { BasicType out_bt = type()->is_vect()->element_basic_type(); - if (out_bt == T_BOOLEAN) { + if (!Matcher::has_predicated_vectors() && out_bt == T_BOOLEAN) { return in(1); // redundant conversion } + return this; } @@ -1105,7 +1153,9 @@ Node* ReductionNode::make_reduction_input(PhaseGVN& gvn, int opc, BasicType bt) case Op_MinReductionV: switch (bt) { case T_BYTE: + return gvn.makecon(TypeInt::make(max_jbyte)); case T_SHORT: + return gvn.makecon(TypeInt::make(max_jshort)); case T_INT: return gvn.makecon(TypeInt::MAX); case T_LONG: @@ -1120,7 +1170,9 @@ Node* ReductionNode::make_reduction_input(PhaseGVN& gvn, int opc, BasicType bt) case Op_MaxReductionV: switch (bt) { case T_BYTE: + return gvn.makecon(TypeInt::make(min_jbyte)); case T_SHORT: + return gvn.makecon(TypeInt::make(min_jshort)); case T_INT: return gvn.makecon(TypeInt::MIN); case T_LONG: @@ -1313,16 +1365,17 @@ Node* VectorUnboxNode::Ideal(PhaseGVN* phase, bool can_reshape) { bool is_vector_mask = vbox_klass->is_subclass_of(ciEnv::current()->vector_VectorMask_klass()); bool is_vector_shuffle = vbox_klass->is_subclass_of(ciEnv::current()->vector_VectorShuffle_klass()); if (is_vector_mask) { + const TypeVect* vmask_type = TypeVect::makemask(out_vt->element_basic_type(), out_vt->length()); if (in_vt->length_in_bytes() == out_vt->length_in_bytes() && Matcher::match_rule_supported_vector(Op_VectorMaskCast, out_vt->length(), out_vt->element_basic_type())) { // Apply "VectorUnbox (VectorBox vmask) ==> VectorMaskCast (vmask)" // directly. This could avoid the transformation ordering issue from // "VectorStoreMask (VectorLoadMask vmask) => vmask". - return new VectorMaskCastNode(value, out_vt); + return new VectorMaskCastNode(value, vmask_type); } // VectorUnbox (VectorBox vmask) ==> VectorLoadMask (VectorStoreMask vmask) value = phase->transform(VectorStoreMaskNode::make(*phase, value, in_vt->element_basic_type(), in_vt->length())); - return new VectorLoadMaskNode(value, out_vt); + return new VectorLoadMaskNode(value, vmask_type); } else if (is_vector_shuffle) { if (!is_shuffle_to_vector()) { // VectorUnbox (VectorBox vshuffle) ==> VectorLoadShuffle vshuffle @@ -1380,13 +1433,14 @@ Node* VectorMaskOpNode::make(Node* mask, const Type* ty, int mopc) { return new VectorMaskLastTrueNode(mask, ty); case Op_VectorMaskFirstTrue: return new VectorMaskFirstTrueNode(mask, ty); + case Op_VectorMaskToLong: + return new VectorMaskToLongNode(mask, ty); default: assert(false, "Unhandled operation"); } return NULL; } - #ifndef PRODUCT void VectorBoxAllocateNode::dump_spec(outputStream *st) const { CallStaticJavaNode::dump_spec(st); diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp index e3ba3dd91eff784485d6a61a90f47e35f4877d4b..0b04a053f999ee30ed5e4c3028cdc613a7b480c4 100644 --- a/src/hotspot/share/opto/vectornode.hpp +++ b/src/hotspot/share/opto/vectornode.hpp @@ -66,16 +66,22 @@ class VectorNode : public TypeNode { virtual int Opcode() const; - virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(vect_type()->length_in_bytes()); } + virtual uint ideal_reg() const { + return type()->ideal_reg(); + } - static VectorNode* scalar2vector(Node* s, uint vlen, const Type* opd_t); + static VectorNode* scalar2vector(Node* s, uint vlen, const Type* opd_t, bool is_mask = false); static VectorNode* shift_count(int opc, Node* cnt, uint vlen, BasicType bt); static VectorNode* make(int opc, Node* n1, Node* n2, uint vlen, BasicType bt); - static VectorNode* make(int vopc, Node* n1, Node* n2, const TypeVect* vt); + static VectorNode* make(int vopc, Node* n1, Node* n2, const TypeVect* vt, bool is_mask = false); static VectorNode* make(int opc, Node* n1, Node* n2, Node* n3, uint vlen, BasicType bt); static VectorNode* make(int vopc, Node* n1, Node* n2, Node* n3, const TypeVect* vt); + static VectorNode* make_mask_node(int vopc, Node* n1, Node* n2, uint vlen, BasicType bt); static bool is_shift_opcode(int opc); + + static bool is_vshift_cnt_opcode(int opc); + static bool is_rotate_opcode(int opc); static int opcode(int opc, BasicType bt); @@ -798,8 +804,8 @@ class StoreVectorNode : public StoreNode { } virtual int Opcode() const; virtual uint match_edge(uint idx) const { return idx == MemNode::Address || - idx == MemNode::ValueIn || - idx == MemNode::ValueIn + 1; } + idx == MemNode::ValueIn || + idx == MemNode::ValueIn + 1; } }; //------------------------------StoreVectorMaskedNode-------------------------------- @@ -808,7 +814,7 @@ class StoreVectorMaskedNode : public StoreVectorNode { public: StoreVectorMaskedNode(Node* c, Node* mem, Node* dst, Node* src, const TypePtr* at, Node* mask) : StoreVectorNode(c, mem, dst, at, src) { - assert(mask->bottom_type()->is_vectmask(), "sanity"); + assert(mask->bottom_type()->isa_vectmask(), "sanity"); init_class_id(Class_StoreVector); set_mismatched_access(); add_req(mask); @@ -828,7 +834,7 @@ class LoadVectorMaskedNode : public LoadVectorNode { public: LoadVectorMaskedNode(Node* c, Node* mem, Node* src, const TypePtr* at, const TypeVect* vt, Node* mask) : LoadVectorNode(c, mem, src, at, vt) { - assert(mask->bottom_type()->is_vectmask(), "sanity"); + assert(mask->bottom_type()->isa_vectmask(), "sanity"); init_class_id(Class_LoadVector); set_mismatched_access(); add_req(mask); @@ -842,6 +848,45 @@ class LoadVectorMaskedNode : public LoadVectorNode { Node* Ideal(PhaseGVN* phase, bool can_reshape); }; +//-------------------------------LoadVectorGatherMaskedNode--------------------------------- +// Load Vector from memory via index map under the influence of a predicate register(mask). +class LoadVectorGatherMaskedNode : public LoadVectorNode { + public: + LoadVectorGatherMaskedNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeVect* vt, Node* indices, Node* mask) + : LoadVectorNode(c, mem, adr, at, vt) { + init_class_id(Class_LoadVector); + assert(indices->bottom_type()->is_vect(), "indices must be in vector"); + assert(mask->bottom_type()->isa_vectmask(), "sanity"); + add_req(indices); + add_req(mask); + assert(req() == MemNode::ValueIn + 2, "match_edge expects that last input is in MemNode::ValueIn+1"); + } + + virtual int Opcode() const; + virtual uint match_edge(uint idx) const { return idx == MemNode::Address || + idx == MemNode::ValueIn || + idx == MemNode::ValueIn + 1; } +}; + +//------------------------------StoreVectorScatterMaskedNode-------------------------------- +// Store Vector into memory via index map under the influence of a predicate register(mask). +class StoreVectorScatterMaskedNode : public StoreVectorNode { + public: + StoreVectorScatterMaskedNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val, Node* indices, Node* mask) + : StoreVectorNode(c, mem, adr, at, val) { + init_class_id(Class_StoreVector); + assert(indices->bottom_type()->is_vect(), "indices must be in vector"); + assert(mask->bottom_type()->isa_vectmask(), "sanity"); + add_req(indices); + add_req(mask); + assert(req() == MemNode::ValueIn + 3, "match_edge expects that last input is in MemNode::ValueIn+2"); + } + virtual int Opcode() const; + virtual uint match_edge(uint idx) const { return idx == MemNode::Address || + idx == MemNode::ValueIn || + idx == MemNode::ValueIn + 1 || + idx == MemNode::ValueIn + 2; } +}; //------------------------------VectorCmpMaskedNode-------------------------------- // Vector Comparison under the influence of a predicate register(mask). @@ -856,7 +901,6 @@ class VectorCmpMaskedNode : public TypeNode { virtual int Opcode() const; }; - class VectorMaskGenNode : public TypeNode { public: VectorMaskGenNode(Node* length, const Type* ty, BasicType ety): TypeNode(ty, 2), _elemType(ety) { @@ -878,7 +922,7 @@ class VectorMaskOpNode : public TypeNode { public: VectorMaskOpNode(Node* mask, const Type* ty, int mopc): TypeNode(ty, 2), _mopc(mopc) { - assert(mask->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN, ""); + assert(Matcher::has_predicated_vectors() || mask->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN, ""); init_req(1, mask); } @@ -913,6 +957,42 @@ class VectorMaskLastTrueNode : public VectorMaskOpNode { virtual int Opcode() const; }; +class VectorMaskToLongNode : public VectorMaskOpNode { + public: + VectorMaskToLongNode(Node* mask, const Type* ty): + VectorMaskOpNode(mask, ty, Op_VectorMaskToLong) {} + virtual int Opcode() const; + virtual uint ideal_reg() const { return Op_RegL; } +}; + +//-------------------------- Vector mask broadcast ----------------------------------- +class MaskAllNode : public VectorNode { + public: + MaskAllNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {} + virtual int Opcode() const; +}; + +//--------------------------- Vector mask logical and -------------------------------- +class AndVMaskNode : public VectorNode { + public: + AndVMaskNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {} + virtual int Opcode() const; +}; + +//--------------------------- Vector mask logical or --------------------------------- +class OrVMaskNode : public VectorNode { + public: + OrVMaskNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {} + virtual int Opcode() const; +}; + +//--------------------------- Vector mask logical xor -------------------------------- +class XorVMaskNode : public VectorNode { + public: + XorVMaskNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {} + virtual int Opcode() const; +}; + //=========================Promote_Scalar_to_Vector============================ //------------------------------ReplicateBNode--------------------------------- @@ -1184,7 +1264,7 @@ class VectorMaskCmpNode : public VectorNode { BoolTest::mask _predicate; protected: - uint size_of() const { return sizeof(*this); } + virtual uint size_of() const { return sizeof(VectorMaskCmpNode); } public: VectorMaskCmpNode(BoolTest::mask predicate, Node* in1, Node* in2, ConINode* predicate_node, const TypeVect* vt) : @@ -1194,6 +1274,7 @@ class VectorMaskCmpNode : public VectorNode { "VectorMaskCmp inputs must have same type for elements"); assert(in1->bottom_type()->is_vect()->length() == in2->bottom_type()->is_vect()->length(), "VectorMaskCmp inputs must have same number of elements"); + assert((BoolTest::mask)predicate_node->get_int() == predicate, "Unmatched predicates"); init_class_id(Class_VectorMaskCmp); } @@ -1305,7 +1386,6 @@ class VectorMaskCastNode : public VectorNode { VectorMaskCastNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) { const TypeVect* in_vt = in->bottom_type()->is_vect(); assert(in_vt->length() == vt->length(), "vector length must match"); - assert(type2aelembytes(in_vt->element_basic_type()) == type2aelembytes(vt->element_basic_type()), "element size must match"); } virtual int Opcode() const; @@ -1315,12 +1395,19 @@ class VectorMaskCastNode : public VectorNode { class VectorReinterpretNode : public VectorNode { private: const TypeVect* _src_vt; + protected: - uint size_of() const { return sizeof(*this); } + uint size_of() const { return sizeof(VectorReinterpretNode); } public: VectorReinterpretNode(Node* in, const TypeVect* src_vt, const TypeVect* dst_vt) - : VectorNode(in, dst_vt), _src_vt(src_vt) { } + : VectorNode(in, dst_vt), _src_vt(src_vt) { + assert((!dst_vt->isa_vectmask() && !src_vt->isa_vectmask()) || + (type2aelembytes(src_vt->element_basic_type()) >= type2aelembytes(dst_vt->element_basic_type())), + "unsupported mask widening reinterpretation"); + init_class_id(Class_VectorReinterpret); + } + const TypeVect* src_type() { return _src_vt; } virtual uint hash() const { return VectorNode::hash() + _src_vt->hash(); } virtual bool cmp( const Node &n ) const { return VectorNode::cmp(n) && !Type::cmp(_src_vt,((VectorReinterpretNode&)n)._src_vt); @@ -1453,6 +1540,7 @@ class VectorUnboxNode : public VectorNode { VectorUnboxNode(Compile* C, const TypeVect* vec_type, Node* obj, Node* mem, bool shuffle_to_vector) : VectorNode(mem, obj, vec_type) { _shuffle_to_vector = shuffle_to_vector; + init_class_id(Class_VectorUnbox); init_flags(Flag_is_macro); C->add_macro_node(this); } @@ -1482,5 +1570,4 @@ public: virtual int Opcode() const; Node* Ideal(PhaseGVN* phase, bool can_reshape); }; - #endif // SHARE_OPTO_VECTORNODE_HPP diff --git a/src/hotspot/share/prims/jni.cpp b/src/hotspot/share/prims/jni.cpp index 745e472a81007d07b33611d21a016d471b1917d3..13a2ff9dc8718def636c3eec9c6c5475ab04472c 100644 --- a/src/hotspot/share/prims/jni.cpp +++ b/src/hotspot/share/prims/jni.cpp @@ -639,11 +639,8 @@ JNI_ENTRY(jint, jni_PushLocalFrame(JNIEnv *env, jint capacity)) HOTSPOT_JNI_PUSHLOCALFRAME_RETURN((uint32_t)JNI_ERR); return JNI_ERR; } - JNIHandleBlock* old_handles = thread->active_handles(); - JNIHandleBlock* new_handles = JNIHandleBlock::allocate_block(thread); - assert(new_handles != NULL, "should not be NULL"); - new_handles->set_pop_frame_link(old_handles); - thread->set_active_handles(new_handles); + + thread->push_jni_handle_block(); jint ret = JNI_OK; HOTSPOT_JNI_PUSHLOCALFRAME_RETURN(ret); return ret; diff --git a/src/hotspot/share/prims/jvm.cpp b/src/hotspot/share/prims/jvm.cpp index f44b34bf4e53be75f33cb1ef4f107457780f5aca..de4e35492c8ecf6dd8b1d2a98ec9f77fccf066da 100644 --- a/src/hotspot/share/prims/jvm.cpp +++ b/src/hotspot/share/prims/jvm.cpp @@ -426,8 +426,8 @@ extern volatile jint vm_created; JVM_ENTRY_NO_ENV(void, JVM_BeforeHalt()) #if INCLUDE_CDS // Link all classes for dynamic CDS dumping before vm exit. - if (DynamicDumpSharedSpaces) { - DynamicArchive::prepare_for_dynamic_dumping(); + if (DynamicArchive::should_dump_at_vm_exit()) { + DynamicArchive::prepare_for_dump_at_exit(); } #endif EventShutdown event; @@ -3706,7 +3706,7 @@ JVM_ENTRY(void, JVM_DumpDynamicArchive(JNIEnv *env, jstring archiveName)) ResourceMark rm(THREAD); Handle file_handle(THREAD, JNIHandles::resolve_non_null(archiveName)); char* archive_name = java_lang_String::as_utf8_string(file_handle()); - DynamicArchive::dump(archive_name, CHECK); + DynamicArchive::dump_for_jcmd(archive_name, CHECK); #endif // INCLUDE_CDS JVM_END diff --git a/src/hotspot/share/prims/jvmtiEnvBase.cpp b/src/hotspot/share/prims/jvmtiEnvBase.cpp index e6a9409b7ef9064c2efcc2e9e05840fa1f100398..29fb94226af44a5e3d7876f12f9ffe6735f9cf61 100644 --- a/src/hotspot/share/prims/jvmtiEnvBase.cpp +++ b/src/hotspot/share/prims/jvmtiEnvBase.cpp @@ -1510,7 +1510,7 @@ JvmtiModuleClosure::get_all_modules(JvmtiEnv* env, jint* module_count_ptr, jobje return JVMTI_ERROR_OUT_OF_MEMORY; } for (jint idx = 0; idx < len; idx++) { - array[idx] = JNIHandles::make_local(Thread::current(), _tbl->at(idx).resolve()); + array[idx] = JNIHandles::make_local(_tbl->at(idx).resolve()); } _tbl = NULL; *modules_ptr = array; diff --git a/src/hotspot/share/prims/jvmtiExport.cpp b/src/hotspot/share/prims/jvmtiExport.cpp index 7f484d1ebac6fcd93fcd7d4bd93e48bbd0a70656..685abb08ec7e8a2b3277bd6a55736666c7c93bae 100644 --- a/src/hotspot/share/prims/jvmtiExport.cpp +++ b/src/hotspot/share/prims/jvmtiExport.cpp @@ -141,20 +141,11 @@ private: JavaThread *_thread; JNIEnv* _jni_env; JvmtiThreadState::ExceptionState _saved_exception_state; -#if 0 - JNIHandleBlock* _hblock; -#endif public: JvmtiEventMark(JavaThread *thread) : _thread(thread), _jni_env(thread->jni_environment()), _saved_exception_state(JvmtiThreadState::ES_CLEARED) { -#if 0 - _hblock = thread->active_handles(); - _hblock->clear_thoroughly(); // so we can be safe -#else - // we want to use the code above - but that needs the JNIHandle changes - later... - // for now, steal JNI push local frame code JvmtiThreadState *state = thread->jvmti_thread_state(); // we are before an event. // Save current jvmti thread exception state. @@ -162,31 +153,13 @@ public: _saved_exception_state = state->get_exception_state(); } - JNIHandleBlock* old_handles = thread->active_handles(); - JNIHandleBlock* new_handles = JNIHandleBlock::allocate_block(thread); - assert(new_handles != NULL, "should not be NULL"); - new_handles->set_pop_frame_link(old_handles); - thread->set_active_handles(new_handles); -#endif + thread->push_jni_handle_block(); assert(thread == JavaThread::current(), "thread must be current!"); thread->frame_anchor()->make_walkable(thread); }; ~JvmtiEventMark() { -#if 0 - _hblock->clear(); // for consistency with future correct behavior -#else - // we want to use the code above - but that needs the JNIHandle changes - later... - // for now, steal JNI pop local frame code - JNIHandleBlock* old_handles = _thread->active_handles(); - JNIHandleBlock* new_handles = old_handles->pop_frame_link(); - assert(new_handles != NULL, "should not be NULL"); - _thread->set_active_handles(new_handles); - // Note that we set the pop_frame_link to NULL explicitly, otherwise - // the release_block call will release the blocks. - old_handles->set_pop_frame_link(NULL); - JNIHandleBlock::release_block(old_handles, _thread); // may block -#endif + _thread->pop_jni_handle_block(); JvmtiThreadState* state = _thread->jvmti_thread_state(); // we are continuing after an event. @@ -196,13 +169,7 @@ public: } } -#if 0 - jobject to_jobject(oop obj) { return obj == NULL? NULL : _hblock->allocate_handle_fast(obj); } -#else - // we want to use the code above - but that needs the JNIHandle changes - later... - // for now, use regular make_local jobject to_jobject(oop obj) { return JNIHandles::make_local(_thread,obj); } -#endif jclass to_jclass(Klass* klass) { return (klass == NULL ? NULL : (jclass)to_jobject(klass->java_mirror())); } diff --git a/src/hotspot/share/prims/jvmtiManageCapabilities.cpp b/src/hotspot/share/prims/jvmtiManageCapabilities.cpp index dee9aec8217cb89f67bc4db5f3a1ff05566cb843..e49ca2c776a0c05449ea0a111fde341a8d8bfa39 100644 --- a/src/hotspot/share/prims/jvmtiManageCapabilities.cpp +++ b/src/hotspot/share/prims/jvmtiManageCapabilities.cpp @@ -326,6 +326,12 @@ void JvmtiManageCapabilities::update() { || avail.can_generate_field_modification_events) { RewriteFrequentPairs = false; +#ifdef ZERO + // The BytecodeInterpreter is specialized only with RewriteBytecodes + // for simplicity. If we want to disable RewriteFrequentPairs, we + // need to disable RewriteBytecodes as well. + RewriteBytecodes = false; +#endif } // If can_redefine_classes is enabled in the onload phase then we know that the diff --git a/src/hotspot/share/prims/nativeLookup.cpp b/src/hotspot/share/prims/nativeLookup.cpp index 16a1041e8d727b6e017de79f9b7e6d67178fcbce..48b47d84542e433cbc91c09e294b97b59b64d359 100644 --- a/src/hotspot/share/prims/nativeLookup.cpp +++ b/src/hotspot/share/prims/nativeLookup.cpp @@ -182,24 +182,6 @@ char* NativeLookup::pure_jni_name(const methodHandle& method) { return st.as_string(); } - -char* NativeLookup::critical_jni_name(const methodHandle& method) { - stringStream st; - // Prefix - st.print("JavaCritical_"); - // Klass name - if (!map_escaped_name_on(&st, method->klass_name())) { - return NULL; - } - st.print("_"); - // Method name - if (!map_escaped_name_on(&st, method->name())) { - return NULL; - } - return st.as_string(); -} - - char* NativeLookup::long_jni_name(const methodHandle& method) { // Signatures ignore the wrapping parentheses and the trailing return type stringStream st; @@ -332,12 +314,6 @@ const char* NativeLookup::compute_complete_jni_name(const char* pure_name, const return st.as_string(); } -address NativeLookup::lookup_critical_style(void* dll, const char* pure_name, const char* long_name, int args_size, bool os_style) { - const char* jni_name = compute_complete_jni_name(pure_name, long_name, args_size, os_style); - assert(dll != NULL, "dll must be loaded"); - return (address)os::dll_lookup(dll, jni_name); -} - // Check all the formats of native implementation name to see if there is one // for the specified method. address NativeLookup::lookup_entry(const methodHandle& method, TRAPS) { @@ -381,53 +357,6 @@ address NativeLookup::lookup_entry(const methodHandle& method, TRAPS) { return entry; // NULL indicates not found } -// Check all the formats of native implementation name to see if there is one -// for the specified method. -address NativeLookup::lookup_critical_entry(const methodHandle& method) { - assert(CriticalJNINatives, "or should not be here"); - - if (method->is_synchronized() || - !method->is_static()) { - // Only static non-synchronized methods are allowed - return NULL; - } - - ResourceMark rm; - - Symbol* signature = method->signature(); - for (int end = 0; end < signature->utf8_length(); end++) { - if (signature->char_at(end) == 'L') { - // Don't allow object types - return NULL; - } - } - - // Compute argument size - int args_size = method->size_of_parameters(); - for (SignatureStream ss(signature); !ss.at_return_type(); ss.next()) { - if (ss.is_array()) { - args_size += T_INT_size; // array length parameter - } - } - - // dll handling requires I/O. Don't do that while in _thread_in_vm (safepoint may get requested). - ThreadToNativeFromVM thread_in_native(JavaThread::current()); - - void* dll = dll_load(method); - address entry = NULL; - - if (dll != NULL) { - entry = lookup_critical_style(dll, method, args_size); - // Close the handle to avoid keeping the library alive if the native method holder is unloaded. - // This is fine because the library is still kept alive by JNI (see JVM_LoadLibrary). As soon - // as the holder class and the library are unloaded (see JVM_UnloadLibrary), the native wrapper - // that calls 'critical_entry' becomes unreachable and is unloaded as well. - os::dll_unload(dll); - } - - return entry; // NULL indicates not found -} - void* NativeLookup::dll_load(const methodHandle& method) { if (method->has_native_function()) { @@ -446,44 +375,6 @@ void* NativeLookup::dll_load(const methodHandle& method) { return NULL; } -address NativeLookup::lookup_critical_style(void* dll, const methodHandle& method, int args_size) { - address entry = NULL; - const char* critical_name = critical_jni_name(method); - if (critical_name == NULL) { - // JNI name mapping rejected this method so return - // NULL to indicate UnsatisfiedLinkError should be thrown. - return NULL; - } - - // 1) Try JNI short style - entry = lookup_critical_style(dll, critical_name, "", args_size, true); - if (entry != NULL) { - return entry; - } - - const char* long_name = long_jni_name(method); - if (long_name == NULL) { - // JNI name mapping rejected this method so return - // NULL to indicate UnsatisfiedLinkError should be thrown. - return NULL; - } - - // 2) Try JNI long style - entry = lookup_critical_style(dll, critical_name, long_name, args_size, true); - if (entry != NULL) { - return entry; - } - - // 3) Try JNI short style without os prefix/suffix - entry = lookup_critical_style(dll, critical_name, "", args_size, false); - if (entry != NULL) { - return entry; - } - - // 4) Try JNI long style without os prefix/suffix - return lookup_critical_style(dll, critical_name, long_name, args_size, false); -} - // Check if there are any JVM TI prefixes which have been applied to the native method name. // If any are found, remove them before attemping the look up of the // native implementation again. diff --git a/src/hotspot/share/prims/nativeLookup.hpp b/src/hotspot/share/prims/nativeLookup.hpp index 7166fb298f5a33c4cc643eb1faaeb823f8041629..a00b16c2fa446b77230915415a122c2ccf2b9181 100644 --- a/src/hotspot/share/prims/nativeLookup.hpp +++ b/src/hotspot/share/prims/nativeLookup.hpp @@ -35,8 +35,6 @@ class NativeLookup : AllStatic { private: // Style specific lookup static address lookup_style(const methodHandle& method, char* pure_name, const char* long_name, int args_size, bool os_style, TRAPS); - static address lookup_critical_style(void* dll, const char* pure_name, const char* long_name, int args_size, bool os_style); - static address lookup_critical_style(void* dll, const methodHandle& method, int args_size); static address lookup_base (const methodHandle& method, TRAPS); static address lookup_entry(const methodHandle& method, TRAPS); static address lookup_entry_prefixed(const methodHandle& method, TRAPS); @@ -47,11 +45,9 @@ class NativeLookup : AllStatic { // JNI name computation static char* pure_jni_name(const methodHandle& method); static char* long_jni_name(const methodHandle& method); - static char* critical_jni_name(const methodHandle& method); // Lookup native function. May throw UnsatisfiedLinkError. static address lookup(const methodHandle& method, TRAPS); - static address lookup_critical_entry(const methodHandle& method); }; #endif // SHARE_PRIMS_NATIVELOOKUP_HPP diff --git a/src/hotspot/share/prims/vectorSupport.cpp b/src/hotspot/share/prims/vectorSupport.cpp index 0b59f632408dedce3e8e9aa0dd03e7efd4e12653..cc5a37678a427c3a41c14747adb711a715d6b6b1 100644 --- a/src/hotspot/share/prims/vectorSupport.cpp +++ b/src/hotspot/share/prims/vectorSupport.cpp @@ -430,6 +430,18 @@ int VectorSupport::vop2ideal(jint id, BasicType bt) { } break; } + case VECTOR_OP_MASK_TOLONG: { + switch (bt) { + case T_BYTE: // fall-through + case T_SHORT: // fall-through + case T_INT: // fall-through + case T_LONG: // fall-through + case T_FLOAT: // fall-through + case T_DOUBLE: return Op_VectorMaskToLong; + default: fatal("MASK_TOLONG: %s", type2name(bt)); + } + break; + } case VECTOR_OP_TAN: case VECTOR_OP_TANH: case VECTOR_OP_SIN: diff --git a/src/hotspot/share/prims/vectorSupport.hpp b/src/hotspot/share/prims/vectorSupport.hpp index ac436a879767372b42968dcd4c618725d0cf4b35..ccebddb3d22647df5829f44c4417093dda0ccba3 100644 --- a/src/hotspot/share/prims/vectorSupport.hpp +++ b/src/hotspot/share/prims/vectorSupport.hpp @@ -82,10 +82,11 @@ class VectorSupport : AllStatic { VECTOR_OP_MASK_TRUECOUNT = 19, VECTOR_OP_MASK_FIRSTTRUE = 20, VECTOR_OP_MASK_LASTTRUE = 21, + VECTOR_OP_MASK_TOLONG = 22, // Rotate operations - VECTOR_OP_LROTATE = 22, - VECTOR_OP_RROTATE = 23, + VECTOR_OP_LROTATE = 23, + VECTOR_OP_RROTATE = 24, // Vector Math Library VECTOR_OP_TAN = 101, diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp index 7e154bdd3e7a32c2256392ec544b0984a1d9e53d..89352c1f95e58c50fcdae757e62fd9a8a3ab5a18 100644 --- a/src/hotspot/share/runtime/arguments.cpp +++ b/src/hotspot/share/runtime/arguments.cpp @@ -528,6 +528,10 @@ static SpecialFlag const special_jvm_flags[] = { { "FlightRecorder", JDK_Version::jdk(13), JDK_Version::undefined(), JDK_Version::undefined() }, { "FilterSpuriousWakeups", JDK_Version::jdk(18), JDK_Version::jdk(19), JDK_Version::jdk(20) }, { "MinInliningThreshold", JDK_Version::jdk(18), JDK_Version::jdk(19), JDK_Version::jdk(20) }, + { "DumpSharedSpaces", JDK_Version::jdk(18), JDK_Version::jdk(19), JDK_Version::undefined() }, + { "DynamicDumpSharedSpaces", JDK_Version::jdk(18), JDK_Version::jdk(19), JDK_Version::undefined() }, + { "RequireSharedSpaces", JDK_Version::jdk(18), JDK_Version::jdk(19), JDK_Version::undefined() }, + { "UseSharedSpaces", JDK_Version::jdk(18), JDK_Version::jdk(19), JDK_Version::undefined() }, // --- Deprecated alias flags (see also aliased_jvm_flags) - sorted by obsolete_in then expired_in: { "DefaultMaxRAMFraction", JDK_Version::jdk(8), JDK_Version::undefined(), JDK_Version::undefined() }, @@ -1439,6 +1443,8 @@ bool Arguments::check_unsupported_cds_runtime_properties() { if (get_property(unsupported_properties[i]) != NULL) { if (RequireSharedSpaces) { warning("CDS is disabled when the %s option is specified.", unsupported_options[i]); + } else { + log_info(cds)("CDS is disabled when the %s option is specified.", unsupported_options[i]); } return true; } @@ -3117,17 +3123,11 @@ jint Arguments::finalize_vm_init_args(bool patch_mod_javabase) { // TODO: revisit the following for the static archive case. set_mode_flags(_int); } - if (DumpSharedSpaces || ArchiveClassesAtExit != NULL) { - // Always verify non-system classes during CDS dump - if (!BytecodeVerificationRemote) { - BytecodeVerificationRemote = true; - log_info(cds)("All non-system classes will be verified (-Xverify:remote) during CDS dump time."); - } - } // RecordDynamicDumpInfo is not compatible with ArchiveClassesAtExit if (ArchiveClassesAtExit != NULL && RecordDynamicDumpInfo) { - log_info(cds)("RecordDynamicDumpInfo is for jcmd only, could not set with -XX:ArchiveClassesAtExit."); + jio_fprintf(defaultStream::output_stream(), + "-XX:+RecordDynamicDumpInfo cannot be used with -XX:ArchiveClassesAtExit.\n"); return JNI_ERR; } @@ -3143,6 +3143,14 @@ jint Arguments::finalize_vm_init_args(bool patch_mod_javabase) { if (UseSharedSpaces && !DumpSharedSpaces && check_unsupported_cds_runtime_properties()) { FLAG_SET_DEFAULT(UseSharedSpaces, false); } + + if (DumpSharedSpaces || DynamicDumpSharedSpaces) { + // Always verify non-system classes during CDS dump + if (!BytecodeVerificationRemote) { + BytecodeVerificationRemote = true; + log_info(cds)("All non-system classes will be verified (-Xverify:remote) during CDS dump time."); + } + } #endif #ifndef CAN_SHOW_REGISTERS_ON_ASSERT @@ -3422,9 +3430,7 @@ jint Arguments::set_shared_spaces_flags_and_archive_paths() { #if INCLUDE_CDS // Initialize shared archive paths which could include both base and dynamic archive paths // This must be after set_ergonomics_flags() called so flag UseCompressedOops is set properly. - if (!init_shared_archive_paths()) { - return JNI_ENOMEM; - } + init_shared_archive_paths(); #endif // INCLUDE_CDS return JNI_OK; } @@ -3487,45 +3493,45 @@ void Arguments::extract_shared_archive_paths(const char* archive_path, len = end_ptr - begin_ptr; cur_path = NEW_C_HEAP_ARRAY(char, len + 1, mtInternal); strncpy(cur_path, begin_ptr, len + 1); - //cur_path[len] = '\0'; FileMapInfo::check_archive((const char*)cur_path, false /*is_static*/); *top_archive_path = cur_path; } -bool Arguments::init_shared_archive_paths() { - if (ArchiveClassesAtExit != NULL) { +void Arguments::init_shared_archive_paths() { + if (ArchiveClassesAtExit != nullptr) { + assert(!RecordDynamicDumpInfo, "already checked"); if (DumpSharedSpaces) { vm_exit_during_initialization("-XX:ArchiveClassesAtExit cannot be used with -Xshare:dump"); } - if (FLAG_SET_CMDLINE(DynamicDumpSharedSpaces, true) != JVMFlag::SUCCESS) { - return false; - } check_unsupported_dumping_properties(); - SharedDynamicArchivePath = os::strdup_check_oom(ArchiveClassesAtExit, mtArguments); - } else { - if (SharedDynamicArchivePath != nullptr) { - os::free(SharedDynamicArchivePath); - SharedDynamicArchivePath = nullptr; - } } - if (SharedArchiveFile == NULL) { + + if (SharedArchiveFile == nullptr) { SharedArchivePath = get_default_shared_archive_path(); } else { int archives = num_archives(SharedArchiveFile); - if (is_dumping_archive()) { - if (archives > 1) { - vm_exit_during_initialization( - "Cannot have more than 1 archive file specified in -XX:SharedArchiveFile during CDS dumping"); - } - if (DynamicDumpSharedSpaces) { - if (os::same_files(SharedArchiveFile, ArchiveClassesAtExit)) { - vm_exit_during_initialization( - "Cannot have the same archive file specified for -XX:SharedArchiveFile and -XX:ArchiveClassesAtExit", - SharedArchiveFile); - } - } + assert(archives > 0, "must be"); + + if (is_dumping_archive() && archives > 1) { + vm_exit_during_initialization( + "Cannot have more than 1 archive file specified in -XX:SharedArchiveFile during CDS dumping"); } - if (!is_dumping_archive()){ + + if (DumpSharedSpaces) { + assert(archives == 1, "must be"); + // Static dump is simple: only one archive is allowed in SharedArchiveFile. This file + // will be overwritten no matter regardless of its contents + SharedArchivePath = os::strdup_check_oom(SharedArchiveFile, mtArguments); + } else { + // SharedArchiveFile may specify one or two files. In case (c), the path for base.jsa + // is read from top.jsa + // (a) 1 file: -XX:SharedArchiveFile=base.jsa + // (b) 2 files: -XX:SharedArchiveFile=base.jsa:top.jsa + // (c) 2 files: -XX:SharedArchiveFile=top.jsa + // + // However, if either RecordDynamicDumpInfo or ArchiveClassesAtExit is used, we do not + // allow cases (b) and (c). Case (b) is already checked above. + if (archives > 2) { vm_exit_during_initialization( "Cannot have more than 2 archive files specified in the -XX:SharedArchiveFile option"); @@ -3543,11 +3549,26 @@ bool Arguments::init_shared_archive_paths() { extract_shared_archive_paths((const char*)SharedArchiveFile, &SharedArchivePath, &SharedDynamicArchivePath); } - } else { // CDS dumping - SharedArchivePath = os::strdup_check_oom(SharedArchiveFile, mtArguments); + + if (SharedDynamicArchivePath != nullptr) { + // Check for case (c) + if (RecordDynamicDumpInfo) { + vm_exit_during_initialization("-XX:+RecordDynamicDumpInfo is unsupported when a dynamic CDS archive is specified in -XX:SharedArchiveFile", + SharedArchiveFile); + } + if (ArchiveClassesAtExit != nullptr) { + vm_exit_during_initialization("-XX:ArchiveClassesAtExit is unsupported when a dynamic CDS archive is specified in -XX:SharedArchiveFile", + SharedArchiveFile); + } + } + + if (ArchiveClassesAtExit != nullptr && os::same_files(SharedArchiveFile, ArchiveClassesAtExit)) { + vm_exit_during_initialization( + "Cannot have the same archive file specified for -XX:SharedArchiveFile and -XX:ArchiveClassesAtExit", + SharedArchiveFile); + } } } - return (SharedArchivePath != NULL); } #endif // INCLUDE_CDS diff --git a/src/hotspot/share/runtime/arguments.hpp b/src/hotspot/share/runtime/arguments.hpp index dbd20d9257c07c01fe8f0a025b6563f1a365be93..0867f5c5c4d9c1935920baffd13460c1432afb78 100644 --- a/src/hotspot/share/runtime/arguments.hpp +++ b/src/hotspot/share/runtime/arguments.hpp @@ -618,7 +618,7 @@ class Arguments : AllStatic { static void fix_appclasspath(); static char* get_default_shared_archive_path() NOT_CDS_RETURN_(NULL); - static bool init_shared_archive_paths() NOT_CDS_RETURN_(false); + static void init_shared_archive_paths() NOT_CDS_RETURN; // Operation modi static Mode mode() { return _mode; } diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp index 81bfbaaf033ef16230a5c8d7a04a0f79a8c6ae21..45f16aefa1f481336feb810671319351c0c6cefe 100644 --- a/src/hotspot/share/runtime/globals.hpp +++ b/src/hotspot/share/runtime/globals.hpp @@ -314,9 +314,6 @@ const intx ObjectAlignmentInBytes = 8; product(bool, InlineUnsafeOps, true, DIAGNOSTIC, \ "Inline memory ops (native methods) from Unsafe") \ \ - product(bool, CriticalJNINatives, false, \ - "(Deprecated) Check for critical JNI entry points") \ - \ product(bool, UseAESIntrinsics, false, DIAGNOSTIC, \ "Use intrinsics for AES versions of crypto") \ \ @@ -1804,21 +1801,21 @@ const intx ObjectAlignmentInBytes = 8; /* Shared spaces */ \ \ product(bool, UseSharedSpaces, true, \ - "Use shared spaces for metadata") \ + "(Deprecated) Use shared spaces for metadata") \ \ product(bool, VerifySharedSpaces, false, \ "Verify integrity of shared spaces") \ \ product(bool, RequireSharedSpaces, false, \ - "Require shared spaces for metadata") \ + "(Deprecated) Require shared spaces for metadata") \ \ product(bool, DumpSharedSpaces, false, \ - "Special mode: JVM reads a class list, loads classes, builds " \ - "shared spaces, and dumps the shared spaces to a file to be " \ - "used in future JVM runs") \ + "(Deprecated) Special mode: JVM reads a class list, loads " \ + "classes, builds shared spaces, and dumps the shared spaces to " \ + "a file to be used in future JVM runs") \ \ product(bool, DynamicDumpSharedSpaces, false, \ - "Dynamic archive") \ + "(Deprecated) Dynamic archive") \ \ product(bool, RecordDynamicDumpInfo, false, \ "Record class info for jcmd VM.cds dynamic_dump") \ diff --git a/src/hotspot/share/runtime/handshake.cpp b/src/hotspot/share/runtime/handshake.cpp index 6d3d4d55bddc1880d20f87f33fcea0d593070b4e..2605cee98fd17de8559e22022c63e06f7ef9f36e 100644 --- a/src/hotspot/share/runtime/handshake.cpp +++ b/src/hotspot/share/runtime/handshake.cpp @@ -355,7 +355,7 @@ void Handshake::execute(HandshakeClosure* hs_cl, ThreadsListHandle* tlh, JavaThr guarantee(target != nullptr, "must be"); if (tlh == nullptr) { - guarantee(Thread::is_JavaThread_protected(target, /* checkTLHOnly */ true), + guarantee(Thread::is_JavaThread_protected_by_TLH(target), "missing ThreadsListHandle in calling context."); target->handshake_state()->add_operation(&op); } else if (tlh->includes(target)) { @@ -412,7 +412,7 @@ void Handshake::execute(AsyncHandshakeClosure* hs_cl, JavaThread* target) { if (current != target) { // Another thread is handling the request and it must be protecting // the target. - guarantee(Thread::is_JavaThread_protected(target, /* checkTLHOnly */ true), + guarantee(Thread::is_JavaThread_protected_by_TLH(target), "missing ThreadsListHandle in calling context."); } // Implied else: diff --git a/src/hotspot/share/runtime/java.cpp b/src/hotspot/share/runtime/java.cpp index bb5b46af6c54283df10bf6448a18b762bff73a9c..ee7e6abb40acc4f0021497fc57b86df8c24804e5 100644 --- a/src/hotspot/share/runtime/java.cpp +++ b/src/hotspot/share/runtime/java.cpp @@ -503,9 +503,10 @@ void before_exit(JavaThread* thread) { os::terminate_signal_thread(); #if INCLUDE_CDS - if (DynamicDumpSharedSpaces) { + if (DynamicArchive::should_dump_at_vm_exit()) { + assert(ArchiveClassesAtExit != NULL, "Must be already set"); ExceptionMark em(thread); - DynamicArchive::dump(thread); + DynamicArchive::dump(ArchiveClassesAtExit, thread); if (thread->has_pending_exception()) { ResourceMark rm(thread); oop pending_exception = thread->pending_exception(); diff --git a/src/hotspot/share/runtime/jniHandles.cpp b/src/hotspot/share/runtime/jniHandles.cpp index 89c2427ff1c16360aa584377de3f00c279b2b0b6..772f4012996d2223ecebf69a98e4d3ab95612031 100644 --- a/src/hotspot/share/runtime/jniHandles.cpp +++ b/src/hotspot/share/runtime/jniHandles.cpp @@ -56,18 +56,17 @@ void jni_handles_init() { } jobject JNIHandles::make_local(oop obj) { - return make_local(Thread::current(), obj); + return make_local(JavaThread::current(), obj); } // Used by NewLocalRef which requires NULL on out-of-memory -jobject JNIHandles::make_local(Thread* thread, oop obj, AllocFailType alloc_failmode) { +jobject JNIHandles::make_local(JavaThread* thread, oop obj, AllocFailType alloc_failmode) { if (obj == NULL) { return NULL; // ignore null handles } else { assert(oopDesc::is_oop(obj), "not an oop"); - assert(thread->is_Java_thread(), "not a Java thread"); assert(!current_thread_in_native(), "must not be in native"); - return thread->active_handles()->allocate_handle(obj, alloc_failmode); + return thread->active_handles()->allocate_handle(thread, obj, alloc_failmode); } } @@ -187,7 +186,7 @@ inline bool is_storage_handle(const OopStorage* storage, const oop* ptr) { } -jobjectRefType JNIHandles::handle_type(Thread* thread, jobject handle) { +jobjectRefType JNIHandles::handle_type(JavaThread* thread, jobject handle) { assert(handle != NULL, "precondition"); jobjectRefType result = JNIInvalidRefType; if (is_jweak(handle)) { @@ -205,9 +204,7 @@ jobjectRefType JNIHandles::handle_type(Thread* thread, jobject handle) { case OopStorage::INVALID_ENTRY: // Not in global storage. Might be a local handle. - if (is_local_handle(thread, handle) || - (thread->is_Java_thread() && - is_frame_handle(JavaThread::cast(thread), handle))) { + if (is_local_handle(thread, handle) || is_frame_handle(thread, handle)) { result = JNILocalRefType; } break; @@ -220,7 +217,7 @@ jobjectRefType JNIHandles::handle_type(Thread* thread, jobject handle) { } -bool JNIHandles::is_local_handle(Thread* thread, jobject handle) { +bool JNIHandles::is_local_handle(JavaThread* thread, jobject handle) { assert(handle != NULL, "precondition"); JNIHandleBlock* block = thread->active_handles(); @@ -305,12 +302,7 @@ bool JNIHandles::current_thread_in_native() { JavaThread::cast(thread)->thread_state() == _thread_in_native); } - -int JNIHandleBlock::_blocks_allocated = 0; -JNIHandleBlock* JNIHandleBlock::_block_free_list = NULL; -#ifndef PRODUCT -JNIHandleBlock* JNIHandleBlock::_block_list = NULL; -#endif +int JNIHandleBlock::_blocks_allocated = 0; static inline bool is_tagged_free_list(uintptr_t value) { return (value & 1u) != 0; @@ -343,44 +335,28 @@ void JNIHandleBlock::zap() { } #endif // ASSERT -JNIHandleBlock* JNIHandleBlock::allocate_block(Thread* thread, AllocFailType alloc_failmode) { - assert(thread == NULL || thread == Thread::current(), "sanity check"); +JNIHandleBlock* JNIHandleBlock::allocate_block(JavaThread* thread, AllocFailType alloc_failmode) { + // The VM thread can allocate a handle block in behalf of another thread during a safepoint. + assert(thread == NULL || thread == Thread::current() || SafepointSynchronize::is_at_safepoint(), + "sanity check"); JNIHandleBlock* block; // Check the thread-local free list for a block so we don't // have to acquire a mutex. if (thread != NULL && thread->free_handle_block() != NULL) { block = thread->free_handle_block(); thread->set_free_handle_block(block->_next); - } - else { - // locking with safepoint checking introduces a potential deadlock: - // - we would hold JNIHandleBlockFreeList_lock and then Threads_lock - // - another would hold Threads_lock (jni_AttachCurrentThread) and then - // JNIHandleBlockFreeList_lock (JNIHandleBlock::allocate_block) - MutexLocker ml(JNIHandleBlockFreeList_lock, - Mutex::_no_safepoint_check_flag); - if (_block_free_list == NULL) { - // Allocate new block - if (alloc_failmode == AllocFailStrategy::RETURN_NULL) { - block = new (std::nothrow) JNIHandleBlock(); - if (block == NULL) { - return NULL; - } - } else { - block = new JNIHandleBlock(); + } else { + // Allocate new block + if (alloc_failmode == AllocFailStrategy::RETURN_NULL) { + block = new (std::nothrow) JNIHandleBlock(); + if (block == NULL) { + return NULL; } - _blocks_allocated++; - block->zap(); - #ifndef PRODUCT - // Link new block to list of all allocated blocks - block->_block_list_link = _block_list; - _block_list = block; - #endif } else { - // Get block from free list - block = _block_free_list; - _block_free_list = _block_free_list->_next; + block = new JNIHandleBlock(); } + Atomic::inc(&_blocks_allocated); + block->zap(); } block->_top = 0; block->_next = NULL; @@ -394,7 +370,7 @@ JNIHandleBlock* JNIHandleBlock::allocate_block(Thread* thread, AllocFailType all } -void JNIHandleBlock::release_block(JNIHandleBlock* block, Thread* thread) { +void JNIHandleBlock::release_block(JNIHandleBlock* block, JavaThread* thread) { assert(thread == NULL || thread == Thread::current(), "sanity check"); JNIHandleBlock* pop_frame_link = block->pop_frame_link(); // Put returned block at the beginning of the thread-local free list. @@ -415,20 +391,8 @@ void JNIHandleBlock::release_block(JNIHandleBlock* block, Thread* thread) { block = NULL; } if (block != NULL) { - // Return blocks to free list - // locking with safepoint checking introduces a potential deadlock: - // - we would hold JNIHandleBlockFreeList_lock and then Threads_lock - // - another would hold Threads_lock (jni_AttachCurrentThread) and then - // JNIHandleBlockFreeList_lock (JNIHandleBlock::allocate_block) - MutexLocker ml(JNIHandleBlockFreeList_lock, - Mutex::_no_safepoint_check_flag); - while (block != NULL) { - block->zap(); - JNIHandleBlock* next = block->_next; - block->_next = _block_free_list; - _block_free_list = block; - block = next; - } + Atomic::dec(&_blocks_allocated); + delete block; } if (pop_frame_link != NULL) { // As a sanity check we release blocks pointed to by the pop_frame_link. @@ -468,7 +432,7 @@ void JNIHandleBlock::oops_do(OopClosure* f) { } -jobject JNIHandleBlock::allocate_handle(oop obj, AllocFailType alloc_failmode) { +jobject JNIHandleBlock::allocate_handle(JavaThread* caller, oop obj, AllocFailType alloc_failmode) { assert(Universe::heap()->is_in(obj), "sanity check"); if (_top == 0) { // This is the first allocation or the initial block got zapped when @@ -516,26 +480,21 @@ jobject JNIHandleBlock::allocate_handle(oop obj, AllocFailType alloc_failmode) { if (_last->_next != NULL) { // update last and retry _last = _last->_next; - return allocate_handle(obj, alloc_failmode); + return allocate_handle(caller, obj, alloc_failmode); } // No space available, we have to rebuild free list or expand if (_allocate_before_rebuild == 0) { rebuild_free_list(); // updates _allocate_before_rebuild counter } else { - // Append new block - Thread* thread = Thread::current(); - Handle obj_handle(thread, obj); - // This can block, so we need to preserve obj across call. - _last->_next = JNIHandleBlock::allocate_block(thread, alloc_failmode); + _last->_next = JNIHandleBlock::allocate_block(caller, alloc_failmode); if (_last->_next == NULL) { return NULL; } _last = _last->_next; _allocate_before_rebuild--; - obj = obj_handle(); } - return allocate_handle(obj, alloc_failmode); // retry + return allocate_handle(caller, obj, alloc_failmode); // retry } void JNIHandleBlock::rebuild_free_list() { @@ -612,46 +571,3 @@ const size_t JNIHandleBlock::get_number_of_live_handles() { size_t JNIHandleBlock::memory_usage() const { return length() * sizeof(JNIHandleBlock); } - - -#ifndef PRODUCT - -bool JNIHandles::is_local_handle(jobject handle) { - return JNIHandleBlock::any_contains(handle); -} - -bool JNIHandleBlock::any_contains(jobject handle) { - assert(handle != NULL, "precondition"); - for (JNIHandleBlock* current = _block_list; current != NULL; current = current->_block_list_link) { - if (current->contains(handle)) { - return true; - } - } - return false; -} - -void JNIHandleBlock::print_statistics() { - int used_blocks = 0; - int free_blocks = 0; - int used_handles = 0; - int free_handles = 0; - JNIHandleBlock* block = _block_list; - while (block != NULL) { - if (block->_top > 0) { - used_blocks++; - } else { - free_blocks++; - } - used_handles += block->_top; - free_handles += (block_size_in_oops - block->_top); - block = block->_block_list_link; - } - tty->print_cr("JNIHandleBlocks statistics"); - tty->print_cr("- blocks allocated: %d", used_blocks + free_blocks); - tty->print_cr("- blocks in use: %d", used_blocks); - tty->print_cr("- blocks free: %d", free_blocks); - tty->print_cr("- handles in use: %d", used_handles); - tty->print_cr("- handles free: %d", free_handles); -} - -#endif diff --git a/src/hotspot/share/runtime/jniHandles.hpp b/src/hotspot/share/runtime/jniHandles.hpp index d64caa58c605521ee27c15acec34450cc2dbf1ae..c36b761008ad3cee1e8807a3b7629d021263f67f 100644 --- a/src/hotspot/share/runtime/jniHandles.hpp +++ b/src/hotspot/share/runtime/jniHandles.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -84,7 +84,7 @@ class JNIHandles : AllStatic { // Local handles static jobject make_local(oop obj); - static jobject make_local(Thread* thread, oop obj, // Faster version when current thread is known + static jobject make_local(JavaThread* thread, oop obj, // Faster version when current thread is known AllocFailType alloc_failmode = AllocFailStrategy::EXIT_OOM); inline static void destroy_local(jobject handle); @@ -104,20 +104,15 @@ class JNIHandles : AllStatic { static void print(); static void verify(); // The category predicates all require handle != NULL. - static bool is_local_handle(Thread* thread, jobject handle); + static bool is_local_handle(JavaThread* thread, jobject handle); static bool is_frame_handle(JavaThread* thread, jobject handle); static bool is_global_handle(jobject handle); static bool is_weak_global_handle(jobject handle); static size_t global_handle_memory_usage(); static size_t weak_global_handle_memory_usage(); -#ifndef PRODUCT - // Is handle from any local block of any thread? - static bool is_local_handle(jobject handle); -#endif - // precondition: handle != NULL. - static jobjectRefType handle_type(Thread* thread, jobject handle); + static jobjectRefType handle_type(JavaThread* thread, jobject handle); // Garbage collection support(global handles only, local handles are traversed from thread) // Traversal of regular global handles @@ -145,6 +140,7 @@ class JNIHandleBlock : public CHeapObj { uintptr_t _handles[block_size_in_oops]; // The handles int _top; // Index of next unused handle + int _allocate_before_rebuild; // Number of blocks to allocate before rebuilding free list JNIHandleBlock* _next; // Link to next block // The following instance variables are only used by the first block in a chain. @@ -152,17 +148,9 @@ class JNIHandleBlock : public CHeapObj { JNIHandleBlock* _last; // Last block in use JNIHandleBlock* _pop_frame_link; // Block to restore on PopLocalFrame call uintptr_t* _free_list; // Handle free list - int _allocate_before_rebuild; // Number of blocks to allocate before rebuilding free list // Check JNI, "planned capacity" for current frame (or push/ensure) size_t _planned_capacity; - - #ifndef PRODUCT - JNIHandleBlock* _block_list_link; // Link for list below - static JNIHandleBlock* _block_list; // List of all allocated blocks (for debugging only) - #endif - - static JNIHandleBlock* _block_free_list; // Free list of currently unused blocks static int _blocks_allocated; // For debugging/printing // Fill block with bad_handle values @@ -176,11 +164,11 @@ class JNIHandleBlock : public CHeapObj { public: // Handle allocation - jobject allocate_handle(oop obj, AllocFailType alloc_failmode = AllocFailStrategy::EXIT_OOM); + jobject allocate_handle(JavaThread* caller, oop obj, AllocFailType alloc_failmode = AllocFailStrategy::EXIT_OOM); // Block allocation and block free list management - static JNIHandleBlock* allocate_block(Thread* thread = NULL, AllocFailType alloc_failmode = AllocFailStrategy::EXIT_OOM); - static void release_block(JNIHandleBlock* block, Thread* thread = NULL); + static JNIHandleBlock* allocate_block(JavaThread* thread = NULL, AllocFailType alloc_failmode = AllocFailStrategy::EXIT_OOM); + static void release_block(JNIHandleBlock* block, JavaThread* thread = NULL); // JNI PushLocalFrame/PopLocalFrame support JNIHandleBlock* pop_frame_link() const { return _pop_frame_link; } @@ -203,10 +191,6 @@ class JNIHandleBlock : public CHeapObj { bool contains(jobject handle) const; // Does this block contain handle size_t length() const; // Length of chain starting with this block size_t memory_usage() const; - #ifndef PRODUCT - static bool any_contains(jobject handle); // Does any block currently in use contain handle - static void print_statistics(); - #endif }; #endif // SHARE_RUNTIME_JNIHANDLES_HPP diff --git a/src/hotspot/share/runtime/mutexLocker.cpp b/src/hotspot/share/runtime/mutexLocker.cpp index 5f03979314ef093c58ec1f059fcb1d1409de1f0b..257f853fb92ad45e92c3175e1429825763ec1f28 100644 --- a/src/hotspot/share/runtime/mutexLocker.cpp +++ b/src/hotspot/share/runtime/mutexLocker.cpp @@ -48,7 +48,6 @@ Mutex* Module_lock = NULL; Mutex* CompiledIC_lock = NULL; Mutex* InlineCacheBuffer_lock = NULL; Mutex* VMStatistic_lock = NULL; -Mutex* JNIHandleBlockFreeList_lock = NULL; Mutex* JmethodIdCreation_lock = NULL; Mutex* JfieldIdCreation_lock = NULL; Monitor* JNICritical_lock = NULL; @@ -132,6 +131,7 @@ Mutex* UnsafeJlong_lock = NULL; Mutex* CodeHeapStateAnalytics_lock = NULL; Mutex* Metaspace_lock = NULL; +Monitor* MetaspaceCritical_lock = NULL; Mutex* ClassLoaderDataGraph_lock = NULL; Monitor* ThreadsSMRDelete_lock = NULL; Mutex* ThreadIdTableCreate_lock = NULL; @@ -244,6 +244,7 @@ void mutex_init() { def(RawMonitor_lock , PaddedMutex , nosafepoint-1); def(Metaspace_lock , PaddedMutex , nosafepoint-3); + def(MetaspaceCritical_lock , PaddedMonitor, nosafepoint-1); def(Patching_lock , PaddedMutex , nosafepoint); // used for safepointing and code patching. def(MonitorDeflation_lock , PaddedMonitor, nosafepoint); // used for monitor deflation thread operations @@ -259,7 +260,6 @@ void mutex_init() { def(SharedDictionary_lock , PaddedMutex , safepoint); def(VMStatistic_lock , PaddedMutex , safepoint); - def(JNIHandleBlockFreeList_lock , PaddedMutex , nosafepoint-1); // handles are used by VM thread def(SignatureHandlerLibrary_lock , PaddedMutex , safepoint); def(SymbolArena_lock , PaddedMutex , nosafepoint); def(ExceptionCache_lock , PaddedMutex , safepoint); diff --git a/src/hotspot/share/runtime/mutexLocker.hpp b/src/hotspot/share/runtime/mutexLocker.hpp index c4c7215006067fb8395fb7b0853d01e64c468c00..a40b0592cc44d6c1d291c6da32e57a077a367379 100644 --- a/src/hotspot/share/runtime/mutexLocker.hpp +++ b/src/hotspot/share/runtime/mutexLocker.hpp @@ -40,7 +40,6 @@ extern Mutex* Module_lock; // a lock on module and package extern Mutex* CompiledIC_lock; // a lock used to guard compiled IC patching and access extern Mutex* InlineCacheBuffer_lock; // a lock used to guard the InlineCacheBuffer extern Mutex* VMStatistic_lock; // a lock used to guard statistics count increment -extern Mutex* JNIHandleBlockFreeList_lock; // a lock on the JNI handle block free list extern Mutex* JmethodIdCreation_lock; // a lock on creating JNI method identifiers extern Mutex* JfieldIdCreation_lock; // a lock on creating JNI static field identifiers extern Monitor* JNICritical_lock; // a lock used while entering and exiting JNI critical regions, allows GC to sometimes get in @@ -140,7 +139,8 @@ extern Monitor* JfrThreadSampler_lock; // used to suspend/resume JFR t extern Mutex* UnsafeJlong_lock; // provides Unsafe atomic updates to jlongs on platforms that don't support cx8 #endif -extern Mutex* Metaspace_lock; // protects Metaspace virtualspace and chunk expansions +extern Mutex* Metaspace_lock; // protects Metaspace virtualspace and chunk expansions +extern Monitor* MetaspaceCritical_lock; // synchronizes failed metaspace allocations that risk throwing metaspace OOM extern Mutex* ClassLoaderDataGraph_lock; // protects CLDG list, needed for concurrent unloading diff --git a/src/hotspot/share/runtime/nonJavaThread.cpp b/src/hotspot/share/runtime/nonJavaThread.cpp index b6291c6eabebc8f119318308d1da964a06b1c1b8..e85866370eecbcb49f2b8a33776d02f9bbe7fcda 100644 --- a/src/hotspot/share/runtime/nonJavaThread.cpp +++ b/src/hotspot/share/runtime/nonJavaThread.cpp @@ -234,7 +234,6 @@ int WatcherThread::sleep() const { void WatcherThread::run() { assert(this == watcher_thread(), "just checking"); - this->set_active_handles(JNIHandleBlock::allocate_block()); while (true) { assert(watcher_thread() == Thread::current(), "thread consistency check"); assert(watcher_thread() == this, "thread consistency check"); diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp index 173b05859d702756c1c107c1a798f0a396f4dd60..52c91be8f25d7eea00be4d5dda2ef1db6467a29e 100644 --- a/src/hotspot/share/runtime/os.cpp +++ b/src/hotspot/share/runtime/os.cpp @@ -1169,13 +1169,6 @@ void os::print_location(outputStream* st, intptr_t x, bool verbose) { st->print_cr(INTPTR_FORMAT " is a weak global jni handle", p2i(addr)); return; } -#ifndef PRODUCT - // we don't keep the block list in product mode - if (JNIHandles::is_local_handle((jobject) addr)) { - st->print_cr(INTPTR_FORMAT " is a local jni handle", p2i(addr)); - return; - } -#endif } // Check if addr belongs to a Java thread. diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp index c4f95bad54ec499bd00e75c6b14c96513b4209fe..a5de65ea5ab94c9ae2e36be412c666c535e495f2 100644 --- a/src/hotspot/share/runtime/sharedRuntime.cpp +++ b/src/hotspot/share/runtime/sharedRuntime.cpp @@ -497,7 +497,7 @@ address SharedRuntime::raw_exception_handler_for_return_address(JavaThread* curr return SharedRuntime::deopt_blob()->unpack_with_exception(); } else { // The deferred StackWatermarkSet::after_unwind check will be performed in - // * OptoRuntime::rethrow_C for C2 code + // * OptoRuntime::handle_exception_C_helper for C2 code // * exception_handler_for_pc_helper via Runtime1::handle_exception_from_callee_id for C1 code return nm->exception_begin(); } @@ -3008,17 +3008,11 @@ bool AdapterHandlerEntry::compare_code(AdapterHandlerEntry* other) { void AdapterHandlerLibrary::create_native_wrapper(const methodHandle& method) { ResourceMark rm; nmethod* nm = NULL; - address critical_entry = NULL; assert(method->is_native(), "must be native"); assert(method->is_method_handle_intrinsic() || method->has_native_function(), "must have something valid to call!"); - if (CriticalJNINatives && !method->is_method_handle_intrinsic()) { - // We perform the I/O with transition to native before acquiring AdapterHandlerLibrary_lock. - critical_entry = NativeLookup::lookup_critical_entry(method); - } - { // Perform the work while holding the lock, but perform any printing outside the lock MutexLocker mu(AdapterHandlerLibrary_lock); @@ -3061,7 +3055,7 @@ void AdapterHandlerLibrary::create_native_wrapper(const methodHandle& method) { int comp_args_on_stack = SharedRuntime::java_calling_convention(sig_bt, regs, total_args_passed); // Generate the compiled-to-native wrapper code - nm = SharedRuntime::generate_native_wrapper(&_masm, method, compile_id, sig_bt, regs, ret_type, critical_entry); + nm = SharedRuntime::generate_native_wrapper(&_masm, method, compile_id, sig_bt, regs, ret_type); if (nm != NULL) { { diff --git a/src/hotspot/share/runtime/sharedRuntime.hpp b/src/hotspot/share/runtime/sharedRuntime.hpp index 423e52df6e72c0e978d7fdccfc37b5e557c5d84e..ec816928ac79c0cdfd9ebf374b6eb1e66b6333e1 100644 --- a/src/hotspot/share/runtime/sharedRuntime.hpp +++ b/src/hotspot/share/runtime/sharedRuntime.hpp @@ -474,15 +474,13 @@ class SharedRuntime: AllStatic { // returns. // // The wrapper may contain special-case code if the given method - // is a JNI critical method, or a compiled method handle adapter, - // such as _invokeBasic, _linkToVirtual, etc. + // is a compiled method handle adapter, such as _invokeBasic, _linkToVirtual, etc. static nmethod* generate_native_wrapper(MacroAssembler* masm, const methodHandle& method, int compile_id, BasicType* sig_bt, VMRegPair* regs, - BasicType ret_type, - address critical_entry); + BasicType ret_type); // A compiled caller has just called the interpreter, but compiled code // exists. Patch the caller so he no longer calls into the interpreter. diff --git a/src/hotspot/share/runtime/thread.cpp b/src/hotspot/share/runtime/thread.cpp index f570509ecd1be1cf16d8faafa4ffef370c3eee1a..b4a9b0c59a81f03cebc0197f97d078efb7e334a7 100644 --- a/src/hotspot/share/runtime/thread.cpp +++ b/src/hotspot/share/runtime/thread.cpp @@ -220,8 +220,6 @@ Thread::Thread() { DEBUG_ONLY(_current_resource_mark = NULL;) set_handle_area(new (mtThread) HandleArea(NULL)); set_metadata_handles(new (ResourceObj::C_HEAP, mtClass) GrowableArray(30, mtClass)); - set_active_handles(NULL); - set_free_handle_block(NULL); set_last_handle_mark(NULL); DEBUG_ONLY(_missed_ic_stub_refill_verifier = NULL); @@ -436,60 +434,71 @@ void Thread::check_for_dangling_thread_pointer(Thread *thread) { #endif // Is the target JavaThread protected by the calling Thread or by some other -// mechanism? If checkTLHOnly is true (default is false), then we only check -// if the target JavaThread is protected by a ThreadsList (if any) associated -// with the calling Thread. +// mechanism? // -bool Thread::is_JavaThread_protected(const JavaThread* p, bool checkTLHOnly) { +bool Thread::is_JavaThread_protected(const JavaThread* target) { Thread* current_thread = Thread::current(); - if (!checkTLHOnly) { - // Do the simplest check first: - if (SafepointSynchronize::is_at_safepoint()) { - // The target is protected since JavaThreads cannot exit - // while we're at a safepoint. - return true; - } - // If the target hasn't been started yet then it is trivially - // "protected". We assume the caller is the thread that will do - // the starting. - if (p->osthread() == NULL || p->osthread()->get_state() <= INITIALIZED) { - return true; - } + // Do the simplest check first: + if (SafepointSynchronize::is_at_safepoint()) { + // The target is protected since JavaThreads cannot exit + // while we're at a safepoint. + return true; + } - // Now make the simple checks based on who the caller is: - if (current_thread == p || Threads_lock->owner() == current_thread) { - // Target JavaThread is self or calling thread owns the Threads_lock. - // Second check is the same as Threads_lock->owner_is_self(), - // but we already have the current thread so check directly. - return true; - } + // If the target hasn't been started yet then it is trivially + // "protected". We assume the caller is the thread that will do + // the starting. + if (target->osthread() == NULL || target->osthread()->get_state() <= INITIALIZED) { + return true; + } + + // Now make the simple checks based on who the caller is: + if (current_thread == target || Threads_lock->owner() == current_thread) { + // Target JavaThread is self or calling thread owns the Threads_lock. + // Second check is the same as Threads_lock->owner_is_self(), + // but we already have the current thread so check directly. + return true; } + // Check the ThreadsLists associated with the calling thread (if any) + // to see if one of them protects the target JavaThread: + if (is_JavaThread_protected_by_TLH(target)) { + return true; + } + + // Use this debug code with -XX:+UseNewCode to diagnose locations that + // are missing a ThreadsListHandle or other protection mechanism: + // guarantee(!UseNewCode, "current_thread=" INTPTR_FORMAT " is not protecting target=" + // INTPTR_FORMAT, p2i(current_thread), p2i(target)); + + // Note: Since 'target' isn't protected by a TLH, the call to + // target->is_handshake_safe_for() may crash, but we have debug bits so + // we'll be able to figure out what protection mechanism is missing. + assert(target->is_handshake_safe_for(current_thread), "JavaThread=" INTPTR_FORMAT + " is not protected and not handshake safe.", p2i(target)); + + // The target JavaThread is not protected so it is not safe to query: + return false; +} + +// Is the target JavaThread protected by a ThreadsListHandle (TLH) associated +// with the calling Thread? +// +bool Thread::is_JavaThread_protected_by_TLH(const JavaThread* target) { + Thread* current_thread = Thread::current(); + // Check the ThreadsLists associated with the calling thread (if any) // to see if one of them protects the target JavaThread: for (SafeThreadsListPtr* stlp = current_thread->_threads_list_ptr; stlp != NULL; stlp = stlp->previous()) { - if (stlp->list()->includes(p)) { + if (stlp->list()->includes(target)) { // The target JavaThread is protected by this ThreadsList: return true; } } - if (!checkTLHOnly) { - // Use this debug code with -XX:+UseNewCode to diagnose locations that - // are missing a ThreadsListHandle or other protection mechanism: - // guarantee(!UseNewCode, "current_thread=" INTPTR_FORMAT " is not protecting p=" - // INTPTR_FORMAT, p2i(current_thread), p2i(p)); - - // Note: Since 'p' isn't protected by a TLH, the call to - // p->is_handshake_safe_for() may crash, but we have debug bits so - // we'll be able to figure out what protection mechanism is missing. - assert(p->is_handshake_safe_for(current_thread), "JavaThread=" INTPTR_FORMAT - " is not protected and not handshake safe.", p2i(p)); - } - - // The target JavaThread is not protected so it is not safe to query: + // The target JavaThread is not protected by a TLH so it is not safe to query: return false; } @@ -536,9 +545,6 @@ bool Thread::claim_par_threads_do(uintx claim_token) { } void Thread::oops_do_no_frames(OopClosure* f, CodeBlobClosure* cf) { - if (active_handles() != NULL) { - active_handles()->oops_do(f); - } // Do oop for ThreadShadow f->do_oop((oop*)&_pending_exception); handle_area()->oops_do(f); @@ -1003,6 +1009,8 @@ JavaThread::JavaThread() : _current_pending_monitor(NULL), _current_pending_monitor_is_from_java(true), _current_waiting_monitor(NULL), + _active_handles(NULL), + _free_handle_block(NULL), _Stalled(0), _monitor_chunks(nullptr), @@ -1750,13 +1758,13 @@ void JavaThread::send_thread_stop(oop java_throwable) { // - Target thread will not enter any new monitors. // bool JavaThread::java_suspend() { - guarantee(Thread::is_JavaThread_protected(this, /* checkTLHOnly */ true), + guarantee(Thread::is_JavaThread_protected_by_TLH(/* target */ this), "missing ThreadsListHandle in calling context."); return this->handshake_state()->suspend(); } bool JavaThread::java_resume() { - guarantee(Thread::is_JavaThread_protected(this, /* checkTLHOnly */ true), + guarantee(Thread::is_JavaThread_protected_by_TLH(/* target */ this), "missing ThreadsListHandle in calling context."); return this->handshake_state()->resume(); } @@ -1930,6 +1938,28 @@ void JavaThread::verify_frame_info() { } #endif +// Push on a new block of JNI handles. +void JavaThread::push_jni_handle_block() { + // Allocate a new block for JNI handles. + // Inlined code from jni_PushLocalFrame() + JNIHandleBlock* old_handles = active_handles(); + JNIHandleBlock* new_handles = JNIHandleBlock::allocate_block(this); + assert(old_handles != NULL && new_handles != NULL, "should not be NULL"); + new_handles->set_pop_frame_link(old_handles); // make sure java handles get gc'd. + set_active_handles(new_handles); +} + +// Pop off the current block of JNI handles. +void JavaThread::pop_jni_handle_block() { + // Release our JNI handle block + JNIHandleBlock* old_handles = active_handles(); + JNIHandleBlock* new_handles = old_handles->pop_frame_link(); + assert(new_handles != nullptr, "should never set active handles to null"); + set_active_handles(new_handles); + old_handles->set_pop_frame_link(NULL); + JNIHandleBlock::release_block(old_handles, this); +} + void JavaThread::oops_do_no_frames(OopClosure* f, CodeBlobClosure* cf) { // Verify that the deferred card marks have been flushed. assert(deferred_card_mark().is_empty(), "Should be empty during GC"); @@ -1937,6 +1967,10 @@ void JavaThread::oops_do_no_frames(OopClosure* f, CodeBlobClosure* cf) { // Traverse the GCHandles Thread::oops_do_no_frames(f, cf); + if (active_handles() != NULL) { + active_handles()->oops_do(f); + } + DEBUG_ONLY(verify_frame_info();) if (has_last_Java_frame()) { @@ -2137,7 +2171,7 @@ void JavaThread::verify() { // if vm exit occurs during initialization). These cases can all be accounted // for such that this method never returns NULL. const char* JavaThread::name() const { - if (Thread::is_JavaThread_protected(this)) { + if (Thread::is_JavaThread_protected(/* target */ this)) { // The target JavaThread is protected so get_thread_name_string() is safe: return get_thread_name_string(); } @@ -2830,7 +2864,7 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) { { TraceTime timer("Start VMThread", TRACETIME_LOG(Info, startuptime)); VMThread::create(); - Thread* vmthread = VMThread::vm_thread(); + VMThread* vmthread = VMThread::vm_thread(); if (!os::create_thread(vmthread, os::vm_thread)) { vm_exit_during_initialization("Cannot create VM thread. " @@ -2842,7 +2876,7 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) { { MonitorLocker ml(Notify_lock); os::start_thread(vmthread); - while (vmthread->active_handles() == NULL) { + while (!vmthread->is_running()) { ml.wait(); } } @@ -3269,8 +3303,8 @@ void JavaThread::invoke_shutdown_hooks() { // Link all classes for dynamic CDS dumping before vm exit. // Same operation is being done in JVM_BeforeHalt for handling the // case where the application calls System.exit(). - if (DynamicDumpSharedSpaces) { - DynamicArchive::prepare_for_dynamic_dumping(); + if (DynamicArchive::should_dump_at_vm_exit()) { + DynamicArchive::prepare_for_dump_at_exit(); } #endif diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp index 247943ed8da2aa96f3d3ea166509d7056bfe9245..74916b982759d81b8c2e0ad17320696cbf356262 100644 --- a/src/hotspot/share/runtime/thread.hpp +++ b/src/hotspot/share/runtime/thread.hpp @@ -200,10 +200,11 @@ class Thread: public ThreadShadow { public: // Is the target JavaThread protected by the calling Thread or by some other - // mechanism? If checkTLHOnly is true (default is false), then we only check - // if the target JavaThread is protected by a ThreadsList (if any) associated - // with the calling Thread. - static bool is_JavaThread_protected(const JavaThread* p, bool checkTLHOnly = false); + // mechanism? + static bool is_JavaThread_protected(const JavaThread* target); + // Is the target JavaThread protected by a ThreadsListHandle (TLH) associated + // with the calling Thread? + static bool is_JavaThread_protected_by_TLH(const JavaThread* target); void* operator new(size_t size) throw() { return allocate(size, true); } void* operator new(size_t size, const std::nothrow_t& nothrow_constant) throw() { @@ -238,12 +239,6 @@ class Thread: public ThreadShadow { #endif private: - // Active_handles points to a block of handles - JNIHandleBlock* _active_handles; - - // One-element thread local free list - JNIHandleBlock* _free_handle_block; - // Point to the last handle mark HandleMark* _last_handle_mark; @@ -416,12 +411,6 @@ class Thread: public ThreadShadow { OSThread* osthread() const { return _osthread; } void set_osthread(OSThread* thread) { _osthread = thread; } - // JNI handle support - JNIHandleBlock* active_handles() const { return _active_handles; } - void set_active_handles(JNIHandleBlock* block) { _active_handles = block; } - JNIHandleBlock* free_handle_block() const { return _free_handle_block; } - void set_free_handle_block(JNIHandleBlock* block) { _free_handle_block = block; } - // Internal handle support HandleArea* handle_area() const { return _handle_area; } void set_handle_area(HandleArea* area) { _handle_area = area; } @@ -607,7 +596,6 @@ protected: // Code generation static ByteSize exception_file_offset() { return byte_offset_of(Thread, _exception_file); } static ByteSize exception_line_offset() { return byte_offset_of(Thread, _exception_line); } - static ByteSize active_handles_offset() { return byte_offset_of(Thread, _active_handles); } static ByteSize stack_base_offset() { return byte_offset_of(Thread, _stack_base); } static ByteSize stack_size_offset() { return byte_offset_of(Thread, _stack_size); } @@ -746,6 +734,13 @@ class JavaThread: public Thread { ObjectMonitor* volatile _current_pending_monitor; // ObjectMonitor this thread is waiting to lock bool _current_pending_monitor_is_from_java; // locking is from Java code ObjectMonitor* volatile _current_waiting_monitor; // ObjectMonitor on which this thread called Object.wait() + + // Active_handles points to a block of handles + JNIHandleBlock* _active_handles; + + // One-element thread local free list + JNIHandleBlock* _free_handle_block; + public: volatile intptr_t _Stalled; @@ -773,6 +768,15 @@ class JavaThread: public Thread { Atomic::store(&_current_waiting_monitor, monitor); } + // JNI handle support + JNIHandleBlock* active_handles() const { return _active_handles; } + void set_active_handles(JNIHandleBlock* block) { _active_handles = block; } + JNIHandleBlock* free_handle_block() const { return _free_handle_block; } + void set_free_handle_block(JNIHandleBlock* block) { _free_handle_block = block; } + + void push_jni_handle_block(); + void pop_jni_handle_block(); + private: MonitorChunk* _monitor_chunks; // Contains the off stack monitors // allocated during deoptimization @@ -1283,6 +1287,8 @@ class JavaThread: public Thread { static ByteSize exception_handler_pc_offset() { return byte_offset_of(JavaThread, _exception_handler_pc); } static ByteSize is_method_handle_return_offset() { return byte_offset_of(JavaThread, _is_method_handle_return); } + static ByteSize active_handles_offset() { return byte_offset_of(JavaThread, _active_handles); } + // StackOverflow offsets static ByteSize stack_overflow_limit_offset() { return byte_offset_of(JavaThread, _stack_overflow_state._stack_overflow_limit); @@ -1745,4 +1751,13 @@ class UnlockFlagSaver { } }; +class JNIHandleMark : public StackObj { + JavaThread* _thread; + public: + JNIHandleMark(JavaThread* thread) : _thread(thread) { + thread->push_jni_handle_block(); + } + ~JNIHandleMark() { _thread->pop_jni_handle_block(); } +}; + #endif // SHARE_RUNTIME_THREAD_HPP diff --git a/src/hotspot/share/runtime/vmStructs.cpp b/src/hotspot/share/runtime/vmStructs.cpp index 32f09d22d4101f8dd4e80a147c33d51e059807e1..89abff4a0a8d530e1d9935daa89405f5509fa641 100644 --- a/src/hotspot/share/runtime/vmStructs.cpp +++ b/src/hotspot/share/runtime/vmStructs.cpp @@ -704,7 +704,6 @@ nonstatic_field(ThreadShadow, _pending_exception, oop) \ nonstatic_field(ThreadShadow, _exception_file, const char*) \ nonstatic_field(ThreadShadow, _exception_line, int) \ - nonstatic_field(Thread, _active_handles, JNIHandleBlock*) \ nonstatic_field(Thread, _tlab, ThreadLocalAllocBuffer) \ nonstatic_field(Thread, _allocated_bytes, jlong) \ nonstatic_field(NamedThread, _name, char*) \ @@ -728,6 +727,7 @@ nonstatic_field(JavaThread, _stack_size, size_t) \ nonstatic_field(JavaThread, _vframe_array_head, vframeArray*) \ nonstatic_field(JavaThread, _vframe_array_last, vframeArray*) \ + nonstatic_field(JavaThread, _active_handles, JNIHandleBlock*) \ volatile_nonstatic_field(JavaThread, _terminated, JavaThread::TerminatedTypes) \ nonstatic_field(Thread, _resource_area, ResourceArea*) \ nonstatic_field(CompilerThread, _env, ciEnv*) \ @@ -824,7 +824,6 @@ nonstatic_field(ciMethodData, _arg_local, intx) \ nonstatic_field(ciMethodData, _arg_stack, intx) \ nonstatic_field(ciMethodData, _arg_returned, intx) \ - nonstatic_field(ciMethodData, _current_mileage, int) \ nonstatic_field(ciMethodData, _orig, MethodData::CompilerCounters) \ \ nonstatic_field(ciField, _holder, ciInstanceKlass*) \ @@ -1850,6 +1849,10 @@ declare_c2_type(VectorUnboxNode, VectorNode) \ declare_c2_type(VectorReinterpretNode, VectorNode) \ declare_c2_type(VectorMaskCastNode, VectorNode) \ + declare_c2_type(MaskAllNode, VectorNode) \ + declare_c2_type(AndVMaskNode, VectorNode) \ + declare_c2_type(OrVMaskNode, VectorNode) \ + declare_c2_type(XorVMaskNode, VectorNode) \ declare_c2_type(VectorBoxNode, Node) \ declare_c2_type(VectorBoxAllocateNode, CallStaticJavaNode) \ declare_c2_type(VectorTestNode, Node) \ diff --git a/src/hotspot/share/runtime/vmThread.cpp b/src/hotspot/share/runtime/vmThread.cpp index 08d58de97fd6f7449b9d6feeb992b6b57455a95b..2e601a11f60d2c64d0e6505f678a488d925c4248 100644 --- a/src/hotspot/share/runtime/vmThread.cpp +++ b/src/hotspot/share/runtime/vmThread.cpp @@ -139,7 +139,7 @@ void VMThread::create() { } } -VMThread::VMThread() : NamedThread() { +VMThread::VMThread() : NamedThread(), _is_running(false) { set_name("VM Thread"); } @@ -152,10 +152,10 @@ static VM_None halt_op("Halt"); void VMThread::run() { assert(this == vm_thread(), "check"); - // Notify_lock wait checks on active_handles() to rewait in + // Notify_lock wait checks on is_running() to rewait in // case of spurious wakeup, it should wait on the last // value set prior to the notify - this->set_active_handles(JNIHandleBlock::allocate_block()); + Atomic::store(&_is_running, true); { MutexLocker ml(Notify_lock); diff --git a/src/hotspot/share/runtime/vmThread.hpp b/src/hotspot/share/runtime/vmThread.hpp index dd1965271e60734319a3d2654d57450375c6a559..23384c61257b6afbd3d8a6127a0976aab245452d 100644 --- a/src/hotspot/share/runtime/vmThread.hpp +++ b/src/hotspot/share/runtime/vmThread.hpp @@ -25,6 +25,7 @@ #ifndef SHARE_RUNTIME_VMTHREAD_HPP #define SHARE_RUNTIME_VMTHREAD_HPP +#include "runtime/atomic.hpp" #include "runtime/perfDataTypes.hpp" #include "runtime/nonJavaThread.hpp" #include "runtime/thread.hpp" @@ -59,6 +60,8 @@ public: class VMThread: public NamedThread { private: + volatile bool _is_running; + static ThreadPriority _current_priority; static bool _should_terminate; @@ -84,6 +87,7 @@ class VMThread: public NamedThread { guarantee(false, "VMThread deletion must fix the race with VM termination"); } + bool is_running() const { return Atomic::load(&_is_running); } // Tester bool is_VM_thread() const { return true; } diff --git a/src/hotspot/share/services/management.cpp b/src/hotspot/share/services/management.cpp index 7f103ea784974bb34c6508051a65872ec8956f8c..09c8750f6f9abc547c6fce396e64c35da06df4bc 100644 --- a/src/hotspot/share/services/management.cpp +++ b/src/hotspot/share/services/management.cpp @@ -2015,7 +2015,7 @@ JVM_ENTRY(void, jmm_GetDiagnosticCommandInfo(JNIEnv *env, jobjectArray cmds, JVM_END JVM_ENTRY(void, jmm_GetDiagnosticCommandArgumentsInfo(JNIEnv *env, - jstring command, dcmdArgInfo* infoArray)) + jstring command, dcmdArgInfo* infoArray, jint count)) ResourceMark rm(THREAD); oop cmd = JNIHandles::resolve_external_guard(command); if (cmd == NULL) { @@ -2039,10 +2039,12 @@ JVM_ENTRY(void, jmm_GetDiagnosticCommandArgumentsInfo(JNIEnv *env, } DCmdMark mark(dcmd); GrowableArray* array = dcmd->argument_info_array(); - if (array->length() == 0) { - return; + const int num_args = array->length(); + if (num_args != count) { + assert(false, "jmm_GetDiagnosticCommandArgumentsInfo count mismatch (%d vs %d)", count, num_args); + THROW_MSG(vmSymbols::java_lang_InternalError(), "jmm_GetDiagnosticCommandArgumentsInfo count mismatch"); } - for (int i = 0; i < array->length(); i++) { + for (int i = 0; i < num_args; i++) { infoArray[i].name = array->at(i)->name(); infoArray[i].description = array->at(i)->description(); infoArray[i].type = array->at(i)->type(); diff --git a/src/hotspot/share/utilities/accessFlags.hpp b/src/hotspot/share/utilities/accessFlags.hpp index cb3663349a82030d675997cde8811fb7b403e69e..83d1b6579062d80587cd1af030a9587f22ae91c1 100644 --- a/src/hotspot/share/utilities/accessFlags.hpp +++ b/src/hotspot/share/utilities/accessFlags.hpp @@ -69,6 +69,7 @@ enum { JVM_ACC_IS_SHARED_CLASS = 0x02000000, // True if klass is shared JVM_ACC_IS_HIDDEN_CLASS = 0x04000000, // True if klass is hidden JVM_ACC_IS_VALUE_BASED_CLASS = 0x08000000, // True if klass is marked as a ValueBased class + JVM_ACC_IS_BEING_REDEFINED = 0x00100000, // True if the klass is being redefined. // Klass* and Method* flags JVM_ACC_HAS_LOCAL_VARIABLE_TABLE= 0x00200000, @@ -159,6 +160,10 @@ class AccessFlags { void set_has_localvariable_table() { atomic_set_bits(JVM_ACC_HAS_LOCAL_VARIABLE_TABLE); } void clear_has_localvariable_table() { atomic_clear_bits(JVM_ACC_HAS_LOCAL_VARIABLE_TABLE); } + bool is_being_redefined() const { return (_flags & JVM_ACC_IS_BEING_REDEFINED) != 0; } + void set_is_being_redefined() { atomic_set_bits(JVM_ACC_IS_BEING_REDEFINED); } + void clear_is_being_redefined() { atomic_clear_bits(JVM_ACC_IS_BEING_REDEFINED); } + // field flags bool is_field_access_watched() const { return (_flags & JVM_ACC_FIELD_ACCESS_WATCHED) != 0; } bool is_field_modification_watched() const diff --git a/src/hotspot/share/utilities/growableArray.hpp b/src/hotspot/share/utilities/growableArray.hpp index 8ab6f6f4af66d4bfdcbece178518f632a38c7787..e333388f11c85a804d76f28cc3d6c7a4ca44818f 100644 --- a/src/hotspot/share/utilities/growableArray.hpp +++ b/src/hotspot/share/utilities/growableArray.hpp @@ -320,6 +320,17 @@ public: return min; } + void truncate_to(int idx) { + for (int i = 0, j = idx; j < length(); i++, j++) { + at_put(i, at(j)); + } + trunc_to(length() - idx); + } + + void truncate_from(int idx) { + trunc_to(idx); + } + size_t data_size_in_bytes() const { return _len * sizeof(E); } diff --git a/src/java.base/share/classes/java/io/File.java b/src/java.base/share/classes/java/io/File.java index d809ec5edc991bb1721ec182c2fec547406039f4..3f7cd86cf7027eaa3b126a06e37e75a4ca7db517 100644 --- a/src/java.base/share/classes/java/io/File.java +++ b/src/java.base/share/classes/java/io/File.java @@ -220,7 +220,7 @@ public class File * string for convenience. This string contains a single character, namely * {@link #separatorChar}. */ - public static final String separator = "" + separatorChar; + public static final String separator = String.valueOf(separatorChar); /** * The system-dependent path-separator character. This field is @@ -239,7 +239,7 @@ public class File * for convenience. This string contains a single character, namely * {@link #pathSeparatorChar}. */ - public static final String pathSeparator = "" + pathSeparatorChar; + public static final String pathSeparator = String.valueOf(pathSeparatorChar); /* -- Constructors -- */ diff --git a/src/java.base/share/classes/java/io/ObjectInputFilter.java b/src/java.base/share/classes/java/io/ObjectInputFilter.java index c9722ea23e742aecd9f3c8b7f0fa21fb8fb0d1cf..d5cae8043b1b9eacf0b4aeeb8020ad1f95bd96ab 100644 --- a/src/java.base/share/classes/java/io/ObjectInputFilter.java +++ b/src/java.base/share/classes/java/io/ObjectInputFilter.java @@ -523,6 +523,8 @@ public interface ObjectInputFilter { * {@systemProperty jdk.serialFilter}, its value is used to configure the filter. * If the system property is not defined, and the {@link java.security.Security} property * {@code jdk.serialFilter} is defined then it is used to configure the filter. + * The filter is created as if {@link #createFilter(String) createFilter} is called; + * if the filter string is invalid, an {@link ExceptionInInitializerError} is thrown. * Otherwise, the filter is not configured during initialization and * can be set with {@link #setSerialFilter(ObjectInputFilter) Config.setSerialFilter}. * Setting the {@code jdk.serialFilter} with {@link System#setProperty(String, String) diff --git a/src/java.base/share/classes/java/io/OutputStreamWriter.java b/src/java.base/share/classes/java/io/OutputStreamWriter.java index f9acde201b760f5385a2bb4123f7cee3e5fd6b1a..43b0b10e46abfa2f48a3b87754c0b8577fda0b96 100644 --- a/src/java.base/share/classes/java/io/OutputStreamWriter.java +++ b/src/java.base/share/classes/java/io/OutputStreamWriter.java @@ -99,7 +99,9 @@ public class OutputStreamWriter extends Writer { } /** - * Creates an OutputStreamWriter that uses the default character encoding. + * Creates an OutputStreamWriter that uses the default character encoding, or + * where {@code out} is a {@code PrintStream}, the charset used by the print + * stream. * * @param out An OutputStream * @see Charset#defaultCharset() @@ -107,7 +109,7 @@ public class OutputStreamWriter extends Writer { public OutputStreamWriter(OutputStream out) { super(out); se = StreamEncoder.forOutputStreamWriter(out, this, - Charset.defaultCharset()); + out instanceof PrintStream ps ? ps.charset() : Charset.defaultCharset()); } /** diff --git a/src/java.base/share/classes/java/io/PrintStream.java b/src/java.base/share/classes/java/io/PrintStream.java index acf1a4612823396b649b92a30afbbc312a974052..0281ed66ca82e08991faf7f33ea97ab589e17795 100644 --- a/src/java.base/share/classes/java/io/PrintStream.java +++ b/src/java.base/share/classes/java/io/PrintStream.java @@ -68,6 +68,7 @@ public class PrintStream extends FilterOutputStream private final boolean autoFlush; private boolean trouble = false; private Formatter formatter; + private final Charset charset; /** * Track both the text- and character-output streams, so that their buffers @@ -108,7 +109,8 @@ public class PrintStream extends FilterOutputStream private PrintStream(boolean autoFlush, OutputStream out) { super(out); this.autoFlush = autoFlush; - this.charOut = new OutputStreamWriter(this); + this.charset = out instanceof PrintStream ps ? ps.charset() : Charset.defaultCharset(); + this.charOut = new OutputStreamWriter(this, charset); this.textOut = new BufferedWriter(charOut); } @@ -124,7 +126,8 @@ public class PrintStream extends FilterOutputStream /** * Creates a new print stream, without automatic line flushing, with the * specified OutputStream. Characters written to the stream are converted - * to bytes using the default charset. + * to bytes using the default charset, or where {@code out} is a + * {@code PrintStream}, the charset used by the print stream. * * @param out The output stream to which values and objects will be * printed @@ -139,7 +142,8 @@ public class PrintStream extends FilterOutputStream /** * Creates a new print stream, with the specified OutputStream and line * flushing. Characters written to the stream are converted to bytes using - * the default charset. + * the default charset, or where {@code out} is a {@code PrintStream}, + * the charset used by the print stream. * * @param out The output stream to which values and objects will be * printed @@ -201,6 +205,7 @@ public class PrintStream extends FilterOutputStream this.autoFlush = autoFlush; this.charOut = new OutputStreamWriter(this, charset); this.textOut = new BufferedWriter(charOut); + this.charset = charset; } /** @@ -1374,4 +1379,12 @@ public class PrintStream extends FilterOutputStream return this; } + /** + * {@return the charset used in this {@code PrintStream} instance} + * + * @since 18 + */ + public Charset charset() { + return charset; + } } diff --git a/src/java.base/share/classes/java/io/PrintWriter.java b/src/java.base/share/classes/java/io/PrintWriter.java index 447769d7fb320196d51fa76dbe7901cce692cdbc..874a03cce3bbfbd7e83afeb72fe8a75ba548f50b 100644 --- a/src/java.base/share/classes/java/io/PrintWriter.java +++ b/src/java.base/share/classes/java/io/PrintWriter.java @@ -118,7 +118,8 @@ public class PrintWriter extends Writer { * Creates a new PrintWriter, without automatic line flushing, from an * existing OutputStream. This convenience constructor creates the * necessary intermediate OutputStreamWriter, which will convert characters - * into bytes using the default charset. + * into bytes using the default charset, or where {@code out} is a + * {@code PrintStream}, the charset used by the print stream. * * @param out An output stream * @@ -132,8 +133,9 @@ public class PrintWriter extends Writer { /** * Creates a new PrintWriter from an existing OutputStream. This * convenience constructor creates the necessary intermediate - * OutputStreamWriter, which will convert characters into bytes using the - * default charset. + * OutputStreamWriter, which will convert characters into bytes using + * the default charset, or where {@code out} is a {@code PrintStream}, + * the charset used by the print stream. * * @param out An output stream * @param autoFlush A boolean; if true, the {@code println}, @@ -144,7 +146,7 @@ public class PrintWriter extends Writer { * @see Charset#defaultCharset() */ public PrintWriter(OutputStream out, boolean autoFlush) { - this(out, autoFlush, Charset.defaultCharset()); + this(out, autoFlush, out instanceof PrintStream ps ? ps.charset() : Charset.defaultCharset()); } /** diff --git a/src/java.base/share/classes/java/io/SequenceInputStream.java b/src/java.base/share/classes/java/io/SequenceInputStream.java index 8eea5ab6098e79dc167f9108b664b157e1442056..89b0507a5520ac15e76fe246ac07bbaec82595d6 100644 --- a/src/java.base/share/classes/java/io/SequenceInputStream.java +++ b/src/java.base/share/classes/java/io/SequenceInputStream.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1994, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1994, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -98,7 +98,7 @@ public class SequenceInputStream extends InputStream { private void peekNextStream() { if (e.hasMoreElements()) { - in = (InputStream) e.nextElement(); + in = e.nextElement(); if (in == null) throw new NullPointerException(); } else { diff --git a/src/java.base/share/classes/java/lang/ClassValue.java b/src/java.base/share/classes/java/lang/ClassValue.java index beb07bb2782e46b2de48249178d22a13b8d5bf9d..18f0775cc848f803c43aebc742fef35cfda184c4 100644 --- a/src/java.base/share/classes/java/lang/ClassValue.java +++ b/src/java.base/share/classes/java/lang/ClassValue.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -685,7 +685,7 @@ public abstract class ClassValue { if (haveReplacement >= 0) { if (cache[(replacementPos+1) & mask] != null) { // Be conservative, to avoid breaking up a non-null run. - cache[replacementPos & mask] = (Entry) Entry.DEAD_ENTRY; + cache[replacementPos & mask] = Entry.DEAD_ENTRY; } else { cache[replacementPos & mask] = null; cacheLoad -= 1; diff --git a/src/java.base/share/classes/java/lang/Enum.java b/src/java.base/share/classes/java/lang/Enum.java index 54a1f09c9ebd75ed0139ab9a486d7dc10c7734b0..636ad7c757e471c46c86a8ed4b524880b89387af 100644 --- a/src/java.base/share/classes/java/lang/Enum.java +++ b/src/java.base/share/classes/java/lang/Enum.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -193,7 +193,7 @@ public abstract class Enum> * method is the order in which the constants are declared. */ public final int compareTo(E o) { - Enum other = (Enum)o; + Enum other = o; Enum self = this; if (self.getClass() != other.getClass() && // optimization self.getDeclaringClass() != other.getDeclaringClass()) diff --git a/src/java.base/share/classes/java/lang/RuntimePermission.java b/src/java.base/share/classes/java/lang/RuntimePermission.java index db985101f7c8d236dd87d872775a492ec26f45d1..7df4c5fac5ba00d668267fa71286bf1a3b85f46e 100644 --- a/src/java.base/share/classes/java/lang/RuntimePermission.java +++ b/src/java.base/share/classes/java/lang/RuntimePermission.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -378,6 +378,16 @@ import java.lang.module.ModuleFinder; * {@linkplain ModuleFinder#ofSystem system modules} in the runtime image. * * + * + * inetAddressResolverProvider + * This {@code RuntimePermission} is required to be granted to + * classes which subclass and implement {@code java.net.spi.InetAddressResolverProvider}. + * The permission is checked during invocation of the abstract base class constructor. + * This permission ensures trust in classes which provide resolvers used by + * {@link java.net.InetAddress} hostname and address resolution methods. + * See {@link java.net.spi.InetAddressResolverProvider} for more information. + * + * * * * diff --git a/src/java.base/share/classes/java/lang/StackTraceElement.java b/src/java.base/share/classes/java/lang/StackTraceElement.java index 259f9255986cc3e485de5f404d72b62e6eecfda7..9a39bbc1aec208d175e847fa13ede10a947430e4 100644 --- a/src/java.base/share/classes/java/lang/StackTraceElement.java +++ b/src/java.base/share/classes/java/lang/StackTraceElement.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -443,7 +443,7 @@ public final class StackTraceElement implements java.io.Serializable { */ private synchronized void computeFormat() { try { - Class cls = (Class) declaringClassObject; + Class cls = declaringClassObject; ClassLoader loader = cls.getClassLoader0(); Module m = cls.getModule(); byte bits = 0; diff --git a/src/java.base/share/classes/java/lang/System.java b/src/java.base/share/classes/java/lang/System.java index 6d4137c7aa463966e81ffa4f222eb9c61c0f4fd3..12a8281343319970c74df440dcaa136a34ef1d88 100644 --- a/src/java.base/share/classes/java/lang/System.java +++ b/src/java.base/share/classes/java/lang/System.java @@ -33,7 +33,6 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; -import java.io.UnsupportedEncodingException; import java.lang.annotation.Annotation; import java.lang.invoke.MethodHandle; import java.lang.invoke.MethodType; @@ -45,9 +44,9 @@ import java.lang.reflect.Method; import java.lang.reflect.Modifier; import java.net.URI; import java.net.URL; -import java.nio.charset.CharacterCodingException; import java.nio.channels.Channel; import java.nio.channels.spi.SelectorProvider; +import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.security.AccessControlContext; import java.security.AccessController; @@ -84,6 +83,7 @@ import jdk.internal.vm.annotation.Stable; import sun.nio.fs.DefaultFileSystemProvider; import sun.reflect.annotation.AnnotationType; import sun.nio.ch.Interruptible; +import sun.nio.cs.UTF_8; import sun.security.util.SecurityConstants; /** @@ -188,6 +188,11 @@ public final class System { @SuppressWarnings("removal") private static volatile SecurityManager security; // read by VM + // `sun.jnu.encoding` if it is not supported. Otherwise null. + // It is initialized in `initPhase1()` before any charset providers + // are initialized. + private static String notSupportedJnuEncoding; + // return true if a security manager is allowed private static boolean allowSecurityManager() { return (allowSecurityManager != NEVER); @@ -2017,10 +2022,9 @@ public final class System { * Create PrintStream for stdout/err based on encoding. */ private static PrintStream newPrintStream(FileOutputStream fos, String enc) { - if (enc != null) { - try { - return new PrintStream(new BufferedOutputStream(fos, 128), true, enc); - } catch (UnsupportedEncodingException uee) {} + if (enc != null) { + return new PrintStream(new BufferedOutputStream(fos, 128), true, + Charset.forName(enc, UTF_8.INSTANCE)); } return new PrintStream(new BufferedOutputStream(fos, 128), true); } @@ -2113,6 +2117,13 @@ public final class System { VM.saveProperties(tempProps); props = createProperties(tempProps); + // Check if sun.jnu.encoding is supported. If not, replace it with UTF-8. + var jnuEncoding = props.getProperty("sun.jnu.encoding"); + if (jnuEncoding == null || !Charset.isSupported(jnuEncoding)) { + notSupportedJnuEncoding = jnuEncoding == null ? "null" : jnuEncoding; + props.setProperty("sun.jnu.encoding", "UTF-8"); + } + StaticProperty.javaHome(); // Load StaticProperty to cache the property values lineSeparator = props.getProperty("line.separator"); @@ -2141,7 +2152,6 @@ public final class System { Thread current = Thread.currentThread(); current.getThreadGroup().add(current); - // Subsystems that are invoked during initialization can invoke // VM.isBooted() in order to avoid doing things that should // wait until the VM is fully initialized. The initialization level @@ -2248,6 +2258,14 @@ public final class System { WARNING: The Security Manager is deprecated and will be removed in a future release"""); } + // Emit a warning if `sun.jnu.encoding` is not supported. + if (notSupportedJnuEncoding != null) { + System.err.println( + "WARNING: The encoding of the underlying platform's" + + " file system is not supported: " + + notSupportedJnuEncoding); + } + initialErrStream = System.err; // initializing the system class loader diff --git a/src/java.base/share/classes/java/net/Inet4AddressImpl.java b/src/java.base/share/classes/java/net/Inet4AddressImpl.java index 4cbe28f998b1e1f022beb827cf7df80d2adc548d..c7feaf4c195e6e8d0d389e011cb2c2140387f69f 100644 --- a/src/java.base/share/classes/java/net/Inet4AddressImpl.java +++ b/src/java.base/share/classes/java/net/Inet4AddressImpl.java @@ -24,6 +24,9 @@ */ package java.net; import java.io.IOException; +import java.net.spi.InetAddressResolver.LookupPolicy; + +import static java.net.spi.InetAddressResolver.LookupPolicy.IPV4; /* * Package private implementation of InetAddressImpl for IPv4. @@ -32,8 +35,14 @@ import java.io.IOException; */ final class Inet4AddressImpl implements InetAddressImpl { public native String getLocalHostName() throws UnknownHostException; - public native InetAddress[] - lookupAllHostAddr(String hostname) throws UnknownHostException; + public InetAddress[] lookupAllHostAddr(String hostname, LookupPolicy lookupPolicy) + throws UnknownHostException { + if ((lookupPolicy.characteristics() & IPV4) == 0) { + throw new UnknownHostException(hostname); + } + return lookupAllHostAddr(hostname); + } + private native InetAddress[] lookupAllHostAddr(String hostname) throws UnknownHostException; public native String getHostByAddr(byte[] addr) throws UnknownHostException; private native boolean isReachable0(byte[] addr, int timeout, byte[] ifaddr, int ttl) throws IOException; diff --git a/src/java.base/share/classes/java/net/Inet6AddressImpl.java b/src/java.base/share/classes/java/net/Inet6AddressImpl.java index f956a50e311229b900362aeb35c95a6f5b2b202f..87fefbe37ba32f18e71daecb33120759c95cd17d 100644 --- a/src/java.base/share/classes/java/net/Inet6AddressImpl.java +++ b/src/java.base/share/classes/java/net/Inet6AddressImpl.java @@ -25,10 +25,9 @@ package java.net; import java.io.IOException; +import java.net.spi.InetAddressResolver.LookupPolicy; -import static java.net.InetAddress.IPv6; -import static java.net.InetAddress.PREFER_IPV6_VALUE; -import static java.net.InetAddress.PREFER_SYSTEM_VALUE; +import static java.net.InetAddress.PLATFORM_LOOKUP_POLICY; /* * Package private implementation of InetAddressImpl for dual @@ -48,8 +47,13 @@ final class Inet6AddressImpl implements InetAddressImpl { public native String getLocalHostName() throws UnknownHostException; - public native InetAddress[] lookupAllHostAddr(String hostname) - throws UnknownHostException; + public InetAddress[] lookupAllHostAddr(String hostname, LookupPolicy lookupPolicy) + throws UnknownHostException { + return lookupAllHostAddr(hostname, lookupPolicy.characteristics()); + } + + private native InetAddress[] lookupAllHostAddr(String hostname, int characteristics) + throws UnknownHostException; public native String getHostByAddr(byte[] addr) throws UnknownHostException; @@ -96,8 +100,9 @@ final class Inet6AddressImpl implements InetAddressImpl { public synchronized InetAddress anyLocalAddress() { if (anyLocalAddress == null) { - if (InetAddress.preferIPv6Address == PREFER_IPV6_VALUE || - InetAddress.preferIPv6Address == PREFER_SYSTEM_VALUE) { + int flags = PLATFORM_LOOKUP_POLICY.characteristics(); + if (InetAddress.ipv6AddressesFirst(flags) || + InetAddress.systemAddressesOrder(flags)) { anyLocalAddress = new Inet6Address(); anyLocalAddress.holder().hostName = "::"; } else { @@ -109,9 +114,9 @@ final class Inet6AddressImpl implements InetAddressImpl { public synchronized InetAddress loopbackAddress() { if (loopbackAddress == null) { - boolean preferIPv6Address = - InetAddress.preferIPv6Address == PREFER_IPV6_VALUE || - InetAddress.preferIPv6Address == PREFER_SYSTEM_VALUE; + int flags = PLATFORM_LOOKUP_POLICY.characteristics(); + boolean preferIPv6Address = InetAddress.ipv6AddressesFirst(flags) || + InetAddress.systemAddressesOrder(flags); for (int i = 0; i < 2; i++) { InetAddress address; diff --git a/src/java.base/share/classes/java/net/InetAddress.java b/src/java.base/share/classes/java/net/InetAddress.java index 38e7fc629216a1f31e1e1ef60fd6464a14096511..3875216768be9deab04ee261dc940cba55f0c336 100644 --- a/src/java.base/share/classes/java/net/InetAddress.java +++ b/src/java.base/share/classes/java/net/InetAddress.java @@ -25,6 +25,11 @@ package java.net; +import java.net.spi.InetAddressResolver; +import java.net.spi.InetAddressResolverProvider; +import java.net.spi.InetAddressResolver.LookupPolicy; +import java.security.AccessController; +import java.security.PrivilegedAction; import java.util.List; import java.util.NavigableSet; import java.util.ArrayList; @@ -40,19 +45,31 @@ import java.io.ObjectInputStream.GetField; import java.io.ObjectOutputStream; import java.io.ObjectOutputStream.PutField; import java.lang.annotation.Native; +import java.util.ServiceLoader; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.atomic.AtomicLong; import java.util.Arrays; +import java.util.concurrent.locks.ReentrantLock; +import java.util.stream.Stream; + +import jdk.internal.misc.VM; import jdk.internal.access.JavaNetInetAddressAccess; import jdk.internal.access.SharedSecrets; +import jdk.internal.vm.annotation.Stable; +import sun.net.ResolverProviderConfiguration; import sun.security.action.*; import sun.net.InetAddressCachePolicy; import sun.net.util.IPAddressUtil; import sun.nio.cs.UTF_8; +import static java.net.spi.InetAddressResolver.LookupPolicy.IPV4; +import static java.net.spi.InetAddressResolver.LookupPolicy.IPV4_FIRST; +import static java.net.spi.InetAddressResolver.LookupPolicy.IPV6; +import static java.net.spi.InetAddressResolver.LookupPolicy.IPV6_FIRST; + /** * This class represents an Internet Protocol (IP) address. * @@ -128,25 +145,35 @@ import sun.nio.cs.UTF_8; * address format, please refer to Inet6Address#format. * - *

There is a couple of - * System Properties affecting how IPv4 and IPv6 addresses are used.

+ *

There is a couple of + * System Properties affecting how IPv4 and IPv6 addresses are used. * - *

Host Name Resolution

+ *

Host Name Resolution

+ * + *

The InetAddress class provides methods to resolve host names to + * their IP addresses and vice versa. The actual resolution is delegated to an + * {@linkplain InetAddressResolver InetAddress resolver}. * - * Host name-to-IP address resolution is accomplished through - * the use of a combination of local machine configuration information - * and network naming services such as the Domain Name System (DNS) - * and Network Information Service(NIS). The particular naming - * services(s) being used is by default the local machine configured - * one. For any host name, its corresponding IP address is returned. + *

Host name-to-IP address resolution maps a host name to an IP address. + * For any host name, its corresponding IP address is returned. * *

Reverse name resolution means that for any IP address, * the host associated with the IP address is returned. * - *

The InetAddress class provides methods to resolve host names to - * their IP addresses and vice versa. + *

The built-in InetAddress resolver implementation does + * host name-to-IP address resolution and vice versa through the use of + * a combination of local machine configuration information and network + * naming services such as the Domain Name System (DNS) and the Lightweight Directory + * Access Protocol (LDAP). + * The particular naming services that the built-in resolver uses by default + * depends on the configuration of the local machine. + * + *

{@code InetAddress} has a service provider mechanism for InetAddress resolvers + * that allows a custom InetAddress resolver to be used instead of the built-in implementation. + * {@link InetAddressResolverProvider} is the service provider class. Its API docs provide all the + * details on this mechanism. * - *

InetAddress Caching

+ *

InetAddress Caching

* * The InetAddress class has a cache to store successful as well as * unsuccessful host name resolutions. @@ -198,10 +225,6 @@ import sun.nio.cs.UTF_8; */ public class InetAddress implements java.io.Serializable { - @Native static final int PREFER_IPV4_VALUE = 0; - @Native static final int PREFER_IPV6_VALUE = 1; - @Native static final int PREFER_SYSTEM_VALUE = 2; - /** * Specify the address family: Internet Protocol, Version 4 * @since 1.4 @@ -214,9 +237,6 @@ public class InetAddress implements java.io.Serializable { */ @Native static final int IPv6 = 2; - /* Specify address family preference */ - static final transient int preferIPv6Address; - static class InetAddressHolder { /** * Reserve the original application specified hostname. @@ -288,8 +308,11 @@ public class InetAddress implements java.io.Serializable { return holder; } - /* Used to store the name service provider */ - private static transient NameService nameService; + /* Used to store the system-wide resolver */ + @Stable + private static volatile InetAddressResolver resolver; + + private static final InetAddressResolver BUILTIN_RESOLVER; /** * Used to store the best available hostname. @@ -301,22 +324,25 @@ public class InetAddress implements java.io.Serializable { @java.io.Serial private static final long serialVersionUID = 3286316764910316507L; + // "java.net.preferIPv4Stack" system property value + private static final String PREFER_IPV4_STACK_VALUE; + + // "java.net.preferIPv6Addresses" system property value + private static final String PREFER_IPV6_ADDRESSES_VALUE; + + // "jdk.net.hosts.file" system property value + private static final String HOSTS_FILE_NAME; + /* * Load net library into runtime, and perform initializations. */ static { - String str = GetPropertyAction.privilegedGetProperty("java.net.preferIPv6Addresses"); - if (str == null) { - preferIPv6Address = PREFER_IPV4_VALUE; - } else if (str.equalsIgnoreCase("true")) { - preferIPv6Address = PREFER_IPV6_VALUE; - } else if (str.equalsIgnoreCase("false")) { - preferIPv6Address = PREFER_IPV4_VALUE; - } else if (str.equalsIgnoreCase("system")) { - preferIPv6Address = PREFER_SYSTEM_VALUE; - } else { - preferIPv6Address = PREFER_IPV4_VALUE; - } + PREFER_IPV4_STACK_VALUE = + GetPropertyAction.privilegedGetProperty("java.net.preferIPv4Stack"); + PREFER_IPV6_ADDRESSES_VALUE = + GetPropertyAction.privilegedGetProperty("java.net.preferIPv6Addresses"); + HOSTS_FILE_NAME = + GetPropertyAction.privilegedGetProperty("jdk.net.hosts.file"); jdk.internal.loader.BootLoader.loadLibrary("net"); SharedSecrets.setJavaNetInetAddressAccess( new JavaNetInetAddressAccess() { @@ -324,13 +350,6 @@ public class InetAddress implements java.io.Serializable { return ia.holder.getOriginalHostName(); } - public InetAddress getByName(String hostName, - InetAddress hostAddress) - throws UnknownHostException - { - return InetAddress.getByName(hostName, hostAddress); - } - public int addressValue(Inet4Address inet4Address) { return inet4Address.addressValue(); } @@ -343,6 +362,131 @@ public class InetAddress implements java.io.Serializable { init(); } + /** + * Creates an address lookup policy from {@code "java.net.preferIPv4Stack"}, + * {@code "java.net.preferIPv6Addresses"} system property values, and O/S configuration. + */ + private static final LookupPolicy initializePlatformLookupPolicy() { + // Calculate AddressFamily value first + boolean ipv4Available = isIPv4Available(); + if ("true".equals(PREFER_IPV4_STACK_VALUE) && ipv4Available) { + return LookupPolicy.of(IPV4); + } + // Check if IPv6 is not supported + if (InetAddress.impl instanceof Inet4AddressImpl) { + return LookupPolicy.of(IPV4); + } + // Check if system supports IPv4, if not use IPv6 + if (!ipv4Available) { + return LookupPolicy.of(IPV6); + } + // If both address families are needed - check preferIPv6Addresses value + if (PREFER_IPV6_ADDRESSES_VALUE != null) { + if (PREFER_IPV6_ADDRESSES_VALUE.equalsIgnoreCase("true")) { + return LookupPolicy.of(IPV4 | IPV6 | IPV6_FIRST); + } + if (PREFER_IPV6_ADDRESSES_VALUE.equalsIgnoreCase("false")) { + return LookupPolicy.of(IPV4 | IPV6 | IPV4_FIRST); + } + if (PREFER_IPV6_ADDRESSES_VALUE.equalsIgnoreCase("system")) { + return LookupPolicy.of(IPV4 | IPV6); + } + } + // Default value with both address families needed - IPv4 addresses come first + return LookupPolicy.of(IPV4 | IPV6 | IPV4_FIRST); + } + + static boolean systemAddressesOrder(int lookupCharacteristics) { + return (lookupCharacteristics & (IPV4_FIRST | IPV6_FIRST)) == 0; + } + + static boolean ipv4AddressesFirst(int lookupCharacteristics) { + return (lookupCharacteristics & IPV4_FIRST) != 0; + } + + static boolean ipv6AddressesFirst(int lookupCharacteristics) { + return (lookupCharacteristics & IPV6_FIRST) != 0; + } + + // Native method to check if IPv4 is available + private static native boolean isIPv4Available(); + + /** + * The {@code RuntimePermission("inetAddressResolverProvider")} is + * necessary to subclass and instantiate the {@code InetAddressResolverProvider} + * class, as well as to obtain resolver from an instance of that class, + * and it is also required to obtain the operating system name resolution configurations. + */ + private static final RuntimePermission INET_ADDRESS_RESOLVER_PERMISSION = + new RuntimePermission("inetAddressResolverProvider"); + + private static final ReentrantLock RESOLVER_LOCK = new ReentrantLock(); + private static volatile InetAddressResolver bootstrapResolver; + + @SuppressWarnings("removal") + private static InetAddressResolver resolver() { + InetAddressResolver cns = resolver; + if (cns != null) { + return cns; + } + if (VM.isBooted()) { + RESOLVER_LOCK.lock(); + boolean bootstrapSet = false; + try { + cns = resolver; + if (cns != null) { + return cns; + } + // Protection against provider calling InetAddress APIs during initialization + if (bootstrapResolver != null) { + return bootstrapResolver; + } + bootstrapResolver = BUILTIN_RESOLVER; + bootstrapSet = true; + + if (HOSTS_FILE_NAME != null) { + // The default resolver service is already host file resolver + cns = BUILTIN_RESOLVER; + } else if (System.getSecurityManager() != null) { + PrivilegedAction pa = InetAddress::loadResolver; + cns = AccessController.doPrivileged( + pa, null, INET_ADDRESS_RESOLVER_PERMISSION); + } else { + cns = loadResolver(); + } + + InetAddress.resolver = cns; + return cns; + } finally { + // We want to clear bootstrap resolver reference only after an attempt to + // instantiate a resolver has been completed. + if (bootstrapSet) { + bootstrapResolver = null; + } + RESOLVER_LOCK.unlock(); + } + } else { + return BUILTIN_RESOLVER; + } + } + + private static InetAddressResolver loadResolver() { + return ServiceLoader.load(InetAddressResolverProvider.class) + .findFirst() + .map(nsp -> nsp.get(builtinConfiguration())) + .orElse(BUILTIN_RESOLVER); + } + + private static InetAddressResolverProvider.Configuration builtinConfiguration() { + return new ResolverProviderConfiguration(BUILTIN_RESOLVER, () -> { + try { + return impl.getLocalHostName(); + } catch (UnknownHostException unknownHostException) { + return "localhost"; + } + }); + } + /** * Constructor for the Socket.accept() method. * This creates an empty InetAddress, which is filled in by @@ -555,7 +699,7 @@ public class InetAddress implements java.io.Serializable { * this host name will be remembered and returned; * otherwise, a reverse name lookup will be performed * and the result will be returned based on the system - * configured name lookup service. If a lookup of the name service + * configured resolver. If a lookup of the name service * is required, call * {@link #getCanonicalHostName() getCanonicalHostName}. * @@ -656,10 +800,11 @@ public class InetAddress implements java.io.Serializable { * @see SecurityManager#checkConnect */ private static String getHostFromNameService(InetAddress addr, boolean check) { - String host = null; + String host; + var resolver = resolver(); try { // first lookup the hostname - host = nameService.getHostByAddr(addr.getAddress()); + host = resolver.lookupByAddress(addr.getAddress()); /* check to see if calling code is allowed to know * the hostname for this IP address, ie, connect to the host @@ -691,11 +836,12 @@ public class InetAddress implements java.io.Serializable { host = addr.getHostAddress(); return host; } - } catch (SecurityException e) { + } catch (RuntimeException | UnknownHostException e) { + // 'resolver.lookupByAddress' and 'InetAddress.getAllByName0' delegate to + // the system-wide resolver, which could be a custom one. At that point we + // treat any unexpected RuntimeException thrown by the resolver as we would + // treat an UnknownHostException or an unmatched host name. host = addr.getHostAddress(); - } catch (UnknownHostException e) { - host = addr.getHostAddress(); - // let next provider resolve the hostname } return host; } @@ -755,8 +901,9 @@ public class InetAddress implements java.io.Serializable { * string returned is of the form: hostname / literal IP * address. * - * If the host name is unresolved, no reverse name service lookup - * is performed. The hostname part will be represented by an empty string. + * If the host name is unresolved, no reverse lookup + * is performed. The hostname part will be represented + * by an empty string. * * @return a string representation of this IP address. */ @@ -821,11 +968,9 @@ public class InetAddress implements java.io.Serializable { // in cache when the result is obtained private static final class NameServiceAddresses implements Addresses { private final String host; - private final InetAddress reqAddr; - NameServiceAddresses(String host, InetAddress reqAddr) { + NameServiceAddresses(String host) { this.host = host; - this.reqAddr = reqAddr; } @Override @@ -849,7 +994,7 @@ public class InetAddress implements java.io.Serializable { UnknownHostException ex; int cachePolicy; try { - inetAddresses = getAddressesFromNameService(host, reqAddr); + inetAddresses = getAddressesFromNameService(host); ex = null; cachePolicy = InetAddressCachePolicy.get(); } catch (UnknownHostException uhe) { @@ -875,7 +1020,7 @@ public class InetAddress implements java.io.Serializable { expirySet.add(cachedAddresses); } } - if (inetAddresses == null) { + if (inetAddresses == null || inetAddresses.length == 0) { throw ex == null ? new UnknownHostException(host) : ex; } return inetAddresses; @@ -889,81 +1034,48 @@ public class InetAddress implements java.io.Serializable { } /** - * NameService provides host and address lookup service - * - * @since 9 - */ - private interface NameService { - - /** - * Lookup a host mapping by name. Retrieve the IP addresses - * associated with a host - * - * @param host the specified hostname - * @return array of IP addresses for the requested host - * @throws UnknownHostException - * if no IP address for the {@code host} could be found - */ - InetAddress[] lookupAllHostAddr(String host) - throws UnknownHostException; - - /** - * Lookup the host corresponding to the IP address provided - * - * @param addr byte array representing an IP address - * @return {@code String} representing the host name mapping - * @throws UnknownHostException - * if no host found for the specified IP address - */ - String getHostByAddr(byte[] addr) throws UnknownHostException; - - } - - /** - * The default NameService implementation, which delegates to the underlying + * The default InetAddressResolver implementation, which delegates to the underlying * OS network libraries to resolve host address mappings. * * @since 9 */ - private static final class PlatformNameService implements NameService { + private static final class PlatformResolver implements InetAddressResolver { - public InetAddress[] lookupAllHostAddr(String host) - throws UnknownHostException - { - return impl.lookupAllHostAddr(host); + public Stream lookupByName(String host, LookupPolicy policy) + throws UnknownHostException { + Objects.requireNonNull(host); + Objects.requireNonNull(policy); + return Arrays.stream(impl.lookupAllHostAddr(host, policy)); } - public String getHostByAddr(byte[] addr) - throws UnknownHostException - { + public String lookupByAddress(byte[] addr) + throws UnknownHostException { + Objects.requireNonNull(addr); + if (addr.length != Inet4Address.INADDRSZ && addr.length != Inet6Address.INADDRSZ) { + throw new IllegalArgumentException("Invalid address length"); + } return impl.getHostByAddr(addr); } } /** - * The HostsFileNameService provides host address mapping + * The HostsFileResolver provides host address mapping * by reading the entries in a hosts file, which is specified by * {@code jdk.net.hosts.file} system property * *

The file format is that which corresponds with the /etc/hosts file * IP Address host alias list. * - *

When the file lookup is enabled it replaces the default NameService + *

When the file lookup is enabled it replaces the default InetAddressResolver * implementation * * @since 9 */ - private static final class HostsFileNameService implements NameService { - - private static final InetAddress[] EMPTY_ARRAY = new InetAddress[0]; - - // Specify if only IPv4 addresses should be returned by HostsFileService implementation - private static final boolean preferIPv4Stack = Boolean.parseBoolean( - GetPropertyAction.privilegedGetProperty("java.net.preferIPv4Stack")); + private static final class HostsFileResolver implements InetAddressResolver { private final String hostsFile; - public HostsFileNameService(String hostsFileName) { + public HostsFileResolver(String hostsFileName) { this.hostsFile = hostsFileName; } @@ -974,17 +1086,22 @@ public class InetAddress implements java.io.Serializable { * * @param addr byte array representing an IP address * @return {@code String} representing the host name mapping - * @throws UnknownHostException - * if no host found for the specified IP address + * @throws UnknownHostException if no host found for the specified IP address + * @throws IllegalArgumentException if IP address is of illegal length + * @throws NullPointerException if addr is {@code null} */ @Override - public String getHostByAddr(byte[] addr) throws UnknownHostException { + public String lookupByAddress(byte[] addr) throws UnknownHostException { String hostEntry; String host = null; + Objects.requireNonNull(addr); + // Check the length of the address array + if (addr.length != Inet4Address.INADDRSZ && addr.length != Inet6Address.INADDRSZ) { + throw new IllegalArgumentException("Invalid address length"); + } try (Scanner hostsFileScanner = new Scanner(new File(hostsFile), - UTF_8.INSTANCE)) - { + UTF_8.INSTANCE)) { while (hostsFileScanner.hasNextLine()) { hostEntry = hostsFileScanner.nextLine(); if (!hostEntry.startsWith("#")) { @@ -1020,22 +1137,31 @@ public class InetAddress implements java.io.Serializable { * with the specified host name. * * @param host the specified hostname - * @return array of IP addresses for the requested host + * @param lookupPolicy IP addresses lookup policy which specifies addresses + * family and their order + * @return stream of IP addresses for the requested host + * @throws NullPointerException if either parameter is {@code null} * @throws UnknownHostException * if no IP address for the {@code host} could be found */ - public InetAddress[] lookupAllHostAddr(String host) + public Stream lookupByName(String host, LookupPolicy lookupPolicy) throws UnknownHostException { String hostEntry; String addrStr; byte addr[]; + + Objects.requireNonNull(host); + Objects.requireNonNull(lookupPolicy); List inetAddresses = new ArrayList<>(); List inet4Addresses = new ArrayList<>(); List inet6Addresses = new ArrayList<>(); + int flags = lookupPolicy.characteristics(); + boolean needIPv4 = (flags & IPv4) != 0; + boolean needIPv6 = (flags & IPv6) != 0; // lookup the file and create a list InetAddress for the specified host try (Scanner hostsFileScanner = new Scanner(new File(hostsFile), - UTF_8.INSTANCE)) { + UTF_8.INSTANCE)) { while (hostsFileScanner.hasNextLine()) { hostEntry = hostsFileScanner.nextLine(); if (!hostEntry.startsWith("#")) { @@ -1047,10 +1173,10 @@ public class InetAddress implements java.io.Serializable { if (addr != null) { InetAddress address = InetAddress.getByAddress(host, addr); inetAddresses.add(address); - if (address instanceof Inet4Address) { + if (address instanceof Inet4Address && needIPv4) { inet4Addresses.add(address); } - if (address instanceof Inet6Address) { + if (address instanceof Inet6Address && needIPv6) { inet6Addresses.add(address); } } @@ -1062,33 +1188,38 @@ public class InetAddress implements java.io.Serializable { throw new UnknownHostException("Unable to resolve host " + host + " as hosts file " + hostsFile + " not found "); } - - List res; - // If "preferIPv4Stack" system property is set to "true" then return - // only IPv4 addresses - if (preferIPv4Stack) { - res = inet4Addresses; - } else { - // Otherwise, analyse "preferIPv6Addresses" value - res = switch (preferIPv6Address) { - case PREFER_IPV4_VALUE -> concatAddresses(inet4Addresses, inet6Addresses); - case PREFER_IPV6_VALUE -> concatAddresses(inet6Addresses, inet4Addresses); - default -> inetAddresses; - }; + // Check if only IPv4 addresses are requested + if (needIPv4 && !needIPv6) { + checkResultsList(inet4Addresses, host); + return inet4Addresses.stream(); } - - if (res.isEmpty()) { - throw new UnknownHostException("Unable to resolve host " + host - + " in hosts file " + hostsFile); + // Check if only IPv6 addresses are requested + if (!needIPv4 && needIPv6) { + checkResultsList(inet6Addresses, host); + return inet6Addresses.stream(); } - return res.toArray(EMPTY_ARRAY); + // If both type of addresses are requested: + // First, check if there is any results. Then arrange + // addresses according to LookupPolicy value. + checkResultsList(inetAddresses, host); + if (ipv6AddressesFirst(flags)) { + return Stream.concat(inet6Addresses.stream(), inet4Addresses.stream()); + } else if (ipv4AddressesFirst(flags)) { + return Stream.concat(inet4Addresses.stream(), inet6Addresses.stream()); + } + // Only "system" addresses order is possible at this stage + assert systemAddressesOrder(flags); + return inetAddresses.stream(); } - private static List concatAddresses(List firstPart, - List secondPart) { - List result = new ArrayList<>(firstPart); - result.addAll(secondPart); - return result; + // Checks if result list with addresses is not empty. + // If it is empty throw an UnknownHostException. + private void checkResultsList(List addressesList, String hostName) + throws UnknownHostException { + if (addressesList.isEmpty()) { + throw new UnknownHostException("Unable to resolve host " + hostName + + " in hosts file " + hostsFile); + } } private String removeComments(String hostsEntry) { @@ -1130,45 +1261,52 @@ public class InetAddress implements java.io.Serializable { static final InetAddressImpl impl; + /** + * Platform-wide {@code LookupPolicy} initialized from {@code "java.net.preferIPv4Stack"}, + * {@code "java.net.preferIPv6Addresses"} system properties. + */ + static final LookupPolicy PLATFORM_LOOKUP_POLICY; + static { // create the impl impl = InetAddressImplFactory.create(); - // create name service - nameService = createNameService(); + // impl must be initialized before calling this method + PLATFORM_LOOKUP_POLICY = initializePlatformLookupPolicy(); + + // create built-in resolver + BUILTIN_RESOLVER = createBuiltinInetAddressResolver(); } /** - * Create an instance of the NameService interface based on + * Create an instance of the InetAddressResolver interface based on * the setting of the {@code jdk.net.hosts.file} system property. * - *

The default NameService is the PlatformNameService, which typically + *

The default InetAddressResolver is the PlatformResolver, which typically * delegates name and address resolution calls to the underlying * OS network libraries. * - *

A HostsFileNameService is created if the {@code jdk.net.hosts.file} + *

A HostsFileResolver is created if the {@code jdk.net.hosts.file} * system property is set. If the specified file doesn't exist, the name or * address lookup will result in an UnknownHostException. Thus, non existent * hosts file is handled as if the file is empty. * - * @return a NameService + * @return an InetAddressResolver */ - private static NameService createNameService() { - - String hostsFileName = - GetPropertyAction.privilegedGetProperty("jdk.net.hosts.file"); - NameService theNameService; - if (hostsFileName != null) { - theNameService = new HostsFileNameService(hostsFileName); + private static InetAddressResolver createBuiltinInetAddressResolver() { + InetAddressResolver theResolver; + if (HOSTS_FILE_NAME != null) { + theResolver = new HostsFileResolver(HOSTS_FILE_NAME); } else { - theNameService = new PlatformNameService(); + theResolver = new PlatformResolver(); } - return theNameService; + return theResolver; } /** * Creates an InetAddress based on the provided host name and IP address. - * No name service is checked for the validity of the address. + * The system-wide {@linkplain InetAddressResolver resolver} is not used to check + * the validity of the address. * *

The host name can either be a machine name, such as * "{@code www.example.com}", or a textual representation of its IP @@ -1251,15 +1389,9 @@ public class InetAddress implements java.io.Serializable { return InetAddress.getAllByName(host)[0]; } - // called from deployment cache manager - private static InetAddress getByName(String host, InetAddress reqAddr) - throws UnknownHostException { - return InetAddress.getAllByName(host, reqAddr)[0]; - } - /** * Given the name of a host, returns an array of its IP addresses, - * based on the configured name service on the system. + * based on the configured system {@linkplain InetAddressResolver resolver}. * *

The host name can either be a machine name, such as * "{@code www.example.com}", or a textual representation of its IP @@ -1298,11 +1430,6 @@ public class InetAddress implements java.io.Serializable { */ public static InetAddress[] getAllByName(String host) throws UnknownHostException { - return getAllByName(host, null); - } - - private static InetAddress[] getAllByName(String host, InetAddress reqAddr) - throws UnknownHostException { if (host == null || host.isEmpty()) { InetAddress[] ret = new InetAddress[1]; @@ -1364,7 +1491,7 @@ public class InetAddress implements java.io.Serializable { // We were expecting an IPv6 Literal, but got something else throw new UnknownHostException("["+host+"]"); } - return getAllByName0(host, reqAddr, true, true); + return getAllByName0(host, true, true); } /** @@ -1414,25 +1541,18 @@ public class InetAddress implements java.io.Serializable { return zone; } - private static InetAddress[] getAllByName0 (String host) - throws UnknownHostException - { - return getAllByName0(host, true); - } - /** * package private so SocketPermission can call it */ static InetAddress[] getAllByName0 (String host, boolean check) throws UnknownHostException { - return getAllByName0 (host, null, check, true); + return getAllByName0(host, check, true); } /** * Designated lookup method. * * @param host host name to look up - * @param reqAddr requested address to be the 1st in returned array * @param check perform security check * @param useCache use cached value if not expired else always * perform name service lookup (and cache the result) @@ -1440,7 +1560,6 @@ public class InetAddress implements java.io.Serializable { * @throws UnknownHostException if host name is not found */ private static InetAddress[] getAllByName0(String host, - InetAddress reqAddr, boolean check, boolean useCache) throws UnknownHostException { @@ -1498,7 +1617,7 @@ public class InetAddress implements java.io.Serializable { // the name service and install it within cache... Addresses oldAddrs = cache.putIfAbsent( host, - addrs = new NameServiceAddresses(host, reqAddr) + addrs = new NameServiceAddresses(host) ); if (oldAddrs != null) { // lost putIfAbsent race addrs = oldAddrs; @@ -1509,47 +1628,30 @@ public class InetAddress implements java.io.Serializable { return addrs.get().clone(); } - static InetAddress[] getAddressesFromNameService(String host, InetAddress reqAddr) + static InetAddress[] getAddressesFromNameService(String host) throws UnknownHostException { - InetAddress[] addresses = null; + Stream addresses = null; UnknownHostException ex = null; + var resolver = resolver(); try { - addresses = nameService.lookupAllHostAddr(host); - } catch (UnknownHostException uhe) { + addresses = resolver.lookupByName(host, PLATFORM_LOOKUP_POLICY); + } catch (RuntimeException | UnknownHostException x) { if (host.equalsIgnoreCase("localhost")) { - addresses = new InetAddress[]{impl.loopbackAddress()}; - } else { + addresses = Stream.of(impl.loopbackAddress()); + } else if (x instanceof UnknownHostException uhe) { ex = uhe; + } else { + ex = new UnknownHostException(); + ex.initCause(x); } } - - if (addresses == null) { + InetAddress[] result = addresses == null ? null + : addresses.toArray(InetAddress[]::new); + if (result == null || result.length == 0) { throw ex == null ? new UnknownHostException(host) : ex; } - - // More to do? - if (reqAddr != null && addresses.length > 1 && !addresses[0].equals(reqAddr)) { - // Find it? - int i = 1; - for (; i < addresses.length; i++) { - if (addresses[i].equals(reqAddr)) { - break; - } - } - // Rotate - if (i < addresses.length) { - InetAddress tmp, tmp2 = reqAddr; - for (int j = 0; j < i; j++) { - tmp = addresses[j]; - addresses[j] = tmp2; - tmp2 = tmp; - } - addresses[i] = tmp2; - } - } - - return addresses; + return result; } /** @@ -1557,8 +1659,7 @@ public class InetAddress implements java.io.Serializable { * The argument is in network byte order: the highest order * byte of the address is in {@code getAddress()[0]}. * - *

This method doesn't block, i.e. no reverse name service lookup - * is performed. + *

This method doesn't block, i.e. no reverse lookup is performed. * *

IPv4 address byte array must be 4 bytes long and IPv6 byte array * must be 16 bytes long @@ -1637,7 +1738,7 @@ public class InetAddress implements java.io.Serializable { // call getAllByName0 without security checks and // without using cached data try { - localAddr = getAllByName0(local, null, false, false)[0]; + localAddr = getAllByName0(local, false, false)[0]; } catch (UnknownHostException uhe) { // Rethrow with a more informative error message. UnknownHostException uhe2 = diff --git a/src/java.base/share/classes/java/net/InetAddressImpl.java b/src/java.base/share/classes/java/net/InetAddressImpl.java index a2f8ea010528e11d736f0be388a034f4ad66383a..f0364ffaedf46b86a124d9a71851504e83638b36 100644 --- a/src/java.base/share/classes/java/net/InetAddressImpl.java +++ b/src/java.base/share/classes/java/net/InetAddressImpl.java @@ -24,7 +24,10 @@ */ package java.net; + import java.io.IOException; +import java.net.spi.InetAddressResolver.LookupPolicy; + /* * Package private interface to "implementation" used by * {@link InetAddress}. @@ -38,7 +41,7 @@ sealed interface InetAddressImpl permits Inet4AddressImpl, Inet6AddressImpl { String getLocalHostName() throws UnknownHostException; InetAddress[] - lookupAllHostAddr(String hostname) throws UnknownHostException; + lookupAllHostAddr(String hostname, LookupPolicy lookupPolicy) throws UnknownHostException; String getHostByAddr(byte[] addr) throws UnknownHostException; InetAddress anyLocalAddress(); diff --git a/src/java.base/share/classes/java/net/doc-files/net-properties.html b/src/java.base/share/classes/java/net/doc-files/net-properties.html index ea0311b71614627141a1e77f81e95097d7314f61..0c2d3e232dad4ef40d03d713ec36b1679dbc5cbd 100644 --- a/src/java.base/share/classes/java/net/doc-files/net-properties.html +++ b/src/java.base/share/classes/java/net/doc-files/net-properties.html @@ -1,6 +1,6 @@ */ - @ForceInline - public final + @Override + public abstract ByteVector lanewise(VectorOperators.Unary op, - VectorMask m) { - return blend(lanewise(op), m); + VectorMask m); + @ForceInline + final + ByteVector lanewiseTemplate(VectorOperators.Unary op, + Class> maskClass, + VectorMask m) { + m.check(maskClass, this); + if (opKind(op, VO_SPECIAL)) { + if (op == ZOMO) { + return blend(broadcast(-1), compare(NE, 0, m)); + } + if (op == NOT) { + return lanewise(XOR, broadcast(-1), m); + } else if (op == NEG) { + return lanewise(NOT, m).lanewise(ADD, broadcast(1), m); + } + } + int opc = opCode(op); + return VectorSupport.unaryOp( + opc, getClass(), maskClass, byte.class, length(), + this, m, + UN_IMPL.find(op, opc, ByteVector::unaryOperations)); + } + + private static final + ImplCache>> + UN_IMPL = new ImplCache<>(Unary.class, ByteVector.class); + + private static UnaryOperation> unaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_NEG: return (v0, m) -> + v0.uOp(m, (i, a) -> (byte) -a); + case VECTOR_OP_ABS: return (v0, m) -> + v0.uOp(m, (i, a) -> (byte) Math.abs(a)); + default: return null; + } } // Binary lanewise support @@ -599,6 +646,7 @@ public abstract class ByteVector extends AbstractVector { Vector v) { ByteVector that = (ByteVector) v; that.check(this); + if (opKind(op, VO_SPECIAL | VO_SHIFT)) { if (op == FIRST_NONZERO) { // FIXME: Support this in the JIT. @@ -617,74 +665,110 @@ public abstract class ByteVector extends AbstractVector { that = that.lanewise(NOT); op = AND; } else if (op == DIV) { - VectorMask eqz = that.eq((byte)0); + VectorMask eqz = that.eq((byte) 0); if (eqz.anyTrue()) { throw that.divZeroException(); } } } + int opc = opCode(op); return VectorSupport.binaryOp( - opc, getClass(), byte.class, length(), - this, that, - BIN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (byte)(a + b)); - case VECTOR_OP_SUB: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (byte)(a - b)); - case VECTOR_OP_MUL: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (byte)(a * b)); - case VECTOR_OP_DIV: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (byte)(a / b)); - case VECTOR_OP_MAX: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (byte)Math.max(a, b)); - case VECTOR_OP_MIN: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (byte)Math.min(a, b)); - case VECTOR_OP_AND: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (byte)(a & b)); - case VECTOR_OP_OR: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (byte)(a | b)); - case VECTOR_OP_XOR: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (byte)(a ^ b)); - case VECTOR_OP_LSHIFT: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> (byte)(a << n)); - case VECTOR_OP_RSHIFT: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> (byte)(a >> n)); - case VECTOR_OP_URSHIFT: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> (byte)((a & LSHR_SETUP_MASK) >>> n)); - case VECTOR_OP_LROTATE: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> rotateLeft(a, (int)n)); - case VECTOR_OP_RROTATE: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> rotateRight(a, (int)n)); - default: return null; - }})); + opc, getClass(), null, byte.class, length(), + this, that, null, + BIN_IMPL.find(op, opc, ByteVector::binaryOperations)); } - private static final - ImplCache> BIN_IMPL - = new ImplCache<>(Binary.class, ByteVector.class); /** * {@inheritDoc} * @see #lanewise(VectorOperators.Binary,byte,VectorMask) */ - @ForceInline - public final + @Override + public abstract ByteVector lanewise(VectorOperators.Binary op, Vector v, - VectorMask m) { + VectorMask m); + @ForceInline + final + ByteVector lanewiseTemplate(VectorOperators.Binary op, + Class> maskClass, + Vector v, VectorMask m) { ByteVector that = (ByteVector) v; - if (op == DIV) { - VectorMask eqz = that.eq((byte)0); - if (eqz.and(m).anyTrue()) { - throw that.divZeroException(); + that.check(this); + m.check(maskClass, this); + + if (opKind(op, VO_SPECIAL | VO_SHIFT)) { + if (op == FIRST_NONZERO) { + // FIXME: Support this in the JIT. + VectorMask thisNZ + = this.viewAsIntegralLanes().compare(NE, (byte) 0); + that = that.blend((byte) 0, thisNZ.cast(vspecies())); + op = OR_UNCHECKED; + } + if (opKind(op, VO_SHIFT)) { + // As per shift specification for Java, mask the shift count. + // This allows the JIT to ignore some ISA details. + that = that.lanewise(AND, SHIFT_MASK); + } + if (op == AND_NOT) { + // FIXME: Support this in the JIT. + that = that.lanewise(NOT); + op = AND; + } else if (op == DIV) { + VectorMask eqz = that.eq((byte)0); + if (eqz.and(m).anyTrue()) { + throw that.divZeroException(); + } + // suppress div/0 exceptions in unset lanes + that = that.lanewise(NOT, eqz); } - // suppress div/0 exceptions in unset lanes - that = that.lanewise(NOT, eqz); - return blend(lanewise(DIV, that), m); } - return blend(lanewise(op, v), m); + + int opc = opCode(op); + return VectorSupport.binaryOp( + opc, getClass(), maskClass, byte.class, length(), + this, that, m, + BIN_IMPL.find(op, opc, ByteVector::binaryOperations)); + } + + private static final + ImplCache>> + BIN_IMPL = new ImplCache<>(Binary.class, ByteVector.class); + + private static BinaryOperation> binaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (byte)(a + b)); + case VECTOR_OP_SUB: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (byte)(a - b)); + case VECTOR_OP_MUL: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (byte)(a * b)); + case VECTOR_OP_DIV: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (byte)(a / b)); + case VECTOR_OP_MAX: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (byte)Math.max(a, b)); + case VECTOR_OP_MIN: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (byte)Math.min(a, b)); + case VECTOR_OP_AND: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (byte)(a & b)); + case VECTOR_OP_OR: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (byte)(a | b)); + case VECTOR_OP_XOR: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (byte)(a ^ b)); + case VECTOR_OP_LSHIFT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> (byte)(a << n)); + case VECTOR_OP_RSHIFT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> (byte)(a >> n)); + case VECTOR_OP_URSHIFT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> (byte)((a & LSHR_SETUP_MASK) >>> n)); + case VECTOR_OP_LROTATE: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n)); + case VECTOR_OP_RROTATE: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n)); + default: return null; + } } + // FIXME: Maybe all of the public final methods in this file (the // simple ones that just call lanewise) should be pushed down to // the X-VectorBits template. They can't optimize properly at @@ -747,7 +831,13 @@ public abstract class ByteVector extends AbstractVector { ByteVector lanewise(VectorOperators.Binary op, byte e, VectorMask m) { - return blend(lanewise(op, e), m); + if (opKind(op, VO_SHIFT) && (byte)(int)e == e) { + return lanewiseShift(op, (int) e, m); + } + if (op == AND_NOT) { + op = AND; e = (byte) ~e; + } + return lanewise(op, broadcast(e), m); } /** @@ -767,8 +857,7 @@ public abstract class ByteVector extends AbstractVector { byte e1 = (byte) e; if ((long)e1 != e // allow shift ops to clip down their int parameters - && !(opKind(op, VO_SHIFT) && (int)e1 == e) - ) { + && !(opKind(op, VO_SHIFT) && (int)e1 == e)) { vspecies().checkValue(e); // for exception } return lanewise(op, e1); @@ -788,7 +877,13 @@ public abstract class ByteVector extends AbstractVector { public final ByteVector lanewise(VectorOperators.Binary op, long e, VectorMask m) { - return blend(lanewise(op, e), m); + byte e1 = (byte) e; + if ((long)e1 != e + // allow shift ops to clip down their int parameters + && !(opKind(op, VO_SHIFT) && (int)e1 == e)) { + vspecies().checkValue(e); // for exception + } + return lanewise(op, e1, m); } /*package-private*/ @@ -805,27 +900,52 @@ public abstract class ByteVector extends AbstractVector { e &= SHIFT_MASK; int opc = opCode(op); return VectorSupport.broadcastInt( - opc, getClass(), byte.class, length(), - this, e, - BIN_INT_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_LSHIFT: return (v, n) -> - v.uOp((i, a) -> (byte)(a << n)); - case VECTOR_OP_RSHIFT: return (v, n) -> - v.uOp((i, a) -> (byte)(a >> n)); - case VECTOR_OP_URSHIFT: return (v, n) -> - v.uOp((i, a) -> (byte)((a & LSHR_SETUP_MASK) >>> n)); - case VECTOR_OP_LROTATE: return (v, n) -> - v.uOp((i, a) -> rotateLeft(a, (int)n)); - case VECTOR_OP_RROTATE: return (v, n) -> - v.uOp((i, a) -> rotateRight(a, (int)n)); - default: return null; - }})); + opc, getClass(), null, byte.class, length(), + this, e, null, + BIN_INT_IMPL.find(op, opc, ByteVector::broadcastIntOperations)); } + + /*package-private*/ + abstract ByteVector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m); + + /*package-private*/ + @ForceInline + final ByteVector + lanewiseShiftTemplate(VectorOperators.Binary op, + Class> maskClass, + int e, VectorMask m) { + m.check(maskClass, this); + assert(opKind(op, VO_SHIFT)); + // As per shift specification for Java, mask the shift count. + e &= SHIFT_MASK; + int opc = opCode(op); + return VectorSupport.broadcastInt( + opc, getClass(), maskClass, byte.class, length(), + this, e, m, + BIN_INT_IMPL.find(op, opc, ByteVector::broadcastIntOperations)); + } + private static final - ImplCache> BIN_INT_IMPL + ImplCache>> BIN_INT_IMPL = new ImplCache<>(Binary.class, ByteVector.class); + private static VectorBroadcastIntOp> broadcastIntOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_LSHIFT: return (v, n, m) -> + v.uOp(m, (i, a) -> (byte)(a << n)); + case VECTOR_OP_RSHIFT: return (v, n, m) -> + v.uOp(m, (i, a) -> (byte)(a >> n)); + case VECTOR_OP_URSHIFT: return (v, n, m) -> + v.uOp(m, (i, a) -> (byte)((a & LSHR_SETUP_MASK) >>> n)); + case VECTOR_OP_LROTATE: return (v, n, m) -> + v.uOp(m, (i, a) -> rotateLeft(a, (int)n)); + case VECTOR_OP_RROTATE: return (v, n, m) -> + v.uOp(m, (i, a) -> rotateRight(a, (int)n)); + default: return null; + } + } + // As per shift specification for Java, mask the shift count. // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte). // The latter two maskings go beyond the JLS, but seem reasonable @@ -878,16 +998,10 @@ public abstract class ByteVector extends AbstractVector { } int opc = opCode(op); return VectorSupport.ternaryOp( - opc, getClass(), byte.class, length(), - this, that, tother, - TERN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - default: return null; - }})); + opc, getClass(), null, byte.class, length(), + this, that, tother, null, + TERN_IMPL.find(op, opc, ByteVector::ternaryOperations)); } - private static final - ImplCache> TERN_IMPL - = new ImplCache<>(Ternary.class, ByteVector.class); /** * {@inheritDoc} @@ -895,13 +1009,48 @@ public abstract class ByteVector extends AbstractVector { * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask) * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask) */ - @ForceInline - public final + @Override + public abstract ByteVector lanewise(VectorOperators.Ternary op, Vector v1, Vector v2, - VectorMask m) { - return blend(lanewise(op, v1, v2), m); + VectorMask m); + @ForceInline + final + ByteVector lanewiseTemplate(VectorOperators.Ternary op, + Class> maskClass, + Vector v1, + Vector v2, + VectorMask m) { + ByteVector that = (ByteVector) v1; + ByteVector tother = (ByteVector) v2; + // It's a word: https://www.dictionary.com/browse/tother + // See also Chapter 11 of Dickens, Our Mutual Friend: + // "Totherest Governor," replied Mr Riderhood... + that.check(this); + tother.check(this); + m.check(maskClass, this); + + if (op == BITWISE_BLEND) { + // FIXME: Support this in the JIT. + that = this.lanewise(XOR, that).lanewise(AND, tother); + return this.lanewise(XOR, that, m); + } + int opc = opCode(op); + return VectorSupport.ternaryOp( + opc, getClass(), maskClass, byte.class, length(), + this, that, tother, m, + TERN_IMPL.find(op, opc, ByteVector::ternaryOperations)); + } + + private static final + ImplCache>> + TERN_IMPL = new ImplCache<>(Ternary.class, ByteVector.class); + + private static TernaryOperation> ternaryOperations(int opc_) { + switch (opc_) { + default: return null; + } } /** @@ -958,7 +1107,7 @@ public abstract class ByteVector extends AbstractVector { byte e1, byte e2, VectorMask m) { - return blend(lanewise(op, e1, e2), m); + return lanewise(op, broadcast(e1), broadcast(e2), m); } /** @@ -1016,7 +1165,7 @@ public abstract class ByteVector extends AbstractVector { Vector v1, byte e2, VectorMask m) { - return blend(lanewise(op, v1, e2), m); + return lanewise(op, v1, broadcast(e2), m); } /** @@ -1073,7 +1222,7 @@ public abstract class ByteVector extends AbstractVector { byte e1, Vector v2, VectorMask m) { - return blend(lanewise(op, e1, v2), m); + return lanewise(op, broadcast(e1), v2, m); } // (Thus endeth the Great and Mighty Ternary Ogdoad.) @@ -1745,15 +1894,13 @@ public abstract class ByteVector extends AbstractVector { final > M compareTemplate(Class maskType, Comparison op, Vector v) { - Objects.requireNonNull(v); - ByteSpecies vsp = vspecies(); ByteVector that = (ByteVector) v; that.check(this); int opc = opCode(op); return VectorSupport.compare( opc, getClass(), maskType, byte.class, length(), - this, that, - (cond, v0, v1) -> { + this, that, null, + (cond, v0, v1, m1) -> { AbstractMask m = v0.bTest(cond, v1, (cond_, i, a, b) -> compareWithOp(cond, a, b)); @@ -1763,6 +1910,28 @@ public abstract class ByteVector extends AbstractVector { }); } + /*package-private*/ + @ForceInline + final + > + M compareTemplate(Class maskType, Comparison op, Vector v, M m) { + ByteVector that = (ByteVector) v; + that.check(this); + m.check(maskType, this); + int opc = opCode(op); + return VectorSupport.compare( + opc, getClass(), maskType, byte.class, length(), + this, that, m, + (cond, v0, v1, m1) -> { + AbstractMask cmpM + = v0.bTest(cond, v1, (cond_, i, a, b) + -> compareWithOp(cond, a, b)); + @SuppressWarnings("unchecked") + M m2 = (M) cmpM.and(m1); + return m2; + }); + } + @ForceInline private static boolean compareWithOp(int cond, byte a, byte b) { return switch (cond) { @@ -1780,18 +1949,6 @@ public abstract class ByteVector extends AbstractVector { }; } - /** - * {@inheritDoc} - */ - @Override - @ForceInline - public final - VectorMask compare(VectorOperators.Comparison op, - Vector v, - VectorMask m) { - return compare(op, v).and(m); - } - /** * Tests this vector by comparing it with an input scalar, * according to the given comparison operation. @@ -1850,7 +2007,7 @@ public abstract class ByteVector extends AbstractVector { public final VectorMask compare(VectorOperators.Comparison op, byte e, VectorMask m) { - return compare(op, e).and(m); + return compare(op, broadcast(e), m); } /** @@ -2101,9 +2258,9 @@ public abstract class ByteVector extends AbstractVector { ByteVector rearrangeTemplate(Class shuffletype, S shuffle) { shuffle.checkIndexes(); return VectorSupport.rearrangeOp( - getClass(), shuffletype, byte.class, length(), - this, shuffle, - (v1, s_) -> v1.uOp((i, a) -> { + getClass(), shuffletype, null, byte.class, length(), + this, shuffle, null, + (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); return v1.lane(ei); })); @@ -2120,24 +2277,25 @@ public abstract class ByteVector extends AbstractVector { /*package-private*/ @ForceInline final - > + , M extends VectorMask> ByteVector rearrangeTemplate(Class shuffletype, + Class masktype, S shuffle, - VectorMask m) { - ByteVector unmasked = - VectorSupport.rearrangeOp( - getClass(), shuffletype, byte.class, length(), - this, shuffle, - (v1, s_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); - return ei < 0 ? 0 : v1.lane(ei); - })); + M m) { + + m.check(masktype, this); VectorMask valid = shuffle.laneIsValid(); if (m.andNot(valid).anyTrue()) { shuffle.checkIndexes(); throw new AssertionError(); } - return broadcast((byte)0).blend(unmasked, m); + return VectorSupport.rearrangeOp( + getClass(), shuffletype, masktype, byte.class, length(), + this, shuffle, m, + (v1, s_, m_) -> v1.uOp((i, a) -> { + int ei = s_.laneSource(i); + return ei < 0 || !m_.laneIsSet(i) ? 0 : v1.lane(ei); + })); } /** @@ -2160,17 +2318,17 @@ public abstract class ByteVector extends AbstractVector { S ws = (S) shuffle.wrapIndexes(); ByteVector r0 = VectorSupport.rearrangeOp( - getClass(), shuffletype, byte.class, length(), - this, ws, - (v0, s_) -> v0.uOp((i, a) -> { + getClass(), shuffletype, null, byte.class, length(), + this, ws, null, + (v0, s_, m_) -> v0.uOp((i, a) -> { int ei = s_.laneSource(i); return v0.lane(ei); })); ByteVector r1 = VectorSupport.rearrangeOp( - getClass(), shuffletype, byte.class, length(), - v, ws, - (v1, s_) -> v1.uOp((i, a) -> { + getClass(), shuffletype, null, byte.class, length(), + v, ws, null, + (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); return v1.lane(ei); })); @@ -2433,9 +2591,18 @@ public abstract class ByteVector extends AbstractVector { @ForceInline final byte reduceLanesTemplate(VectorOperators.Associative op, + Class> maskClass, VectorMask m) { - ByteVector v = reduceIdentityVector(op).blend(this, m); - return v.reduceLanesTemplate(op); + m.check(maskClass, this); + if (op == FIRST_NONZERO) { + ByteVector v = reduceIdentityVector(op).blend(this, m); + return v.reduceLanesTemplate(op); + } + int opc = opCode(op); + return fromBits(VectorSupport.reductionCoerced( + opc, getClass(), maskClass, byte.class, length(), + this, m, + REDUCE_IMPL.find(op, opc, ByteVector::reductionOperations))); } /*package-private*/ @@ -2450,30 +2617,34 @@ public abstract class ByteVector extends AbstractVector { } int opc = opCode(op); return fromBits(VectorSupport.reductionCoerced( - opc, getClass(), byte.class, length(), - this, - REDUCE_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return v -> - toBits(v.rOp((byte)0, (i, a, b) -> (byte)(a + b))); - case VECTOR_OP_MUL: return v -> - toBits(v.rOp((byte)1, (i, a, b) -> (byte)(a * b))); - case VECTOR_OP_MIN: return v -> - toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (byte) Math.min(a, b))); - case VECTOR_OP_MAX: return v -> - toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (byte) Math.max(a, b))); - case VECTOR_OP_AND: return v -> - toBits(v.rOp((byte)-1, (i, a, b) -> (byte)(a & b))); - case VECTOR_OP_OR: return v -> - toBits(v.rOp((byte)0, (i, a, b) -> (byte)(a | b))); - case VECTOR_OP_XOR: return v -> - toBits(v.rOp((byte)0, (i, a, b) -> (byte)(a ^ b))); - default: return null; - }}))); + opc, getClass(), null, byte.class, length(), + this, null, + REDUCE_IMPL.find(op, opc, ByteVector::reductionOperations))); } + private static final - ImplCache> REDUCE_IMPL - = new ImplCache<>(Associative.class, ByteVector.class); + ImplCache>> + REDUCE_IMPL = new ImplCache<>(Associative.class, ByteVector.class); + + private static ReductionOperation> reductionOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp((byte)0, m, (i, a, b) -> (byte)(a + b))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp((byte)1, m, (i, a, b) -> (byte)(a * b))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (byte) Math.min(a, b))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (byte) Math.max(a, b))); + case VECTOR_OP_AND: return (v, m) -> + toBits(v.rOp((byte)-1, m, (i, a, b) -> (byte)(a & b))); + case VECTOR_OP_OR: return (v, m) -> + toBits(v.rOp((byte)0, m, (i, a, b) -> (byte)(a | b))); + case VECTOR_OP_XOR: return (v, m) -> + toBits(v.rOp((byte)0, m, (i, a, b) -> (byte)(a ^ b))); + default: return null; + } + } private @ForceInline @@ -2699,9 +2870,7 @@ public abstract class ByteVector extends AbstractVector { VectorMask m) { ByteSpecies vsp = (ByteSpecies) species; if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) { - ByteVector zero = vsp.zero(); - ByteVector v = zero.fromByteArray0(a, offset); - return zero.blend(v.maybeSwap(bo), m); + return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo); } // FIXME: optimize @@ -2763,8 +2932,7 @@ public abstract class ByteVector extends AbstractVector { VectorMask m) { ByteSpecies vsp = (ByteSpecies) species; if (offset >= 0 && offset <= (a.length - species.length())) { - ByteVector zero = vsp.zero(); - return zero.blend(zero.fromArray0(a, offset), m); + return vsp.dummyVector().fromArray0(a, offset, m); } // FIXME: optimize @@ -2921,7 +3089,7 @@ public abstract class ByteVector extends AbstractVector { ByteSpecies vsp = (ByteSpecies) species; if (offset >= 0 && offset <= (a.length - species.length())) { ByteVector zero = vsp.zero(); - return zero.blend(zero.fromBooleanArray0(a, offset), m); + return vsp.dummyVector().fromBooleanArray0(a, offset, m); } // FIXME: optimize @@ -3099,9 +3267,7 @@ public abstract class ByteVector extends AbstractVector { VectorMask m) { ByteSpecies vsp = (ByteSpecies) species; if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) { - ByteVector zero = vsp.zero(); - ByteVector v = zero.fromByteBuffer0(bb, offset); - return zero.blend(v.maybeSwap(bo), m); + return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo); } // FIXME: optimize @@ -3173,10 +3339,9 @@ public abstract class ByteVector extends AbstractVector { if (m.allTrue()) { intoArray(a, offset); } else { - // FIXME: optimize ByteSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 1, a.length); - stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v); + intoArray0(a, offset, m); } } @@ -3329,10 +3494,9 @@ public abstract class ByteVector extends AbstractVector { if (m.allTrue()) { intoBooleanArray(a, offset); } else { - // FIXME: optimize ByteSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 1, a.length); - stOp(a, offset, m, (arr, off, i, e) -> arr[off+i] = (e & 1) != 0); + intoBooleanArray0(a, offset, m); } } @@ -3451,12 +3615,9 @@ public abstract class ByteVector extends AbstractVector { if (m.allTrue()) { intoByteArray(a, offset, bo); } else { - // FIXME: optimize ByteSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 1, a.length); - ByteBuffer wb = wrapper(a, bo); - this.stOp(wb, offset, m, - (wb_, o, i, e) -> wb_.put(o + i * 1, e)); + maybeSwap(bo).intoByteArray0(a, offset, m); } } @@ -3468,7 +3629,7 @@ public abstract class ByteVector extends AbstractVector { public final void intoByteBuffer(ByteBuffer bb, int offset, ByteOrder bo) { - if (bb.isReadOnly()) { + if (ScopedMemoryAccess.isReadOnly(bb)) { throw new ReadOnlyBufferException(); } offset = checkFromIndexSize(offset, byteSize(), bb.limit()); @@ -3487,15 +3648,12 @@ public abstract class ByteVector extends AbstractVector { if (m.allTrue()) { intoByteBuffer(bb, offset, bo); } else { - // FIXME: optimize if (bb.isReadOnly()) { throw new ReadOnlyBufferException(); } ByteSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit()); - ByteBuffer wb = wrapper(bb, bo); - this.stOp(wb, offset, m, - (wb_, o, i, e) -> wb_.put(o + i * 1, e)); + maybeSwap(bo).intoByteBuffer0(bb, offset, m); } } @@ -3533,6 +3691,24 @@ public abstract class ByteVector extends AbstractVector { (arr_, off_, i) -> arr_[off_ + i])); } + /*package-private*/ + abstract + ByteVector fromArray0(byte[] a, int offset, VectorMask m); + @ForceInline + final + > + ByteVector fromArray0Template(Class maskClass, byte[] a, int offset, M m) { + m.check(species()); + ByteSpecies vsp = vspecies(); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> s.ldOp(arr, off, vm, + (arr_, off_, i) -> arr_[off_ + i])); + } + + /*package-private*/ abstract @@ -3549,6 +3725,23 @@ public abstract class ByteVector extends AbstractVector { (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0))); } + /*package-private*/ + abstract + ByteVector fromBooleanArray0(boolean[] a, int offset, VectorMask m); + @ForceInline + final + > + ByteVector fromBooleanArray0Template(Class maskClass, boolean[] a, int offset, M m) { + m.check(species()); + ByteSpecies vsp = vspecies(); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, booleanArrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> s.ldOp(arr, off, vm, + (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0))); + } + @Override abstract ByteVector fromByteArray0(byte[] a, int offset); @@ -3567,6 +3760,25 @@ public abstract class ByteVector extends AbstractVector { }); } + abstract + ByteVector fromByteArray0(byte[] a, int offset, VectorMask m); + @ForceInline + final + > + ByteVector fromByteArray0Template(Class maskClass, byte[] a, int offset, M m) { + ByteSpecies vsp = vspecies(); + m.check(vsp); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, byteArrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> { + ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); + return s.ldOp(wb, off, vm, + (wb_, o, i) -> wb_.get(o + i * 1)); + }); + } + abstract ByteVector fromByteBuffer0(ByteBuffer bb, int offset); @ForceInline @@ -3583,6 +3795,24 @@ public abstract class ByteVector extends AbstractVector { }); } + abstract + ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m); + @ForceInline + final + > + ByteVector fromByteBuffer0Template(Class maskClass, ByteBuffer bb, int offset, M m) { + ByteSpecies vsp = vspecies(); + m.check(vsp); + return ScopedMemoryAccess.loadFromByteBufferMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + bb, offset, m, vsp, + (buf, off, s, vm) -> { + ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); + return s.ldOp(wb, off, vm, + (wb_, o, i) -> wb_.get(o + i * 1)); + }); + } + // Unchecked storing operations in native byte order. // Caller is responsible for applying index checks, masking, and // byte swapping. @@ -3602,6 +3832,42 @@ public abstract class ByteVector extends AbstractVector { (arr_, off_, i, e) -> arr_[off_+i] = e)); } + abstract + void intoArray0(byte[] a, int offset, VectorMask m); + @ForceInline + final + > + void intoArray0Template(Class maskClass, byte[] a, int offset, M m) { + m.check(species()); + ByteSpecies vsp = vspecies(); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) + -> v.stOp(arr, off, vm, + (arr_, off_, i, e) -> arr_[off_ + i] = e)); + } + + + abstract + void intoBooleanArray0(boolean[] a, int offset, VectorMask m); + @ForceInline + final + > + void intoBooleanArray0Template(Class maskClass, boolean[] a, int offset, M m) { + m.check(species()); + ByteSpecies vsp = vspecies(); + ByteVector normalized = this.and((byte) 1); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, booleanArrayAddress(a, offset), + normalized, m, a, offset, + (arr, off, v, vm) + -> v.stOp(arr, off, vm, + (arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0)); + } + abstract void intoByteArray0(byte[] a, int offset); @ForceInline @@ -3619,6 +3885,25 @@ public abstract class ByteVector extends AbstractVector { }); } + abstract + void intoByteArray0(byte[] a, int offset, VectorMask m); + @ForceInline + final + > + void intoByteArray0Template(Class maskClass, byte[] a, int offset, M m) { + ByteSpecies vsp = vspecies(); + m.check(vsp); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, byteArrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) -> { + ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); + v.stOp(wb, off, vm, + (tb_, o, i, e) -> tb_.put(o + i * 1, e)); + }); + } + @ForceInline final void intoByteBuffer0(ByteBuffer bb, int offset) { @@ -3633,6 +3918,25 @@ public abstract class ByteVector extends AbstractVector { }); } + abstract + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m); + @ForceInline + final + > + void intoByteBuffer0Template(Class maskClass, ByteBuffer bb, int offset, M m) { + ByteSpecies vsp = vspecies(); + m.check(vsp); + ScopedMemoryAccess.storeIntoByteBufferMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + this, m, bb, offset, + (buf, off, v, vm) -> { + ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); + v.stOp(wb, off, vm, + (wb_, o, i, e) -> wb_.put(o + i * 1, e)); + }); + } + + // End of low-level memory operations. private static @@ -3959,7 +4263,7 @@ public abstract class ByteVector extends AbstractVector { /*package-private*/ @ForceInline ByteVector ldOp(M memory, int offset, - AbstractMask m, + VectorMask m, FLdOp f) { return dummyVector().ldOp(memory, offset, m, f); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java index 6ac3f40f80bbf9471fb85bfac5db881fde4e28cd..62f2eb5eff586a3f8f64c315ab1baf5a930b6862 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java @@ -236,8 +236,8 @@ final class Double128Vector extends DoubleVector { @ForceInline final @Override - double rOp(double v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + double rOp(double v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,22 +273,42 @@ final class Double128Vector extends DoubleVector { return (Double128Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Double128Vector lanewise(Unary op, VectorMask m) { + return (Double128Vector) super.lanewiseTemplate(op, Double128Mask.class, (Double128Mask) m); // specialize + } + @Override @ForceInline public Double128Vector lanewise(Binary op, Vector v) { return (Double128Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Double128Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Double128Vector) super.lanewiseTemplate(op, Double128Mask.class, v, (Double128Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Double128Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Double128Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Double128Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Double128Vector) super.lanewiseTemplate(op, Double128Mask.class, v1, v2, (Double128Mask) m); // specialize + } + @Override @ForceInline public final @@ -308,7 +328,7 @@ final class Double128Vector extends DoubleVector { @ForceInline public final double reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Double128Mask.class, (Double128Mask) m); // specialized } @Override @@ -321,7 +341,7 @@ final class Double128Vector extends DoubleVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Double128Mask.class, (Double128Mask) m); // specialized } @ForceInline @@ -357,6 +377,13 @@ final class Double128Vector extends DoubleVector { return super.compareTemplate(Double128Mask.class, op, s); // specialize } + @Override + @ForceInline + public final Double128Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Double128Mask.class, op, v, (Double128Mask) m); + } + + @Override @ForceInline public Double128Vector blend(Vector v, VectorMask m) { @@ -413,6 +440,7 @@ final class Double128Vector extends DoubleVector { VectorMask m) { return (Double128Vector) super.rearrangeTemplate(Double128Shuffle.class, + Double128Mask.class, (Double128Shuffle) shuffle, (Double128Mask) m); // specialize } @@ -580,16 +608,12 @@ final class Double128Vector extends DoubleVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Double128Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -615,9 +639,9 @@ final class Double128Vector extends DoubleVector { public Double128Mask and(VectorMask mask) { Objects.requireNonNull(mask); Double128Mask m = (Double128Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Double128Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Double128Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -625,9 +649,9 @@ final class Double128Vector extends DoubleVector { public Double128Mask or(VectorMask mask) { Objects.requireNonNull(mask); Double128Mask m = (Double128Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Double128Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Double128Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -635,9 +659,9 @@ final class Double128Vector extends DoubleVector { Double128Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Double128Mask m = (Double128Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Double128Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Double128Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -645,22 +669,32 @@ final class Double128Vector extends DoubleVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Double128Mask.class, long.class, VLENGTH, this, - (m) -> trueCountHelper(((Double128Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Double128Mask.class, long.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Double128Mask.class, long.class, VLENGTH, this, - (m) -> firstTrueHelper(((Double128Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Double128Mask.class, long.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Double128Mask.class, long.class, VLENGTH, this, - (m) -> lastTrueHelper(((Double128Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Double128Mask.class, long.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Double128Mask.class, long.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -771,6 +805,20 @@ final class Double128Vector extends DoubleVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + DoubleVector fromArray0(double[] a, int offset, VectorMask m) { + return super.fromArray0Template(Double128Mask.class, a, offset, (Double128Mask) m); // specialize + } + + @ForceInline + @Override + final + DoubleVector fromArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(Double128Mask.class, a, offset, indexMap, mapOffset, (Double128Mask) m); + } + @ForceInline @@ -780,6 +828,13 @@ final class Double128Vector extends DoubleVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + DoubleVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Double128Mask.class, a, offset, (Double128Mask) m); // specialize + } + @ForceInline @Override final @@ -787,6 +842,13 @@ final class Double128Vector extends DoubleVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Double128Mask.class, bb, offset, (Double128Mask) m); // specialize + } + @ForceInline @Override final @@ -794,6 +856,21 @@ final class Double128Vector extends DoubleVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(double[] a, int offset, VectorMask m) { + super.intoArray0Template(Double128Mask.class, a, offset, (Double128Mask) m); + } + + @ForceInline + @Override + final + void intoArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(Double128Mask.class, a, offset, indexMap, mapOffset, (Double128Mask) m); + } + + @ForceInline @Override final @@ -801,6 +878,21 @@ final class Double128Vector extends DoubleVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Double128Mask.class, a, offset, (Double128Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Double128Mask.class, bb, offset, (Double128Mask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java index 194668361aa402b5ec3da040379618f1feeabeda..547684af87d29652d788386febe4e02e35a6f490 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java @@ -236,8 +236,8 @@ final class Double256Vector extends DoubleVector { @ForceInline final @Override - double rOp(double v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + double rOp(double v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,22 +273,42 @@ final class Double256Vector extends DoubleVector { return (Double256Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Double256Vector lanewise(Unary op, VectorMask m) { + return (Double256Vector) super.lanewiseTemplate(op, Double256Mask.class, (Double256Mask) m); // specialize + } + @Override @ForceInline public Double256Vector lanewise(Binary op, Vector v) { return (Double256Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Double256Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Double256Vector) super.lanewiseTemplate(op, Double256Mask.class, v, (Double256Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Double256Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Double256Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Double256Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Double256Vector) super.lanewiseTemplate(op, Double256Mask.class, v1, v2, (Double256Mask) m); // specialize + } + @Override @ForceInline public final @@ -308,7 +328,7 @@ final class Double256Vector extends DoubleVector { @ForceInline public final double reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Double256Mask.class, (Double256Mask) m); // specialized } @Override @@ -321,7 +341,7 @@ final class Double256Vector extends DoubleVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Double256Mask.class, (Double256Mask) m); // specialized } @ForceInline @@ -357,6 +377,13 @@ final class Double256Vector extends DoubleVector { return super.compareTemplate(Double256Mask.class, op, s); // specialize } + @Override + @ForceInline + public final Double256Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Double256Mask.class, op, v, (Double256Mask) m); + } + + @Override @ForceInline public Double256Vector blend(Vector v, VectorMask m) { @@ -413,6 +440,7 @@ final class Double256Vector extends DoubleVector { VectorMask m) { return (Double256Vector) super.rearrangeTemplate(Double256Shuffle.class, + Double256Mask.class, (Double256Shuffle) shuffle, (Double256Mask) m); // specialize } @@ -584,16 +612,12 @@ final class Double256Vector extends DoubleVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Double256Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -619,9 +643,9 @@ final class Double256Vector extends DoubleVector { public Double256Mask and(VectorMask mask) { Objects.requireNonNull(mask); Double256Mask m = (Double256Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Double256Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Double256Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -629,9 +653,9 @@ final class Double256Vector extends DoubleVector { public Double256Mask or(VectorMask mask) { Objects.requireNonNull(mask); Double256Mask m = (Double256Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Double256Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Double256Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -639,9 +663,9 @@ final class Double256Vector extends DoubleVector { Double256Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Double256Mask m = (Double256Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Double256Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Double256Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -649,22 +673,32 @@ final class Double256Vector extends DoubleVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Double256Mask.class, long.class, VLENGTH, this, - (m) -> trueCountHelper(((Double256Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Double256Mask.class, long.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Double256Mask.class, long.class, VLENGTH, this, - (m) -> firstTrueHelper(((Double256Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Double256Mask.class, long.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Double256Mask.class, long.class, VLENGTH, this, - (m) -> lastTrueHelper(((Double256Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Double256Mask.class, long.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Double256Mask.class, long.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -775,6 +809,20 @@ final class Double256Vector extends DoubleVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + DoubleVector fromArray0(double[] a, int offset, VectorMask m) { + return super.fromArray0Template(Double256Mask.class, a, offset, (Double256Mask) m); // specialize + } + + @ForceInline + @Override + final + DoubleVector fromArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(Double256Mask.class, a, offset, indexMap, mapOffset, (Double256Mask) m); + } + @ForceInline @@ -784,6 +832,13 @@ final class Double256Vector extends DoubleVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + DoubleVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Double256Mask.class, a, offset, (Double256Mask) m); // specialize + } + @ForceInline @Override final @@ -791,6 +846,13 @@ final class Double256Vector extends DoubleVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Double256Mask.class, bb, offset, (Double256Mask) m); // specialize + } + @ForceInline @Override final @@ -798,6 +860,21 @@ final class Double256Vector extends DoubleVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(double[] a, int offset, VectorMask m) { + super.intoArray0Template(Double256Mask.class, a, offset, (Double256Mask) m); + } + + @ForceInline + @Override + final + void intoArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(Double256Mask.class, a, offset, indexMap, mapOffset, (Double256Mask) m); + } + + @ForceInline @Override final @@ -805,6 +882,21 @@ final class Double256Vector extends DoubleVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Double256Mask.class, a, offset, (Double256Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Double256Mask.class, bb, offset, (Double256Mask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java index 2b090b9cefdb6feb7eb7e2bbce5f3e8823968ff3..bacc0cde0881f2a5dc4e8c7fd9395b07fb52012b 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java @@ -236,8 +236,8 @@ final class Double512Vector extends DoubleVector { @ForceInline final @Override - double rOp(double v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + double rOp(double v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,22 +273,42 @@ final class Double512Vector extends DoubleVector { return (Double512Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Double512Vector lanewise(Unary op, VectorMask m) { + return (Double512Vector) super.lanewiseTemplate(op, Double512Mask.class, (Double512Mask) m); // specialize + } + @Override @ForceInline public Double512Vector lanewise(Binary op, Vector v) { return (Double512Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Double512Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Double512Vector) super.lanewiseTemplate(op, Double512Mask.class, v, (Double512Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Double512Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Double512Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Double512Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Double512Vector) super.lanewiseTemplate(op, Double512Mask.class, v1, v2, (Double512Mask) m); // specialize + } + @Override @ForceInline public final @@ -308,7 +328,7 @@ final class Double512Vector extends DoubleVector { @ForceInline public final double reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Double512Mask.class, (Double512Mask) m); // specialized } @Override @@ -321,7 +341,7 @@ final class Double512Vector extends DoubleVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Double512Mask.class, (Double512Mask) m); // specialized } @ForceInline @@ -357,6 +377,13 @@ final class Double512Vector extends DoubleVector { return super.compareTemplate(Double512Mask.class, op, s); // specialize } + @Override + @ForceInline + public final Double512Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Double512Mask.class, op, v, (Double512Mask) m); + } + + @Override @ForceInline public Double512Vector blend(Vector v, VectorMask m) { @@ -413,6 +440,7 @@ final class Double512Vector extends DoubleVector { VectorMask m) { return (Double512Vector) super.rearrangeTemplate(Double512Shuffle.class, + Double512Mask.class, (Double512Shuffle) shuffle, (Double512Mask) m); // specialize } @@ -592,16 +620,12 @@ final class Double512Vector extends DoubleVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Double512Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -627,9 +651,9 @@ final class Double512Vector extends DoubleVector { public Double512Mask and(VectorMask mask) { Objects.requireNonNull(mask); Double512Mask m = (Double512Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Double512Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Double512Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -637,9 +661,9 @@ final class Double512Vector extends DoubleVector { public Double512Mask or(VectorMask mask) { Objects.requireNonNull(mask); Double512Mask m = (Double512Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Double512Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Double512Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -647,9 +671,9 @@ final class Double512Vector extends DoubleVector { Double512Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Double512Mask m = (Double512Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Double512Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Double512Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -657,22 +681,32 @@ final class Double512Vector extends DoubleVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Double512Mask.class, long.class, VLENGTH, this, - (m) -> trueCountHelper(((Double512Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Double512Mask.class, long.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Double512Mask.class, long.class, VLENGTH, this, - (m) -> firstTrueHelper(((Double512Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Double512Mask.class, long.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Double512Mask.class, long.class, VLENGTH, this, - (m) -> lastTrueHelper(((Double512Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Double512Mask.class, long.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Double512Mask.class, long.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -783,6 +817,20 @@ final class Double512Vector extends DoubleVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + DoubleVector fromArray0(double[] a, int offset, VectorMask m) { + return super.fromArray0Template(Double512Mask.class, a, offset, (Double512Mask) m); // specialize + } + + @ForceInline + @Override + final + DoubleVector fromArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(Double512Mask.class, a, offset, indexMap, mapOffset, (Double512Mask) m); + } + @ForceInline @@ -792,6 +840,13 @@ final class Double512Vector extends DoubleVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + DoubleVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Double512Mask.class, a, offset, (Double512Mask) m); // specialize + } + @ForceInline @Override final @@ -799,6 +854,13 @@ final class Double512Vector extends DoubleVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Double512Mask.class, bb, offset, (Double512Mask) m); // specialize + } + @ForceInline @Override final @@ -806,6 +868,21 @@ final class Double512Vector extends DoubleVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(double[] a, int offset, VectorMask m) { + super.intoArray0Template(Double512Mask.class, a, offset, (Double512Mask) m); + } + + @ForceInline + @Override + final + void intoArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(Double512Mask.class, a, offset, indexMap, mapOffset, (Double512Mask) m); + } + + @ForceInline @Override final @@ -813,6 +890,21 @@ final class Double512Vector extends DoubleVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Double512Mask.class, a, offset, (Double512Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Double512Mask.class, bb, offset, (Double512Mask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java index 48b6d3e63ead90b4ed27f0744e751a70655afc93..29977c72c6aeaf271a2888cbc67df202db0e1aec 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java @@ -236,8 +236,8 @@ final class Double64Vector extends DoubleVector { @ForceInline final @Override - double rOp(double v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + double rOp(double v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,22 +273,42 @@ final class Double64Vector extends DoubleVector { return (Double64Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Double64Vector lanewise(Unary op, VectorMask m) { + return (Double64Vector) super.lanewiseTemplate(op, Double64Mask.class, (Double64Mask) m); // specialize + } + @Override @ForceInline public Double64Vector lanewise(Binary op, Vector v) { return (Double64Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Double64Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Double64Vector) super.lanewiseTemplate(op, Double64Mask.class, v, (Double64Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Double64Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Double64Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Double64Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Double64Vector) super.lanewiseTemplate(op, Double64Mask.class, v1, v2, (Double64Mask) m); // specialize + } + @Override @ForceInline public final @@ -308,7 +328,7 @@ final class Double64Vector extends DoubleVector { @ForceInline public final double reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Double64Mask.class, (Double64Mask) m); // specialized } @Override @@ -321,7 +341,7 @@ final class Double64Vector extends DoubleVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Double64Mask.class, (Double64Mask) m); // specialized } @ForceInline @@ -357,6 +377,13 @@ final class Double64Vector extends DoubleVector { return super.compareTemplate(Double64Mask.class, op, s); // specialize } + @Override + @ForceInline + public final Double64Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Double64Mask.class, op, v, (Double64Mask) m); + } + + @Override @ForceInline public Double64Vector blend(Vector v, VectorMask m) { @@ -413,6 +440,7 @@ final class Double64Vector extends DoubleVector { VectorMask m) { return (Double64Vector) super.rearrangeTemplate(Double64Shuffle.class, + Double64Mask.class, (Double64Shuffle) shuffle, (Double64Mask) m); // specialize } @@ -578,16 +606,12 @@ final class Double64Vector extends DoubleVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Double64Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -613,9 +637,9 @@ final class Double64Vector extends DoubleVector { public Double64Mask and(VectorMask mask) { Objects.requireNonNull(mask); Double64Mask m = (Double64Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Double64Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Double64Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -623,9 +647,9 @@ final class Double64Vector extends DoubleVector { public Double64Mask or(VectorMask mask) { Objects.requireNonNull(mask); Double64Mask m = (Double64Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Double64Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Double64Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -633,9 +657,9 @@ final class Double64Vector extends DoubleVector { Double64Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Double64Mask m = (Double64Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Double64Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Double64Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -643,22 +667,32 @@ final class Double64Vector extends DoubleVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Double64Mask.class, long.class, VLENGTH, this, - (m) -> trueCountHelper(((Double64Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Double64Mask.class, long.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Double64Mask.class, long.class, VLENGTH, this, - (m) -> firstTrueHelper(((Double64Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Double64Mask.class, long.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Double64Mask.class, long.class, VLENGTH, this, - (m) -> lastTrueHelper(((Double64Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Double64Mask.class, long.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Double64Mask.class, long.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -769,6 +803,20 @@ final class Double64Vector extends DoubleVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + DoubleVector fromArray0(double[] a, int offset, VectorMask m) { + return super.fromArray0Template(Double64Mask.class, a, offset, (Double64Mask) m); // specialize + } + + @ForceInline + @Override + final + DoubleVector fromArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(Double64Mask.class, a, offset, indexMap, mapOffset, (Double64Mask) m); + } + @ForceInline @@ -778,6 +826,13 @@ final class Double64Vector extends DoubleVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + DoubleVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Double64Mask.class, a, offset, (Double64Mask) m); // specialize + } + @ForceInline @Override final @@ -785,6 +840,13 @@ final class Double64Vector extends DoubleVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Double64Mask.class, bb, offset, (Double64Mask) m); // specialize + } + @ForceInline @Override final @@ -792,6 +854,21 @@ final class Double64Vector extends DoubleVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(double[] a, int offset, VectorMask m) { + super.intoArray0Template(Double64Mask.class, a, offset, (Double64Mask) m); + } + + @ForceInline + @Override + final + void intoArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(Double64Mask.class, a, offset, indexMap, mapOffset, (Double64Mask) m); + } + + @ForceInline @Override final @@ -799,6 +876,21 @@ final class Double64Vector extends DoubleVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Double64Mask.class, a, offset, (Double64Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Double64Mask.class, bb, offset, (Double64Mask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java index b12021a123da37b2885811607e588e59d26c1c51..c9db9f93b40a9092288c6ba4e21ad5a6ff00d28f 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java @@ -236,8 +236,8 @@ final class DoubleMaxVector extends DoubleVector { @ForceInline final @Override - double rOp(double v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + double rOp(double v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,22 +273,42 @@ final class DoubleMaxVector extends DoubleVector { return (DoubleMaxVector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public DoubleMaxVector lanewise(Unary op, VectorMask m) { + return (DoubleMaxVector) super.lanewiseTemplate(op, DoubleMaxMask.class, (DoubleMaxMask) m); // specialize + } + @Override @ForceInline public DoubleMaxVector lanewise(Binary op, Vector v) { return (DoubleMaxVector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public DoubleMaxVector lanewise(Binary op, Vector v, VectorMask m) { + return (DoubleMaxVector) super.lanewiseTemplate(op, DoubleMaxMask.class, v, (DoubleMaxMask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final DoubleMaxVector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (DoubleMaxVector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + DoubleMaxVector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (DoubleMaxVector) super.lanewiseTemplate(op, DoubleMaxMask.class, v1, v2, (DoubleMaxMask) m); // specialize + } + @Override @ForceInline public final @@ -308,7 +328,7 @@ final class DoubleMaxVector extends DoubleVector { @ForceInline public final double reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, DoubleMaxMask.class, (DoubleMaxMask) m); // specialized } @Override @@ -321,7 +341,7 @@ final class DoubleMaxVector extends DoubleVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, DoubleMaxMask.class, (DoubleMaxMask) m); // specialized } @ForceInline @@ -357,6 +377,13 @@ final class DoubleMaxVector extends DoubleVector { return super.compareTemplate(DoubleMaxMask.class, op, s); // specialize } + @Override + @ForceInline + public final DoubleMaxMask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(DoubleMaxMask.class, op, v, (DoubleMaxMask) m); + } + + @Override @ForceInline public DoubleMaxVector blend(Vector v, VectorMask m) { @@ -413,6 +440,7 @@ final class DoubleMaxVector extends DoubleVector { VectorMask m) { return (DoubleMaxVector) super.rearrangeTemplate(DoubleMaxShuffle.class, + DoubleMaxMask.class, (DoubleMaxShuffle) shuffle, (DoubleMaxMask) m); // specialize } @@ -577,16 +605,12 @@ final class DoubleMaxVector extends DoubleVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - DoubleMaxMask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -612,9 +636,9 @@ final class DoubleMaxVector extends DoubleVector { public DoubleMaxMask and(VectorMask mask) { Objects.requireNonNull(mask); DoubleMaxMask m = (DoubleMaxMask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, DoubleMaxMask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, DoubleMaxMask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -622,9 +646,9 @@ final class DoubleMaxVector extends DoubleVector { public DoubleMaxMask or(VectorMask mask) { Objects.requireNonNull(mask); DoubleMaxMask m = (DoubleMaxMask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, DoubleMaxMask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, DoubleMaxMask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -632,9 +656,9 @@ final class DoubleMaxVector extends DoubleVector { DoubleMaxMask xor(VectorMask mask) { Objects.requireNonNull(mask); DoubleMaxMask m = (DoubleMaxMask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, DoubleMaxMask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, DoubleMaxMask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -642,22 +666,32 @@ final class DoubleMaxVector extends DoubleVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, DoubleMaxMask.class, long.class, VLENGTH, this, - (m) -> trueCountHelper(((DoubleMaxMask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, DoubleMaxMask.class, long.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, DoubleMaxMask.class, long.class, VLENGTH, this, - (m) -> firstTrueHelper(((DoubleMaxMask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, DoubleMaxMask.class, long.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, DoubleMaxMask.class, long.class, VLENGTH, this, - (m) -> lastTrueHelper(((DoubleMaxMask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, DoubleMaxMask.class, long.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, DoubleMaxMask.class, long.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -768,6 +802,20 @@ final class DoubleMaxVector extends DoubleVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + DoubleVector fromArray0(double[] a, int offset, VectorMask m) { + return super.fromArray0Template(DoubleMaxMask.class, a, offset, (DoubleMaxMask) m); // specialize + } + + @ForceInline + @Override + final + DoubleVector fromArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(DoubleMaxMask.class, a, offset, indexMap, mapOffset, (DoubleMaxMask) m); + } + @ForceInline @@ -777,6 +825,13 @@ final class DoubleMaxVector extends DoubleVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + DoubleVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(DoubleMaxMask.class, a, offset, (DoubleMaxMask) m); // specialize + } + @ForceInline @Override final @@ -784,6 +839,13 @@ final class DoubleMaxVector extends DoubleVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(DoubleMaxMask.class, bb, offset, (DoubleMaxMask) m); // specialize + } + @ForceInline @Override final @@ -791,6 +853,21 @@ final class DoubleMaxVector extends DoubleVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(double[] a, int offset, VectorMask m) { + super.intoArray0Template(DoubleMaxMask.class, a, offset, (DoubleMaxMask) m); + } + + @ForceInline + @Override + final + void intoArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(DoubleMaxMask.class, a, offset, indexMap, mapOffset, (DoubleMaxMask) m); + } + + @ForceInline @Override final @@ -798,6 +875,21 @@ final class DoubleMaxVector extends DoubleVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(DoubleMaxMask.class, a, offset, (DoubleMaxMask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(DoubleMaxMask.class, bb, offset, (DoubleMaxMask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java index 4c3746b745f1e1bdbcb2d4a752f07451b6f98177..691a4def745301984b2448f9e8527c8d2cc2b961 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java @@ -29,7 +29,6 @@ import java.nio.ByteOrder; import java.nio.ReadOnlyBufferException; import java.util.Arrays; import java.util.Objects; -import java.util.function.BinaryOperator; import java.util.function.Function; import java.util.function.UnaryOperator; @@ -173,6 +172,9 @@ public abstract class DoubleVector extends AbstractVector { final DoubleVector uOpTemplate(VectorMask m, FUnOp f) { + if (m == null) { + return uOpTemplate(f); + } double[] vec = vec(); double[] res = new double[length()]; boolean[] mbits = ((AbstractMask)m).getBits(); @@ -216,6 +218,9 @@ public abstract class DoubleVector extends AbstractVector { DoubleVector bOpTemplate(Vector o, VectorMask m, FBinOp f) { + if (m == null) { + return bOpTemplate(o, f); + } double[] res = new double[length()]; double[] vec1 = this.vec(); double[] vec2 = ((DoubleVector)o).vec(); @@ -265,6 +270,9 @@ public abstract class DoubleVector extends AbstractVector { Vector o2, VectorMask m, FTriOp f) { + if (m == null) { + return tOpTemplate(o1, o2, f); + } double[] res = new double[length()]; double[] vec1 = this.vec(); double[] vec2 = ((DoubleVector)o1).vec(); @@ -280,7 +288,22 @@ public abstract class DoubleVector extends AbstractVector { /*package-private*/ abstract - double rOp(double v, FBinOp f); + double rOp(double v, VectorMask m, FBinOp f); + + @ForceInline + final + double rOpTemplate(double v, VectorMask m, FBinOp f) { + if (m == null) { + return rOpTemplate(v, f); + } + double[] vec = vec(); + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < vec.length; i++) { + v = mbits[i] ? f.apply(i, v, vec[i]) : v; + } + return v; + } + @ForceInline final double rOpTemplate(double v, FBinOp f) { @@ -540,61 +563,80 @@ public abstract class DoubleVector extends AbstractVector { } int opc = opCode(op); return VectorSupport.unaryOp( - opc, getClass(), double.class, length(), - this, - UN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_NEG: return v0 -> - v0.uOp((i, a) -> (double) -a); - case VECTOR_OP_ABS: return v0 -> - v0.uOp((i, a) -> (double) Math.abs(a)); - case VECTOR_OP_SIN: return v0 -> - v0.uOp((i, a) -> (double) Math.sin(a)); - case VECTOR_OP_COS: return v0 -> - v0.uOp((i, a) -> (double) Math.cos(a)); - case VECTOR_OP_TAN: return v0 -> - v0.uOp((i, a) -> (double) Math.tan(a)); - case VECTOR_OP_ASIN: return v0 -> - v0.uOp((i, a) -> (double) Math.asin(a)); - case VECTOR_OP_ACOS: return v0 -> - v0.uOp((i, a) -> (double) Math.acos(a)); - case VECTOR_OP_ATAN: return v0 -> - v0.uOp((i, a) -> (double) Math.atan(a)); - case VECTOR_OP_EXP: return v0 -> - v0.uOp((i, a) -> (double) Math.exp(a)); - case VECTOR_OP_LOG: return v0 -> - v0.uOp((i, a) -> (double) Math.log(a)); - case VECTOR_OP_LOG10: return v0 -> - v0.uOp((i, a) -> (double) Math.log10(a)); - case VECTOR_OP_SQRT: return v0 -> - v0.uOp((i, a) -> (double) Math.sqrt(a)); - case VECTOR_OP_CBRT: return v0 -> - v0.uOp((i, a) -> (double) Math.cbrt(a)); - case VECTOR_OP_SINH: return v0 -> - v0.uOp((i, a) -> (double) Math.sinh(a)); - case VECTOR_OP_COSH: return v0 -> - v0.uOp((i, a) -> (double) Math.cosh(a)); - case VECTOR_OP_TANH: return v0 -> - v0.uOp((i, a) -> (double) Math.tanh(a)); - case VECTOR_OP_EXPM1: return v0 -> - v0.uOp((i, a) -> (double) Math.expm1(a)); - case VECTOR_OP_LOG1P: return v0 -> - v0.uOp((i, a) -> (double) Math.log1p(a)); - default: return null; - }})); + opc, getClass(), null, double.class, length(), + this, null, + UN_IMPL.find(op, opc, DoubleVector::unaryOperations)); } - private static final - ImplCache> UN_IMPL - = new ImplCache<>(Unary.class, DoubleVector.class); /** * {@inheritDoc} */ - @ForceInline - public final + @Override + public abstract DoubleVector lanewise(VectorOperators.Unary op, - VectorMask m) { - return blend(lanewise(op), m); + VectorMask m); + @ForceInline + final + DoubleVector lanewiseTemplate(VectorOperators.Unary op, + Class> maskClass, + VectorMask m) { + m.check(maskClass, this); + if (opKind(op, VO_SPECIAL)) { + if (op == ZOMO) { + return blend(broadcast(-1), compare(NE, 0, m)); + } + } + int opc = opCode(op); + return VectorSupport.unaryOp( + opc, getClass(), maskClass, double.class, length(), + this, m, + UN_IMPL.find(op, opc, DoubleVector::unaryOperations)); + } + + private static final + ImplCache>> + UN_IMPL = new ImplCache<>(Unary.class, DoubleVector.class); + + private static UnaryOperation> unaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_NEG: return (v0, m) -> + v0.uOp(m, (i, a) -> (double) -a); + case VECTOR_OP_ABS: return (v0, m) -> + v0.uOp(m, (i, a) -> (double) Math.abs(a)); + case VECTOR_OP_SIN: return (v0, m) -> + v0.uOp(m, (i, a) -> (double) Math.sin(a)); + case VECTOR_OP_COS: return (v0, m) -> + v0.uOp(m, (i, a) -> (double) Math.cos(a)); + case VECTOR_OP_TAN: return (v0, m) -> + v0.uOp(m, (i, a) -> (double) Math.tan(a)); + case VECTOR_OP_ASIN: return (v0, m) -> + v0.uOp(m, (i, a) -> (double) Math.asin(a)); + case VECTOR_OP_ACOS: return (v0, m) -> + v0.uOp(m, (i, a) -> (double) Math.acos(a)); + case VECTOR_OP_ATAN: return (v0, m) -> + v0.uOp(m, (i, a) -> (double) Math.atan(a)); + case VECTOR_OP_EXP: return (v0, m) -> + v0.uOp(m, (i, a) -> (double) Math.exp(a)); + case VECTOR_OP_LOG: return (v0, m) -> + v0.uOp(m, (i, a) -> (double) Math.log(a)); + case VECTOR_OP_LOG10: return (v0, m) -> + v0.uOp(m, (i, a) -> (double) Math.log10(a)); + case VECTOR_OP_SQRT: return (v0, m) -> + v0.uOp(m, (i, a) -> (double) Math.sqrt(a)); + case VECTOR_OP_CBRT: return (v0, m) -> + v0.uOp(m, (i, a) -> (double) Math.cbrt(a)); + case VECTOR_OP_SINH: return (v0, m) -> + v0.uOp(m, (i, a) -> (double) Math.sinh(a)); + case VECTOR_OP_COSH: return (v0, m) -> + v0.uOp(m, (i, a) -> (double) Math.cosh(a)); + case VECTOR_OP_TANH: return (v0, m) -> + v0.uOp(m, (i, a) -> (double) Math.tanh(a)); + case VECTOR_OP_EXPM1: return (v0, m) -> + v0.uOp(m, (i, a) -> (double) Math.expm1(a)); + case VECTOR_OP_LOG1P: return (v0, m) -> + v0.uOp(m, (i, a) -> (double) Math.log1p(a)); + default: return null; + } } // Binary lanewise support @@ -614,6 +656,7 @@ public abstract class DoubleVector extends AbstractVector { Vector v) { DoubleVector that = (DoubleVector) v; that.check(this); + if (opKind(op, VO_SPECIAL )) { if (op == FIRST_NONZERO) { // FIXME: Support this in the JIT. @@ -627,48 +670,75 @@ public abstract class DoubleVector extends AbstractVector { .viewAsFloatingLanes(); } } + int opc = opCode(op); return VectorSupport.binaryOp( - opc, getClass(), double.class, length(), - this, that, - BIN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (double)(a + b)); - case VECTOR_OP_SUB: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (double)(a - b)); - case VECTOR_OP_MUL: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (double)(a * b)); - case VECTOR_OP_DIV: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (double)(a / b)); - case VECTOR_OP_MAX: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (double)Math.max(a, b)); - case VECTOR_OP_MIN: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (double)Math.min(a, b)); - case VECTOR_OP_ATAN2: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (double) Math.atan2(a, b)); - case VECTOR_OP_POW: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (double) Math.pow(a, b)); - case VECTOR_OP_HYPOT: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (double) Math.hypot(a, b)); - default: return null; - }})); + opc, getClass(), null, double.class, length(), + this, that, null, + BIN_IMPL.find(op, opc, DoubleVector::binaryOperations)); } - private static final - ImplCache> BIN_IMPL - = new ImplCache<>(Binary.class, DoubleVector.class); /** * {@inheritDoc} * @see #lanewise(VectorOperators.Binary,double,VectorMask) */ - @ForceInline - public final + @Override + public abstract DoubleVector lanewise(VectorOperators.Binary op, Vector v, - VectorMask m) { - return blend(lanewise(op, v), m); + VectorMask m); + @ForceInline + final + DoubleVector lanewiseTemplate(VectorOperators.Binary op, + Class> maskClass, + Vector v, VectorMask m) { + DoubleVector that = (DoubleVector) v; + that.check(this); + m.check(maskClass, this); + + if (opKind(op, VO_SPECIAL )) { + if (op == FIRST_NONZERO) { + return blend(lanewise(op, v), m); + } + } + + int opc = opCode(op); + return VectorSupport.binaryOp( + opc, getClass(), maskClass, double.class, length(), + this, that, m, + BIN_IMPL.find(op, opc, DoubleVector::binaryOperations)); } + + private static final + ImplCache>> + BIN_IMPL = new ImplCache<>(Binary.class, DoubleVector.class); + + private static BinaryOperation> binaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (double)(a + b)); + case VECTOR_OP_SUB: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (double)(a - b)); + case VECTOR_OP_MUL: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (double)(a * b)); + case VECTOR_OP_DIV: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (double)(a / b)); + case VECTOR_OP_MAX: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (double)Math.max(a, b)); + case VECTOR_OP_MIN: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (double)Math.min(a, b)); + case VECTOR_OP_OR: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> fromBits(toBits(a) | toBits(b))); + case VECTOR_OP_ATAN2: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (double) Math.atan2(a, b)); + case VECTOR_OP_POW: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (double) Math.pow(a, b)); + case VECTOR_OP_HYPOT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (double) Math.hypot(a, b)); + default: return null; + } + } + // FIXME: Maybe all of the public final methods in this file (the // simple ones that just call lanewise) should be pushed down to // the X-VectorBits template. They can't optimize properly at @@ -725,7 +795,7 @@ public abstract class DoubleVector extends AbstractVector { DoubleVector lanewise(VectorOperators.Binary op, double e, VectorMask m) { - return blend(lanewise(op, e), m); + return lanewise(op, broadcast(e), m); } /** @@ -743,8 +813,7 @@ public abstract class DoubleVector extends AbstractVector { DoubleVector lanewise(VectorOperators.Binary op, long e) { double e1 = (double) e; - if ((long)e1 != e - ) { + if ((long)e1 != e) { vspecies().checkValue(e); // for exception } return lanewise(op, e1); @@ -764,7 +833,11 @@ public abstract class DoubleVector extends AbstractVector { public final DoubleVector lanewise(VectorOperators.Binary op, long e, VectorMask m) { - return blend(lanewise(op, e), m); + double e1 = (double) e; + if ((long)e1 != e) { + vspecies().checkValue(e); // for exception + } + return lanewise(op, e1, m); } @@ -806,18 +879,10 @@ public abstract class DoubleVector extends AbstractVector { tother.check(this); int opc = opCode(op); return VectorSupport.ternaryOp( - opc, getClass(), double.class, length(), - this, that, tother, - TERN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_FMA: return (v0, v1_, v2_) -> - v0.tOp(v1_, v2_, (i, a, b, c) -> Math.fma(a, b, c)); - default: return null; - }})); + opc, getClass(), null, double.class, length(), + this, that, tother, null, + TERN_IMPL.find(op, opc, DoubleVector::ternaryOperations)); } - private static final - ImplCache> TERN_IMPL - = new ImplCache<>(Ternary.class, DoubleVector.class); /** * {@inheritDoc} @@ -825,13 +890,45 @@ public abstract class DoubleVector extends AbstractVector { * @see #lanewise(VectorOperators.Ternary,Vector,double,VectorMask) * @see #lanewise(VectorOperators.Ternary,double,Vector,VectorMask) */ - @ForceInline - public final + @Override + public abstract DoubleVector lanewise(VectorOperators.Ternary op, Vector v1, Vector v2, - VectorMask m) { - return blend(lanewise(op, v1, v2), m); + VectorMask m); + @ForceInline + final + DoubleVector lanewiseTemplate(VectorOperators.Ternary op, + Class> maskClass, + Vector v1, + Vector v2, + VectorMask m) { + DoubleVector that = (DoubleVector) v1; + DoubleVector tother = (DoubleVector) v2; + // It's a word: https://www.dictionary.com/browse/tother + // See also Chapter 11 of Dickens, Our Mutual Friend: + // "Totherest Governor," replied Mr Riderhood... + that.check(this); + tother.check(this); + m.check(maskClass, this); + + int opc = opCode(op); + return VectorSupport.ternaryOp( + opc, getClass(), maskClass, double.class, length(), + this, that, tother, m, + TERN_IMPL.find(op, opc, DoubleVector::ternaryOperations)); + } + + private static final + ImplCache>> + TERN_IMPL = new ImplCache<>(Ternary.class, DoubleVector.class); + + private static TernaryOperation> ternaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> + v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c)); + default: return null; + } } /** @@ -888,7 +985,7 @@ public abstract class DoubleVector extends AbstractVector { double e1, double e2, VectorMask m) { - return blend(lanewise(op, e1, e2), m); + return lanewise(op, broadcast(e1), broadcast(e2), m); } /** @@ -946,7 +1043,7 @@ public abstract class DoubleVector extends AbstractVector { Vector v1, double e2, VectorMask m) { - return blend(lanewise(op, v1, e2), m); + return lanewise(op, v1, broadcast(e2), m); } /** @@ -1003,7 +1100,7 @@ public abstract class DoubleVector extends AbstractVector { double e1, Vector v2, VectorMask m) { - return blend(lanewise(op, e1, v2), m); + return lanewise(op, broadcast(e1), v2, m); } // (Thus endeth the Great and Mighty Ternary Ogdoad.) @@ -1647,15 +1744,13 @@ public abstract class DoubleVector extends AbstractVector { final > M compareTemplate(Class maskType, Comparison op, Vector v) { - Objects.requireNonNull(v); - DoubleSpecies vsp = vspecies(); DoubleVector that = (DoubleVector) v; that.check(this); int opc = opCode(op); return VectorSupport.compare( opc, getClass(), maskType, double.class, length(), - this, that, - (cond, v0, v1) -> { + this, that, null, + (cond, v0, v1, m1) -> { AbstractMask m = v0.bTest(cond, v1, (cond_, i, a, b) -> compareWithOp(cond, a, b)); @@ -1665,6 +1760,28 @@ public abstract class DoubleVector extends AbstractVector { }); } + /*package-private*/ + @ForceInline + final + > + M compareTemplate(Class maskType, Comparison op, Vector v, M m) { + DoubleVector that = (DoubleVector) v; + that.check(this); + m.check(maskType, this); + int opc = opCode(op); + return VectorSupport.compare( + opc, getClass(), maskType, double.class, length(), + this, that, m, + (cond, v0, v1, m1) -> { + AbstractMask cmpM + = v0.bTest(cond, v1, (cond_, i, a, b) + -> compareWithOp(cond, a, b)); + @SuppressWarnings("unchecked") + M m2 = (M) cmpM.and(m1); + return m2; + }); + } + @ForceInline private static boolean compareWithOp(int cond, double a, double b) { return switch (cond) { @@ -1678,18 +1795,6 @@ public abstract class DoubleVector extends AbstractVector { }; } - /** - * {@inheritDoc} - */ - @Override - @ForceInline - public final - VectorMask compare(VectorOperators.Comparison op, - Vector v, - VectorMask m) { - return compare(op, v).and(m); - } - /** * Tests this vector by comparing it with an input scalar, * according to the given comparison operation. @@ -1748,7 +1853,7 @@ public abstract class DoubleVector extends AbstractVector { public final VectorMask compare(VectorOperators.Comparison op, double e, VectorMask m) { - return compare(op, e).and(m); + return compare(op, broadcast(e), m); } /** @@ -1999,9 +2104,9 @@ public abstract class DoubleVector extends AbstractVector { DoubleVector rearrangeTemplate(Class shuffletype, S shuffle) { shuffle.checkIndexes(); return VectorSupport.rearrangeOp( - getClass(), shuffletype, double.class, length(), - this, shuffle, - (v1, s_) -> v1.uOp((i, a) -> { + getClass(), shuffletype, null, double.class, length(), + this, shuffle, null, + (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); return v1.lane(ei); })); @@ -2018,24 +2123,25 @@ public abstract class DoubleVector extends AbstractVector { /*package-private*/ @ForceInline final - > + , M extends VectorMask> DoubleVector rearrangeTemplate(Class shuffletype, + Class masktype, S shuffle, - VectorMask m) { - DoubleVector unmasked = - VectorSupport.rearrangeOp( - getClass(), shuffletype, double.class, length(), - this, shuffle, - (v1, s_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); - return ei < 0 ? 0 : v1.lane(ei); - })); + M m) { + + m.check(masktype, this); VectorMask valid = shuffle.laneIsValid(); if (m.andNot(valid).anyTrue()) { shuffle.checkIndexes(); throw new AssertionError(); } - return broadcast((double)0).blend(unmasked, m); + return VectorSupport.rearrangeOp( + getClass(), shuffletype, masktype, double.class, length(), + this, shuffle, m, + (v1, s_, m_) -> v1.uOp((i, a) -> { + int ei = s_.laneSource(i); + return ei < 0 || !m_.laneIsSet(i) ? 0 : v1.lane(ei); + })); } /** @@ -2058,17 +2164,17 @@ public abstract class DoubleVector extends AbstractVector { S ws = (S) shuffle.wrapIndexes(); DoubleVector r0 = VectorSupport.rearrangeOp( - getClass(), shuffletype, double.class, length(), - this, ws, - (v0, s_) -> v0.uOp((i, a) -> { + getClass(), shuffletype, null, double.class, length(), + this, ws, null, + (v0, s_, m_) -> v0.uOp((i, a) -> { int ei = s_.laneSource(i); return v0.lane(ei); })); DoubleVector r1 = VectorSupport.rearrangeOp( - getClass(), shuffletype, double.class, length(), - v, ws, - (v1, s_) -> v1.uOp((i, a) -> { + getClass(), shuffletype, null, double.class, length(), + v, ws, null, + (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); return v1.lane(ei); })); @@ -2309,9 +2415,18 @@ public abstract class DoubleVector extends AbstractVector { @ForceInline final double reduceLanesTemplate(VectorOperators.Associative op, + Class> maskClass, VectorMask m) { - DoubleVector v = reduceIdentityVector(op).blend(this, m); - return v.reduceLanesTemplate(op); + m.check(maskClass, this); + if (op == FIRST_NONZERO) { + DoubleVector v = reduceIdentityVector(op).blend(this, m); + return v.reduceLanesTemplate(op); + } + int opc = opCode(op); + return fromBits(VectorSupport.reductionCoerced( + opc, getClass(), maskClass, double.class, length(), + this, m, + REDUCE_IMPL.find(op, opc, DoubleVector::reductionOperations))); } /*package-private*/ @@ -2326,24 +2441,28 @@ public abstract class DoubleVector extends AbstractVector { } int opc = opCode(op); return fromBits(VectorSupport.reductionCoerced( - opc, getClass(), double.class, length(), - this, - REDUCE_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return v -> - toBits(v.rOp((double)0, (i, a, b) -> (double)(a + b))); - case VECTOR_OP_MUL: return v -> - toBits(v.rOp((double)1, (i, a, b) -> (double)(a * b))); - case VECTOR_OP_MIN: return v -> - toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (double) Math.min(a, b))); - case VECTOR_OP_MAX: return v -> - toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (double) Math.max(a, b))); - default: return null; - }}))); + opc, getClass(), null, double.class, length(), + this, null, + REDUCE_IMPL.find(op, opc, DoubleVector::reductionOperations))); } + private static final - ImplCache> REDUCE_IMPL - = new ImplCache<>(Associative.class, DoubleVector.class); + ImplCache>> + REDUCE_IMPL = new ImplCache<>(Associative.class, DoubleVector.class); + + private static ReductionOperation> reductionOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp((double)0, m, (i, a, b) -> (double)(a + b))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp((double)1, m, (i, a, b) -> (double)(a * b))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (double) Math.min(a, b))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (double) Math.max(a, b))); + default: return null; + } + } private @ForceInline @@ -2549,9 +2668,7 @@ public abstract class DoubleVector extends AbstractVector { VectorMask m) { DoubleSpecies vsp = (DoubleSpecies) species; if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) { - DoubleVector zero = vsp.zero(); - DoubleVector v = zero.fromByteArray0(a, offset); - return zero.blend(v.maybeSwap(bo), m); + return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo); } // FIXME: optimize @@ -2613,8 +2730,7 @@ public abstract class DoubleVector extends AbstractVector { VectorMask m) { DoubleSpecies vsp = (DoubleSpecies) species; if (offset >= 0 && offset <= (a.length - species.length())) { - DoubleVector zero = vsp.zero(); - return zero.blend(zero.fromArray0(a, offset), m); + return vsp.dummyVector().fromArray0(a, offset, m); } // FIXME: optimize @@ -2690,13 +2806,13 @@ public abstract class DoubleVector extends AbstractVector { vix = VectorIntrinsics.checkIndex(vix, a.length); return VectorSupport.loadWithMap( - vectorType, double.class, vsp.laneCount(), - IntVector.species(vsp.indexShape()).vectorType(), - a, ARRAY_BASE, vix, + vectorType, null, double.class, vsp.laneCount(), + isp.vectorType(), + a, ARRAY_BASE, vix, null, a, offset, indexMap, mapOffset, vsp, - (double[] c, int idx, int[] iMap, int idy, DoubleSpecies s) -> + (c, idx, iMap, idy, s, vm) -> s.vOp(n -> c[idx + iMap[idy+n]])); - } + } /** * Gathers a new vector composed of elements from an array of type @@ -2744,9 +2860,8 @@ public abstract class DoubleVector extends AbstractVector { return fromArray(species, a, offset, indexMap, mapOffset); } else { - // FIXME: Cannot vectorize yet, if there's a mask. DoubleSpecies vsp = (DoubleSpecies) species; - return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]); + return vsp.dummyVector().fromArray0(a, offset, indexMap, mapOffset, m); } } @@ -2840,9 +2955,7 @@ public abstract class DoubleVector extends AbstractVector { VectorMask m) { DoubleSpecies vsp = (DoubleSpecies) species; if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) { - DoubleVector zero = vsp.zero(); - DoubleVector v = zero.fromByteBuffer0(bb, offset); - return zero.blend(v.maybeSwap(bo), m); + return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo); } // FIXME: optimize @@ -2914,10 +3027,9 @@ public abstract class DoubleVector extends AbstractVector { if (m.allTrue()) { intoArray(a, offset); } else { - // FIXME: optimize DoubleSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 1, a.length); - stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v); + intoArray0(a, offset, m); } } @@ -2980,12 +3092,12 @@ public abstract class DoubleVector extends AbstractVector { vix = VectorIntrinsics.checkIndex(vix, a.length); VectorSupport.storeWithMap( - vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + vsp.vectorType(), null, vsp.elementType(), vsp.laneCount(), isp.vectorType(), a, arrayAddress(a, 0), vix, - this, + this, null, a, offset, indexMap, mapOffset, - (arr, off, v, map, mo) + (arr, off, v, map, mo, vm) -> v.stOp(arr, off, (arr_, off_, i, e) -> { int j = map[mo + i]; @@ -3032,12 +3144,7 @@ public abstract class DoubleVector extends AbstractVector { intoArray(a, offset, indexMap, mapOffset); } else { - // FIXME: Cannot vectorize yet, if there's a mask. - stOp(a, offset, m, - (arr, off, i, e) -> { - int j = indexMap[mapOffset + i]; - arr[off + j] = e; - }); + intoArray0(a, offset, indexMap, mapOffset, m); } } @@ -3067,12 +3174,9 @@ public abstract class DoubleVector extends AbstractVector { if (m.allTrue()) { intoByteArray(a, offset, bo); } else { - // FIXME: optimize DoubleSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 8, a.length); - ByteBuffer wb = wrapper(a, bo); - this.stOp(wb, offset, m, - (wb_, o, i, e) -> wb_.putDouble(o + i * 8, e)); + maybeSwap(bo).intoByteArray0(a, offset, m); } } @@ -3084,7 +3188,7 @@ public abstract class DoubleVector extends AbstractVector { public final void intoByteBuffer(ByteBuffer bb, int offset, ByteOrder bo) { - if (bb.isReadOnly()) { + if (ScopedMemoryAccess.isReadOnly(bb)) { throw new ReadOnlyBufferException(); } offset = checkFromIndexSize(offset, byteSize(), bb.limit()); @@ -3103,15 +3207,12 @@ public abstract class DoubleVector extends AbstractVector { if (m.allTrue()) { intoByteBuffer(bb, offset, bo); } else { - // FIXME: optimize if (bb.isReadOnly()) { throw new ReadOnlyBufferException(); } DoubleSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 8, bb.limit()); - ByteBuffer wb = wrapper(bb, bo); - this.stOp(wb, offset, m, - (wb_, o, i, e) -> wb_.putDouble(o + i * 8, e)); + maybeSwap(bo).intoByteBuffer0(bb, offset, m); } } @@ -3149,6 +3250,75 @@ public abstract class DoubleVector extends AbstractVector { (arr_, off_, i) -> arr_[off_ + i])); } + /*package-private*/ + abstract + DoubleVector fromArray0(double[] a, int offset, VectorMask m); + @ForceInline + final + > + DoubleVector fromArray0Template(Class maskClass, double[] a, int offset, M m) { + m.check(species()); + DoubleSpecies vsp = vspecies(); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> s.ldOp(arr, off, vm, + (arr_, off_, i) -> arr_[off_ + i])); + } + + /*package-private*/ + abstract + DoubleVector fromArray0(double[] a, int offset, + int[] indexMap, int mapOffset, + VectorMask m); + @ForceInline + final + > + DoubleVector fromArray0Template(Class maskClass, double[] a, int offset, + int[] indexMap, int mapOffset, M m) { + DoubleSpecies vsp = vspecies(); + IntVector.IntSpecies isp = IntVector.species(vsp.indexShape()); + Objects.requireNonNull(a); + Objects.requireNonNull(indexMap); + m.check(vsp); + Class vectorType = vsp.vectorType(); + + if (vsp.laneCount() == 1) { + return DoubleVector.fromArray(vsp, a, offset + indexMap[mapOffset], m); + } + + // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k] + IntVector vix; + if (isp.laneCount() != vsp.laneCount()) { + // For DoubleMaxVector, if vector length is non-power-of-two or + // 2048 bits, indexShape of Double species is S_MAX_BIT. + // Assume that vector length is 2048, then the lane count of Double + // vector is 32. When converting Double species to int species, + // indexShape is still S_MAX_BIT, but the lane count of int vector + // is 64. So when loading index vector (IntVector), only lower half + // of index data is needed. + vix = IntVector + .fromArray(isp, indexMap, mapOffset, IntMaxVector.IntMaxMask.LOWER_HALF_TRUE_MASK) + .add(offset); + } else { + vix = IntVector + .fromArray(isp, indexMap, mapOffset) + .add(offset); + } + + // FIXME: Check index under mask controlling. + vix = VectorIntrinsics.checkIndex(vix, a.length); + + return VectorSupport.loadWithMap( + vectorType, maskClass, double.class, vsp.laneCount(), + isp.vectorType(), + a, ARRAY_BASE, vix, m, + a, offset, indexMap, mapOffset, vsp, + (c, idx, iMap, idy, s, vm) -> + s.vOp(vm, n -> c[idx + iMap[idy+n]])); + } + @Override @@ -3169,6 +3339,25 @@ public abstract class DoubleVector extends AbstractVector { }); } + abstract + DoubleVector fromByteArray0(byte[] a, int offset, VectorMask m); + @ForceInline + final + > + DoubleVector fromByteArray0Template(Class maskClass, byte[] a, int offset, M m) { + DoubleSpecies vsp = vspecies(); + m.check(vsp); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, byteArrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> { + ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); + return s.ldOp(wb, off, vm, + (wb_, o, i) -> wb_.getDouble(o + i * 8)); + }); + } + abstract DoubleVector fromByteBuffer0(ByteBuffer bb, int offset); @ForceInline @@ -3185,6 +3374,24 @@ public abstract class DoubleVector extends AbstractVector { }); } + abstract + DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m); + @ForceInline + final + > + DoubleVector fromByteBuffer0Template(Class maskClass, ByteBuffer bb, int offset, M m) { + DoubleSpecies vsp = vspecies(); + m.check(vsp); + return ScopedMemoryAccess.loadFromByteBufferMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + bb, offset, m, vsp, + (buf, off, s, vm) -> { + ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); + return s.ldOp(wb, off, vm, + (wb_, o, i) -> wb_.getDouble(o + i * 8)); + }); + } + // Unchecked storing operations in native byte order. // Caller is responsible for applying index checks, masking, and // byte swapping. @@ -3204,6 +3411,77 @@ public abstract class DoubleVector extends AbstractVector { (arr_, off_, i, e) -> arr_[off_+i] = e)); } + abstract + void intoArray0(double[] a, int offset, VectorMask m); + @ForceInline + final + > + void intoArray0Template(Class maskClass, double[] a, int offset, M m) { + m.check(species()); + DoubleSpecies vsp = vspecies(); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) + -> v.stOp(arr, off, vm, + (arr_, off_, i, e) -> arr_[off_ + i] = e)); + } + + abstract + void intoArray0(double[] a, int offset, + int[] indexMap, int mapOffset, + VectorMask m); + @ForceInline + final + > + void intoArray0Template(Class maskClass, double[] a, int offset, + int[] indexMap, int mapOffset, M m) { + m.check(species()); + DoubleSpecies vsp = vspecies(); + IntVector.IntSpecies isp = IntVector.species(vsp.indexShape()); + if (vsp.laneCount() == 1) { + intoArray(a, offset + indexMap[mapOffset], m); + return; + } + + // Index vector: vix[0:n] = i -> offset + indexMap[mo + i] + IntVector vix; + if (isp.laneCount() != vsp.laneCount()) { + // For DoubleMaxVector, if vector length is 2048 bits, indexShape + // of Double species is S_MAX_BIT. and the lane count of Double + // vector is 32. When converting Double species to int species, + // indexShape is still S_MAX_BIT, but the lane count of int vector + // is 64. So when loading index vector (IntVector), only lower half + // of index data is needed. + vix = IntVector + .fromArray(isp, indexMap, mapOffset, IntMaxVector.IntMaxMask.LOWER_HALF_TRUE_MASK) + .add(offset); + } else { + vix = IntVector + .fromArray(isp, indexMap, mapOffset) + .add(offset); + } + + + // FIXME: Check index under mask controlling. + vix = VectorIntrinsics.checkIndex(vix, a.length); + + VectorSupport.storeWithMap( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + isp.vectorType(), + a, arrayAddress(a, 0), vix, + this, m, + a, offset, indexMap, mapOffset, + (arr, off, v, map, mo, vm) + -> v.stOp(arr, off, vm, + (arr_, off_, i, e) -> { + int j = map[mo + i]; + arr[off + j] = e; + })); + } + + abstract void intoByteArray0(byte[] a, int offset); @ForceInline @@ -3221,6 +3499,25 @@ public abstract class DoubleVector extends AbstractVector { }); } + abstract + void intoByteArray0(byte[] a, int offset, VectorMask m); + @ForceInline + final + > + void intoByteArray0Template(Class maskClass, byte[] a, int offset, M m) { + DoubleSpecies vsp = vspecies(); + m.check(vsp); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, byteArrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) -> { + ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); + v.stOp(wb, off, vm, + (tb_, o, i, e) -> tb_.putDouble(o + i * 8, e)); + }); + } + @ForceInline final void intoByteBuffer0(ByteBuffer bb, int offset) { @@ -3235,6 +3532,25 @@ public abstract class DoubleVector extends AbstractVector { }); } + abstract + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m); + @ForceInline + final + > + void intoByteBuffer0Template(Class maskClass, ByteBuffer bb, int offset, M m) { + DoubleSpecies vsp = vspecies(); + m.check(vsp); + ScopedMemoryAccess.storeIntoByteBufferMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + this, m, bb, offset, + (buf, off, v, vm) -> { + ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); + v.stOp(wb, off, vm, + (wb_, o, i, e) -> wb_.putDouble(o + i * 8, e)); + }); + } + + // End of low-level memory operations. private static @@ -3552,7 +3868,7 @@ public abstract class DoubleVector extends AbstractVector { /*package-private*/ @ForceInline DoubleVector ldOp(M memory, int offset, - AbstractMask m, + VectorMask m, FLdOp f) { return dummyVector().ldOp(memory, offset, m, f); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java index 29b8052cb1696f4b085ca77506b6b9a70f63b723..4e0dd018d269986843332e216b4ddafe64b72000 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java @@ -236,8 +236,8 @@ final class Float128Vector extends FloatVector { @ForceInline final @Override - float rOp(float v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + float rOp(float v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,22 +273,42 @@ final class Float128Vector extends FloatVector { return (Float128Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Float128Vector lanewise(Unary op, VectorMask m) { + return (Float128Vector) super.lanewiseTemplate(op, Float128Mask.class, (Float128Mask) m); // specialize + } + @Override @ForceInline public Float128Vector lanewise(Binary op, Vector v) { return (Float128Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Float128Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Float128Vector) super.lanewiseTemplate(op, Float128Mask.class, v, (Float128Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Float128Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Float128Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Float128Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Float128Vector) super.lanewiseTemplate(op, Float128Mask.class, v1, v2, (Float128Mask) m); // specialize + } + @Override @ForceInline public final @@ -308,7 +328,7 @@ final class Float128Vector extends FloatVector { @ForceInline public final float reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Float128Mask.class, (Float128Mask) m); // specialized } @Override @@ -321,7 +341,7 @@ final class Float128Vector extends FloatVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Float128Mask.class, (Float128Mask) m); // specialized } @ForceInline @@ -357,6 +377,13 @@ final class Float128Vector extends FloatVector { return super.compareTemplate(Float128Mask.class, op, s); // specialize } + @Override + @ForceInline + public final Float128Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Float128Mask.class, op, v, (Float128Mask) m); + } + + @Override @ForceInline public Float128Vector blend(Vector v, VectorMask m) { @@ -413,6 +440,7 @@ final class Float128Vector extends FloatVector { VectorMask m) { return (Float128Vector) super.rearrangeTemplate(Float128Shuffle.class, + Float128Mask.class, (Float128Shuffle) shuffle, (Float128Mask) m); // specialize } @@ -584,16 +612,12 @@ final class Float128Vector extends FloatVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Float128Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -619,9 +643,9 @@ final class Float128Vector extends FloatVector { public Float128Mask and(VectorMask mask) { Objects.requireNonNull(mask); Float128Mask m = (Float128Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Float128Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Float128Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -629,9 +653,9 @@ final class Float128Vector extends FloatVector { public Float128Mask or(VectorMask mask) { Objects.requireNonNull(mask); Float128Mask m = (Float128Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Float128Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Float128Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -639,9 +663,9 @@ final class Float128Vector extends FloatVector { Float128Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Float128Mask m = (Float128Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Float128Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Float128Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -649,22 +673,32 @@ final class Float128Vector extends FloatVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Float128Mask.class, int.class, VLENGTH, this, - (m) -> trueCountHelper(((Float128Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Float128Mask.class, int.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Float128Mask.class, int.class, VLENGTH, this, - (m) -> firstTrueHelper(((Float128Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Float128Mask.class, int.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Float128Mask.class, int.class, VLENGTH, this, - (m) -> lastTrueHelper(((Float128Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Float128Mask.class, int.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Float128Mask.class, int.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -775,6 +809,20 @@ final class Float128Vector extends FloatVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + FloatVector fromArray0(float[] a, int offset, VectorMask m) { + return super.fromArray0Template(Float128Mask.class, a, offset, (Float128Mask) m); // specialize + } + + @ForceInline + @Override + final + FloatVector fromArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(Float128Mask.class, a, offset, indexMap, mapOffset, (Float128Mask) m); + } + @ForceInline @@ -784,6 +832,13 @@ final class Float128Vector extends FloatVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + FloatVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Float128Mask.class, a, offset, (Float128Mask) m); // specialize + } + @ForceInline @Override final @@ -791,6 +846,13 @@ final class Float128Vector extends FloatVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Float128Mask.class, bb, offset, (Float128Mask) m); // specialize + } + @ForceInline @Override final @@ -798,6 +860,21 @@ final class Float128Vector extends FloatVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(float[] a, int offset, VectorMask m) { + super.intoArray0Template(Float128Mask.class, a, offset, (Float128Mask) m); + } + + @ForceInline + @Override + final + void intoArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(Float128Mask.class, a, offset, indexMap, mapOffset, (Float128Mask) m); + } + + @ForceInline @Override final @@ -805,6 +882,21 @@ final class Float128Vector extends FloatVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Float128Mask.class, a, offset, (Float128Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Float128Mask.class, bb, offset, (Float128Mask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java index 9e8bb030c15ad7b4aef068ba591116f117660464..7812876f4eb59f8874ed9c977d653e22230681cb 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java @@ -236,8 +236,8 @@ final class Float256Vector extends FloatVector { @ForceInline final @Override - float rOp(float v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + float rOp(float v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,22 +273,42 @@ final class Float256Vector extends FloatVector { return (Float256Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Float256Vector lanewise(Unary op, VectorMask m) { + return (Float256Vector) super.lanewiseTemplate(op, Float256Mask.class, (Float256Mask) m); // specialize + } + @Override @ForceInline public Float256Vector lanewise(Binary op, Vector v) { return (Float256Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Float256Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Float256Vector) super.lanewiseTemplate(op, Float256Mask.class, v, (Float256Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Float256Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Float256Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Float256Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Float256Vector) super.lanewiseTemplate(op, Float256Mask.class, v1, v2, (Float256Mask) m); // specialize + } + @Override @ForceInline public final @@ -308,7 +328,7 @@ final class Float256Vector extends FloatVector { @ForceInline public final float reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Float256Mask.class, (Float256Mask) m); // specialized } @Override @@ -321,7 +341,7 @@ final class Float256Vector extends FloatVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Float256Mask.class, (Float256Mask) m); // specialized } @ForceInline @@ -357,6 +377,13 @@ final class Float256Vector extends FloatVector { return super.compareTemplate(Float256Mask.class, op, s); // specialize } + @Override + @ForceInline + public final Float256Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Float256Mask.class, op, v, (Float256Mask) m); + } + + @Override @ForceInline public Float256Vector blend(Vector v, VectorMask m) { @@ -413,6 +440,7 @@ final class Float256Vector extends FloatVector { VectorMask m) { return (Float256Vector) super.rearrangeTemplate(Float256Shuffle.class, + Float256Mask.class, (Float256Shuffle) shuffle, (Float256Mask) m); // specialize } @@ -592,16 +620,12 @@ final class Float256Vector extends FloatVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Float256Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -627,9 +651,9 @@ final class Float256Vector extends FloatVector { public Float256Mask and(VectorMask mask) { Objects.requireNonNull(mask); Float256Mask m = (Float256Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Float256Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Float256Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -637,9 +661,9 @@ final class Float256Vector extends FloatVector { public Float256Mask or(VectorMask mask) { Objects.requireNonNull(mask); Float256Mask m = (Float256Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Float256Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Float256Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -647,9 +671,9 @@ final class Float256Vector extends FloatVector { Float256Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Float256Mask m = (Float256Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Float256Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Float256Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -657,22 +681,32 @@ final class Float256Vector extends FloatVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Float256Mask.class, int.class, VLENGTH, this, - (m) -> trueCountHelper(((Float256Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Float256Mask.class, int.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Float256Mask.class, int.class, VLENGTH, this, - (m) -> firstTrueHelper(((Float256Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Float256Mask.class, int.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Float256Mask.class, int.class, VLENGTH, this, - (m) -> lastTrueHelper(((Float256Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Float256Mask.class, int.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Float256Mask.class, int.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -783,6 +817,20 @@ final class Float256Vector extends FloatVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + FloatVector fromArray0(float[] a, int offset, VectorMask m) { + return super.fromArray0Template(Float256Mask.class, a, offset, (Float256Mask) m); // specialize + } + + @ForceInline + @Override + final + FloatVector fromArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(Float256Mask.class, a, offset, indexMap, mapOffset, (Float256Mask) m); + } + @ForceInline @@ -792,6 +840,13 @@ final class Float256Vector extends FloatVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + FloatVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Float256Mask.class, a, offset, (Float256Mask) m); // specialize + } + @ForceInline @Override final @@ -799,6 +854,13 @@ final class Float256Vector extends FloatVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Float256Mask.class, bb, offset, (Float256Mask) m); // specialize + } + @ForceInline @Override final @@ -806,6 +868,21 @@ final class Float256Vector extends FloatVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(float[] a, int offset, VectorMask m) { + super.intoArray0Template(Float256Mask.class, a, offset, (Float256Mask) m); + } + + @ForceInline + @Override + final + void intoArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(Float256Mask.class, a, offset, indexMap, mapOffset, (Float256Mask) m); + } + + @ForceInline @Override final @@ -813,6 +890,21 @@ final class Float256Vector extends FloatVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Float256Mask.class, a, offset, (Float256Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Float256Mask.class, bb, offset, (Float256Mask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java index ec85efe1b7e2b71ab00709637859b496557cbd49..a8936709baadb9c7e3a379b59fa7bae36376fda6 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java @@ -236,8 +236,8 @@ final class Float512Vector extends FloatVector { @ForceInline final @Override - float rOp(float v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + float rOp(float v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,22 +273,42 @@ final class Float512Vector extends FloatVector { return (Float512Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Float512Vector lanewise(Unary op, VectorMask m) { + return (Float512Vector) super.lanewiseTemplate(op, Float512Mask.class, (Float512Mask) m); // specialize + } + @Override @ForceInline public Float512Vector lanewise(Binary op, Vector v) { return (Float512Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Float512Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Float512Vector) super.lanewiseTemplate(op, Float512Mask.class, v, (Float512Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Float512Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Float512Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Float512Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Float512Vector) super.lanewiseTemplate(op, Float512Mask.class, v1, v2, (Float512Mask) m); // specialize + } + @Override @ForceInline public final @@ -308,7 +328,7 @@ final class Float512Vector extends FloatVector { @ForceInline public final float reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Float512Mask.class, (Float512Mask) m); // specialized } @Override @@ -321,7 +341,7 @@ final class Float512Vector extends FloatVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Float512Mask.class, (Float512Mask) m); // specialized } @ForceInline @@ -357,6 +377,13 @@ final class Float512Vector extends FloatVector { return super.compareTemplate(Float512Mask.class, op, s); // specialize } + @Override + @ForceInline + public final Float512Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Float512Mask.class, op, v, (Float512Mask) m); + } + + @Override @ForceInline public Float512Vector blend(Vector v, VectorMask m) { @@ -413,6 +440,7 @@ final class Float512Vector extends FloatVector { VectorMask m) { return (Float512Vector) super.rearrangeTemplate(Float512Shuffle.class, + Float512Mask.class, (Float512Shuffle) shuffle, (Float512Mask) m); // specialize } @@ -608,16 +636,12 @@ final class Float512Vector extends FloatVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Float512Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -643,9 +667,9 @@ final class Float512Vector extends FloatVector { public Float512Mask and(VectorMask mask) { Objects.requireNonNull(mask); Float512Mask m = (Float512Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Float512Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Float512Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -653,9 +677,9 @@ final class Float512Vector extends FloatVector { public Float512Mask or(VectorMask mask) { Objects.requireNonNull(mask); Float512Mask m = (Float512Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Float512Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Float512Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -663,9 +687,9 @@ final class Float512Vector extends FloatVector { Float512Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Float512Mask m = (Float512Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Float512Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Float512Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -673,22 +697,32 @@ final class Float512Vector extends FloatVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Float512Mask.class, int.class, VLENGTH, this, - (m) -> trueCountHelper(((Float512Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Float512Mask.class, int.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Float512Mask.class, int.class, VLENGTH, this, - (m) -> firstTrueHelper(((Float512Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Float512Mask.class, int.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Float512Mask.class, int.class, VLENGTH, this, - (m) -> lastTrueHelper(((Float512Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Float512Mask.class, int.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Float512Mask.class, int.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -799,6 +833,20 @@ final class Float512Vector extends FloatVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + FloatVector fromArray0(float[] a, int offset, VectorMask m) { + return super.fromArray0Template(Float512Mask.class, a, offset, (Float512Mask) m); // specialize + } + + @ForceInline + @Override + final + FloatVector fromArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(Float512Mask.class, a, offset, indexMap, mapOffset, (Float512Mask) m); + } + @ForceInline @@ -808,6 +856,13 @@ final class Float512Vector extends FloatVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + FloatVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Float512Mask.class, a, offset, (Float512Mask) m); // specialize + } + @ForceInline @Override final @@ -815,6 +870,13 @@ final class Float512Vector extends FloatVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Float512Mask.class, bb, offset, (Float512Mask) m); // specialize + } + @ForceInline @Override final @@ -822,6 +884,21 @@ final class Float512Vector extends FloatVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(float[] a, int offset, VectorMask m) { + super.intoArray0Template(Float512Mask.class, a, offset, (Float512Mask) m); + } + + @ForceInline + @Override + final + void intoArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(Float512Mask.class, a, offset, indexMap, mapOffset, (Float512Mask) m); + } + + @ForceInline @Override final @@ -829,6 +906,21 @@ final class Float512Vector extends FloatVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Float512Mask.class, a, offset, (Float512Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Float512Mask.class, bb, offset, (Float512Mask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java index a1e3ba216e920b04d94fbc1afd7ee5bd8df77cde..0c91d46e64e46788d840b52abbd186b2deb4f3bb 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java @@ -236,8 +236,8 @@ final class Float64Vector extends FloatVector { @ForceInline final @Override - float rOp(float v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + float rOp(float v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,22 +273,42 @@ final class Float64Vector extends FloatVector { return (Float64Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Float64Vector lanewise(Unary op, VectorMask m) { + return (Float64Vector) super.lanewiseTemplate(op, Float64Mask.class, (Float64Mask) m); // specialize + } + @Override @ForceInline public Float64Vector lanewise(Binary op, Vector v) { return (Float64Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Float64Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Float64Vector) super.lanewiseTemplate(op, Float64Mask.class, v, (Float64Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Float64Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Float64Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Float64Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Float64Vector) super.lanewiseTemplate(op, Float64Mask.class, v1, v2, (Float64Mask) m); // specialize + } + @Override @ForceInline public final @@ -308,7 +328,7 @@ final class Float64Vector extends FloatVector { @ForceInline public final float reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Float64Mask.class, (Float64Mask) m); // specialized } @Override @@ -321,7 +341,7 @@ final class Float64Vector extends FloatVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Float64Mask.class, (Float64Mask) m); // specialized } @ForceInline @@ -357,6 +377,13 @@ final class Float64Vector extends FloatVector { return super.compareTemplate(Float64Mask.class, op, s); // specialize } + @Override + @ForceInline + public final Float64Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Float64Mask.class, op, v, (Float64Mask) m); + } + + @Override @ForceInline public Float64Vector blend(Vector v, VectorMask m) { @@ -413,6 +440,7 @@ final class Float64Vector extends FloatVector { VectorMask m) { return (Float64Vector) super.rearrangeTemplate(Float64Shuffle.class, + Float64Mask.class, (Float64Shuffle) shuffle, (Float64Mask) m); // specialize } @@ -580,16 +608,12 @@ final class Float64Vector extends FloatVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Float64Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -615,9 +639,9 @@ final class Float64Vector extends FloatVector { public Float64Mask and(VectorMask mask) { Objects.requireNonNull(mask); Float64Mask m = (Float64Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Float64Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Float64Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -625,9 +649,9 @@ final class Float64Vector extends FloatVector { public Float64Mask or(VectorMask mask) { Objects.requireNonNull(mask); Float64Mask m = (Float64Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Float64Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Float64Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -635,9 +659,9 @@ final class Float64Vector extends FloatVector { Float64Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Float64Mask m = (Float64Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Float64Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Float64Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -645,22 +669,32 @@ final class Float64Vector extends FloatVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Float64Mask.class, int.class, VLENGTH, this, - (m) -> trueCountHelper(((Float64Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Float64Mask.class, int.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Float64Mask.class, int.class, VLENGTH, this, - (m) -> firstTrueHelper(((Float64Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Float64Mask.class, int.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Float64Mask.class, int.class, VLENGTH, this, - (m) -> lastTrueHelper(((Float64Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Float64Mask.class, int.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Float64Mask.class, int.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -771,6 +805,20 @@ final class Float64Vector extends FloatVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + FloatVector fromArray0(float[] a, int offset, VectorMask m) { + return super.fromArray0Template(Float64Mask.class, a, offset, (Float64Mask) m); // specialize + } + + @ForceInline + @Override + final + FloatVector fromArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(Float64Mask.class, a, offset, indexMap, mapOffset, (Float64Mask) m); + } + @ForceInline @@ -780,6 +828,13 @@ final class Float64Vector extends FloatVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + FloatVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Float64Mask.class, a, offset, (Float64Mask) m); // specialize + } + @ForceInline @Override final @@ -787,6 +842,13 @@ final class Float64Vector extends FloatVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Float64Mask.class, bb, offset, (Float64Mask) m); // specialize + } + @ForceInline @Override final @@ -794,6 +856,21 @@ final class Float64Vector extends FloatVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(float[] a, int offset, VectorMask m) { + super.intoArray0Template(Float64Mask.class, a, offset, (Float64Mask) m); + } + + @ForceInline + @Override + final + void intoArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(Float64Mask.class, a, offset, indexMap, mapOffset, (Float64Mask) m); + } + + @ForceInline @Override final @@ -801,6 +878,21 @@ final class Float64Vector extends FloatVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Float64Mask.class, a, offset, (Float64Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Float64Mask.class, bb, offset, (Float64Mask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java index 71d6ac05b69c6fe0724e53a1baecc8b29af06222..f2e1bd05c13d290677c414ae909b4597f2b43e88 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java @@ -236,8 +236,8 @@ final class FloatMaxVector extends FloatVector { @ForceInline final @Override - float rOp(float v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + float rOp(float v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,22 +273,42 @@ final class FloatMaxVector extends FloatVector { return (FloatMaxVector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public FloatMaxVector lanewise(Unary op, VectorMask m) { + return (FloatMaxVector) super.lanewiseTemplate(op, FloatMaxMask.class, (FloatMaxMask) m); // specialize + } + @Override @ForceInline public FloatMaxVector lanewise(Binary op, Vector v) { return (FloatMaxVector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public FloatMaxVector lanewise(Binary op, Vector v, VectorMask m) { + return (FloatMaxVector) super.lanewiseTemplate(op, FloatMaxMask.class, v, (FloatMaxMask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final FloatMaxVector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (FloatMaxVector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + FloatMaxVector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (FloatMaxVector) super.lanewiseTemplate(op, FloatMaxMask.class, v1, v2, (FloatMaxMask) m); // specialize + } + @Override @ForceInline public final @@ -308,7 +328,7 @@ final class FloatMaxVector extends FloatVector { @ForceInline public final float reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, FloatMaxMask.class, (FloatMaxMask) m); // specialized } @Override @@ -321,7 +341,7 @@ final class FloatMaxVector extends FloatVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, FloatMaxMask.class, (FloatMaxMask) m); // specialized } @ForceInline @@ -357,6 +377,13 @@ final class FloatMaxVector extends FloatVector { return super.compareTemplate(FloatMaxMask.class, op, s); // specialize } + @Override + @ForceInline + public final FloatMaxMask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(FloatMaxMask.class, op, v, (FloatMaxMask) m); + } + + @Override @ForceInline public FloatMaxVector blend(Vector v, VectorMask m) { @@ -413,6 +440,7 @@ final class FloatMaxVector extends FloatVector { VectorMask m) { return (FloatMaxVector) super.rearrangeTemplate(FloatMaxShuffle.class, + FloatMaxMask.class, (FloatMaxShuffle) shuffle, (FloatMaxMask) m); // specialize } @@ -577,16 +605,12 @@ final class FloatMaxVector extends FloatVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - FloatMaxMask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -612,9 +636,9 @@ final class FloatMaxVector extends FloatVector { public FloatMaxMask and(VectorMask mask) { Objects.requireNonNull(mask); FloatMaxMask m = (FloatMaxMask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, FloatMaxMask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, FloatMaxMask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -622,9 +646,9 @@ final class FloatMaxVector extends FloatVector { public FloatMaxMask or(VectorMask mask) { Objects.requireNonNull(mask); FloatMaxMask m = (FloatMaxMask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, FloatMaxMask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, FloatMaxMask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -632,9 +656,9 @@ final class FloatMaxVector extends FloatVector { FloatMaxMask xor(VectorMask mask) { Objects.requireNonNull(mask); FloatMaxMask m = (FloatMaxMask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, FloatMaxMask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, FloatMaxMask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -642,22 +666,32 @@ final class FloatMaxVector extends FloatVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, FloatMaxMask.class, int.class, VLENGTH, this, - (m) -> trueCountHelper(((FloatMaxMask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, FloatMaxMask.class, int.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, FloatMaxMask.class, int.class, VLENGTH, this, - (m) -> firstTrueHelper(((FloatMaxMask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, FloatMaxMask.class, int.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, FloatMaxMask.class, int.class, VLENGTH, this, - (m) -> lastTrueHelper(((FloatMaxMask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, FloatMaxMask.class, int.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, FloatMaxMask.class, int.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -768,6 +802,20 @@ final class FloatMaxVector extends FloatVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + FloatVector fromArray0(float[] a, int offset, VectorMask m) { + return super.fromArray0Template(FloatMaxMask.class, a, offset, (FloatMaxMask) m); // specialize + } + + @ForceInline + @Override + final + FloatVector fromArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(FloatMaxMask.class, a, offset, indexMap, mapOffset, (FloatMaxMask) m); + } + @ForceInline @@ -777,6 +825,13 @@ final class FloatMaxVector extends FloatVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + FloatVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(FloatMaxMask.class, a, offset, (FloatMaxMask) m); // specialize + } + @ForceInline @Override final @@ -784,6 +839,13 @@ final class FloatMaxVector extends FloatVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(FloatMaxMask.class, bb, offset, (FloatMaxMask) m); // specialize + } + @ForceInline @Override final @@ -791,6 +853,21 @@ final class FloatMaxVector extends FloatVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(float[] a, int offset, VectorMask m) { + super.intoArray0Template(FloatMaxMask.class, a, offset, (FloatMaxMask) m); + } + + @ForceInline + @Override + final + void intoArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(FloatMaxMask.class, a, offset, indexMap, mapOffset, (FloatMaxMask) m); + } + + @ForceInline @Override final @@ -798,6 +875,21 @@ final class FloatMaxVector extends FloatVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(FloatMaxMask.class, a, offset, (FloatMaxMask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(FloatMaxMask.class, bb, offset, (FloatMaxMask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java index 7a741cd2e5e2003b9feb18d2f1f67fcaae2e3aec..d5dbc2f9efa36fd125f5c3cf7d812ca0b6c2ca20 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java @@ -29,7 +29,6 @@ import java.nio.ByteOrder; import java.nio.ReadOnlyBufferException; import java.util.Arrays; import java.util.Objects; -import java.util.function.BinaryOperator; import java.util.function.Function; import java.util.function.UnaryOperator; @@ -173,6 +172,9 @@ public abstract class FloatVector extends AbstractVector { final FloatVector uOpTemplate(VectorMask m, FUnOp f) { + if (m == null) { + return uOpTemplate(f); + } float[] vec = vec(); float[] res = new float[length()]; boolean[] mbits = ((AbstractMask)m).getBits(); @@ -216,6 +218,9 @@ public abstract class FloatVector extends AbstractVector { FloatVector bOpTemplate(Vector o, VectorMask m, FBinOp f) { + if (m == null) { + return bOpTemplate(o, f); + } float[] res = new float[length()]; float[] vec1 = this.vec(); float[] vec2 = ((FloatVector)o).vec(); @@ -265,6 +270,9 @@ public abstract class FloatVector extends AbstractVector { Vector o2, VectorMask m, FTriOp f) { + if (m == null) { + return tOpTemplate(o1, o2, f); + } float[] res = new float[length()]; float[] vec1 = this.vec(); float[] vec2 = ((FloatVector)o1).vec(); @@ -280,7 +288,22 @@ public abstract class FloatVector extends AbstractVector { /*package-private*/ abstract - float rOp(float v, FBinOp f); + float rOp(float v, VectorMask m, FBinOp f); + + @ForceInline + final + float rOpTemplate(float v, VectorMask m, FBinOp f) { + if (m == null) { + return rOpTemplate(v, f); + } + float[] vec = vec(); + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < vec.length; i++) { + v = mbits[i] ? f.apply(i, v, vec[i]) : v; + } + return v; + } + @ForceInline final float rOpTemplate(float v, FBinOp f) { @@ -540,61 +563,80 @@ public abstract class FloatVector extends AbstractVector { } int opc = opCode(op); return VectorSupport.unaryOp( - opc, getClass(), float.class, length(), - this, - UN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_NEG: return v0 -> - v0.uOp((i, a) -> (float) -a); - case VECTOR_OP_ABS: return v0 -> - v0.uOp((i, a) -> (float) Math.abs(a)); - case VECTOR_OP_SIN: return v0 -> - v0.uOp((i, a) -> (float) Math.sin(a)); - case VECTOR_OP_COS: return v0 -> - v0.uOp((i, a) -> (float) Math.cos(a)); - case VECTOR_OP_TAN: return v0 -> - v0.uOp((i, a) -> (float) Math.tan(a)); - case VECTOR_OP_ASIN: return v0 -> - v0.uOp((i, a) -> (float) Math.asin(a)); - case VECTOR_OP_ACOS: return v0 -> - v0.uOp((i, a) -> (float) Math.acos(a)); - case VECTOR_OP_ATAN: return v0 -> - v0.uOp((i, a) -> (float) Math.atan(a)); - case VECTOR_OP_EXP: return v0 -> - v0.uOp((i, a) -> (float) Math.exp(a)); - case VECTOR_OP_LOG: return v0 -> - v0.uOp((i, a) -> (float) Math.log(a)); - case VECTOR_OP_LOG10: return v0 -> - v0.uOp((i, a) -> (float) Math.log10(a)); - case VECTOR_OP_SQRT: return v0 -> - v0.uOp((i, a) -> (float) Math.sqrt(a)); - case VECTOR_OP_CBRT: return v0 -> - v0.uOp((i, a) -> (float) Math.cbrt(a)); - case VECTOR_OP_SINH: return v0 -> - v0.uOp((i, a) -> (float) Math.sinh(a)); - case VECTOR_OP_COSH: return v0 -> - v0.uOp((i, a) -> (float) Math.cosh(a)); - case VECTOR_OP_TANH: return v0 -> - v0.uOp((i, a) -> (float) Math.tanh(a)); - case VECTOR_OP_EXPM1: return v0 -> - v0.uOp((i, a) -> (float) Math.expm1(a)); - case VECTOR_OP_LOG1P: return v0 -> - v0.uOp((i, a) -> (float) Math.log1p(a)); - default: return null; - }})); + opc, getClass(), null, float.class, length(), + this, null, + UN_IMPL.find(op, opc, FloatVector::unaryOperations)); } - private static final - ImplCache> UN_IMPL - = new ImplCache<>(Unary.class, FloatVector.class); /** * {@inheritDoc} */ - @ForceInline - public final + @Override + public abstract FloatVector lanewise(VectorOperators.Unary op, - VectorMask m) { - return blend(lanewise(op), m); + VectorMask m); + @ForceInline + final + FloatVector lanewiseTemplate(VectorOperators.Unary op, + Class> maskClass, + VectorMask m) { + m.check(maskClass, this); + if (opKind(op, VO_SPECIAL)) { + if (op == ZOMO) { + return blend(broadcast(-1), compare(NE, 0, m)); + } + } + int opc = opCode(op); + return VectorSupport.unaryOp( + opc, getClass(), maskClass, float.class, length(), + this, m, + UN_IMPL.find(op, opc, FloatVector::unaryOperations)); + } + + private static final + ImplCache>> + UN_IMPL = new ImplCache<>(Unary.class, FloatVector.class); + + private static UnaryOperation> unaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_NEG: return (v0, m) -> + v0.uOp(m, (i, a) -> (float) -a); + case VECTOR_OP_ABS: return (v0, m) -> + v0.uOp(m, (i, a) -> (float) Math.abs(a)); + case VECTOR_OP_SIN: return (v0, m) -> + v0.uOp(m, (i, a) -> (float) Math.sin(a)); + case VECTOR_OP_COS: return (v0, m) -> + v0.uOp(m, (i, a) -> (float) Math.cos(a)); + case VECTOR_OP_TAN: return (v0, m) -> + v0.uOp(m, (i, a) -> (float) Math.tan(a)); + case VECTOR_OP_ASIN: return (v0, m) -> + v0.uOp(m, (i, a) -> (float) Math.asin(a)); + case VECTOR_OP_ACOS: return (v0, m) -> + v0.uOp(m, (i, a) -> (float) Math.acos(a)); + case VECTOR_OP_ATAN: return (v0, m) -> + v0.uOp(m, (i, a) -> (float) Math.atan(a)); + case VECTOR_OP_EXP: return (v0, m) -> + v0.uOp(m, (i, a) -> (float) Math.exp(a)); + case VECTOR_OP_LOG: return (v0, m) -> + v0.uOp(m, (i, a) -> (float) Math.log(a)); + case VECTOR_OP_LOG10: return (v0, m) -> + v0.uOp(m, (i, a) -> (float) Math.log10(a)); + case VECTOR_OP_SQRT: return (v0, m) -> + v0.uOp(m, (i, a) -> (float) Math.sqrt(a)); + case VECTOR_OP_CBRT: return (v0, m) -> + v0.uOp(m, (i, a) -> (float) Math.cbrt(a)); + case VECTOR_OP_SINH: return (v0, m) -> + v0.uOp(m, (i, a) -> (float) Math.sinh(a)); + case VECTOR_OP_COSH: return (v0, m) -> + v0.uOp(m, (i, a) -> (float) Math.cosh(a)); + case VECTOR_OP_TANH: return (v0, m) -> + v0.uOp(m, (i, a) -> (float) Math.tanh(a)); + case VECTOR_OP_EXPM1: return (v0, m) -> + v0.uOp(m, (i, a) -> (float) Math.expm1(a)); + case VECTOR_OP_LOG1P: return (v0, m) -> + v0.uOp(m, (i, a) -> (float) Math.log1p(a)); + default: return null; + } } // Binary lanewise support @@ -614,6 +656,7 @@ public abstract class FloatVector extends AbstractVector { Vector v) { FloatVector that = (FloatVector) v; that.check(this); + if (opKind(op, VO_SPECIAL )) { if (op == FIRST_NONZERO) { // FIXME: Support this in the JIT. @@ -627,48 +670,75 @@ public abstract class FloatVector extends AbstractVector { .viewAsFloatingLanes(); } } + int opc = opCode(op); return VectorSupport.binaryOp( - opc, getClass(), float.class, length(), - this, that, - BIN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (float)(a + b)); - case VECTOR_OP_SUB: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (float)(a - b)); - case VECTOR_OP_MUL: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (float)(a * b)); - case VECTOR_OP_DIV: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (float)(a / b)); - case VECTOR_OP_MAX: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (float)Math.max(a, b)); - case VECTOR_OP_MIN: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (float)Math.min(a, b)); - case VECTOR_OP_ATAN2: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (float) Math.atan2(a, b)); - case VECTOR_OP_POW: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (float) Math.pow(a, b)); - case VECTOR_OP_HYPOT: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (float) Math.hypot(a, b)); - default: return null; - }})); + opc, getClass(), null, float.class, length(), + this, that, null, + BIN_IMPL.find(op, opc, FloatVector::binaryOperations)); } - private static final - ImplCache> BIN_IMPL - = new ImplCache<>(Binary.class, FloatVector.class); /** * {@inheritDoc} * @see #lanewise(VectorOperators.Binary,float,VectorMask) */ - @ForceInline - public final + @Override + public abstract FloatVector lanewise(VectorOperators.Binary op, Vector v, - VectorMask m) { - return blend(lanewise(op, v), m); + VectorMask m); + @ForceInline + final + FloatVector lanewiseTemplate(VectorOperators.Binary op, + Class> maskClass, + Vector v, VectorMask m) { + FloatVector that = (FloatVector) v; + that.check(this); + m.check(maskClass, this); + + if (opKind(op, VO_SPECIAL )) { + if (op == FIRST_NONZERO) { + return blend(lanewise(op, v), m); + } + } + + int opc = opCode(op); + return VectorSupport.binaryOp( + opc, getClass(), maskClass, float.class, length(), + this, that, m, + BIN_IMPL.find(op, opc, FloatVector::binaryOperations)); } + + private static final + ImplCache>> + BIN_IMPL = new ImplCache<>(Binary.class, FloatVector.class); + + private static BinaryOperation> binaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (float)(a + b)); + case VECTOR_OP_SUB: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (float)(a - b)); + case VECTOR_OP_MUL: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (float)(a * b)); + case VECTOR_OP_DIV: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (float)(a / b)); + case VECTOR_OP_MAX: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (float)Math.max(a, b)); + case VECTOR_OP_MIN: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (float)Math.min(a, b)); + case VECTOR_OP_OR: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> fromBits(toBits(a) | toBits(b))); + case VECTOR_OP_ATAN2: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (float) Math.atan2(a, b)); + case VECTOR_OP_POW: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (float) Math.pow(a, b)); + case VECTOR_OP_HYPOT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (float) Math.hypot(a, b)); + default: return null; + } + } + // FIXME: Maybe all of the public final methods in this file (the // simple ones that just call lanewise) should be pushed down to // the X-VectorBits template. They can't optimize properly at @@ -725,7 +795,7 @@ public abstract class FloatVector extends AbstractVector { FloatVector lanewise(VectorOperators.Binary op, float e, VectorMask m) { - return blend(lanewise(op, e), m); + return lanewise(op, broadcast(e), m); } /** @@ -743,8 +813,7 @@ public abstract class FloatVector extends AbstractVector { FloatVector lanewise(VectorOperators.Binary op, long e) { float e1 = (float) e; - if ((long)e1 != e - ) { + if ((long)e1 != e) { vspecies().checkValue(e); // for exception } return lanewise(op, e1); @@ -764,7 +833,11 @@ public abstract class FloatVector extends AbstractVector { public final FloatVector lanewise(VectorOperators.Binary op, long e, VectorMask m) { - return blend(lanewise(op, e), m); + float e1 = (float) e; + if ((long)e1 != e) { + vspecies().checkValue(e); // for exception + } + return lanewise(op, e1, m); } @@ -806,18 +879,10 @@ public abstract class FloatVector extends AbstractVector { tother.check(this); int opc = opCode(op); return VectorSupport.ternaryOp( - opc, getClass(), float.class, length(), - this, that, tother, - TERN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_FMA: return (v0, v1_, v2_) -> - v0.tOp(v1_, v2_, (i, a, b, c) -> Math.fma(a, b, c)); - default: return null; - }})); + opc, getClass(), null, float.class, length(), + this, that, tother, null, + TERN_IMPL.find(op, opc, FloatVector::ternaryOperations)); } - private static final - ImplCache> TERN_IMPL - = new ImplCache<>(Ternary.class, FloatVector.class); /** * {@inheritDoc} @@ -825,13 +890,45 @@ public abstract class FloatVector extends AbstractVector { * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask) * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask) */ - @ForceInline - public final + @Override + public abstract FloatVector lanewise(VectorOperators.Ternary op, Vector v1, Vector v2, - VectorMask m) { - return blend(lanewise(op, v1, v2), m); + VectorMask m); + @ForceInline + final + FloatVector lanewiseTemplate(VectorOperators.Ternary op, + Class> maskClass, + Vector v1, + Vector v2, + VectorMask m) { + FloatVector that = (FloatVector) v1; + FloatVector tother = (FloatVector) v2; + // It's a word: https://www.dictionary.com/browse/tother + // See also Chapter 11 of Dickens, Our Mutual Friend: + // "Totherest Governor," replied Mr Riderhood... + that.check(this); + tother.check(this); + m.check(maskClass, this); + + int opc = opCode(op); + return VectorSupport.ternaryOp( + opc, getClass(), maskClass, float.class, length(), + this, that, tother, m, + TERN_IMPL.find(op, opc, FloatVector::ternaryOperations)); + } + + private static final + ImplCache>> + TERN_IMPL = new ImplCache<>(Ternary.class, FloatVector.class); + + private static TernaryOperation> ternaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> + v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c)); + default: return null; + } } /** @@ -888,7 +985,7 @@ public abstract class FloatVector extends AbstractVector { float e1, float e2, VectorMask m) { - return blend(lanewise(op, e1, e2), m); + return lanewise(op, broadcast(e1), broadcast(e2), m); } /** @@ -946,7 +1043,7 @@ public abstract class FloatVector extends AbstractVector { Vector v1, float e2, VectorMask m) { - return blend(lanewise(op, v1, e2), m); + return lanewise(op, v1, broadcast(e2), m); } /** @@ -1003,7 +1100,7 @@ public abstract class FloatVector extends AbstractVector { float e1, Vector v2, VectorMask m) { - return blend(lanewise(op, e1, v2), m); + return lanewise(op, broadcast(e1), v2, m); } // (Thus endeth the Great and Mighty Ternary Ogdoad.) @@ -1659,15 +1756,13 @@ public abstract class FloatVector extends AbstractVector { final > M compareTemplate(Class maskType, Comparison op, Vector v) { - Objects.requireNonNull(v); - FloatSpecies vsp = vspecies(); FloatVector that = (FloatVector) v; that.check(this); int opc = opCode(op); return VectorSupport.compare( opc, getClass(), maskType, float.class, length(), - this, that, - (cond, v0, v1) -> { + this, that, null, + (cond, v0, v1, m1) -> { AbstractMask m = v0.bTest(cond, v1, (cond_, i, a, b) -> compareWithOp(cond, a, b)); @@ -1677,6 +1772,28 @@ public abstract class FloatVector extends AbstractVector { }); } + /*package-private*/ + @ForceInline + final + > + M compareTemplate(Class maskType, Comparison op, Vector v, M m) { + FloatVector that = (FloatVector) v; + that.check(this); + m.check(maskType, this); + int opc = opCode(op); + return VectorSupport.compare( + opc, getClass(), maskType, float.class, length(), + this, that, m, + (cond, v0, v1, m1) -> { + AbstractMask cmpM + = v0.bTest(cond, v1, (cond_, i, a, b) + -> compareWithOp(cond, a, b)); + @SuppressWarnings("unchecked") + M m2 = (M) cmpM.and(m1); + return m2; + }); + } + @ForceInline private static boolean compareWithOp(int cond, float a, float b) { return switch (cond) { @@ -1690,18 +1807,6 @@ public abstract class FloatVector extends AbstractVector { }; } - /** - * {@inheritDoc} - */ - @Override - @ForceInline - public final - VectorMask compare(VectorOperators.Comparison op, - Vector v, - VectorMask m) { - return compare(op, v).and(m); - } - /** * Tests this vector by comparing it with an input scalar, * according to the given comparison operation. @@ -1760,7 +1865,7 @@ public abstract class FloatVector extends AbstractVector { public final VectorMask compare(VectorOperators.Comparison op, float e, VectorMask m) { - return compare(op, e).and(m); + return compare(op, broadcast(e), m); } /** @@ -2011,9 +2116,9 @@ public abstract class FloatVector extends AbstractVector { FloatVector rearrangeTemplate(Class shuffletype, S shuffle) { shuffle.checkIndexes(); return VectorSupport.rearrangeOp( - getClass(), shuffletype, float.class, length(), - this, shuffle, - (v1, s_) -> v1.uOp((i, a) -> { + getClass(), shuffletype, null, float.class, length(), + this, shuffle, null, + (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); return v1.lane(ei); })); @@ -2030,24 +2135,25 @@ public abstract class FloatVector extends AbstractVector { /*package-private*/ @ForceInline final - > + , M extends VectorMask> FloatVector rearrangeTemplate(Class shuffletype, + Class masktype, S shuffle, - VectorMask m) { - FloatVector unmasked = - VectorSupport.rearrangeOp( - getClass(), shuffletype, float.class, length(), - this, shuffle, - (v1, s_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); - return ei < 0 ? 0 : v1.lane(ei); - })); + M m) { + + m.check(masktype, this); VectorMask valid = shuffle.laneIsValid(); if (m.andNot(valid).anyTrue()) { shuffle.checkIndexes(); throw new AssertionError(); } - return broadcast((float)0).blend(unmasked, m); + return VectorSupport.rearrangeOp( + getClass(), shuffletype, masktype, float.class, length(), + this, shuffle, m, + (v1, s_, m_) -> v1.uOp((i, a) -> { + int ei = s_.laneSource(i); + return ei < 0 || !m_.laneIsSet(i) ? 0 : v1.lane(ei); + })); } /** @@ -2070,17 +2176,17 @@ public abstract class FloatVector extends AbstractVector { S ws = (S) shuffle.wrapIndexes(); FloatVector r0 = VectorSupport.rearrangeOp( - getClass(), shuffletype, float.class, length(), - this, ws, - (v0, s_) -> v0.uOp((i, a) -> { + getClass(), shuffletype, null, float.class, length(), + this, ws, null, + (v0, s_, m_) -> v0.uOp((i, a) -> { int ei = s_.laneSource(i); return v0.lane(ei); })); FloatVector r1 = VectorSupport.rearrangeOp( - getClass(), shuffletype, float.class, length(), - v, ws, - (v1, s_) -> v1.uOp((i, a) -> { + getClass(), shuffletype, null, float.class, length(), + v, ws, null, + (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); return v1.lane(ei); })); @@ -2329,9 +2435,18 @@ public abstract class FloatVector extends AbstractVector { @ForceInline final float reduceLanesTemplate(VectorOperators.Associative op, + Class> maskClass, VectorMask m) { - FloatVector v = reduceIdentityVector(op).blend(this, m); - return v.reduceLanesTemplate(op); + m.check(maskClass, this); + if (op == FIRST_NONZERO) { + FloatVector v = reduceIdentityVector(op).blend(this, m); + return v.reduceLanesTemplate(op); + } + int opc = opCode(op); + return fromBits(VectorSupport.reductionCoerced( + opc, getClass(), maskClass, float.class, length(), + this, m, + REDUCE_IMPL.find(op, opc, FloatVector::reductionOperations))); } /*package-private*/ @@ -2346,24 +2461,28 @@ public abstract class FloatVector extends AbstractVector { } int opc = opCode(op); return fromBits(VectorSupport.reductionCoerced( - opc, getClass(), float.class, length(), - this, - REDUCE_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return v -> - toBits(v.rOp((float)0, (i, a, b) -> (float)(a + b))); - case VECTOR_OP_MUL: return v -> - toBits(v.rOp((float)1, (i, a, b) -> (float)(a * b))); - case VECTOR_OP_MIN: return v -> - toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (float) Math.min(a, b))); - case VECTOR_OP_MAX: return v -> - toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (float) Math.max(a, b))); - default: return null; - }}))); + opc, getClass(), null, float.class, length(), + this, null, + REDUCE_IMPL.find(op, opc, FloatVector::reductionOperations))); } + private static final - ImplCache> REDUCE_IMPL - = new ImplCache<>(Associative.class, FloatVector.class); + ImplCache>> + REDUCE_IMPL = new ImplCache<>(Associative.class, FloatVector.class); + + private static ReductionOperation> reductionOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp((float)0, m, (i, a, b) -> (float)(a + b))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp((float)1, m, (i, a, b) -> (float)(a * b))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (float) Math.min(a, b))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (float) Math.max(a, b))); + default: return null; + } + } private @ForceInline @@ -2573,9 +2692,7 @@ public abstract class FloatVector extends AbstractVector { VectorMask m) { FloatSpecies vsp = (FloatSpecies) species; if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) { - FloatVector zero = vsp.zero(); - FloatVector v = zero.fromByteArray0(a, offset); - return zero.blend(v.maybeSwap(bo), m); + return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo); } // FIXME: optimize @@ -2637,8 +2754,7 @@ public abstract class FloatVector extends AbstractVector { VectorMask m) { FloatSpecies vsp = (FloatSpecies) species; if (offset >= 0 && offset <= (a.length - species.length())) { - FloatVector zero = vsp.zero(); - return zero.blend(zero.fromArray0(a, offset), m); + return vsp.dummyVector().fromArray0(a, offset, m); } // FIXME: optimize @@ -2696,13 +2812,13 @@ public abstract class FloatVector extends AbstractVector { vix = VectorIntrinsics.checkIndex(vix, a.length); return VectorSupport.loadWithMap( - vectorType, float.class, vsp.laneCount(), - IntVector.species(vsp.indexShape()).vectorType(), - a, ARRAY_BASE, vix, + vectorType, null, float.class, vsp.laneCount(), + isp.vectorType(), + a, ARRAY_BASE, vix, null, a, offset, indexMap, mapOffset, vsp, - (float[] c, int idx, int[] iMap, int idy, FloatSpecies s) -> + (c, idx, iMap, idy, s, vm) -> s.vOp(n -> c[idx + iMap[idy+n]])); - } + } /** * Gathers a new vector composed of elements from an array of type @@ -2750,9 +2866,8 @@ public abstract class FloatVector extends AbstractVector { return fromArray(species, a, offset, indexMap, mapOffset); } else { - // FIXME: Cannot vectorize yet, if there's a mask. FloatSpecies vsp = (FloatSpecies) species; - return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]); + return vsp.dummyVector().fromArray0(a, offset, indexMap, mapOffset, m); } } @@ -2846,9 +2961,7 @@ public abstract class FloatVector extends AbstractVector { VectorMask m) { FloatSpecies vsp = (FloatSpecies) species; if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) { - FloatVector zero = vsp.zero(); - FloatVector v = zero.fromByteBuffer0(bb, offset); - return zero.blend(v.maybeSwap(bo), m); + return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo); } // FIXME: optimize @@ -2920,10 +3033,9 @@ public abstract class FloatVector extends AbstractVector { if (m.allTrue()) { intoArray(a, offset); } else { - // FIXME: optimize FloatSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 1, a.length); - stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v); + intoArray0(a, offset, m); } } @@ -2967,12 +3079,12 @@ public abstract class FloatVector extends AbstractVector { vix = VectorIntrinsics.checkIndex(vix, a.length); VectorSupport.storeWithMap( - vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + vsp.vectorType(), null, vsp.elementType(), vsp.laneCount(), isp.vectorType(), a, arrayAddress(a, 0), vix, - this, + this, null, a, offset, indexMap, mapOffset, - (arr, off, v, map, mo) + (arr, off, v, map, mo, vm) -> v.stOp(arr, off, (arr_, off_, i, e) -> { int j = map[mo + i]; @@ -3019,12 +3131,7 @@ public abstract class FloatVector extends AbstractVector { intoArray(a, offset, indexMap, mapOffset); } else { - // FIXME: Cannot vectorize yet, if there's a mask. - stOp(a, offset, m, - (arr, off, i, e) -> { - int j = indexMap[mapOffset + i]; - arr[off + j] = e; - }); + intoArray0(a, offset, indexMap, mapOffset, m); } } @@ -3054,12 +3161,9 @@ public abstract class FloatVector extends AbstractVector { if (m.allTrue()) { intoByteArray(a, offset, bo); } else { - // FIXME: optimize FloatSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 4, a.length); - ByteBuffer wb = wrapper(a, bo); - this.stOp(wb, offset, m, - (wb_, o, i, e) -> wb_.putFloat(o + i * 4, e)); + maybeSwap(bo).intoByteArray0(a, offset, m); } } @@ -3071,7 +3175,7 @@ public abstract class FloatVector extends AbstractVector { public final void intoByteBuffer(ByteBuffer bb, int offset, ByteOrder bo) { - if (bb.isReadOnly()) { + if (ScopedMemoryAccess.isReadOnly(bb)) { throw new ReadOnlyBufferException(); } offset = checkFromIndexSize(offset, byteSize(), bb.limit()); @@ -3090,15 +3194,12 @@ public abstract class FloatVector extends AbstractVector { if (m.allTrue()) { intoByteBuffer(bb, offset, bo); } else { - // FIXME: optimize if (bb.isReadOnly()) { throw new ReadOnlyBufferException(); } FloatSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit()); - ByteBuffer wb = wrapper(bb, bo); - this.stOp(wb, offset, m, - (wb_, o, i, e) -> wb_.putFloat(o + i * 4, e)); + maybeSwap(bo).intoByteBuffer0(bb, offset, m); } } @@ -3136,6 +3237,57 @@ public abstract class FloatVector extends AbstractVector { (arr_, off_, i) -> arr_[off_ + i])); } + /*package-private*/ + abstract + FloatVector fromArray0(float[] a, int offset, VectorMask m); + @ForceInline + final + > + FloatVector fromArray0Template(Class maskClass, float[] a, int offset, M m) { + m.check(species()); + FloatSpecies vsp = vspecies(); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> s.ldOp(arr, off, vm, + (arr_, off_, i) -> arr_[off_ + i])); + } + + /*package-private*/ + abstract + FloatVector fromArray0(float[] a, int offset, + int[] indexMap, int mapOffset, + VectorMask m); + @ForceInline + final + > + FloatVector fromArray0Template(Class maskClass, float[] a, int offset, + int[] indexMap, int mapOffset, M m) { + FloatSpecies vsp = vspecies(); + IntVector.IntSpecies isp = IntVector.species(vsp.indexShape()); + Objects.requireNonNull(a); + Objects.requireNonNull(indexMap); + m.check(vsp); + Class vectorType = vsp.vectorType(); + + // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k] + IntVector vix = IntVector + .fromArray(isp, indexMap, mapOffset) + .add(offset); + + // FIXME: Check index under mask controlling. + vix = VectorIntrinsics.checkIndex(vix, a.length); + + return VectorSupport.loadWithMap( + vectorType, maskClass, float.class, vsp.laneCount(), + isp.vectorType(), + a, ARRAY_BASE, vix, m, + a, offset, indexMap, mapOffset, vsp, + (c, idx, iMap, idy, s, vm) -> + s.vOp(vm, n -> c[idx + iMap[idy+n]])); + } + @Override @@ -3156,6 +3308,25 @@ public abstract class FloatVector extends AbstractVector { }); } + abstract + FloatVector fromByteArray0(byte[] a, int offset, VectorMask m); + @ForceInline + final + > + FloatVector fromByteArray0Template(Class maskClass, byte[] a, int offset, M m) { + FloatSpecies vsp = vspecies(); + m.check(vsp); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, byteArrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> { + ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); + return s.ldOp(wb, off, vm, + (wb_, o, i) -> wb_.getFloat(o + i * 4)); + }); + } + abstract FloatVector fromByteBuffer0(ByteBuffer bb, int offset); @ForceInline @@ -3172,6 +3343,24 @@ public abstract class FloatVector extends AbstractVector { }); } + abstract + FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m); + @ForceInline + final + > + FloatVector fromByteBuffer0Template(Class maskClass, ByteBuffer bb, int offset, M m) { + FloatSpecies vsp = vspecies(); + m.check(vsp); + return ScopedMemoryAccess.loadFromByteBufferMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + bb, offset, m, vsp, + (buf, off, s, vm) -> { + ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); + return s.ldOp(wb, off, vm, + (wb_, o, i) -> wb_.getFloat(o + i * 4)); + }); + } + // Unchecked storing operations in native byte order. // Caller is responsible for applying index checks, masking, and // byte swapping. @@ -3191,6 +3380,58 @@ public abstract class FloatVector extends AbstractVector { (arr_, off_, i, e) -> arr_[off_+i] = e)); } + abstract + void intoArray0(float[] a, int offset, VectorMask m); + @ForceInline + final + > + void intoArray0Template(Class maskClass, float[] a, int offset, M m) { + m.check(species()); + FloatSpecies vsp = vspecies(); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) + -> v.stOp(arr, off, vm, + (arr_, off_, i, e) -> arr_[off_ + i] = e)); + } + + abstract + void intoArray0(float[] a, int offset, + int[] indexMap, int mapOffset, + VectorMask m); + @ForceInline + final + > + void intoArray0Template(Class maskClass, float[] a, int offset, + int[] indexMap, int mapOffset, M m) { + m.check(species()); + FloatSpecies vsp = vspecies(); + IntVector.IntSpecies isp = IntVector.species(vsp.indexShape()); + // Index vector: vix[0:n] = i -> offset + indexMap[mo + i] + IntVector vix = IntVector + .fromArray(isp, indexMap, mapOffset) + .add(offset); + + // FIXME: Check index under mask controlling. + vix = VectorIntrinsics.checkIndex(vix, a.length); + + VectorSupport.storeWithMap( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + isp.vectorType(), + a, arrayAddress(a, 0), vix, + this, m, + a, offset, indexMap, mapOffset, + (arr, off, v, map, mo, vm) + -> v.stOp(arr, off, vm, + (arr_, off_, i, e) -> { + int j = map[mo + i]; + arr[off + j] = e; + })); + } + + abstract void intoByteArray0(byte[] a, int offset); @ForceInline @@ -3208,6 +3449,25 @@ public abstract class FloatVector extends AbstractVector { }); } + abstract + void intoByteArray0(byte[] a, int offset, VectorMask m); + @ForceInline + final + > + void intoByteArray0Template(Class maskClass, byte[] a, int offset, M m) { + FloatSpecies vsp = vspecies(); + m.check(vsp); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, byteArrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) -> { + ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); + v.stOp(wb, off, vm, + (tb_, o, i, e) -> tb_.putFloat(o + i * 4, e)); + }); + } + @ForceInline final void intoByteBuffer0(ByteBuffer bb, int offset) { @@ -3222,6 +3482,25 @@ public abstract class FloatVector extends AbstractVector { }); } + abstract + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m); + @ForceInline + final + > + void intoByteBuffer0Template(Class maskClass, ByteBuffer bb, int offset, M m) { + FloatSpecies vsp = vspecies(); + m.check(vsp); + ScopedMemoryAccess.storeIntoByteBufferMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + this, m, bb, offset, + (buf, off, v, vm) -> { + ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); + v.stOp(wb, off, vm, + (wb_, o, i, e) -> wb_.putFloat(o + i * 4, e)); + }); + } + + // End of low-level memory operations. private static @@ -3539,7 +3818,7 @@ public abstract class FloatVector extends AbstractVector { /*package-private*/ @ForceInline FloatVector ldOp(M memory, int offset, - AbstractMask m, + VectorMask m, FLdOp f) { return dummyVector().ldOp(memory, offset, m, f); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java index 3b9461cc55a421c37e544795e262dfdae4f5f791..f54042159699ce51d8bf954ec127399f501d6c8d 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java @@ -236,8 +236,8 @@ final class Int128Vector extends IntVector { @ForceInline final @Override - int rOp(int v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + int rOp(int v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,12 +273,24 @@ final class Int128Vector extends IntVector { return (Int128Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Int128Vector lanewise(Unary op, VectorMask m) { + return (Int128Vector) super.lanewiseTemplate(op, Int128Mask.class, (Int128Mask) m); // specialize + } + @Override @ForceInline public Int128Vector lanewise(Binary op, Vector v) { return (Int128Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Int128Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Int128Vector) super.lanewiseTemplate(op, Int128Mask.class, v, (Int128Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline Int128Vector @@ -286,15 +298,30 @@ final class Int128Vector extends IntVector { return (Int128Vector) super.lanewiseShiftTemplate(op, e); // specialize } + /*package-private*/ + @Override + @ForceInline Int128Vector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m) { + return (Int128Vector) super.lanewiseShiftTemplate(op, Int128Mask.class, e, (Int128Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Int128Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Int128Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Int128Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Int128Vector) super.lanewiseTemplate(op, Int128Mask.class, v1, v2, (Int128Mask) m); // specialize + } + @Override @ForceInline public final @@ -314,7 +341,7 @@ final class Int128Vector extends IntVector { @ForceInline public final int reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Int128Mask.class, (Int128Mask) m); // specialized } @Override @@ -327,7 +354,7 @@ final class Int128Vector extends IntVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Int128Mask.class, (Int128Mask) m); // specialized } @ForceInline @@ -363,6 +390,13 @@ final class Int128Vector extends IntVector { return super.compareTemplate(Int128Mask.class, op, s); // specialize } + @Override + @ForceInline + public final Int128Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Int128Mask.class, op, v, (Int128Mask) m); + } + + @Override @ForceInline public Int128Vector blend(Vector v, VectorMask m) { @@ -419,6 +453,7 @@ final class Int128Vector extends IntVector { VectorMask m) { return (Int128Vector) super.rearrangeTemplate(Int128Shuffle.class, + Int128Mask.class, (Int128Shuffle) shuffle, (Int128Mask) m); // specialize } @@ -588,16 +623,12 @@ final class Int128Vector extends IntVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Int128Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -623,9 +654,9 @@ final class Int128Vector extends IntVector { public Int128Mask and(VectorMask mask) { Objects.requireNonNull(mask); Int128Mask m = (Int128Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Int128Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Int128Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -633,9 +664,9 @@ final class Int128Vector extends IntVector { public Int128Mask or(VectorMask mask) { Objects.requireNonNull(mask); Int128Mask m = (Int128Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Int128Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Int128Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -643,9 +674,9 @@ final class Int128Vector extends IntVector { Int128Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Int128Mask m = (Int128Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Int128Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Int128Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -653,22 +684,32 @@ final class Int128Vector extends IntVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Int128Mask.class, int.class, VLENGTH, this, - (m) -> trueCountHelper(((Int128Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Int128Mask.class, int.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Int128Mask.class, int.class, VLENGTH, this, - (m) -> firstTrueHelper(((Int128Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Int128Mask.class, int.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Int128Mask.class, int.class, VLENGTH, this, - (m) -> lastTrueHelper(((Int128Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Int128Mask.class, int.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Int128Mask.class, int.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -779,6 +820,20 @@ final class Int128Vector extends IntVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + IntVector fromArray0(int[] a, int offset, VectorMask m) { + return super.fromArray0Template(Int128Mask.class, a, offset, (Int128Mask) m); // specialize + } + + @ForceInline + @Override + final + IntVector fromArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(Int128Mask.class, a, offset, indexMap, mapOffset, (Int128Mask) m); + } + @ForceInline @@ -788,6 +843,13 @@ final class Int128Vector extends IntVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + IntVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Int128Mask.class, a, offset, (Int128Mask) m); // specialize + } + @ForceInline @Override final @@ -795,6 +857,13 @@ final class Int128Vector extends IntVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Int128Mask.class, bb, offset, (Int128Mask) m); // specialize + } + @ForceInline @Override final @@ -802,6 +871,21 @@ final class Int128Vector extends IntVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(int[] a, int offset, VectorMask m) { + super.intoArray0Template(Int128Mask.class, a, offset, (Int128Mask) m); + } + + @ForceInline + @Override + final + void intoArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(Int128Mask.class, a, offset, indexMap, mapOffset, (Int128Mask) m); + } + + @ForceInline @Override final @@ -809,6 +893,21 @@ final class Int128Vector extends IntVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Int128Mask.class, a, offset, (Int128Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Int128Mask.class, bb, offset, (Int128Mask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java index 7cfad269b9cedccd3f023766e9c34ca0c64a2647..f5ed7ce09b87069317a722e64922e2e8c56bb7e5 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java @@ -236,8 +236,8 @@ final class Int256Vector extends IntVector { @ForceInline final @Override - int rOp(int v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + int rOp(int v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,12 +273,24 @@ final class Int256Vector extends IntVector { return (Int256Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Int256Vector lanewise(Unary op, VectorMask m) { + return (Int256Vector) super.lanewiseTemplate(op, Int256Mask.class, (Int256Mask) m); // specialize + } + @Override @ForceInline public Int256Vector lanewise(Binary op, Vector v) { return (Int256Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Int256Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Int256Vector) super.lanewiseTemplate(op, Int256Mask.class, v, (Int256Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline Int256Vector @@ -286,15 +298,30 @@ final class Int256Vector extends IntVector { return (Int256Vector) super.lanewiseShiftTemplate(op, e); // specialize } + /*package-private*/ + @Override + @ForceInline Int256Vector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m) { + return (Int256Vector) super.lanewiseShiftTemplate(op, Int256Mask.class, e, (Int256Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Int256Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Int256Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Int256Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Int256Vector) super.lanewiseTemplate(op, Int256Mask.class, v1, v2, (Int256Mask) m); // specialize + } + @Override @ForceInline public final @@ -314,7 +341,7 @@ final class Int256Vector extends IntVector { @ForceInline public final int reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Int256Mask.class, (Int256Mask) m); // specialized } @Override @@ -327,7 +354,7 @@ final class Int256Vector extends IntVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Int256Mask.class, (Int256Mask) m); // specialized } @ForceInline @@ -363,6 +390,13 @@ final class Int256Vector extends IntVector { return super.compareTemplate(Int256Mask.class, op, s); // specialize } + @Override + @ForceInline + public final Int256Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Int256Mask.class, op, v, (Int256Mask) m); + } + + @Override @ForceInline public Int256Vector blend(Vector v, VectorMask m) { @@ -419,6 +453,7 @@ final class Int256Vector extends IntVector { VectorMask m) { return (Int256Vector) super.rearrangeTemplate(Int256Shuffle.class, + Int256Mask.class, (Int256Shuffle) shuffle, (Int256Mask) m); // specialize } @@ -596,16 +631,12 @@ final class Int256Vector extends IntVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Int256Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -631,9 +662,9 @@ final class Int256Vector extends IntVector { public Int256Mask and(VectorMask mask) { Objects.requireNonNull(mask); Int256Mask m = (Int256Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Int256Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Int256Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -641,9 +672,9 @@ final class Int256Vector extends IntVector { public Int256Mask or(VectorMask mask) { Objects.requireNonNull(mask); Int256Mask m = (Int256Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Int256Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Int256Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -651,9 +682,9 @@ final class Int256Vector extends IntVector { Int256Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Int256Mask m = (Int256Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Int256Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Int256Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -661,22 +692,32 @@ final class Int256Vector extends IntVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Int256Mask.class, int.class, VLENGTH, this, - (m) -> trueCountHelper(((Int256Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Int256Mask.class, int.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Int256Mask.class, int.class, VLENGTH, this, - (m) -> firstTrueHelper(((Int256Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Int256Mask.class, int.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Int256Mask.class, int.class, VLENGTH, this, - (m) -> lastTrueHelper(((Int256Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Int256Mask.class, int.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Int256Mask.class, int.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -787,6 +828,20 @@ final class Int256Vector extends IntVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + IntVector fromArray0(int[] a, int offset, VectorMask m) { + return super.fromArray0Template(Int256Mask.class, a, offset, (Int256Mask) m); // specialize + } + + @ForceInline + @Override + final + IntVector fromArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(Int256Mask.class, a, offset, indexMap, mapOffset, (Int256Mask) m); + } + @ForceInline @@ -796,6 +851,13 @@ final class Int256Vector extends IntVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + IntVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Int256Mask.class, a, offset, (Int256Mask) m); // specialize + } + @ForceInline @Override final @@ -803,6 +865,13 @@ final class Int256Vector extends IntVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Int256Mask.class, bb, offset, (Int256Mask) m); // specialize + } + @ForceInline @Override final @@ -810,6 +879,21 @@ final class Int256Vector extends IntVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(int[] a, int offset, VectorMask m) { + super.intoArray0Template(Int256Mask.class, a, offset, (Int256Mask) m); + } + + @ForceInline + @Override + final + void intoArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(Int256Mask.class, a, offset, indexMap, mapOffset, (Int256Mask) m); + } + + @ForceInline @Override final @@ -817,6 +901,21 @@ final class Int256Vector extends IntVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Int256Mask.class, a, offset, (Int256Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Int256Mask.class, bb, offset, (Int256Mask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java index cf519221a0e94cff517a92addb6300f4c1327224..6da5f59d602c040812e21ebd2053f88f66530e72 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java @@ -236,8 +236,8 @@ final class Int512Vector extends IntVector { @ForceInline final @Override - int rOp(int v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + int rOp(int v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,12 +273,24 @@ final class Int512Vector extends IntVector { return (Int512Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Int512Vector lanewise(Unary op, VectorMask m) { + return (Int512Vector) super.lanewiseTemplate(op, Int512Mask.class, (Int512Mask) m); // specialize + } + @Override @ForceInline public Int512Vector lanewise(Binary op, Vector v) { return (Int512Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Int512Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Int512Vector) super.lanewiseTemplate(op, Int512Mask.class, v, (Int512Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline Int512Vector @@ -286,15 +298,30 @@ final class Int512Vector extends IntVector { return (Int512Vector) super.lanewiseShiftTemplate(op, e); // specialize } + /*package-private*/ + @Override + @ForceInline Int512Vector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m) { + return (Int512Vector) super.lanewiseShiftTemplate(op, Int512Mask.class, e, (Int512Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Int512Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Int512Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Int512Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Int512Vector) super.lanewiseTemplate(op, Int512Mask.class, v1, v2, (Int512Mask) m); // specialize + } + @Override @ForceInline public final @@ -314,7 +341,7 @@ final class Int512Vector extends IntVector { @ForceInline public final int reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Int512Mask.class, (Int512Mask) m); // specialized } @Override @@ -327,7 +354,7 @@ final class Int512Vector extends IntVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Int512Mask.class, (Int512Mask) m); // specialized } @ForceInline @@ -363,6 +390,13 @@ final class Int512Vector extends IntVector { return super.compareTemplate(Int512Mask.class, op, s); // specialize } + @Override + @ForceInline + public final Int512Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Int512Mask.class, op, v, (Int512Mask) m); + } + + @Override @ForceInline public Int512Vector blend(Vector v, VectorMask m) { @@ -419,6 +453,7 @@ final class Int512Vector extends IntVector { VectorMask m) { return (Int512Vector) super.rearrangeTemplate(Int512Shuffle.class, + Int512Mask.class, (Int512Shuffle) shuffle, (Int512Mask) m); // specialize } @@ -612,16 +647,12 @@ final class Int512Vector extends IntVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Int512Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -647,9 +678,9 @@ final class Int512Vector extends IntVector { public Int512Mask and(VectorMask mask) { Objects.requireNonNull(mask); Int512Mask m = (Int512Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Int512Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Int512Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -657,9 +688,9 @@ final class Int512Vector extends IntVector { public Int512Mask or(VectorMask mask) { Objects.requireNonNull(mask); Int512Mask m = (Int512Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Int512Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Int512Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -667,9 +698,9 @@ final class Int512Vector extends IntVector { Int512Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Int512Mask m = (Int512Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Int512Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Int512Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -677,22 +708,32 @@ final class Int512Vector extends IntVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Int512Mask.class, int.class, VLENGTH, this, - (m) -> trueCountHelper(((Int512Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Int512Mask.class, int.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Int512Mask.class, int.class, VLENGTH, this, - (m) -> firstTrueHelper(((Int512Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Int512Mask.class, int.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Int512Mask.class, int.class, VLENGTH, this, - (m) -> lastTrueHelper(((Int512Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Int512Mask.class, int.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Int512Mask.class, int.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -803,6 +844,20 @@ final class Int512Vector extends IntVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + IntVector fromArray0(int[] a, int offset, VectorMask m) { + return super.fromArray0Template(Int512Mask.class, a, offset, (Int512Mask) m); // specialize + } + + @ForceInline + @Override + final + IntVector fromArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(Int512Mask.class, a, offset, indexMap, mapOffset, (Int512Mask) m); + } + @ForceInline @@ -812,6 +867,13 @@ final class Int512Vector extends IntVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + IntVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Int512Mask.class, a, offset, (Int512Mask) m); // specialize + } + @ForceInline @Override final @@ -819,6 +881,13 @@ final class Int512Vector extends IntVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Int512Mask.class, bb, offset, (Int512Mask) m); // specialize + } + @ForceInline @Override final @@ -826,6 +895,21 @@ final class Int512Vector extends IntVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(int[] a, int offset, VectorMask m) { + super.intoArray0Template(Int512Mask.class, a, offset, (Int512Mask) m); + } + + @ForceInline + @Override + final + void intoArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(Int512Mask.class, a, offset, indexMap, mapOffset, (Int512Mask) m); + } + + @ForceInline @Override final @@ -833,6 +917,21 @@ final class Int512Vector extends IntVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Int512Mask.class, a, offset, (Int512Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Int512Mask.class, bb, offset, (Int512Mask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java index 2775b168328f171471950c160ad5d1037976f78a..c9b1afb21b7a9c2e87f1696377aa09dc08b750fb 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java @@ -236,8 +236,8 @@ final class Int64Vector extends IntVector { @ForceInline final @Override - int rOp(int v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + int rOp(int v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,12 +273,24 @@ final class Int64Vector extends IntVector { return (Int64Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Int64Vector lanewise(Unary op, VectorMask m) { + return (Int64Vector) super.lanewiseTemplate(op, Int64Mask.class, (Int64Mask) m); // specialize + } + @Override @ForceInline public Int64Vector lanewise(Binary op, Vector v) { return (Int64Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Int64Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Int64Vector) super.lanewiseTemplate(op, Int64Mask.class, v, (Int64Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline Int64Vector @@ -286,15 +298,30 @@ final class Int64Vector extends IntVector { return (Int64Vector) super.lanewiseShiftTemplate(op, e); // specialize } + /*package-private*/ + @Override + @ForceInline Int64Vector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m) { + return (Int64Vector) super.lanewiseShiftTemplate(op, Int64Mask.class, e, (Int64Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Int64Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Int64Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Int64Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Int64Vector) super.lanewiseTemplate(op, Int64Mask.class, v1, v2, (Int64Mask) m); // specialize + } + @Override @ForceInline public final @@ -314,7 +341,7 @@ final class Int64Vector extends IntVector { @ForceInline public final int reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Int64Mask.class, (Int64Mask) m); // specialized } @Override @@ -327,7 +354,7 @@ final class Int64Vector extends IntVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Int64Mask.class, (Int64Mask) m); // specialized } @ForceInline @@ -363,6 +390,13 @@ final class Int64Vector extends IntVector { return super.compareTemplate(Int64Mask.class, op, s); // specialize } + @Override + @ForceInline + public final Int64Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Int64Mask.class, op, v, (Int64Mask) m); + } + + @Override @ForceInline public Int64Vector blend(Vector v, VectorMask m) { @@ -419,6 +453,7 @@ final class Int64Vector extends IntVector { VectorMask m) { return (Int64Vector) super.rearrangeTemplate(Int64Shuffle.class, + Int64Mask.class, (Int64Shuffle) shuffle, (Int64Mask) m); // specialize } @@ -584,16 +619,12 @@ final class Int64Vector extends IntVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Int64Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -619,9 +650,9 @@ final class Int64Vector extends IntVector { public Int64Mask and(VectorMask mask) { Objects.requireNonNull(mask); Int64Mask m = (Int64Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Int64Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Int64Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -629,9 +660,9 @@ final class Int64Vector extends IntVector { public Int64Mask or(VectorMask mask) { Objects.requireNonNull(mask); Int64Mask m = (Int64Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Int64Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Int64Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -639,9 +670,9 @@ final class Int64Vector extends IntVector { Int64Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Int64Mask m = (Int64Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Int64Mask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Int64Mask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -649,22 +680,32 @@ final class Int64Vector extends IntVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Int64Mask.class, int.class, VLENGTH, this, - (m) -> trueCountHelper(((Int64Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Int64Mask.class, int.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Int64Mask.class, int.class, VLENGTH, this, - (m) -> firstTrueHelper(((Int64Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Int64Mask.class, int.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Int64Mask.class, int.class, VLENGTH, this, - (m) -> lastTrueHelper(((Int64Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Int64Mask.class, int.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Int64Mask.class, int.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -775,6 +816,20 @@ final class Int64Vector extends IntVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + IntVector fromArray0(int[] a, int offset, VectorMask m) { + return super.fromArray0Template(Int64Mask.class, a, offset, (Int64Mask) m); // specialize + } + + @ForceInline + @Override + final + IntVector fromArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(Int64Mask.class, a, offset, indexMap, mapOffset, (Int64Mask) m); + } + @ForceInline @@ -784,6 +839,13 @@ final class Int64Vector extends IntVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + IntVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Int64Mask.class, a, offset, (Int64Mask) m); // specialize + } + @ForceInline @Override final @@ -791,6 +853,13 @@ final class Int64Vector extends IntVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Int64Mask.class, bb, offset, (Int64Mask) m); // specialize + } + @ForceInline @Override final @@ -798,6 +867,21 @@ final class Int64Vector extends IntVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(int[] a, int offset, VectorMask m) { + super.intoArray0Template(Int64Mask.class, a, offset, (Int64Mask) m); + } + + @ForceInline + @Override + final + void intoArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(Int64Mask.class, a, offset, indexMap, mapOffset, (Int64Mask) m); + } + + @ForceInline @Override final @@ -805,6 +889,21 @@ final class Int64Vector extends IntVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Int64Mask.class, a, offset, (Int64Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Int64Mask.class, bb, offset, (Int64Mask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java index 2492e5d23e1b621db1d4b9a5782f2efa5c7a6e04..7b28a5947b067896540e5f31b6b92e45744535ef 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java @@ -236,8 +236,8 @@ final class IntMaxVector extends IntVector { @ForceInline final @Override - int rOp(int v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + int rOp(int v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,12 +273,24 @@ final class IntMaxVector extends IntVector { return (IntMaxVector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public IntMaxVector lanewise(Unary op, VectorMask m) { + return (IntMaxVector) super.lanewiseTemplate(op, IntMaxMask.class, (IntMaxMask) m); // specialize + } + @Override @ForceInline public IntMaxVector lanewise(Binary op, Vector v) { return (IntMaxVector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public IntMaxVector lanewise(Binary op, Vector v, VectorMask m) { + return (IntMaxVector) super.lanewiseTemplate(op, IntMaxMask.class, v, (IntMaxMask) m); // specialize + } + /*package-private*/ @Override @ForceInline IntMaxVector @@ -286,15 +298,30 @@ final class IntMaxVector extends IntVector { return (IntMaxVector) super.lanewiseShiftTemplate(op, e); // specialize } + /*package-private*/ + @Override + @ForceInline IntMaxVector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m) { + return (IntMaxVector) super.lanewiseShiftTemplate(op, IntMaxMask.class, e, (IntMaxMask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final IntMaxVector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (IntMaxVector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + IntMaxVector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (IntMaxVector) super.lanewiseTemplate(op, IntMaxMask.class, v1, v2, (IntMaxMask) m); // specialize + } + @Override @ForceInline public final @@ -314,7 +341,7 @@ final class IntMaxVector extends IntVector { @ForceInline public final int reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, IntMaxMask.class, (IntMaxMask) m); // specialized } @Override @@ -327,7 +354,7 @@ final class IntMaxVector extends IntVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, IntMaxMask.class, (IntMaxMask) m); // specialized } @ForceInline @@ -363,6 +390,13 @@ final class IntMaxVector extends IntVector { return super.compareTemplate(IntMaxMask.class, op, s); // specialize } + @Override + @ForceInline + public final IntMaxMask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(IntMaxMask.class, op, v, (IntMaxMask) m); + } + + @Override @ForceInline public IntMaxVector blend(Vector v, VectorMask m) { @@ -419,6 +453,7 @@ final class IntMaxVector extends IntVector { VectorMask m) { return (IntMaxVector) super.rearrangeTemplate(IntMaxShuffle.class, + IntMaxMask.class, (IntMaxShuffle) shuffle, (IntMaxMask) m); // specialize } @@ -582,16 +617,12 @@ final class IntMaxVector extends IntVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - IntMaxMask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -617,9 +648,9 @@ final class IntMaxVector extends IntVector { public IntMaxMask and(VectorMask mask) { Objects.requireNonNull(mask); IntMaxMask m = (IntMaxMask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, IntMaxMask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, IntMaxMask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -627,9 +658,9 @@ final class IntMaxVector extends IntVector { public IntMaxMask or(VectorMask mask) { Objects.requireNonNull(mask); IntMaxMask m = (IntMaxMask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, IntMaxMask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, IntMaxMask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -637,9 +668,9 @@ final class IntMaxVector extends IntVector { IntMaxMask xor(VectorMask mask) { Objects.requireNonNull(mask); IntMaxMask m = (IntMaxMask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, IntMaxMask.class, int.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, IntMaxMask.class, null, int.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -647,22 +678,32 @@ final class IntMaxVector extends IntVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, IntMaxMask.class, int.class, VLENGTH, this, - (m) -> trueCountHelper(((IntMaxMask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, IntMaxMask.class, int.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, IntMaxMask.class, int.class, VLENGTH, this, - (m) -> firstTrueHelper(((IntMaxMask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, IntMaxMask.class, int.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, IntMaxMask.class, int.class, VLENGTH, this, - (m) -> lastTrueHelper(((IntMaxMask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, IntMaxMask.class, int.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, IntMaxMask.class, int.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -784,6 +825,20 @@ final class IntMaxVector extends IntVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + IntVector fromArray0(int[] a, int offset, VectorMask m) { + return super.fromArray0Template(IntMaxMask.class, a, offset, (IntMaxMask) m); // specialize + } + + @ForceInline + @Override + final + IntVector fromArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(IntMaxMask.class, a, offset, indexMap, mapOffset, (IntMaxMask) m); + } + @ForceInline @@ -793,6 +848,13 @@ final class IntMaxVector extends IntVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + IntVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(IntMaxMask.class, a, offset, (IntMaxMask) m); // specialize + } + @ForceInline @Override final @@ -800,6 +862,13 @@ final class IntMaxVector extends IntVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(IntMaxMask.class, bb, offset, (IntMaxMask) m); // specialize + } + @ForceInline @Override final @@ -807,6 +876,21 @@ final class IntMaxVector extends IntVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(int[] a, int offset, VectorMask m) { + super.intoArray0Template(IntMaxMask.class, a, offset, (IntMaxMask) m); + } + + @ForceInline + @Override + final + void intoArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(IntMaxMask.class, a, offset, indexMap, mapOffset, (IntMaxMask) m); + } + + @ForceInline @Override final @@ -814,6 +898,21 @@ final class IntMaxVector extends IntVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(IntMaxMask.class, a, offset, (IntMaxMask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(IntMaxMask.class, bb, offset, (IntMaxMask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java index 6fdf612a255a3bc4f4aced273a885af7c1b4b46f..3deb61e990403ac289c3187820c50699f29fff21 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java @@ -29,7 +29,6 @@ import java.nio.ByteOrder; import java.nio.ReadOnlyBufferException; import java.util.Arrays; import java.util.Objects; -import java.util.function.BinaryOperator; import java.util.function.Function; import java.util.function.UnaryOperator; @@ -173,6 +172,9 @@ public abstract class IntVector extends AbstractVector { final IntVector uOpTemplate(VectorMask m, FUnOp f) { + if (m == null) { + return uOpTemplate(f); + } int[] vec = vec(); int[] res = new int[length()]; boolean[] mbits = ((AbstractMask)m).getBits(); @@ -216,6 +218,9 @@ public abstract class IntVector extends AbstractVector { IntVector bOpTemplate(Vector o, VectorMask m, FBinOp f) { + if (m == null) { + return bOpTemplate(o, f); + } int[] res = new int[length()]; int[] vec1 = this.vec(); int[] vec2 = ((IntVector)o).vec(); @@ -265,6 +270,9 @@ public abstract class IntVector extends AbstractVector { Vector o2, VectorMask m, FTriOp f) { + if (m == null) { + return tOpTemplate(o1, o2, f); + } int[] res = new int[length()]; int[] vec1 = this.vec(); int[] vec2 = ((IntVector)o1).vec(); @@ -280,7 +288,22 @@ public abstract class IntVector extends AbstractVector { /*package-private*/ abstract - int rOp(int v, FBinOp f); + int rOp(int v, VectorMask m, FBinOp f); + + @ForceInline + final + int rOpTemplate(int v, VectorMask m, FBinOp f) { + if (m == null) { + return rOpTemplate(v, f); + } + int[] vec = vec(); + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < vec.length; i++) { + v = mbits[i] ? f.apply(i, v, vec[i]) : v; + } + return v; + } + @ForceInline final int rOpTemplate(int v, FBinOp f) { @@ -549,37 +572,61 @@ public abstract class IntVector extends AbstractVector { return blend(broadcast(-1), compare(NE, 0)); } if (op == NOT) { - return broadcast(-1).lanewiseTemplate(XOR, this); + return broadcast(-1).lanewise(XOR, this); } else if (op == NEG) { // FIXME: Support this in the JIT. - return broadcast(0).lanewiseTemplate(SUB, this); + return broadcast(0).lanewise(SUB, this); } } int opc = opCode(op); return VectorSupport.unaryOp( - opc, getClass(), int.class, length(), - this, - UN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_NEG: return v0 -> - v0.uOp((i, a) -> (int) -a); - case VECTOR_OP_ABS: return v0 -> - v0.uOp((i, a) -> (int) Math.abs(a)); - default: return null; - }})); + opc, getClass(), null, int.class, length(), + this, null, + UN_IMPL.find(op, opc, IntVector::unaryOperations)); } - private static final - ImplCache> UN_IMPL - = new ImplCache<>(Unary.class, IntVector.class); /** * {@inheritDoc} */ - @ForceInline - public final + @Override + public abstract IntVector lanewise(VectorOperators.Unary op, - VectorMask m) { - return blend(lanewise(op), m); + VectorMask m); + @ForceInline + final + IntVector lanewiseTemplate(VectorOperators.Unary op, + Class> maskClass, + VectorMask m) { + m.check(maskClass, this); + if (opKind(op, VO_SPECIAL)) { + if (op == ZOMO) { + return blend(broadcast(-1), compare(NE, 0, m)); + } + if (op == NOT) { + return lanewise(XOR, broadcast(-1), m); + } else if (op == NEG) { + return lanewise(NOT, m).lanewise(ADD, broadcast(1), m); + } + } + int opc = opCode(op); + return VectorSupport.unaryOp( + opc, getClass(), maskClass, int.class, length(), + this, m, + UN_IMPL.find(op, opc, IntVector::unaryOperations)); + } + + private static final + ImplCache>> + UN_IMPL = new ImplCache<>(Unary.class, IntVector.class); + + private static UnaryOperation> unaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_NEG: return (v0, m) -> + v0.uOp(m, (i, a) -> (int) -a); + case VECTOR_OP_ABS: return (v0, m) -> + v0.uOp(m, (i, a) -> (int) Math.abs(a)); + default: return null; + } } // Binary lanewise support @@ -599,6 +646,7 @@ public abstract class IntVector extends AbstractVector { Vector v) { IntVector that = (IntVector) v; that.check(this); + if (opKind(op, VO_SPECIAL | VO_SHIFT)) { if (op == FIRST_NONZERO) { // FIXME: Support this in the JIT. @@ -617,74 +665,110 @@ public abstract class IntVector extends AbstractVector { that = that.lanewise(NOT); op = AND; } else if (op == DIV) { - VectorMask eqz = that.eq((int)0); + VectorMask eqz = that.eq((int) 0); if (eqz.anyTrue()) { throw that.divZeroException(); } } } + int opc = opCode(op); return VectorSupport.binaryOp( - opc, getClass(), int.class, length(), - this, that, - BIN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (int)(a + b)); - case VECTOR_OP_SUB: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (int)(a - b)); - case VECTOR_OP_MUL: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (int)(a * b)); - case VECTOR_OP_DIV: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (int)(a / b)); - case VECTOR_OP_MAX: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (int)Math.max(a, b)); - case VECTOR_OP_MIN: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (int)Math.min(a, b)); - case VECTOR_OP_AND: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (int)(a & b)); - case VECTOR_OP_OR: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (int)(a | b)); - case VECTOR_OP_XOR: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (int)(a ^ b)); - case VECTOR_OP_LSHIFT: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> (int)(a << n)); - case VECTOR_OP_RSHIFT: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> (int)(a >> n)); - case VECTOR_OP_URSHIFT: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> (int)((a & LSHR_SETUP_MASK) >>> n)); - case VECTOR_OP_LROTATE: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> rotateLeft(a, (int)n)); - case VECTOR_OP_RROTATE: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> rotateRight(a, (int)n)); - default: return null; - }})); + opc, getClass(), null, int.class, length(), + this, that, null, + BIN_IMPL.find(op, opc, IntVector::binaryOperations)); } - private static final - ImplCache> BIN_IMPL - = new ImplCache<>(Binary.class, IntVector.class); /** * {@inheritDoc} * @see #lanewise(VectorOperators.Binary,int,VectorMask) */ - @ForceInline - public final + @Override + public abstract IntVector lanewise(VectorOperators.Binary op, Vector v, - VectorMask m) { + VectorMask m); + @ForceInline + final + IntVector lanewiseTemplate(VectorOperators.Binary op, + Class> maskClass, + Vector v, VectorMask m) { IntVector that = (IntVector) v; - if (op == DIV) { - VectorMask eqz = that.eq((int)0); - if (eqz.and(m).anyTrue()) { - throw that.divZeroException(); + that.check(this); + m.check(maskClass, this); + + if (opKind(op, VO_SPECIAL | VO_SHIFT)) { + if (op == FIRST_NONZERO) { + // FIXME: Support this in the JIT. + VectorMask thisNZ + = this.viewAsIntegralLanes().compare(NE, (int) 0); + that = that.blend((int) 0, thisNZ.cast(vspecies())); + op = OR_UNCHECKED; + } + if (opKind(op, VO_SHIFT)) { + // As per shift specification for Java, mask the shift count. + // This allows the JIT to ignore some ISA details. + that = that.lanewise(AND, SHIFT_MASK); + } + if (op == AND_NOT) { + // FIXME: Support this in the JIT. + that = that.lanewise(NOT); + op = AND; + } else if (op == DIV) { + VectorMask eqz = that.eq((int)0); + if (eqz.and(m).anyTrue()) { + throw that.divZeroException(); + } + // suppress div/0 exceptions in unset lanes + that = that.lanewise(NOT, eqz); } - // suppress div/0 exceptions in unset lanes - that = that.lanewise(NOT, eqz); - return blend(lanewise(DIV, that), m); } - return blend(lanewise(op, v), m); + + int opc = opCode(op); + return VectorSupport.binaryOp( + opc, getClass(), maskClass, int.class, length(), + this, that, m, + BIN_IMPL.find(op, opc, IntVector::binaryOperations)); + } + + private static final + ImplCache>> + BIN_IMPL = new ImplCache<>(Binary.class, IntVector.class); + + private static BinaryOperation> binaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (int)(a + b)); + case VECTOR_OP_SUB: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (int)(a - b)); + case VECTOR_OP_MUL: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (int)(a * b)); + case VECTOR_OP_DIV: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (int)(a / b)); + case VECTOR_OP_MAX: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (int)Math.max(a, b)); + case VECTOR_OP_MIN: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (int)Math.min(a, b)); + case VECTOR_OP_AND: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (int)(a & b)); + case VECTOR_OP_OR: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (int)(a | b)); + case VECTOR_OP_XOR: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (int)(a ^ b)); + case VECTOR_OP_LSHIFT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> (int)(a << n)); + case VECTOR_OP_RSHIFT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> (int)(a >> n)); + case VECTOR_OP_URSHIFT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> (int)((a & LSHR_SETUP_MASK) >>> n)); + case VECTOR_OP_LROTATE: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n)); + case VECTOR_OP_RROTATE: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n)); + default: return null; + } } + // FIXME: Maybe all of the public final methods in this file (the // simple ones that just call lanewise) should be pushed down to // the X-VectorBits template. They can't optimize properly at @@ -747,7 +831,13 @@ public abstract class IntVector extends AbstractVector { IntVector lanewise(VectorOperators.Binary op, int e, VectorMask m) { - return blend(lanewise(op, e), m); + if (opKind(op, VO_SHIFT) && (int)(int)e == e) { + return lanewiseShift(op, (int) e, m); + } + if (op == AND_NOT) { + op = AND; e = (int) ~e; + } + return lanewise(op, broadcast(e), m); } /** @@ -767,8 +857,7 @@ public abstract class IntVector extends AbstractVector { int e1 = (int) e; if ((long)e1 != e // allow shift ops to clip down their int parameters - && !(opKind(op, VO_SHIFT) && (int)e1 == e) - ) { + && !(opKind(op, VO_SHIFT) && (int)e1 == e)) { vspecies().checkValue(e); // for exception } return lanewise(op, e1); @@ -788,7 +877,13 @@ public abstract class IntVector extends AbstractVector { public final IntVector lanewise(VectorOperators.Binary op, long e, VectorMask m) { - return blend(lanewise(op, e), m); + int e1 = (int) e; + if ((long)e1 != e + // allow shift ops to clip down their int parameters + && !(opKind(op, VO_SHIFT) && (int)e1 == e)) { + vspecies().checkValue(e); // for exception + } + return lanewise(op, e1, m); } /*package-private*/ @@ -805,27 +900,52 @@ public abstract class IntVector extends AbstractVector { e &= SHIFT_MASK; int opc = opCode(op); return VectorSupport.broadcastInt( - opc, getClass(), int.class, length(), - this, e, - BIN_INT_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_LSHIFT: return (v, n) -> - v.uOp((i, a) -> (int)(a << n)); - case VECTOR_OP_RSHIFT: return (v, n) -> - v.uOp((i, a) -> (int)(a >> n)); - case VECTOR_OP_URSHIFT: return (v, n) -> - v.uOp((i, a) -> (int)((a & LSHR_SETUP_MASK) >>> n)); - case VECTOR_OP_LROTATE: return (v, n) -> - v.uOp((i, a) -> rotateLeft(a, (int)n)); - case VECTOR_OP_RROTATE: return (v, n) -> - v.uOp((i, a) -> rotateRight(a, (int)n)); - default: return null; - }})); + opc, getClass(), null, int.class, length(), + this, e, null, + BIN_INT_IMPL.find(op, opc, IntVector::broadcastIntOperations)); } + + /*package-private*/ + abstract IntVector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m); + + /*package-private*/ + @ForceInline + final IntVector + lanewiseShiftTemplate(VectorOperators.Binary op, + Class> maskClass, + int e, VectorMask m) { + m.check(maskClass, this); + assert(opKind(op, VO_SHIFT)); + // As per shift specification for Java, mask the shift count. + e &= SHIFT_MASK; + int opc = opCode(op); + return VectorSupport.broadcastInt( + opc, getClass(), maskClass, int.class, length(), + this, e, m, + BIN_INT_IMPL.find(op, opc, IntVector::broadcastIntOperations)); + } + private static final - ImplCache> BIN_INT_IMPL + ImplCache>> BIN_INT_IMPL = new ImplCache<>(Binary.class, IntVector.class); + private static VectorBroadcastIntOp> broadcastIntOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_LSHIFT: return (v, n, m) -> + v.uOp(m, (i, a) -> (int)(a << n)); + case VECTOR_OP_RSHIFT: return (v, n, m) -> + v.uOp(m, (i, a) -> (int)(a >> n)); + case VECTOR_OP_URSHIFT: return (v, n, m) -> + v.uOp(m, (i, a) -> (int)((a & LSHR_SETUP_MASK) >>> n)); + case VECTOR_OP_LROTATE: return (v, n, m) -> + v.uOp(m, (i, a) -> rotateLeft(a, (int)n)); + case VECTOR_OP_RROTATE: return (v, n, m) -> + v.uOp(m, (i, a) -> rotateRight(a, (int)n)); + default: return null; + } + } + // As per shift specification for Java, mask the shift count. // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte). // The latter two maskings go beyond the JLS, but seem reasonable @@ -877,16 +997,10 @@ public abstract class IntVector extends AbstractVector { } int opc = opCode(op); return VectorSupport.ternaryOp( - opc, getClass(), int.class, length(), - this, that, tother, - TERN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - default: return null; - }})); + opc, getClass(), null, int.class, length(), + this, that, tother, null, + TERN_IMPL.find(op, opc, IntVector::ternaryOperations)); } - private static final - ImplCache> TERN_IMPL - = new ImplCache<>(Ternary.class, IntVector.class); /** * {@inheritDoc} @@ -894,13 +1008,48 @@ public abstract class IntVector extends AbstractVector { * @see #lanewise(VectorOperators.Ternary,Vector,int,VectorMask) * @see #lanewise(VectorOperators.Ternary,int,Vector,VectorMask) */ - @ForceInline - public final + @Override + public abstract IntVector lanewise(VectorOperators.Ternary op, Vector v1, Vector v2, - VectorMask m) { - return blend(lanewise(op, v1, v2), m); + VectorMask m); + @ForceInline + final + IntVector lanewiseTemplate(VectorOperators.Ternary op, + Class> maskClass, + Vector v1, + Vector v2, + VectorMask m) { + IntVector that = (IntVector) v1; + IntVector tother = (IntVector) v2; + // It's a word: https://www.dictionary.com/browse/tother + // See also Chapter 11 of Dickens, Our Mutual Friend: + // "Totherest Governor," replied Mr Riderhood... + that.check(this); + tother.check(this); + m.check(maskClass, this); + + if (op == BITWISE_BLEND) { + // FIXME: Support this in the JIT. + that = this.lanewise(XOR, that).lanewise(AND, tother); + return this.lanewise(XOR, that, m); + } + int opc = opCode(op); + return VectorSupport.ternaryOp( + opc, getClass(), maskClass, int.class, length(), + this, that, tother, m, + TERN_IMPL.find(op, opc, IntVector::ternaryOperations)); + } + + private static final + ImplCache>> + TERN_IMPL = new ImplCache<>(Ternary.class, IntVector.class); + + private static TernaryOperation> ternaryOperations(int opc_) { + switch (opc_) { + default: return null; + } } /** @@ -957,7 +1106,7 @@ public abstract class IntVector extends AbstractVector { int e1, int e2, VectorMask m) { - return blend(lanewise(op, e1, e2), m); + return lanewise(op, broadcast(e1), broadcast(e2), m); } /** @@ -1015,7 +1164,7 @@ public abstract class IntVector extends AbstractVector { Vector v1, int e2, VectorMask m) { - return blend(lanewise(op, v1, e2), m); + return lanewise(op, v1, broadcast(e2), m); } /** @@ -1072,7 +1221,7 @@ public abstract class IntVector extends AbstractVector { int e1, Vector v2, VectorMask m) { - return blend(lanewise(op, e1, v2), m); + return lanewise(op, broadcast(e1), v2, m); } // (Thus endeth the Great and Mighty Ternary Ogdoad.) @@ -1744,15 +1893,13 @@ public abstract class IntVector extends AbstractVector { final > M compareTemplate(Class maskType, Comparison op, Vector v) { - Objects.requireNonNull(v); - IntSpecies vsp = vspecies(); IntVector that = (IntVector) v; that.check(this); int opc = opCode(op); return VectorSupport.compare( opc, getClass(), maskType, int.class, length(), - this, that, - (cond, v0, v1) -> { + this, that, null, + (cond, v0, v1, m1) -> { AbstractMask m = v0.bTest(cond, v1, (cond_, i, a, b) -> compareWithOp(cond, a, b)); @@ -1762,6 +1909,28 @@ public abstract class IntVector extends AbstractVector { }); } + /*package-private*/ + @ForceInline + final + > + M compareTemplate(Class maskType, Comparison op, Vector v, M m) { + IntVector that = (IntVector) v; + that.check(this); + m.check(maskType, this); + int opc = opCode(op); + return VectorSupport.compare( + opc, getClass(), maskType, int.class, length(), + this, that, m, + (cond, v0, v1, m1) -> { + AbstractMask cmpM + = v0.bTest(cond, v1, (cond_, i, a, b) + -> compareWithOp(cond, a, b)); + @SuppressWarnings("unchecked") + M m2 = (M) cmpM.and(m1); + return m2; + }); + } + @ForceInline private static boolean compareWithOp(int cond, int a, int b) { return switch (cond) { @@ -1779,18 +1948,6 @@ public abstract class IntVector extends AbstractVector { }; } - /** - * {@inheritDoc} - */ - @Override - @ForceInline - public final - VectorMask compare(VectorOperators.Comparison op, - Vector v, - VectorMask m) { - return compare(op, v).and(m); - } - /** * Tests this vector by comparing it with an input scalar, * according to the given comparison operation. @@ -1849,7 +2006,7 @@ public abstract class IntVector extends AbstractVector { public final VectorMask compare(VectorOperators.Comparison op, int e, VectorMask m) { - return compare(op, e).and(m); + return compare(op, broadcast(e), m); } /** @@ -2100,9 +2257,9 @@ public abstract class IntVector extends AbstractVector { IntVector rearrangeTemplate(Class shuffletype, S shuffle) { shuffle.checkIndexes(); return VectorSupport.rearrangeOp( - getClass(), shuffletype, int.class, length(), - this, shuffle, - (v1, s_) -> v1.uOp((i, a) -> { + getClass(), shuffletype, null, int.class, length(), + this, shuffle, null, + (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); return v1.lane(ei); })); @@ -2119,24 +2276,25 @@ public abstract class IntVector extends AbstractVector { /*package-private*/ @ForceInline final - > + , M extends VectorMask> IntVector rearrangeTemplate(Class shuffletype, + Class masktype, S shuffle, - VectorMask m) { - IntVector unmasked = - VectorSupport.rearrangeOp( - getClass(), shuffletype, int.class, length(), - this, shuffle, - (v1, s_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); - return ei < 0 ? 0 : v1.lane(ei); - })); + M m) { + + m.check(masktype, this); VectorMask valid = shuffle.laneIsValid(); if (m.andNot(valid).anyTrue()) { shuffle.checkIndexes(); throw new AssertionError(); } - return broadcast((int)0).blend(unmasked, m); + return VectorSupport.rearrangeOp( + getClass(), shuffletype, masktype, int.class, length(), + this, shuffle, m, + (v1, s_, m_) -> v1.uOp((i, a) -> { + int ei = s_.laneSource(i); + return ei < 0 || !m_.laneIsSet(i) ? 0 : v1.lane(ei); + })); } /** @@ -2159,17 +2317,17 @@ public abstract class IntVector extends AbstractVector { S ws = (S) shuffle.wrapIndexes(); IntVector r0 = VectorSupport.rearrangeOp( - getClass(), shuffletype, int.class, length(), - this, ws, - (v0, s_) -> v0.uOp((i, a) -> { + getClass(), shuffletype, null, int.class, length(), + this, ws, null, + (v0, s_, m_) -> v0.uOp((i, a) -> { int ei = s_.laneSource(i); return v0.lane(ei); })); IntVector r1 = VectorSupport.rearrangeOp( - getClass(), shuffletype, int.class, length(), - v, ws, - (v1, s_) -> v1.uOp((i, a) -> { + getClass(), shuffletype, null, int.class, length(), + v, ws, null, + (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); return v1.lane(ei); })); @@ -2432,9 +2590,18 @@ public abstract class IntVector extends AbstractVector { @ForceInline final int reduceLanesTemplate(VectorOperators.Associative op, + Class> maskClass, VectorMask m) { - IntVector v = reduceIdentityVector(op).blend(this, m); - return v.reduceLanesTemplate(op); + m.check(maskClass, this); + if (op == FIRST_NONZERO) { + IntVector v = reduceIdentityVector(op).blend(this, m); + return v.reduceLanesTemplate(op); + } + int opc = opCode(op); + return fromBits(VectorSupport.reductionCoerced( + opc, getClass(), maskClass, int.class, length(), + this, m, + REDUCE_IMPL.find(op, opc, IntVector::reductionOperations))); } /*package-private*/ @@ -2449,30 +2616,34 @@ public abstract class IntVector extends AbstractVector { } int opc = opCode(op); return fromBits(VectorSupport.reductionCoerced( - opc, getClass(), int.class, length(), - this, - REDUCE_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return v -> - toBits(v.rOp((int)0, (i, a, b) -> (int)(a + b))); - case VECTOR_OP_MUL: return v -> - toBits(v.rOp((int)1, (i, a, b) -> (int)(a * b))); - case VECTOR_OP_MIN: return v -> - toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (int) Math.min(a, b))); - case VECTOR_OP_MAX: return v -> - toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (int) Math.max(a, b))); - case VECTOR_OP_AND: return v -> - toBits(v.rOp((int)-1, (i, a, b) -> (int)(a & b))); - case VECTOR_OP_OR: return v -> - toBits(v.rOp((int)0, (i, a, b) -> (int)(a | b))); - case VECTOR_OP_XOR: return v -> - toBits(v.rOp((int)0, (i, a, b) -> (int)(a ^ b))); - default: return null; - }}))); + opc, getClass(), null, int.class, length(), + this, null, + REDUCE_IMPL.find(op, opc, IntVector::reductionOperations))); } + private static final - ImplCache> REDUCE_IMPL - = new ImplCache<>(Associative.class, IntVector.class); + ImplCache>> + REDUCE_IMPL = new ImplCache<>(Associative.class, IntVector.class); + + private static ReductionOperation> reductionOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp((int)0, m, (i, a, b) -> (int)(a + b))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp((int)1, m, (i, a, b) -> (int)(a * b))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (int) Math.min(a, b))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (int) Math.max(a, b))); + case VECTOR_OP_AND: return (v, m) -> + toBits(v.rOp((int)-1, m, (i, a, b) -> (int)(a & b))); + case VECTOR_OP_OR: return (v, m) -> + toBits(v.rOp((int)0, m, (i, a, b) -> (int)(a | b))); + case VECTOR_OP_XOR: return (v, m) -> + toBits(v.rOp((int)0, m, (i, a, b) -> (int)(a ^ b))); + default: return null; + } + } private @ForceInline @@ -2691,9 +2862,7 @@ public abstract class IntVector extends AbstractVector { VectorMask m) { IntSpecies vsp = (IntSpecies) species; if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) { - IntVector zero = vsp.zero(); - IntVector v = zero.fromByteArray0(a, offset); - return zero.blend(v.maybeSwap(bo), m); + return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo); } // FIXME: optimize @@ -2755,8 +2924,7 @@ public abstract class IntVector extends AbstractVector { VectorMask m) { IntSpecies vsp = (IntSpecies) species; if (offset >= 0 && offset <= (a.length - species.length())) { - IntVector zero = vsp.zero(); - return zero.blend(zero.fromArray0(a, offset), m); + return vsp.dummyVector().fromArray0(a, offset, m); } // FIXME: optimize @@ -2814,13 +2982,13 @@ public abstract class IntVector extends AbstractVector { vix = VectorIntrinsics.checkIndex(vix, a.length); return VectorSupport.loadWithMap( - vectorType, int.class, vsp.laneCount(), - IntVector.species(vsp.indexShape()).vectorType(), - a, ARRAY_BASE, vix, + vectorType, null, int.class, vsp.laneCount(), + isp.vectorType(), + a, ARRAY_BASE, vix, null, a, offset, indexMap, mapOffset, vsp, - (int[] c, int idx, int[] iMap, int idy, IntSpecies s) -> + (c, idx, iMap, idy, s, vm) -> s.vOp(n -> c[idx + iMap[idy+n]])); - } + } /** * Gathers a new vector composed of elements from an array of type @@ -2868,9 +3036,8 @@ public abstract class IntVector extends AbstractVector { return fromArray(species, a, offset, indexMap, mapOffset); } else { - // FIXME: Cannot vectorize yet, if there's a mask. IntSpecies vsp = (IntSpecies) species; - return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]); + return vsp.dummyVector().fromArray0(a, offset, indexMap, mapOffset, m); } } @@ -2964,9 +3131,7 @@ public abstract class IntVector extends AbstractVector { VectorMask m) { IntSpecies vsp = (IntSpecies) species; if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) { - IntVector zero = vsp.zero(); - IntVector v = zero.fromByteBuffer0(bb, offset); - return zero.blend(v.maybeSwap(bo), m); + return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo); } // FIXME: optimize @@ -3038,10 +3203,9 @@ public abstract class IntVector extends AbstractVector { if (m.allTrue()) { intoArray(a, offset); } else { - // FIXME: optimize IntSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 1, a.length); - stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v); + intoArray0(a, offset, m); } } @@ -3085,12 +3249,12 @@ public abstract class IntVector extends AbstractVector { vix = VectorIntrinsics.checkIndex(vix, a.length); VectorSupport.storeWithMap( - vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + vsp.vectorType(), null, vsp.elementType(), vsp.laneCount(), isp.vectorType(), a, arrayAddress(a, 0), vix, - this, + this, null, a, offset, indexMap, mapOffset, - (arr, off, v, map, mo) + (arr, off, v, map, mo, vm) -> v.stOp(arr, off, (arr_, off_, i, e) -> { int j = map[mo + i]; @@ -3137,12 +3301,7 @@ public abstract class IntVector extends AbstractVector { intoArray(a, offset, indexMap, mapOffset); } else { - // FIXME: Cannot vectorize yet, if there's a mask. - stOp(a, offset, m, - (arr, off, i, e) -> { - int j = indexMap[mapOffset + i]; - arr[off + j] = e; - }); + intoArray0(a, offset, indexMap, mapOffset, m); } } @@ -3172,12 +3331,9 @@ public abstract class IntVector extends AbstractVector { if (m.allTrue()) { intoByteArray(a, offset, bo); } else { - // FIXME: optimize IntSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 4, a.length); - ByteBuffer wb = wrapper(a, bo); - this.stOp(wb, offset, m, - (wb_, o, i, e) -> wb_.putInt(o + i * 4, e)); + maybeSwap(bo).intoByteArray0(a, offset, m); } } @@ -3189,7 +3345,7 @@ public abstract class IntVector extends AbstractVector { public final void intoByteBuffer(ByteBuffer bb, int offset, ByteOrder bo) { - if (bb.isReadOnly()) { + if (ScopedMemoryAccess.isReadOnly(bb)) { throw new ReadOnlyBufferException(); } offset = checkFromIndexSize(offset, byteSize(), bb.limit()); @@ -3208,15 +3364,12 @@ public abstract class IntVector extends AbstractVector { if (m.allTrue()) { intoByteBuffer(bb, offset, bo); } else { - // FIXME: optimize if (bb.isReadOnly()) { throw new ReadOnlyBufferException(); } IntSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit()); - ByteBuffer wb = wrapper(bb, bo); - this.stOp(wb, offset, m, - (wb_, o, i, e) -> wb_.putInt(o + i * 4, e)); + maybeSwap(bo).intoByteBuffer0(bb, offset, m); } } @@ -3254,6 +3407,57 @@ public abstract class IntVector extends AbstractVector { (arr_, off_, i) -> arr_[off_ + i])); } + /*package-private*/ + abstract + IntVector fromArray0(int[] a, int offset, VectorMask m); + @ForceInline + final + > + IntVector fromArray0Template(Class maskClass, int[] a, int offset, M m) { + m.check(species()); + IntSpecies vsp = vspecies(); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> s.ldOp(arr, off, vm, + (arr_, off_, i) -> arr_[off_ + i])); + } + + /*package-private*/ + abstract + IntVector fromArray0(int[] a, int offset, + int[] indexMap, int mapOffset, + VectorMask m); + @ForceInline + final + > + IntVector fromArray0Template(Class maskClass, int[] a, int offset, + int[] indexMap, int mapOffset, M m) { + IntSpecies vsp = vspecies(); + IntVector.IntSpecies isp = IntVector.species(vsp.indexShape()); + Objects.requireNonNull(a); + Objects.requireNonNull(indexMap); + m.check(vsp); + Class vectorType = vsp.vectorType(); + + // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k] + IntVector vix = IntVector + .fromArray(isp, indexMap, mapOffset) + .add(offset); + + // FIXME: Check index under mask controlling. + vix = VectorIntrinsics.checkIndex(vix, a.length); + + return VectorSupport.loadWithMap( + vectorType, maskClass, int.class, vsp.laneCount(), + isp.vectorType(), + a, ARRAY_BASE, vix, m, + a, offset, indexMap, mapOffset, vsp, + (c, idx, iMap, idy, s, vm) -> + s.vOp(vm, n -> c[idx + iMap[idy+n]])); + } + @Override @@ -3274,6 +3478,25 @@ public abstract class IntVector extends AbstractVector { }); } + abstract + IntVector fromByteArray0(byte[] a, int offset, VectorMask m); + @ForceInline + final + > + IntVector fromByteArray0Template(Class maskClass, byte[] a, int offset, M m) { + IntSpecies vsp = vspecies(); + m.check(vsp); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, byteArrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> { + ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); + return s.ldOp(wb, off, vm, + (wb_, o, i) -> wb_.getInt(o + i * 4)); + }); + } + abstract IntVector fromByteBuffer0(ByteBuffer bb, int offset); @ForceInline @@ -3290,6 +3513,24 @@ public abstract class IntVector extends AbstractVector { }); } + abstract + IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m); + @ForceInline + final + > + IntVector fromByteBuffer0Template(Class maskClass, ByteBuffer bb, int offset, M m) { + IntSpecies vsp = vspecies(); + m.check(vsp); + return ScopedMemoryAccess.loadFromByteBufferMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + bb, offset, m, vsp, + (buf, off, s, vm) -> { + ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); + return s.ldOp(wb, off, vm, + (wb_, o, i) -> wb_.getInt(o + i * 4)); + }); + } + // Unchecked storing operations in native byte order. // Caller is responsible for applying index checks, masking, and // byte swapping. @@ -3309,6 +3550,58 @@ public abstract class IntVector extends AbstractVector { (arr_, off_, i, e) -> arr_[off_+i] = e)); } + abstract + void intoArray0(int[] a, int offset, VectorMask m); + @ForceInline + final + > + void intoArray0Template(Class maskClass, int[] a, int offset, M m) { + m.check(species()); + IntSpecies vsp = vspecies(); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) + -> v.stOp(arr, off, vm, + (arr_, off_, i, e) -> arr_[off_ + i] = e)); + } + + abstract + void intoArray0(int[] a, int offset, + int[] indexMap, int mapOffset, + VectorMask m); + @ForceInline + final + > + void intoArray0Template(Class maskClass, int[] a, int offset, + int[] indexMap, int mapOffset, M m) { + m.check(species()); + IntSpecies vsp = vspecies(); + IntVector.IntSpecies isp = IntVector.species(vsp.indexShape()); + // Index vector: vix[0:n] = i -> offset + indexMap[mo + i] + IntVector vix = IntVector + .fromArray(isp, indexMap, mapOffset) + .add(offset); + + // FIXME: Check index under mask controlling. + vix = VectorIntrinsics.checkIndex(vix, a.length); + + VectorSupport.storeWithMap( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + isp.vectorType(), + a, arrayAddress(a, 0), vix, + this, m, + a, offset, indexMap, mapOffset, + (arr, off, v, map, mo, vm) + -> v.stOp(arr, off, vm, + (arr_, off_, i, e) -> { + int j = map[mo + i]; + arr[off + j] = e; + })); + } + + abstract void intoByteArray0(byte[] a, int offset); @ForceInline @@ -3326,6 +3619,25 @@ public abstract class IntVector extends AbstractVector { }); } + abstract + void intoByteArray0(byte[] a, int offset, VectorMask m); + @ForceInline + final + > + void intoByteArray0Template(Class maskClass, byte[] a, int offset, M m) { + IntSpecies vsp = vspecies(); + m.check(vsp); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, byteArrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) -> { + ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); + v.stOp(wb, off, vm, + (tb_, o, i, e) -> tb_.putInt(o + i * 4, e)); + }); + } + @ForceInline final void intoByteBuffer0(ByteBuffer bb, int offset) { @@ -3340,6 +3652,25 @@ public abstract class IntVector extends AbstractVector { }); } + abstract + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m); + @ForceInline + final + > + void intoByteBuffer0Template(Class maskClass, ByteBuffer bb, int offset, M m) { + IntSpecies vsp = vspecies(); + m.check(vsp); + ScopedMemoryAccess.storeIntoByteBufferMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + this, m, bb, offset, + (buf, off, v, vm) -> { + ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); + v.stOp(wb, off, vm, + (wb_, o, i, e) -> wb_.putInt(o + i * 4, e)); + }); + } + + // End of low-level memory operations. private static @@ -3657,7 +3988,7 @@ public abstract class IntVector extends AbstractVector { /*package-private*/ @ForceInline IntVector ldOp(M memory, int offset, - AbstractMask m, + VectorMask m, FLdOp f) { return dummyVector().ldOp(memory, offset, m, f); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java index 76e132522c9a1e9510ca6b2a320214760fdccd9e..81dbb215f0df0bfe3443c657dfbe12d8601a3af7 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java @@ -231,8 +231,8 @@ final class Long128Vector extends LongVector { @ForceInline final @Override - long rOp(long v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + long rOp(long v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -268,12 +268,24 @@ final class Long128Vector extends LongVector { return (Long128Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Long128Vector lanewise(Unary op, VectorMask m) { + return (Long128Vector) super.lanewiseTemplate(op, Long128Mask.class, (Long128Mask) m); // specialize + } + @Override @ForceInline public Long128Vector lanewise(Binary op, Vector v) { return (Long128Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Long128Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Long128Vector) super.lanewiseTemplate(op, Long128Mask.class, v, (Long128Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline Long128Vector @@ -281,15 +293,30 @@ final class Long128Vector extends LongVector { return (Long128Vector) super.lanewiseShiftTemplate(op, e); // specialize } + /*package-private*/ + @Override + @ForceInline Long128Vector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m) { + return (Long128Vector) super.lanewiseShiftTemplate(op, Long128Mask.class, e, (Long128Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Long128Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Long128Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Long128Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Long128Vector) super.lanewiseTemplate(op, Long128Mask.class, v1, v2, (Long128Mask) m); // specialize + } + @Override @ForceInline public final @@ -309,7 +336,7 @@ final class Long128Vector extends LongVector { @ForceInline public final long reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Long128Mask.class, (Long128Mask) m); // specialized } @Override @@ -322,7 +349,7 @@ final class Long128Vector extends LongVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Long128Mask.class, (Long128Mask) m); // specialized } @ForceInline @@ -353,6 +380,13 @@ final class Long128Vector extends LongVector { } + @Override + @ForceInline + public final Long128Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Long128Mask.class, op, v, (Long128Mask) m); + } + + @Override @ForceInline public Long128Vector blend(Vector v, VectorMask m) { @@ -409,6 +443,7 @@ final class Long128Vector extends LongVector { VectorMask m) { return (Long128Vector) super.rearrangeTemplate(Long128Shuffle.class, + Long128Mask.class, (Long128Shuffle) shuffle, (Long128Mask) m); // specialize } @@ -574,16 +609,12 @@ final class Long128Vector extends LongVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Long128Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -609,9 +640,9 @@ final class Long128Vector extends LongVector { public Long128Mask and(VectorMask mask) { Objects.requireNonNull(mask); Long128Mask m = (Long128Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Long128Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Long128Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -619,9 +650,9 @@ final class Long128Vector extends LongVector { public Long128Mask or(VectorMask mask) { Objects.requireNonNull(mask); Long128Mask m = (Long128Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Long128Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Long128Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -629,9 +660,9 @@ final class Long128Vector extends LongVector { Long128Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Long128Mask m = (Long128Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Long128Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Long128Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -639,22 +670,32 @@ final class Long128Vector extends LongVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Long128Mask.class, long.class, VLENGTH, this, - (m) -> trueCountHelper(((Long128Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Long128Mask.class, long.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Long128Mask.class, long.class, VLENGTH, this, - (m) -> firstTrueHelper(((Long128Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Long128Mask.class, long.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Long128Mask.class, long.class, VLENGTH, this, - (m) -> lastTrueHelper(((Long128Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Long128Mask.class, long.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Long128Mask.class, long.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -765,6 +806,20 @@ final class Long128Vector extends LongVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + LongVector fromArray0(long[] a, int offset, VectorMask m) { + return super.fromArray0Template(Long128Mask.class, a, offset, (Long128Mask) m); // specialize + } + + @ForceInline + @Override + final + LongVector fromArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(Long128Mask.class, a, offset, indexMap, mapOffset, (Long128Mask) m); + } + @ForceInline @@ -774,6 +829,13 @@ final class Long128Vector extends LongVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + LongVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Long128Mask.class, a, offset, (Long128Mask) m); // specialize + } + @ForceInline @Override final @@ -781,6 +843,13 @@ final class Long128Vector extends LongVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Long128Mask.class, bb, offset, (Long128Mask) m); // specialize + } + @ForceInline @Override final @@ -788,6 +857,21 @@ final class Long128Vector extends LongVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(long[] a, int offset, VectorMask m) { + super.intoArray0Template(Long128Mask.class, a, offset, (Long128Mask) m); + } + + @ForceInline + @Override + final + void intoArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(Long128Mask.class, a, offset, indexMap, mapOffset, (Long128Mask) m); + } + + @ForceInline @Override final @@ -795,6 +879,21 @@ final class Long128Vector extends LongVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Long128Mask.class, a, offset, (Long128Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Long128Mask.class, bb, offset, (Long128Mask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java index a3ba9be431c0a9be9da9694ea738259b3232dc7b..569bd91eafa3a3845d72383b2224ae5267c65fa2 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java @@ -231,8 +231,8 @@ final class Long256Vector extends LongVector { @ForceInline final @Override - long rOp(long v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + long rOp(long v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -268,12 +268,24 @@ final class Long256Vector extends LongVector { return (Long256Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Long256Vector lanewise(Unary op, VectorMask m) { + return (Long256Vector) super.lanewiseTemplate(op, Long256Mask.class, (Long256Mask) m); // specialize + } + @Override @ForceInline public Long256Vector lanewise(Binary op, Vector v) { return (Long256Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Long256Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Long256Vector) super.lanewiseTemplate(op, Long256Mask.class, v, (Long256Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline Long256Vector @@ -281,15 +293,30 @@ final class Long256Vector extends LongVector { return (Long256Vector) super.lanewiseShiftTemplate(op, e); // specialize } + /*package-private*/ + @Override + @ForceInline Long256Vector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m) { + return (Long256Vector) super.lanewiseShiftTemplate(op, Long256Mask.class, e, (Long256Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Long256Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Long256Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Long256Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Long256Vector) super.lanewiseTemplate(op, Long256Mask.class, v1, v2, (Long256Mask) m); // specialize + } + @Override @ForceInline public final @@ -309,7 +336,7 @@ final class Long256Vector extends LongVector { @ForceInline public final long reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Long256Mask.class, (Long256Mask) m); // specialized } @Override @@ -322,7 +349,7 @@ final class Long256Vector extends LongVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Long256Mask.class, (Long256Mask) m); // specialized } @ForceInline @@ -353,6 +380,13 @@ final class Long256Vector extends LongVector { } + @Override + @ForceInline + public final Long256Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Long256Mask.class, op, v, (Long256Mask) m); + } + + @Override @ForceInline public Long256Vector blend(Vector v, VectorMask m) { @@ -409,6 +443,7 @@ final class Long256Vector extends LongVector { VectorMask m) { return (Long256Vector) super.rearrangeTemplate(Long256Shuffle.class, + Long256Mask.class, (Long256Shuffle) shuffle, (Long256Mask) m); // specialize } @@ -578,16 +613,12 @@ final class Long256Vector extends LongVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Long256Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -613,9 +644,9 @@ final class Long256Vector extends LongVector { public Long256Mask and(VectorMask mask) { Objects.requireNonNull(mask); Long256Mask m = (Long256Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Long256Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Long256Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -623,9 +654,9 @@ final class Long256Vector extends LongVector { public Long256Mask or(VectorMask mask) { Objects.requireNonNull(mask); Long256Mask m = (Long256Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Long256Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Long256Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -633,9 +664,9 @@ final class Long256Vector extends LongVector { Long256Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Long256Mask m = (Long256Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Long256Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Long256Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -643,22 +674,32 @@ final class Long256Vector extends LongVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Long256Mask.class, long.class, VLENGTH, this, - (m) -> trueCountHelper(((Long256Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Long256Mask.class, long.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Long256Mask.class, long.class, VLENGTH, this, - (m) -> firstTrueHelper(((Long256Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Long256Mask.class, long.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Long256Mask.class, long.class, VLENGTH, this, - (m) -> lastTrueHelper(((Long256Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Long256Mask.class, long.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Long256Mask.class, long.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -769,6 +810,20 @@ final class Long256Vector extends LongVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + LongVector fromArray0(long[] a, int offset, VectorMask m) { + return super.fromArray0Template(Long256Mask.class, a, offset, (Long256Mask) m); // specialize + } + + @ForceInline + @Override + final + LongVector fromArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(Long256Mask.class, a, offset, indexMap, mapOffset, (Long256Mask) m); + } + @ForceInline @@ -778,6 +833,13 @@ final class Long256Vector extends LongVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + LongVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Long256Mask.class, a, offset, (Long256Mask) m); // specialize + } + @ForceInline @Override final @@ -785,6 +847,13 @@ final class Long256Vector extends LongVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Long256Mask.class, bb, offset, (Long256Mask) m); // specialize + } + @ForceInline @Override final @@ -792,6 +861,21 @@ final class Long256Vector extends LongVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(long[] a, int offset, VectorMask m) { + super.intoArray0Template(Long256Mask.class, a, offset, (Long256Mask) m); + } + + @ForceInline + @Override + final + void intoArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(Long256Mask.class, a, offset, indexMap, mapOffset, (Long256Mask) m); + } + + @ForceInline @Override final @@ -799,6 +883,21 @@ final class Long256Vector extends LongVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Long256Mask.class, a, offset, (Long256Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Long256Mask.class, bb, offset, (Long256Mask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java index 5a222ffc53976ebc3fc58d8a35406f1319c23011..543baf97d47127ff11f06cdb9872f6e51695a884 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java @@ -231,8 +231,8 @@ final class Long512Vector extends LongVector { @ForceInline final @Override - long rOp(long v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + long rOp(long v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -268,12 +268,24 @@ final class Long512Vector extends LongVector { return (Long512Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Long512Vector lanewise(Unary op, VectorMask m) { + return (Long512Vector) super.lanewiseTemplate(op, Long512Mask.class, (Long512Mask) m); // specialize + } + @Override @ForceInline public Long512Vector lanewise(Binary op, Vector v) { return (Long512Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Long512Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Long512Vector) super.lanewiseTemplate(op, Long512Mask.class, v, (Long512Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline Long512Vector @@ -281,15 +293,30 @@ final class Long512Vector extends LongVector { return (Long512Vector) super.lanewiseShiftTemplate(op, e); // specialize } + /*package-private*/ + @Override + @ForceInline Long512Vector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m) { + return (Long512Vector) super.lanewiseShiftTemplate(op, Long512Mask.class, e, (Long512Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Long512Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Long512Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Long512Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Long512Vector) super.lanewiseTemplate(op, Long512Mask.class, v1, v2, (Long512Mask) m); // specialize + } + @Override @ForceInline public final @@ -309,7 +336,7 @@ final class Long512Vector extends LongVector { @ForceInline public final long reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Long512Mask.class, (Long512Mask) m); // specialized } @Override @@ -322,7 +349,7 @@ final class Long512Vector extends LongVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Long512Mask.class, (Long512Mask) m); // specialized } @ForceInline @@ -353,6 +380,13 @@ final class Long512Vector extends LongVector { } + @Override + @ForceInline + public final Long512Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Long512Mask.class, op, v, (Long512Mask) m); + } + + @Override @ForceInline public Long512Vector blend(Vector v, VectorMask m) { @@ -409,6 +443,7 @@ final class Long512Vector extends LongVector { VectorMask m) { return (Long512Vector) super.rearrangeTemplate(Long512Shuffle.class, + Long512Mask.class, (Long512Shuffle) shuffle, (Long512Mask) m); // specialize } @@ -586,16 +621,12 @@ final class Long512Vector extends LongVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Long512Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -621,9 +652,9 @@ final class Long512Vector extends LongVector { public Long512Mask and(VectorMask mask) { Objects.requireNonNull(mask); Long512Mask m = (Long512Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Long512Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Long512Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -631,9 +662,9 @@ final class Long512Vector extends LongVector { public Long512Mask or(VectorMask mask) { Objects.requireNonNull(mask); Long512Mask m = (Long512Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Long512Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Long512Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -641,9 +672,9 @@ final class Long512Vector extends LongVector { Long512Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Long512Mask m = (Long512Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Long512Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Long512Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -651,22 +682,32 @@ final class Long512Vector extends LongVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Long512Mask.class, long.class, VLENGTH, this, - (m) -> trueCountHelper(((Long512Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Long512Mask.class, long.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Long512Mask.class, long.class, VLENGTH, this, - (m) -> firstTrueHelper(((Long512Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Long512Mask.class, long.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Long512Mask.class, long.class, VLENGTH, this, - (m) -> lastTrueHelper(((Long512Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Long512Mask.class, long.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Long512Mask.class, long.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -777,6 +818,20 @@ final class Long512Vector extends LongVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + LongVector fromArray0(long[] a, int offset, VectorMask m) { + return super.fromArray0Template(Long512Mask.class, a, offset, (Long512Mask) m); // specialize + } + + @ForceInline + @Override + final + LongVector fromArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(Long512Mask.class, a, offset, indexMap, mapOffset, (Long512Mask) m); + } + @ForceInline @@ -786,6 +841,13 @@ final class Long512Vector extends LongVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + LongVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Long512Mask.class, a, offset, (Long512Mask) m); // specialize + } + @ForceInline @Override final @@ -793,6 +855,13 @@ final class Long512Vector extends LongVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Long512Mask.class, bb, offset, (Long512Mask) m); // specialize + } + @ForceInline @Override final @@ -800,6 +869,21 @@ final class Long512Vector extends LongVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(long[] a, int offset, VectorMask m) { + super.intoArray0Template(Long512Mask.class, a, offset, (Long512Mask) m); + } + + @ForceInline + @Override + final + void intoArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(Long512Mask.class, a, offset, indexMap, mapOffset, (Long512Mask) m); + } + + @ForceInline @Override final @@ -807,6 +891,21 @@ final class Long512Vector extends LongVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Long512Mask.class, a, offset, (Long512Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Long512Mask.class, bb, offset, (Long512Mask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java index af3cb85811ec9c99c9643f8696fc5df63db6e970..328b399d59eb4c47b074427222f7e39a4c0ab5aa 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java @@ -231,8 +231,8 @@ final class Long64Vector extends LongVector { @ForceInline final @Override - long rOp(long v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + long rOp(long v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -268,12 +268,24 @@ final class Long64Vector extends LongVector { return (Long64Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Long64Vector lanewise(Unary op, VectorMask m) { + return (Long64Vector) super.lanewiseTemplate(op, Long64Mask.class, (Long64Mask) m); // specialize + } + @Override @ForceInline public Long64Vector lanewise(Binary op, Vector v) { return (Long64Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Long64Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Long64Vector) super.lanewiseTemplate(op, Long64Mask.class, v, (Long64Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline Long64Vector @@ -281,15 +293,30 @@ final class Long64Vector extends LongVector { return (Long64Vector) super.lanewiseShiftTemplate(op, e); // specialize } + /*package-private*/ + @Override + @ForceInline Long64Vector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m) { + return (Long64Vector) super.lanewiseShiftTemplate(op, Long64Mask.class, e, (Long64Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Long64Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Long64Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Long64Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Long64Vector) super.lanewiseTemplate(op, Long64Mask.class, v1, v2, (Long64Mask) m); // specialize + } + @Override @ForceInline public final @@ -309,7 +336,7 @@ final class Long64Vector extends LongVector { @ForceInline public final long reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Long64Mask.class, (Long64Mask) m); // specialized } @Override @@ -322,7 +349,7 @@ final class Long64Vector extends LongVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Long64Mask.class, (Long64Mask) m); // specialized } @ForceInline @@ -353,6 +380,13 @@ final class Long64Vector extends LongVector { } + @Override + @ForceInline + public final Long64Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Long64Mask.class, op, v, (Long64Mask) m); + } + + @Override @ForceInline public Long64Vector blend(Vector v, VectorMask m) { @@ -409,6 +443,7 @@ final class Long64Vector extends LongVector { VectorMask m) { return (Long64Vector) super.rearrangeTemplate(Long64Shuffle.class, + Long64Mask.class, (Long64Shuffle) shuffle, (Long64Mask) m); // specialize } @@ -572,16 +607,12 @@ final class Long64Vector extends LongVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Long64Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -607,9 +638,9 @@ final class Long64Vector extends LongVector { public Long64Mask and(VectorMask mask) { Objects.requireNonNull(mask); Long64Mask m = (Long64Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Long64Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Long64Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -617,9 +648,9 @@ final class Long64Vector extends LongVector { public Long64Mask or(VectorMask mask) { Objects.requireNonNull(mask); Long64Mask m = (Long64Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Long64Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Long64Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -627,9 +658,9 @@ final class Long64Vector extends LongVector { Long64Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Long64Mask m = (Long64Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Long64Mask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Long64Mask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -637,22 +668,32 @@ final class Long64Vector extends LongVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Long64Mask.class, long.class, VLENGTH, this, - (m) -> trueCountHelper(((Long64Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Long64Mask.class, long.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Long64Mask.class, long.class, VLENGTH, this, - (m) -> firstTrueHelper(((Long64Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Long64Mask.class, long.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Long64Mask.class, long.class, VLENGTH, this, - (m) -> lastTrueHelper(((Long64Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Long64Mask.class, long.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Long64Mask.class, long.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -763,6 +804,20 @@ final class Long64Vector extends LongVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + LongVector fromArray0(long[] a, int offset, VectorMask m) { + return super.fromArray0Template(Long64Mask.class, a, offset, (Long64Mask) m); // specialize + } + + @ForceInline + @Override + final + LongVector fromArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(Long64Mask.class, a, offset, indexMap, mapOffset, (Long64Mask) m); + } + @ForceInline @@ -772,6 +827,13 @@ final class Long64Vector extends LongVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + LongVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Long64Mask.class, a, offset, (Long64Mask) m); // specialize + } + @ForceInline @Override final @@ -779,6 +841,13 @@ final class Long64Vector extends LongVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Long64Mask.class, bb, offset, (Long64Mask) m); // specialize + } + @ForceInline @Override final @@ -786,6 +855,21 @@ final class Long64Vector extends LongVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(long[] a, int offset, VectorMask m) { + super.intoArray0Template(Long64Mask.class, a, offset, (Long64Mask) m); + } + + @ForceInline + @Override + final + void intoArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(Long64Mask.class, a, offset, indexMap, mapOffset, (Long64Mask) m); + } + + @ForceInline @Override final @@ -793,6 +877,21 @@ final class Long64Vector extends LongVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Long64Mask.class, a, offset, (Long64Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Long64Mask.class, bb, offset, (Long64Mask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java index a38386c49846a932defd355766fb2626677a4ff4..23e9d9f3fb68eaec6ac2189d882314b6c0eb892f 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java @@ -231,8 +231,8 @@ final class LongMaxVector extends LongVector { @ForceInline final @Override - long rOp(long v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + long rOp(long v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -268,12 +268,24 @@ final class LongMaxVector extends LongVector { return (LongMaxVector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public LongMaxVector lanewise(Unary op, VectorMask m) { + return (LongMaxVector) super.lanewiseTemplate(op, LongMaxMask.class, (LongMaxMask) m); // specialize + } + @Override @ForceInline public LongMaxVector lanewise(Binary op, Vector v) { return (LongMaxVector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public LongMaxVector lanewise(Binary op, Vector v, VectorMask m) { + return (LongMaxVector) super.lanewiseTemplate(op, LongMaxMask.class, v, (LongMaxMask) m); // specialize + } + /*package-private*/ @Override @ForceInline LongMaxVector @@ -281,15 +293,30 @@ final class LongMaxVector extends LongVector { return (LongMaxVector) super.lanewiseShiftTemplate(op, e); // specialize } + /*package-private*/ + @Override + @ForceInline LongMaxVector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m) { + return (LongMaxVector) super.lanewiseShiftTemplate(op, LongMaxMask.class, e, (LongMaxMask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final LongMaxVector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (LongMaxVector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + LongMaxVector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (LongMaxVector) super.lanewiseTemplate(op, LongMaxMask.class, v1, v2, (LongMaxMask) m); // specialize + } + @Override @ForceInline public final @@ -309,7 +336,7 @@ final class LongMaxVector extends LongVector { @ForceInline public final long reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, LongMaxMask.class, (LongMaxMask) m); // specialized } @Override @@ -322,7 +349,7 @@ final class LongMaxVector extends LongVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, LongMaxMask.class, (LongMaxMask) m); // specialized } @ForceInline @@ -353,6 +380,13 @@ final class LongMaxVector extends LongVector { } + @Override + @ForceInline + public final LongMaxMask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(LongMaxMask.class, op, v, (LongMaxMask) m); + } + + @Override @ForceInline public LongMaxVector blend(Vector v, VectorMask m) { @@ -409,6 +443,7 @@ final class LongMaxVector extends LongVector { VectorMask m) { return (LongMaxVector) super.rearrangeTemplate(LongMaxShuffle.class, + LongMaxMask.class, (LongMaxShuffle) shuffle, (LongMaxMask) m); // specialize } @@ -572,16 +607,12 @@ final class LongMaxVector extends LongVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - LongMaxMask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -607,9 +638,9 @@ final class LongMaxVector extends LongVector { public LongMaxMask and(VectorMask mask) { Objects.requireNonNull(mask); LongMaxMask m = (LongMaxMask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, LongMaxMask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, LongMaxMask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -617,9 +648,9 @@ final class LongMaxVector extends LongVector { public LongMaxMask or(VectorMask mask) { Objects.requireNonNull(mask); LongMaxMask m = (LongMaxMask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, LongMaxMask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, LongMaxMask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -627,9 +658,9 @@ final class LongMaxVector extends LongVector { LongMaxMask xor(VectorMask mask) { Objects.requireNonNull(mask); LongMaxMask m = (LongMaxMask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, LongMaxMask.class, long.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, LongMaxMask.class, null, long.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -637,22 +668,32 @@ final class LongMaxVector extends LongVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, LongMaxMask.class, long.class, VLENGTH, this, - (m) -> trueCountHelper(((LongMaxMask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, LongMaxMask.class, long.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, LongMaxMask.class, long.class, VLENGTH, this, - (m) -> firstTrueHelper(((LongMaxMask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, LongMaxMask.class, long.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, LongMaxMask.class, long.class, VLENGTH, this, - (m) -> lastTrueHelper(((LongMaxMask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, LongMaxMask.class, long.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, LongMaxMask.class, long.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -763,6 +804,20 @@ final class LongMaxVector extends LongVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + LongVector fromArray0(long[] a, int offset, VectorMask m) { + return super.fromArray0Template(LongMaxMask.class, a, offset, (LongMaxMask) m); // specialize + } + + @ForceInline + @Override + final + LongVector fromArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + return super.fromArray0Template(LongMaxMask.class, a, offset, indexMap, mapOffset, (LongMaxMask) m); + } + @ForceInline @@ -772,6 +827,13 @@ final class LongMaxVector extends LongVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + LongVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(LongMaxMask.class, a, offset, (LongMaxMask) m); // specialize + } + @ForceInline @Override final @@ -779,6 +841,13 @@ final class LongMaxVector extends LongVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(LongMaxMask.class, bb, offset, (LongMaxMask) m); // specialize + } + @ForceInline @Override final @@ -786,6 +855,21 @@ final class LongMaxVector extends LongVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(long[] a, int offset, VectorMask m) { + super.intoArray0Template(LongMaxMask.class, a, offset, (LongMaxMask) m); + } + + @ForceInline + @Override + final + void intoArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask m) { + super.intoArray0Template(LongMaxMask.class, a, offset, indexMap, mapOffset, (LongMaxMask) m); + } + + @ForceInline @Override final @@ -793,6 +877,21 @@ final class LongMaxVector extends LongVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(LongMaxMask.class, a, offset, (LongMaxMask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(LongMaxMask.class, bb, offset, (LongMaxMask) m); + } + + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java index f5069826d44b32696f89a4a318704f63929bc9d8..b48778f5ec3a3252302b3f27336fe36cd9166d08 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java @@ -29,7 +29,6 @@ import java.nio.ByteOrder; import java.nio.ReadOnlyBufferException; import java.util.Arrays; import java.util.Objects; -import java.util.function.BinaryOperator; import java.util.function.Function; import java.util.function.UnaryOperator; @@ -173,6 +172,9 @@ public abstract class LongVector extends AbstractVector { final LongVector uOpTemplate(VectorMask m, FUnOp f) { + if (m == null) { + return uOpTemplate(f); + } long[] vec = vec(); long[] res = new long[length()]; boolean[] mbits = ((AbstractMask)m).getBits(); @@ -216,6 +218,9 @@ public abstract class LongVector extends AbstractVector { LongVector bOpTemplate(Vector o, VectorMask m, FBinOp f) { + if (m == null) { + return bOpTemplate(o, f); + } long[] res = new long[length()]; long[] vec1 = this.vec(); long[] vec2 = ((LongVector)o).vec(); @@ -265,6 +270,9 @@ public abstract class LongVector extends AbstractVector { Vector o2, VectorMask m, FTriOp f) { + if (m == null) { + return tOpTemplate(o1, o2, f); + } long[] res = new long[length()]; long[] vec1 = this.vec(); long[] vec2 = ((LongVector)o1).vec(); @@ -280,7 +288,22 @@ public abstract class LongVector extends AbstractVector { /*package-private*/ abstract - long rOp(long v, FBinOp f); + long rOp(long v, VectorMask m, FBinOp f); + + @ForceInline + final + long rOpTemplate(long v, VectorMask m, FBinOp f) { + if (m == null) { + return rOpTemplate(v, f); + } + long[] vec = vec(); + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < vec.length; i++) { + v = mbits[i] ? f.apply(i, v, vec[i]) : v; + } + return v; + } + @ForceInline final long rOpTemplate(long v, FBinOp f) { @@ -507,37 +530,61 @@ public abstract class LongVector extends AbstractVector { return blend(broadcast(-1), compare(NE, 0)); } if (op == NOT) { - return broadcast(-1).lanewiseTemplate(XOR, this); + return broadcast(-1).lanewise(XOR, this); } else if (op == NEG) { // FIXME: Support this in the JIT. - return broadcast(0).lanewiseTemplate(SUB, this); + return broadcast(0).lanewise(SUB, this); } } int opc = opCode(op); return VectorSupport.unaryOp( - opc, getClass(), long.class, length(), - this, - UN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_NEG: return v0 -> - v0.uOp((i, a) -> (long) -a); - case VECTOR_OP_ABS: return v0 -> - v0.uOp((i, a) -> (long) Math.abs(a)); - default: return null; - }})); + opc, getClass(), null, long.class, length(), + this, null, + UN_IMPL.find(op, opc, LongVector::unaryOperations)); } - private static final - ImplCache> UN_IMPL - = new ImplCache<>(Unary.class, LongVector.class); /** * {@inheritDoc} */ - @ForceInline - public final + @Override + public abstract LongVector lanewise(VectorOperators.Unary op, - VectorMask m) { - return blend(lanewise(op), m); + VectorMask m); + @ForceInline + final + LongVector lanewiseTemplate(VectorOperators.Unary op, + Class> maskClass, + VectorMask m) { + m.check(maskClass, this); + if (opKind(op, VO_SPECIAL)) { + if (op == ZOMO) { + return blend(broadcast(-1), compare(NE, 0, m)); + } + if (op == NOT) { + return lanewise(XOR, broadcast(-1), m); + } else if (op == NEG) { + return lanewise(NOT, m).lanewise(ADD, broadcast(1), m); + } + } + int opc = opCode(op); + return VectorSupport.unaryOp( + opc, getClass(), maskClass, long.class, length(), + this, m, + UN_IMPL.find(op, opc, LongVector::unaryOperations)); + } + + private static final + ImplCache>> + UN_IMPL = new ImplCache<>(Unary.class, LongVector.class); + + private static UnaryOperation> unaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_NEG: return (v0, m) -> + v0.uOp(m, (i, a) -> (long) -a); + case VECTOR_OP_ABS: return (v0, m) -> + v0.uOp(m, (i, a) -> (long) Math.abs(a)); + default: return null; + } } // Binary lanewise support @@ -557,6 +604,7 @@ public abstract class LongVector extends AbstractVector { Vector v) { LongVector that = (LongVector) v; that.check(this); + if (opKind(op, VO_SPECIAL | VO_SHIFT)) { if (op == FIRST_NONZERO) { // FIXME: Support this in the JIT. @@ -575,74 +623,110 @@ public abstract class LongVector extends AbstractVector { that = that.lanewise(NOT); op = AND; } else if (op == DIV) { - VectorMask eqz = that.eq((long)0); + VectorMask eqz = that.eq((long) 0); if (eqz.anyTrue()) { throw that.divZeroException(); } } } + int opc = opCode(op); return VectorSupport.binaryOp( - opc, getClass(), long.class, length(), - this, that, - BIN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (long)(a + b)); - case VECTOR_OP_SUB: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (long)(a - b)); - case VECTOR_OP_MUL: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (long)(a * b)); - case VECTOR_OP_DIV: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (long)(a / b)); - case VECTOR_OP_MAX: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (long)Math.max(a, b)); - case VECTOR_OP_MIN: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (long)Math.min(a, b)); - case VECTOR_OP_AND: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (long)(a & b)); - case VECTOR_OP_OR: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (long)(a | b)); - case VECTOR_OP_XOR: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (long)(a ^ b)); - case VECTOR_OP_LSHIFT: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> (long)(a << n)); - case VECTOR_OP_RSHIFT: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> (long)(a >> n)); - case VECTOR_OP_URSHIFT: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> (long)((a & LSHR_SETUP_MASK) >>> n)); - case VECTOR_OP_LROTATE: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> rotateLeft(a, (int)n)); - case VECTOR_OP_RROTATE: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> rotateRight(a, (int)n)); - default: return null; - }})); + opc, getClass(), null, long.class, length(), + this, that, null, + BIN_IMPL.find(op, opc, LongVector::binaryOperations)); } - private static final - ImplCache> BIN_IMPL - = new ImplCache<>(Binary.class, LongVector.class); /** * {@inheritDoc} * @see #lanewise(VectorOperators.Binary,long,VectorMask) */ - @ForceInline - public final + @Override + public abstract LongVector lanewise(VectorOperators.Binary op, Vector v, - VectorMask m) { + VectorMask m); + @ForceInline + final + LongVector lanewiseTemplate(VectorOperators.Binary op, + Class> maskClass, + Vector v, VectorMask m) { LongVector that = (LongVector) v; - if (op == DIV) { - VectorMask eqz = that.eq((long)0); - if (eqz.and(m).anyTrue()) { - throw that.divZeroException(); + that.check(this); + m.check(maskClass, this); + + if (opKind(op, VO_SPECIAL | VO_SHIFT)) { + if (op == FIRST_NONZERO) { + // FIXME: Support this in the JIT. + VectorMask thisNZ + = this.viewAsIntegralLanes().compare(NE, (long) 0); + that = that.blend((long) 0, thisNZ.cast(vspecies())); + op = OR_UNCHECKED; } - // suppress div/0 exceptions in unset lanes - that = that.lanewise(NOT, eqz); - return blend(lanewise(DIV, that), m); + if (opKind(op, VO_SHIFT)) { + // As per shift specification for Java, mask the shift count. + // This allows the JIT to ignore some ISA details. + that = that.lanewise(AND, SHIFT_MASK); + } + if (op == AND_NOT) { + // FIXME: Support this in the JIT. + that = that.lanewise(NOT); + op = AND; + } else if (op == DIV) { + VectorMask eqz = that.eq((long)0); + if (eqz.and(m).anyTrue()) { + throw that.divZeroException(); + } + // suppress div/0 exceptions in unset lanes + that = that.lanewise(NOT, eqz); + } + } + + int opc = opCode(op); + return VectorSupport.binaryOp( + opc, getClass(), maskClass, long.class, length(), + this, that, m, + BIN_IMPL.find(op, opc, LongVector::binaryOperations)); + } + + private static final + ImplCache>> + BIN_IMPL = new ImplCache<>(Binary.class, LongVector.class); + + private static BinaryOperation> binaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (long)(a + b)); + case VECTOR_OP_SUB: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (long)(a - b)); + case VECTOR_OP_MUL: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (long)(a * b)); + case VECTOR_OP_DIV: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (long)(a / b)); + case VECTOR_OP_MAX: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (long)Math.max(a, b)); + case VECTOR_OP_MIN: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (long)Math.min(a, b)); + case VECTOR_OP_AND: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (long)(a & b)); + case VECTOR_OP_OR: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (long)(a | b)); + case VECTOR_OP_XOR: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (long)(a ^ b)); + case VECTOR_OP_LSHIFT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> (long)(a << n)); + case VECTOR_OP_RSHIFT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> (long)(a >> n)); + case VECTOR_OP_URSHIFT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> (long)((a & LSHR_SETUP_MASK) >>> n)); + case VECTOR_OP_LROTATE: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n)); + case VECTOR_OP_RROTATE: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n)); + default: return null; } - return blend(lanewise(op, v), m); } + // FIXME: Maybe all of the public final methods in this file (the // simple ones that just call lanewise) should be pushed down to // the X-VectorBits template. They can't optimize properly at @@ -705,7 +789,13 @@ public abstract class LongVector extends AbstractVector { LongVector lanewise(VectorOperators.Binary op, long e, VectorMask m) { - return blend(lanewise(op, e), m); + if (opKind(op, VO_SHIFT) && (long)(int)e == e) { + return lanewiseShift(op, (int) e, m); + } + if (op == AND_NOT) { + op = AND; e = (long) ~e; + } + return lanewise(op, broadcast(e), m); } @@ -723,27 +813,52 @@ public abstract class LongVector extends AbstractVector { e &= SHIFT_MASK; int opc = opCode(op); return VectorSupport.broadcastInt( - opc, getClass(), long.class, length(), - this, e, - BIN_INT_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_LSHIFT: return (v, n) -> - v.uOp((i, a) -> (long)(a << n)); - case VECTOR_OP_RSHIFT: return (v, n) -> - v.uOp((i, a) -> (long)(a >> n)); - case VECTOR_OP_URSHIFT: return (v, n) -> - v.uOp((i, a) -> (long)((a & LSHR_SETUP_MASK) >>> n)); - case VECTOR_OP_LROTATE: return (v, n) -> - v.uOp((i, a) -> rotateLeft(a, (int)n)); - case VECTOR_OP_RROTATE: return (v, n) -> - v.uOp((i, a) -> rotateRight(a, (int)n)); - default: return null; - }})); + opc, getClass(), null, long.class, length(), + this, e, null, + BIN_INT_IMPL.find(op, opc, LongVector::broadcastIntOperations)); + } + + /*package-private*/ + abstract LongVector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m); + + /*package-private*/ + @ForceInline + final LongVector + lanewiseShiftTemplate(VectorOperators.Binary op, + Class> maskClass, + int e, VectorMask m) { + m.check(maskClass, this); + assert(opKind(op, VO_SHIFT)); + // As per shift specification for Java, mask the shift count. + e &= SHIFT_MASK; + int opc = opCode(op); + return VectorSupport.broadcastInt( + opc, getClass(), maskClass, long.class, length(), + this, e, m, + BIN_INT_IMPL.find(op, opc, LongVector::broadcastIntOperations)); } + private static final - ImplCache> BIN_INT_IMPL + ImplCache>> BIN_INT_IMPL = new ImplCache<>(Binary.class, LongVector.class); + private static VectorBroadcastIntOp> broadcastIntOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_LSHIFT: return (v, n, m) -> + v.uOp(m, (i, a) -> (long)(a << n)); + case VECTOR_OP_RSHIFT: return (v, n, m) -> + v.uOp(m, (i, a) -> (long)(a >> n)); + case VECTOR_OP_URSHIFT: return (v, n, m) -> + v.uOp(m, (i, a) -> (long)((a & LSHR_SETUP_MASK) >>> n)); + case VECTOR_OP_LROTATE: return (v, n, m) -> + v.uOp(m, (i, a) -> rotateLeft(a, (int)n)); + case VECTOR_OP_RROTATE: return (v, n, m) -> + v.uOp(m, (i, a) -> rotateRight(a, (int)n)); + default: return null; + } + } + // As per shift specification for Java, mask the shift count. // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte). // The latter two maskings go beyond the JLS, but seem reasonable @@ -795,16 +910,10 @@ public abstract class LongVector extends AbstractVector { } int opc = opCode(op); return VectorSupport.ternaryOp( - opc, getClass(), long.class, length(), - this, that, tother, - TERN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - default: return null; - }})); + opc, getClass(), null, long.class, length(), + this, that, tother, null, + TERN_IMPL.find(op, opc, LongVector::ternaryOperations)); } - private static final - ImplCache> TERN_IMPL - = new ImplCache<>(Ternary.class, LongVector.class); /** * {@inheritDoc} @@ -812,13 +921,48 @@ public abstract class LongVector extends AbstractVector { * @see #lanewise(VectorOperators.Ternary,Vector,long,VectorMask) * @see #lanewise(VectorOperators.Ternary,long,Vector,VectorMask) */ - @ForceInline - public final + @Override + public abstract LongVector lanewise(VectorOperators.Ternary op, Vector v1, Vector v2, - VectorMask m) { - return blend(lanewise(op, v1, v2), m); + VectorMask m); + @ForceInline + final + LongVector lanewiseTemplate(VectorOperators.Ternary op, + Class> maskClass, + Vector v1, + Vector v2, + VectorMask m) { + LongVector that = (LongVector) v1; + LongVector tother = (LongVector) v2; + // It's a word: https://www.dictionary.com/browse/tother + // See also Chapter 11 of Dickens, Our Mutual Friend: + // "Totherest Governor," replied Mr Riderhood... + that.check(this); + tother.check(this); + m.check(maskClass, this); + + if (op == BITWISE_BLEND) { + // FIXME: Support this in the JIT. + that = this.lanewise(XOR, that).lanewise(AND, tother); + return this.lanewise(XOR, that, m); + } + int opc = opCode(op); + return VectorSupport.ternaryOp( + opc, getClass(), maskClass, long.class, length(), + this, that, tother, m, + TERN_IMPL.find(op, opc, LongVector::ternaryOperations)); + } + + private static final + ImplCache>> + TERN_IMPL = new ImplCache<>(Ternary.class, LongVector.class); + + private static TernaryOperation> ternaryOperations(int opc_) { + switch (opc_) { + default: return null; + } } /** @@ -875,7 +1019,7 @@ public abstract class LongVector extends AbstractVector { long e1, long e2, VectorMask m) { - return blend(lanewise(op, e1, e2), m); + return lanewise(op, broadcast(e1), broadcast(e2), m); } /** @@ -933,7 +1077,7 @@ public abstract class LongVector extends AbstractVector { Vector v1, long e2, VectorMask m) { - return blend(lanewise(op, v1, e2), m); + return lanewise(op, v1, broadcast(e2), m); } /** @@ -990,7 +1134,7 @@ public abstract class LongVector extends AbstractVector { long e1, Vector v2, VectorMask m) { - return blend(lanewise(op, e1, v2), m); + return lanewise(op, broadcast(e1), v2, m); } // (Thus endeth the Great and Mighty Ternary Ogdoad.) @@ -1662,15 +1806,13 @@ public abstract class LongVector extends AbstractVector { final > M compareTemplate(Class maskType, Comparison op, Vector v) { - Objects.requireNonNull(v); - LongSpecies vsp = vspecies(); LongVector that = (LongVector) v; that.check(this); int opc = opCode(op); return VectorSupport.compare( opc, getClass(), maskType, long.class, length(), - this, that, - (cond, v0, v1) -> { + this, that, null, + (cond, v0, v1, m1) -> { AbstractMask m = v0.bTest(cond, v1, (cond_, i, a, b) -> compareWithOp(cond, a, b)); @@ -1680,6 +1822,28 @@ public abstract class LongVector extends AbstractVector { }); } + /*package-private*/ + @ForceInline + final + > + M compareTemplate(Class maskType, Comparison op, Vector v, M m) { + LongVector that = (LongVector) v; + that.check(this); + m.check(maskType, this); + int opc = opCode(op); + return VectorSupport.compare( + opc, getClass(), maskType, long.class, length(), + this, that, m, + (cond, v0, v1, m1) -> { + AbstractMask cmpM + = v0.bTest(cond, v1, (cond_, i, a, b) + -> compareWithOp(cond, a, b)); + @SuppressWarnings("unchecked") + M m2 = (M) cmpM.and(m1); + return m2; + }); + } + @ForceInline private static boolean compareWithOp(int cond, long a, long b) { return switch (cond) { @@ -1697,18 +1861,6 @@ public abstract class LongVector extends AbstractVector { }; } - /** - * {@inheritDoc} - */ - @Override - @ForceInline - public final - VectorMask compare(VectorOperators.Comparison op, - Vector v, - VectorMask m) { - return compare(op, v).and(m); - } - /** * Tests this vector by comparing it with an input scalar, * according to the given comparison operation. @@ -1767,7 +1919,7 @@ public abstract class LongVector extends AbstractVector { public final VectorMask compare(VectorOperators.Comparison op, long e, VectorMask m) { - return compare(op, e).and(m); + return compare(op, broadcast(e), m); } @@ -1971,9 +2123,9 @@ public abstract class LongVector extends AbstractVector { LongVector rearrangeTemplate(Class shuffletype, S shuffle) { shuffle.checkIndexes(); return VectorSupport.rearrangeOp( - getClass(), shuffletype, long.class, length(), - this, shuffle, - (v1, s_) -> v1.uOp((i, a) -> { + getClass(), shuffletype, null, long.class, length(), + this, shuffle, null, + (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); return v1.lane(ei); })); @@ -1990,24 +2142,25 @@ public abstract class LongVector extends AbstractVector { /*package-private*/ @ForceInline final - > + , M extends VectorMask> LongVector rearrangeTemplate(Class shuffletype, + Class masktype, S shuffle, - VectorMask m) { - LongVector unmasked = - VectorSupport.rearrangeOp( - getClass(), shuffletype, long.class, length(), - this, shuffle, - (v1, s_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); - return ei < 0 ? 0 : v1.lane(ei); - })); + M m) { + + m.check(masktype, this); VectorMask valid = shuffle.laneIsValid(); if (m.andNot(valid).anyTrue()) { shuffle.checkIndexes(); throw new AssertionError(); } - return broadcast((long)0).blend(unmasked, m); + return VectorSupport.rearrangeOp( + getClass(), shuffletype, masktype, long.class, length(), + this, shuffle, m, + (v1, s_, m_) -> v1.uOp((i, a) -> { + int ei = s_.laneSource(i); + return ei < 0 || !m_.laneIsSet(i) ? 0 : v1.lane(ei); + })); } /** @@ -2030,17 +2183,17 @@ public abstract class LongVector extends AbstractVector { S ws = (S) shuffle.wrapIndexes(); LongVector r0 = VectorSupport.rearrangeOp( - getClass(), shuffletype, long.class, length(), - this, ws, - (v0, s_) -> v0.uOp((i, a) -> { + getClass(), shuffletype, null, long.class, length(), + this, ws, null, + (v0, s_, m_) -> v0.uOp((i, a) -> { int ei = s_.laneSource(i); return v0.lane(ei); })); LongVector r1 = VectorSupport.rearrangeOp( - getClass(), shuffletype, long.class, length(), - v, ws, - (v1, s_) -> v1.uOp((i, a) -> { + getClass(), shuffletype, null, long.class, length(), + v, ws, null, + (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); return v1.lane(ei); })); @@ -2303,9 +2456,18 @@ public abstract class LongVector extends AbstractVector { @ForceInline final long reduceLanesTemplate(VectorOperators.Associative op, + Class> maskClass, VectorMask m) { - LongVector v = reduceIdentityVector(op).blend(this, m); - return v.reduceLanesTemplate(op); + m.check(maskClass, this); + if (op == FIRST_NONZERO) { + LongVector v = reduceIdentityVector(op).blend(this, m); + return v.reduceLanesTemplate(op); + } + int opc = opCode(op); + return fromBits(VectorSupport.reductionCoerced( + opc, getClass(), maskClass, long.class, length(), + this, m, + REDUCE_IMPL.find(op, opc, LongVector::reductionOperations))); } /*package-private*/ @@ -2320,30 +2482,34 @@ public abstract class LongVector extends AbstractVector { } int opc = opCode(op); return fromBits(VectorSupport.reductionCoerced( - opc, getClass(), long.class, length(), - this, - REDUCE_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return v -> - toBits(v.rOp((long)0, (i, a, b) -> (long)(a + b))); - case VECTOR_OP_MUL: return v -> - toBits(v.rOp((long)1, (i, a, b) -> (long)(a * b))); - case VECTOR_OP_MIN: return v -> - toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (long) Math.min(a, b))); - case VECTOR_OP_MAX: return v -> - toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (long) Math.max(a, b))); - case VECTOR_OP_AND: return v -> - toBits(v.rOp((long)-1, (i, a, b) -> (long)(a & b))); - case VECTOR_OP_OR: return v -> - toBits(v.rOp((long)0, (i, a, b) -> (long)(a | b))); - case VECTOR_OP_XOR: return v -> - toBits(v.rOp((long)0, (i, a, b) -> (long)(a ^ b))); - default: return null; - }}))); + opc, getClass(), null, long.class, length(), + this, null, + REDUCE_IMPL.find(op, opc, LongVector::reductionOperations))); } + private static final - ImplCache> REDUCE_IMPL - = new ImplCache<>(Associative.class, LongVector.class); + ImplCache>> + REDUCE_IMPL = new ImplCache<>(Associative.class, LongVector.class); + + private static ReductionOperation> reductionOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp((long)0, m, (i, a, b) -> (long)(a + b))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp((long)1, m, (i, a, b) -> (long)(a * b))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (long) Math.min(a, b))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (long) Math.max(a, b))); + case VECTOR_OP_AND: return (v, m) -> + toBits(v.rOp((long)-1, m, (i, a, b) -> (long)(a & b))); + case VECTOR_OP_OR: return (v, m) -> + toBits(v.rOp((long)0, m, (i, a, b) -> (long)(a | b))); + case VECTOR_OP_XOR: return (v, m) -> + toBits(v.rOp((long)0, m, (i, a, b) -> (long)(a ^ b))); + default: return null; + } + } private @ForceInline @@ -2557,9 +2723,7 @@ public abstract class LongVector extends AbstractVector { VectorMask m) { LongSpecies vsp = (LongSpecies) species; if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) { - LongVector zero = vsp.zero(); - LongVector v = zero.fromByteArray0(a, offset); - return zero.blend(v.maybeSwap(bo), m); + return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo); } // FIXME: optimize @@ -2621,8 +2785,7 @@ public abstract class LongVector extends AbstractVector { VectorMask m) { LongSpecies vsp = (LongSpecies) species; if (offset >= 0 && offset <= (a.length - species.length())) { - LongVector zero = vsp.zero(); - return zero.blend(zero.fromArray0(a, offset), m); + return vsp.dummyVector().fromArray0(a, offset, m); } // FIXME: optimize @@ -2698,13 +2861,13 @@ public abstract class LongVector extends AbstractVector { vix = VectorIntrinsics.checkIndex(vix, a.length); return VectorSupport.loadWithMap( - vectorType, long.class, vsp.laneCount(), - IntVector.species(vsp.indexShape()).vectorType(), - a, ARRAY_BASE, vix, + vectorType, null, long.class, vsp.laneCount(), + isp.vectorType(), + a, ARRAY_BASE, vix, null, a, offset, indexMap, mapOffset, vsp, - (long[] c, int idx, int[] iMap, int idy, LongSpecies s) -> + (c, idx, iMap, idy, s, vm) -> s.vOp(n -> c[idx + iMap[idy+n]])); - } + } /** * Gathers a new vector composed of elements from an array of type @@ -2752,9 +2915,8 @@ public abstract class LongVector extends AbstractVector { return fromArray(species, a, offset, indexMap, mapOffset); } else { - // FIXME: Cannot vectorize yet, if there's a mask. LongSpecies vsp = (LongSpecies) species; - return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]); + return vsp.dummyVector().fromArray0(a, offset, indexMap, mapOffset, m); } } @@ -2848,9 +3010,7 @@ public abstract class LongVector extends AbstractVector { VectorMask m) { LongSpecies vsp = (LongSpecies) species; if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) { - LongVector zero = vsp.zero(); - LongVector v = zero.fromByteBuffer0(bb, offset); - return zero.blend(v.maybeSwap(bo), m); + return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo); } // FIXME: optimize @@ -2922,10 +3082,9 @@ public abstract class LongVector extends AbstractVector { if (m.allTrue()) { intoArray(a, offset); } else { - // FIXME: optimize LongSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 1, a.length); - stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v); + intoArray0(a, offset, m); } } @@ -2988,12 +3147,12 @@ public abstract class LongVector extends AbstractVector { vix = VectorIntrinsics.checkIndex(vix, a.length); VectorSupport.storeWithMap( - vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + vsp.vectorType(), null, vsp.elementType(), vsp.laneCount(), isp.vectorType(), a, arrayAddress(a, 0), vix, - this, + this, null, a, offset, indexMap, mapOffset, - (arr, off, v, map, mo) + (arr, off, v, map, mo, vm) -> v.stOp(arr, off, (arr_, off_, i, e) -> { int j = map[mo + i]; @@ -3040,12 +3199,7 @@ public abstract class LongVector extends AbstractVector { intoArray(a, offset, indexMap, mapOffset); } else { - // FIXME: Cannot vectorize yet, if there's a mask. - stOp(a, offset, m, - (arr, off, i, e) -> { - int j = indexMap[mapOffset + i]; - arr[off + j] = e; - }); + intoArray0(a, offset, indexMap, mapOffset, m); } } @@ -3075,12 +3229,9 @@ public abstract class LongVector extends AbstractVector { if (m.allTrue()) { intoByteArray(a, offset, bo); } else { - // FIXME: optimize LongSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 8, a.length); - ByteBuffer wb = wrapper(a, bo); - this.stOp(wb, offset, m, - (wb_, o, i, e) -> wb_.putLong(o + i * 8, e)); + maybeSwap(bo).intoByteArray0(a, offset, m); } } @@ -3092,7 +3243,7 @@ public abstract class LongVector extends AbstractVector { public final void intoByteBuffer(ByteBuffer bb, int offset, ByteOrder bo) { - if (bb.isReadOnly()) { + if (ScopedMemoryAccess.isReadOnly(bb)) { throw new ReadOnlyBufferException(); } offset = checkFromIndexSize(offset, byteSize(), bb.limit()); @@ -3111,15 +3262,12 @@ public abstract class LongVector extends AbstractVector { if (m.allTrue()) { intoByteBuffer(bb, offset, bo); } else { - // FIXME: optimize if (bb.isReadOnly()) { throw new ReadOnlyBufferException(); } LongSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 8, bb.limit()); - ByteBuffer wb = wrapper(bb, bo); - this.stOp(wb, offset, m, - (wb_, o, i, e) -> wb_.putLong(o + i * 8, e)); + maybeSwap(bo).intoByteBuffer0(bb, offset, m); } } @@ -3157,6 +3305,75 @@ public abstract class LongVector extends AbstractVector { (arr_, off_, i) -> arr_[off_ + i])); } + /*package-private*/ + abstract + LongVector fromArray0(long[] a, int offset, VectorMask m); + @ForceInline + final + > + LongVector fromArray0Template(Class maskClass, long[] a, int offset, M m) { + m.check(species()); + LongSpecies vsp = vspecies(); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> s.ldOp(arr, off, vm, + (arr_, off_, i) -> arr_[off_ + i])); + } + + /*package-private*/ + abstract + LongVector fromArray0(long[] a, int offset, + int[] indexMap, int mapOffset, + VectorMask m); + @ForceInline + final + > + LongVector fromArray0Template(Class maskClass, long[] a, int offset, + int[] indexMap, int mapOffset, M m) { + LongSpecies vsp = vspecies(); + IntVector.IntSpecies isp = IntVector.species(vsp.indexShape()); + Objects.requireNonNull(a); + Objects.requireNonNull(indexMap); + m.check(vsp); + Class vectorType = vsp.vectorType(); + + if (vsp.laneCount() == 1) { + return LongVector.fromArray(vsp, a, offset + indexMap[mapOffset], m); + } + + // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k] + IntVector vix; + if (isp.laneCount() != vsp.laneCount()) { + // For LongMaxVector, if vector length is non-power-of-two or + // 2048 bits, indexShape of Long species is S_MAX_BIT. + // Assume that vector length is 2048, then the lane count of Long + // vector is 32. When converting Long species to int species, + // indexShape is still S_MAX_BIT, but the lane count of int vector + // is 64. So when loading index vector (IntVector), only lower half + // of index data is needed. + vix = IntVector + .fromArray(isp, indexMap, mapOffset, IntMaxVector.IntMaxMask.LOWER_HALF_TRUE_MASK) + .add(offset); + } else { + vix = IntVector + .fromArray(isp, indexMap, mapOffset) + .add(offset); + } + + // FIXME: Check index under mask controlling. + vix = VectorIntrinsics.checkIndex(vix, a.length); + + return VectorSupport.loadWithMap( + vectorType, maskClass, long.class, vsp.laneCount(), + isp.vectorType(), + a, ARRAY_BASE, vix, m, + a, offset, indexMap, mapOffset, vsp, + (c, idx, iMap, idy, s, vm) -> + s.vOp(vm, n -> c[idx + iMap[idy+n]])); + } + @Override @@ -3177,6 +3394,25 @@ public abstract class LongVector extends AbstractVector { }); } + abstract + LongVector fromByteArray0(byte[] a, int offset, VectorMask m); + @ForceInline + final + > + LongVector fromByteArray0Template(Class maskClass, byte[] a, int offset, M m) { + LongSpecies vsp = vspecies(); + m.check(vsp); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, byteArrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> { + ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); + return s.ldOp(wb, off, vm, + (wb_, o, i) -> wb_.getLong(o + i * 8)); + }); + } + abstract LongVector fromByteBuffer0(ByteBuffer bb, int offset); @ForceInline @@ -3193,6 +3429,24 @@ public abstract class LongVector extends AbstractVector { }); } + abstract + LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m); + @ForceInline + final + > + LongVector fromByteBuffer0Template(Class maskClass, ByteBuffer bb, int offset, M m) { + LongSpecies vsp = vspecies(); + m.check(vsp); + return ScopedMemoryAccess.loadFromByteBufferMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + bb, offset, m, vsp, + (buf, off, s, vm) -> { + ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); + return s.ldOp(wb, off, vm, + (wb_, o, i) -> wb_.getLong(o + i * 8)); + }); + } + // Unchecked storing operations in native byte order. // Caller is responsible for applying index checks, masking, and // byte swapping. @@ -3212,6 +3466,77 @@ public abstract class LongVector extends AbstractVector { (arr_, off_, i, e) -> arr_[off_+i] = e)); } + abstract + void intoArray0(long[] a, int offset, VectorMask m); + @ForceInline + final + > + void intoArray0Template(Class maskClass, long[] a, int offset, M m) { + m.check(species()); + LongSpecies vsp = vspecies(); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) + -> v.stOp(arr, off, vm, + (arr_, off_, i, e) -> arr_[off_ + i] = e)); + } + + abstract + void intoArray0(long[] a, int offset, + int[] indexMap, int mapOffset, + VectorMask m); + @ForceInline + final + > + void intoArray0Template(Class maskClass, long[] a, int offset, + int[] indexMap, int mapOffset, M m) { + m.check(species()); + LongSpecies vsp = vspecies(); + IntVector.IntSpecies isp = IntVector.species(vsp.indexShape()); + if (vsp.laneCount() == 1) { + intoArray(a, offset + indexMap[mapOffset], m); + return; + } + + // Index vector: vix[0:n] = i -> offset + indexMap[mo + i] + IntVector vix; + if (isp.laneCount() != vsp.laneCount()) { + // For LongMaxVector, if vector length is 2048 bits, indexShape + // of Long species is S_MAX_BIT. and the lane count of Long + // vector is 32. When converting Long species to int species, + // indexShape is still S_MAX_BIT, but the lane count of int vector + // is 64. So when loading index vector (IntVector), only lower half + // of index data is needed. + vix = IntVector + .fromArray(isp, indexMap, mapOffset, IntMaxVector.IntMaxMask.LOWER_HALF_TRUE_MASK) + .add(offset); + } else { + vix = IntVector + .fromArray(isp, indexMap, mapOffset) + .add(offset); + } + + + // FIXME: Check index under mask controlling. + vix = VectorIntrinsics.checkIndex(vix, a.length); + + VectorSupport.storeWithMap( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + isp.vectorType(), + a, arrayAddress(a, 0), vix, + this, m, + a, offset, indexMap, mapOffset, + (arr, off, v, map, mo, vm) + -> v.stOp(arr, off, vm, + (arr_, off_, i, e) -> { + int j = map[mo + i]; + arr[off + j] = e; + })); + } + + abstract void intoByteArray0(byte[] a, int offset); @ForceInline @@ -3229,6 +3554,25 @@ public abstract class LongVector extends AbstractVector { }); } + abstract + void intoByteArray0(byte[] a, int offset, VectorMask m); + @ForceInline + final + > + void intoByteArray0Template(Class maskClass, byte[] a, int offset, M m) { + LongSpecies vsp = vspecies(); + m.check(vsp); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, byteArrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) -> { + ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); + v.stOp(wb, off, vm, + (tb_, o, i, e) -> tb_.putLong(o + i * 8, e)); + }); + } + @ForceInline final void intoByteBuffer0(ByteBuffer bb, int offset) { @@ -3243,6 +3587,25 @@ public abstract class LongVector extends AbstractVector { }); } + abstract + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m); + @ForceInline + final + > + void intoByteBuffer0Template(Class maskClass, ByteBuffer bb, int offset, M m) { + LongSpecies vsp = vspecies(); + m.check(vsp); + ScopedMemoryAccess.storeIntoByteBufferMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + this, m, bb, offset, + (buf, off, v, vm) -> { + ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); + v.stOp(wb, off, vm, + (wb_, o, i, e) -> wb_.putLong(o + i * 8, e)); + }); + } + + // End of low-level memory operations. private static @@ -3551,7 +3914,7 @@ public abstract class LongVector extends AbstractVector { /*package-private*/ @ForceInline LongVector ldOp(M memory, int offset, - AbstractMask m, + VectorMask m, FLdOp f) { return dummyVector().ldOp(memory, offset, m, f); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java index 6292160a52b7707180bc05ac700e93d8c67ab1f1..cd52dc003b6b8ab589785bf41769debbe6fc62d3 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java @@ -236,8 +236,8 @@ final class Short128Vector extends ShortVector { @ForceInline final @Override - short rOp(short v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + short rOp(short v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,12 +273,24 @@ final class Short128Vector extends ShortVector { return (Short128Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Short128Vector lanewise(Unary op, VectorMask m) { + return (Short128Vector) super.lanewiseTemplate(op, Short128Mask.class, (Short128Mask) m); // specialize + } + @Override @ForceInline public Short128Vector lanewise(Binary op, Vector v) { return (Short128Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Short128Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Short128Vector) super.lanewiseTemplate(op, Short128Mask.class, v, (Short128Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline Short128Vector @@ -286,15 +298,30 @@ final class Short128Vector extends ShortVector { return (Short128Vector) super.lanewiseShiftTemplate(op, e); // specialize } + /*package-private*/ + @Override + @ForceInline Short128Vector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m) { + return (Short128Vector) super.lanewiseShiftTemplate(op, Short128Mask.class, e, (Short128Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Short128Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Short128Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Short128Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Short128Vector) super.lanewiseTemplate(op, Short128Mask.class, v1, v2, (Short128Mask) m); // specialize + } + @Override @ForceInline public final @@ -314,7 +341,7 @@ final class Short128Vector extends ShortVector { @ForceInline public final short reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Short128Mask.class, (Short128Mask) m); // specialized } @Override @@ -327,7 +354,7 @@ final class Short128Vector extends ShortVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Short128Mask.class, (Short128Mask) m); // specialized } @ForceInline @@ -363,6 +390,13 @@ final class Short128Vector extends ShortVector { return super.compareTemplate(Short128Mask.class, op, s); // specialize } + @Override + @ForceInline + public final Short128Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Short128Mask.class, op, v, (Short128Mask) m); + } + + @Override @ForceInline public Short128Vector blend(Vector v, VectorMask m) { @@ -419,6 +453,7 @@ final class Short128Vector extends ShortVector { VectorMask m) { return (Short128Vector) super.rearrangeTemplate(Short128Shuffle.class, + Short128Mask.class, (Short128Shuffle) shuffle, (Short128Mask) m); // specialize } @@ -596,16 +631,12 @@ final class Short128Vector extends ShortVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Short128Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -631,9 +662,9 @@ final class Short128Vector extends ShortVector { public Short128Mask and(VectorMask mask) { Objects.requireNonNull(mask); Short128Mask m = (Short128Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Short128Mask.class, short.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Short128Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -641,9 +672,9 @@ final class Short128Vector extends ShortVector { public Short128Mask or(VectorMask mask) { Objects.requireNonNull(mask); Short128Mask m = (Short128Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Short128Mask.class, short.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Short128Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -651,9 +682,9 @@ final class Short128Vector extends ShortVector { Short128Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Short128Mask m = (Short128Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Short128Mask.class, short.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Short128Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -661,22 +692,32 @@ final class Short128Vector extends ShortVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Short128Mask.class, short.class, VLENGTH, this, - (m) -> trueCountHelper(((Short128Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Short128Mask.class, short.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Short128Mask.class, short.class, VLENGTH, this, - (m) -> firstTrueHelper(((Short128Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Short128Mask.class, short.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Short128Mask.class, short.class, VLENGTH, this, - (m) -> lastTrueHelper(((Short128Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Short128Mask.class, short.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Short128Mask.class, short.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -787,6 +828,14 @@ final class Short128Vector extends ShortVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromArray0(short[] a, int offset, VectorMask m) { + return super.fromArray0Template(Short128Mask.class, a, offset, (Short128Mask) m); // specialize + } + + @ForceInline @Override final @@ -794,6 +843,13 @@ final class Short128Vector extends ShortVector { return super.fromCharArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromCharArray0(char[] a, int offset, VectorMask m) { + return super.fromCharArray0Template(Short128Mask.class, a, offset, (Short128Mask) m); // specialize + } + @ForceInline @Override @@ -802,6 +858,13 @@ final class Short128Vector extends ShortVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Short128Mask.class, a, offset, (Short128Mask) m); // specialize + } + @ForceInline @Override final @@ -809,6 +872,13 @@ final class Short128Vector extends ShortVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Short128Mask.class, bb, offset, (Short128Mask) m); // specialize + } + @ForceInline @Override final @@ -816,6 +886,15 @@ final class Short128Vector extends ShortVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(short[] a, int offset, VectorMask m) { + super.intoArray0Template(Short128Mask.class, a, offset, (Short128Mask) m); + } + + + @ForceInline @Override final @@ -823,6 +902,27 @@ final class Short128Vector extends ShortVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Short128Mask.class, a, offset, (Short128Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Short128Mask.class, bb, offset, (Short128Mask) m); + } + + @ForceInline + @Override + final + void intoCharArray0(char[] a, int offset, VectorMask m) { + super.intoCharArray0Template(Short128Mask.class, a, offset, (Short128Mask) m); + } + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java index 8cc621259a5db354894c0210cd493101b7135393..66f6d409e6aed546271d27f5313c42220661209e 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java @@ -236,8 +236,8 @@ final class Short256Vector extends ShortVector { @ForceInline final @Override - short rOp(short v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + short rOp(short v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,12 +273,24 @@ final class Short256Vector extends ShortVector { return (Short256Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Short256Vector lanewise(Unary op, VectorMask m) { + return (Short256Vector) super.lanewiseTemplate(op, Short256Mask.class, (Short256Mask) m); // specialize + } + @Override @ForceInline public Short256Vector lanewise(Binary op, Vector v) { return (Short256Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Short256Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Short256Vector) super.lanewiseTemplate(op, Short256Mask.class, v, (Short256Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline Short256Vector @@ -286,15 +298,30 @@ final class Short256Vector extends ShortVector { return (Short256Vector) super.lanewiseShiftTemplate(op, e); // specialize } + /*package-private*/ + @Override + @ForceInline Short256Vector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m) { + return (Short256Vector) super.lanewiseShiftTemplate(op, Short256Mask.class, e, (Short256Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Short256Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Short256Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Short256Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Short256Vector) super.lanewiseTemplate(op, Short256Mask.class, v1, v2, (Short256Mask) m); // specialize + } + @Override @ForceInline public final @@ -314,7 +341,7 @@ final class Short256Vector extends ShortVector { @ForceInline public final short reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Short256Mask.class, (Short256Mask) m); // specialized } @Override @@ -327,7 +354,7 @@ final class Short256Vector extends ShortVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Short256Mask.class, (Short256Mask) m); // specialized } @ForceInline @@ -363,6 +390,13 @@ final class Short256Vector extends ShortVector { return super.compareTemplate(Short256Mask.class, op, s); // specialize } + @Override + @ForceInline + public final Short256Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Short256Mask.class, op, v, (Short256Mask) m); + } + + @Override @ForceInline public Short256Vector blend(Vector v, VectorMask m) { @@ -419,6 +453,7 @@ final class Short256Vector extends ShortVector { VectorMask m) { return (Short256Vector) super.rearrangeTemplate(Short256Shuffle.class, + Short256Mask.class, (Short256Shuffle) shuffle, (Short256Mask) m); // specialize } @@ -612,16 +647,12 @@ final class Short256Vector extends ShortVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Short256Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -647,9 +678,9 @@ final class Short256Vector extends ShortVector { public Short256Mask and(VectorMask mask) { Objects.requireNonNull(mask); Short256Mask m = (Short256Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Short256Mask.class, short.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Short256Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -657,9 +688,9 @@ final class Short256Vector extends ShortVector { public Short256Mask or(VectorMask mask) { Objects.requireNonNull(mask); Short256Mask m = (Short256Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Short256Mask.class, short.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Short256Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -667,9 +698,9 @@ final class Short256Vector extends ShortVector { Short256Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Short256Mask m = (Short256Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Short256Mask.class, short.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Short256Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -677,22 +708,32 @@ final class Short256Vector extends ShortVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Short256Mask.class, short.class, VLENGTH, this, - (m) -> trueCountHelper(((Short256Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Short256Mask.class, short.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Short256Mask.class, short.class, VLENGTH, this, - (m) -> firstTrueHelper(((Short256Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Short256Mask.class, short.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Short256Mask.class, short.class, VLENGTH, this, - (m) -> lastTrueHelper(((Short256Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Short256Mask.class, short.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Short256Mask.class, short.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -803,6 +844,14 @@ final class Short256Vector extends ShortVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromArray0(short[] a, int offset, VectorMask m) { + return super.fromArray0Template(Short256Mask.class, a, offset, (Short256Mask) m); // specialize + } + + @ForceInline @Override final @@ -810,6 +859,13 @@ final class Short256Vector extends ShortVector { return super.fromCharArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromCharArray0(char[] a, int offset, VectorMask m) { + return super.fromCharArray0Template(Short256Mask.class, a, offset, (Short256Mask) m); // specialize + } + @ForceInline @Override @@ -818,6 +874,13 @@ final class Short256Vector extends ShortVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Short256Mask.class, a, offset, (Short256Mask) m); // specialize + } + @ForceInline @Override final @@ -825,6 +888,13 @@ final class Short256Vector extends ShortVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Short256Mask.class, bb, offset, (Short256Mask) m); // specialize + } + @ForceInline @Override final @@ -832,6 +902,15 @@ final class Short256Vector extends ShortVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(short[] a, int offset, VectorMask m) { + super.intoArray0Template(Short256Mask.class, a, offset, (Short256Mask) m); + } + + + @ForceInline @Override final @@ -839,6 +918,27 @@ final class Short256Vector extends ShortVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Short256Mask.class, a, offset, (Short256Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Short256Mask.class, bb, offset, (Short256Mask) m); + } + + @ForceInline + @Override + final + void intoCharArray0(char[] a, int offset, VectorMask m) { + super.intoCharArray0Template(Short256Mask.class, a, offset, (Short256Mask) m); + } + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java index b19019084b40c1f4976bc294715b535d15324327..a2a8a07ddd3eddbe084fe6a8569c5645b12b941a 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java @@ -236,8 +236,8 @@ final class Short512Vector extends ShortVector { @ForceInline final @Override - short rOp(short v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + short rOp(short v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,12 +273,24 @@ final class Short512Vector extends ShortVector { return (Short512Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Short512Vector lanewise(Unary op, VectorMask m) { + return (Short512Vector) super.lanewiseTemplate(op, Short512Mask.class, (Short512Mask) m); // specialize + } + @Override @ForceInline public Short512Vector lanewise(Binary op, Vector v) { return (Short512Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Short512Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Short512Vector) super.lanewiseTemplate(op, Short512Mask.class, v, (Short512Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline Short512Vector @@ -286,15 +298,30 @@ final class Short512Vector extends ShortVector { return (Short512Vector) super.lanewiseShiftTemplate(op, e); // specialize } + /*package-private*/ + @Override + @ForceInline Short512Vector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m) { + return (Short512Vector) super.lanewiseShiftTemplate(op, Short512Mask.class, e, (Short512Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Short512Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Short512Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Short512Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Short512Vector) super.lanewiseTemplate(op, Short512Mask.class, v1, v2, (Short512Mask) m); // specialize + } + @Override @ForceInline public final @@ -314,7 +341,7 @@ final class Short512Vector extends ShortVector { @ForceInline public final short reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Short512Mask.class, (Short512Mask) m); // specialized } @Override @@ -327,7 +354,7 @@ final class Short512Vector extends ShortVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Short512Mask.class, (Short512Mask) m); // specialized } @ForceInline @@ -363,6 +390,13 @@ final class Short512Vector extends ShortVector { return super.compareTemplate(Short512Mask.class, op, s); // specialize } + @Override + @ForceInline + public final Short512Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Short512Mask.class, op, v, (Short512Mask) m); + } + + @Override @ForceInline public Short512Vector blend(Vector v, VectorMask m) { @@ -419,6 +453,7 @@ final class Short512Vector extends ShortVector { VectorMask m) { return (Short512Vector) super.rearrangeTemplate(Short512Shuffle.class, + Short512Mask.class, (Short512Shuffle) shuffle, (Short512Mask) m); // specialize } @@ -644,16 +679,12 @@ final class Short512Vector extends ShortVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Short512Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -679,9 +710,9 @@ final class Short512Vector extends ShortVector { public Short512Mask and(VectorMask mask) { Objects.requireNonNull(mask); Short512Mask m = (Short512Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Short512Mask.class, short.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Short512Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -689,9 +720,9 @@ final class Short512Vector extends ShortVector { public Short512Mask or(VectorMask mask) { Objects.requireNonNull(mask); Short512Mask m = (Short512Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Short512Mask.class, short.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Short512Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -699,9 +730,9 @@ final class Short512Vector extends ShortVector { Short512Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Short512Mask m = (Short512Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Short512Mask.class, short.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Short512Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -709,22 +740,32 @@ final class Short512Vector extends ShortVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Short512Mask.class, short.class, VLENGTH, this, - (m) -> trueCountHelper(((Short512Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Short512Mask.class, short.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Short512Mask.class, short.class, VLENGTH, this, - (m) -> firstTrueHelper(((Short512Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Short512Mask.class, short.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Short512Mask.class, short.class, VLENGTH, this, - (m) -> lastTrueHelper(((Short512Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Short512Mask.class, short.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Short512Mask.class, short.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -835,6 +876,14 @@ final class Short512Vector extends ShortVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromArray0(short[] a, int offset, VectorMask m) { + return super.fromArray0Template(Short512Mask.class, a, offset, (Short512Mask) m); // specialize + } + + @ForceInline @Override final @@ -842,6 +891,13 @@ final class Short512Vector extends ShortVector { return super.fromCharArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromCharArray0(char[] a, int offset, VectorMask m) { + return super.fromCharArray0Template(Short512Mask.class, a, offset, (Short512Mask) m); // specialize + } + @ForceInline @Override @@ -850,6 +906,13 @@ final class Short512Vector extends ShortVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Short512Mask.class, a, offset, (Short512Mask) m); // specialize + } + @ForceInline @Override final @@ -857,6 +920,13 @@ final class Short512Vector extends ShortVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Short512Mask.class, bb, offset, (Short512Mask) m); // specialize + } + @ForceInline @Override final @@ -864,6 +934,15 @@ final class Short512Vector extends ShortVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(short[] a, int offset, VectorMask m) { + super.intoArray0Template(Short512Mask.class, a, offset, (Short512Mask) m); + } + + + @ForceInline @Override final @@ -871,6 +950,27 @@ final class Short512Vector extends ShortVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Short512Mask.class, a, offset, (Short512Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Short512Mask.class, bb, offset, (Short512Mask) m); + } + + @ForceInline + @Override + final + void intoCharArray0(char[] a, int offset, VectorMask m) { + super.intoCharArray0Template(Short512Mask.class, a, offset, (Short512Mask) m); + } + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java index 17230435481eb8092fb50bc54d2b281e40c48435..ff2b2d7e063f3736f9f6b674770f4ada350aa4d6 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java @@ -236,8 +236,8 @@ final class Short64Vector extends ShortVector { @ForceInline final @Override - short rOp(short v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + short rOp(short v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,12 +273,24 @@ final class Short64Vector extends ShortVector { return (Short64Vector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public Short64Vector lanewise(Unary op, VectorMask m) { + return (Short64Vector) super.lanewiseTemplate(op, Short64Mask.class, (Short64Mask) m); // specialize + } + @Override @ForceInline public Short64Vector lanewise(Binary op, Vector v) { return (Short64Vector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public Short64Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Short64Vector) super.lanewiseTemplate(op, Short64Mask.class, v, (Short64Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline Short64Vector @@ -286,15 +298,30 @@ final class Short64Vector extends ShortVector { return (Short64Vector) super.lanewiseShiftTemplate(op, e); // specialize } + /*package-private*/ + @Override + @ForceInline Short64Vector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m) { + return (Short64Vector) super.lanewiseShiftTemplate(op, Short64Mask.class, e, (Short64Mask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final Short64Vector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (Short64Vector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + Short64Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Short64Vector) super.lanewiseTemplate(op, Short64Mask.class, v1, v2, (Short64Mask) m); // specialize + } + @Override @ForceInline public final @@ -314,7 +341,7 @@ final class Short64Vector extends ShortVector { @ForceInline public final short reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Short64Mask.class, (Short64Mask) m); // specialized } @Override @@ -327,7 +354,7 @@ final class Short64Vector extends ShortVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Short64Mask.class, (Short64Mask) m); // specialized } @ForceInline @@ -363,6 +390,13 @@ final class Short64Vector extends ShortVector { return super.compareTemplate(Short64Mask.class, op, s); // specialize } + @Override + @ForceInline + public final Short64Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Short64Mask.class, op, v, (Short64Mask) m); + } + + @Override @ForceInline public Short64Vector blend(Vector v, VectorMask m) { @@ -419,6 +453,7 @@ final class Short64Vector extends ShortVector { VectorMask m) { return (Short64Vector) super.rearrangeTemplate(Short64Shuffle.class, + Short64Mask.class, (Short64Shuffle) shuffle, (Short64Mask) m); // specialize } @@ -588,16 +623,12 @@ final class Short64Vector extends ShortVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - Short64Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -623,9 +654,9 @@ final class Short64Vector extends ShortVector { public Short64Mask and(VectorMask mask) { Objects.requireNonNull(mask); Short64Mask m = (Short64Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, Short64Mask.class, short.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, Short64Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -633,9 +664,9 @@ final class Short64Vector extends ShortVector { public Short64Mask or(VectorMask mask) { Objects.requireNonNull(mask); Short64Mask m = (Short64Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, Short64Mask.class, short.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, Short64Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -643,9 +674,9 @@ final class Short64Vector extends ShortVector { Short64Mask xor(VectorMask mask) { Objects.requireNonNull(mask); Short64Mask m = (Short64Mask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, Short64Mask.class, short.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, Short64Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -653,22 +684,32 @@ final class Short64Vector extends ShortVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Short64Mask.class, short.class, VLENGTH, this, - (m) -> trueCountHelper(((Short64Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Short64Mask.class, short.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Short64Mask.class, short.class, VLENGTH, this, - (m) -> firstTrueHelper(((Short64Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Short64Mask.class, short.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Short64Mask.class, short.class, VLENGTH, this, - (m) -> lastTrueHelper(((Short64Mask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Short64Mask.class, short.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Short64Mask.class, short.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -779,6 +820,14 @@ final class Short64Vector extends ShortVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromArray0(short[] a, int offset, VectorMask m) { + return super.fromArray0Template(Short64Mask.class, a, offset, (Short64Mask) m); // specialize + } + + @ForceInline @Override final @@ -786,6 +835,13 @@ final class Short64Vector extends ShortVector { return super.fromCharArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromCharArray0(char[] a, int offset, VectorMask m) { + return super.fromCharArray0Template(Short64Mask.class, a, offset, (Short64Mask) m); // specialize + } + @ForceInline @Override @@ -794,6 +850,13 @@ final class Short64Vector extends ShortVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(Short64Mask.class, a, offset, (Short64Mask) m); // specialize + } + @ForceInline @Override final @@ -801,6 +864,13 @@ final class Short64Vector extends ShortVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(Short64Mask.class, bb, offset, (Short64Mask) m); // specialize + } + @ForceInline @Override final @@ -808,6 +878,15 @@ final class Short64Vector extends ShortVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(short[] a, int offset, VectorMask m) { + super.intoArray0Template(Short64Mask.class, a, offset, (Short64Mask) m); + } + + + @ForceInline @Override final @@ -815,6 +894,27 @@ final class Short64Vector extends ShortVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(Short64Mask.class, a, offset, (Short64Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(Short64Mask.class, bb, offset, (Short64Mask) m); + } + + @ForceInline + @Override + final + void intoCharArray0(char[] a, int offset, VectorMask m) { + super.intoCharArray0Template(Short64Mask.class, a, offset, (Short64Mask) m); + } + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java index 7e5d1e5570f4e280a39a875f3a761ac1a770e1bf..7aa01264a738deb58b1137119745700289a8fcd6 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java @@ -236,8 +236,8 @@ final class ShortMaxVector extends ShortVector { @ForceInline final @Override - short rOp(short v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + short rOp(short v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -273,12 +273,24 @@ final class ShortMaxVector extends ShortVector { return (ShortMaxVector) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public ShortMaxVector lanewise(Unary op, VectorMask m) { + return (ShortMaxVector) super.lanewiseTemplate(op, ShortMaxMask.class, (ShortMaxMask) m); // specialize + } + @Override @ForceInline public ShortMaxVector lanewise(Binary op, Vector v) { return (ShortMaxVector) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public ShortMaxVector lanewise(Binary op, Vector v, VectorMask m) { + return (ShortMaxVector) super.lanewiseTemplate(op, ShortMaxMask.class, v, (ShortMaxMask) m); // specialize + } + /*package-private*/ @Override @ForceInline ShortMaxVector @@ -286,15 +298,30 @@ final class ShortMaxVector extends ShortVector { return (ShortMaxVector) super.lanewiseShiftTemplate(op, e); // specialize } + /*package-private*/ + @Override + @ForceInline ShortMaxVector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m) { + return (ShortMaxVector) super.lanewiseShiftTemplate(op, ShortMaxMask.class, e, (ShortMaxMask) m); // specialize + } + /*package-private*/ @Override @ForceInline public final ShortMaxVector - lanewise(VectorOperators.Ternary op, Vector v1, Vector v2) { + lanewise(Ternary op, Vector v1, Vector v2) { return (ShortMaxVector) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + ShortMaxVector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (ShortMaxVector) super.lanewiseTemplate(op, ShortMaxMask.class, v1, v2, (ShortMaxMask) m); // specialize + } + @Override @ForceInline public final @@ -314,7 +341,7 @@ final class ShortMaxVector extends ShortVector { @ForceInline public final short reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, ShortMaxMask.class, (ShortMaxMask) m); // specialized } @Override @@ -327,7 +354,7 @@ final class ShortMaxVector extends ShortVector { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, ShortMaxMask.class, (ShortMaxMask) m); // specialized } @ForceInline @@ -363,6 +390,13 @@ final class ShortMaxVector extends ShortVector { return super.compareTemplate(ShortMaxMask.class, op, s); // specialize } + @Override + @ForceInline + public final ShortMaxMask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(ShortMaxMask.class, op, v, (ShortMaxMask) m); + } + + @Override @ForceInline public ShortMaxVector blend(Vector v, VectorMask m) { @@ -419,6 +453,7 @@ final class ShortMaxVector extends ShortVector { VectorMask m) { return (ShortMaxVector) super.rearrangeTemplate(ShortMaxShuffle.class, + ShortMaxMask.class, (ShortMaxShuffle) shuffle, (ShortMaxMask) m); // specialize } @@ -582,16 +617,12 @@ final class ShortMaxVector extends ShortVector { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - ShortMaxMask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -617,9 +648,9 @@ final class ShortMaxVector extends ShortVector { public ShortMaxMask and(VectorMask mask) { Objects.requireNonNull(mask); ShortMaxMask m = (ShortMaxMask)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, ShortMaxMask.class, short.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, ShortMaxMask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -627,9 +658,9 @@ final class ShortMaxVector extends ShortVector { public ShortMaxMask or(VectorMask mask) { Objects.requireNonNull(mask); ShortMaxMask m = (ShortMaxMask)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, ShortMaxMask.class, short.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, ShortMaxMask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -637,9 +668,9 @@ final class ShortMaxVector extends ShortVector { ShortMaxMask xor(VectorMask mask) { Objects.requireNonNull(mask); ShortMaxMask m = (ShortMaxMask)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, ShortMaxMask.class, short.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, ShortMaxMask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -647,22 +678,32 @@ final class ShortMaxVector extends ShortVector { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, ShortMaxMask.class, short.class, VLENGTH, this, - (m) -> trueCountHelper(((ShortMaxMask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, ShortMaxMask.class, short.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, ShortMaxMask.class, short.class, VLENGTH, this, - (m) -> firstTrueHelper(((ShortMaxMask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, ShortMaxMask.class, short.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, ShortMaxMask.class, short.class, VLENGTH, this, - (m) -> lastTrueHelper(((ShortMaxMask)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, ShortMaxMask.class, short.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, ShortMaxMask.class, short.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -773,6 +814,14 @@ final class ShortMaxVector extends ShortVector { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromArray0(short[] a, int offset, VectorMask m) { + return super.fromArray0Template(ShortMaxMask.class, a, offset, (ShortMaxMask) m); // specialize + } + + @ForceInline @Override final @@ -780,6 +829,13 @@ final class ShortMaxVector extends ShortVector { return super.fromCharArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromCharArray0(char[] a, int offset, VectorMask m) { + return super.fromCharArray0Template(ShortMaxMask.class, a, offset, (ShortMaxMask) m); // specialize + } + @ForceInline @Override @@ -788,6 +844,13 @@ final class ShortMaxVector extends ShortVector { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromByteArray0(byte[] a, int offset, VectorMask m) { + return super.fromByteArray0Template(ShortMaxMask.class, a, offset, (ShortMaxMask) m); // specialize + } + @ForceInline @Override final @@ -795,6 +858,13 @@ final class ShortMaxVector extends ShortVector { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + return super.fromByteBuffer0Template(ShortMaxMask.class, bb, offset, (ShortMaxMask) m); // specialize + } + @ForceInline @Override final @@ -802,6 +872,15 @@ final class ShortMaxVector extends ShortVector { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0(short[] a, int offset, VectorMask m) { + super.intoArray0Template(ShortMaxMask.class, a, offset, (ShortMaxMask) m); + } + + + @ForceInline @Override final @@ -809,6 +888,27 @@ final class ShortMaxVector extends ShortVector { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask m) { + super.intoByteArray0Template(ShortMaxMask.class, a, offset, (ShortMaxMask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m) { + super.intoByteBuffer0Template(ShortMaxMask.class, bb, offset, (ShortMaxMask) m); + } + + @ForceInline + @Override + final + void intoCharArray0(char[] a, int offset, VectorMask m) { + super.intoCharArray0Template(ShortMaxMask.class, a, offset, (ShortMaxMask) m); + } + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java index a02e981e809c90284a131bfd2a4b2344713e0211..e1cada48f17ac18350bd8e015133628a646a57d2 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java @@ -29,7 +29,6 @@ import java.nio.ByteOrder; import java.nio.ReadOnlyBufferException; import java.util.Arrays; import java.util.Objects; -import java.util.function.BinaryOperator; import java.util.function.Function; import java.util.function.UnaryOperator; @@ -173,6 +172,9 @@ public abstract class ShortVector extends AbstractVector { final ShortVector uOpTemplate(VectorMask m, FUnOp f) { + if (m == null) { + return uOpTemplate(f); + } short[] vec = vec(); short[] res = new short[length()]; boolean[] mbits = ((AbstractMask)m).getBits(); @@ -216,6 +218,9 @@ public abstract class ShortVector extends AbstractVector { ShortVector bOpTemplate(Vector o, VectorMask m, FBinOp f) { + if (m == null) { + return bOpTemplate(o, f); + } short[] res = new short[length()]; short[] vec1 = this.vec(); short[] vec2 = ((ShortVector)o).vec(); @@ -265,6 +270,9 @@ public abstract class ShortVector extends AbstractVector { Vector o2, VectorMask m, FTriOp f) { + if (m == null) { + return tOpTemplate(o1, o2, f); + } short[] res = new short[length()]; short[] vec1 = this.vec(); short[] vec2 = ((ShortVector)o1).vec(); @@ -280,7 +288,22 @@ public abstract class ShortVector extends AbstractVector { /*package-private*/ abstract - short rOp(short v, FBinOp f); + short rOp(short v, VectorMask m, FBinOp f); + + @ForceInline + final + short rOpTemplate(short v, VectorMask m, FBinOp f) { + if (m == null) { + return rOpTemplate(v, f); + } + short[] vec = vec(); + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < vec.length; i++) { + v = mbits[i] ? f.apply(i, v, vec[i]) : v; + } + return v; + } + @ForceInline final short rOpTemplate(short v, FBinOp f) { @@ -549,37 +572,61 @@ public abstract class ShortVector extends AbstractVector { return blend(broadcast(-1), compare(NE, 0)); } if (op == NOT) { - return broadcast(-1).lanewiseTemplate(XOR, this); + return broadcast(-1).lanewise(XOR, this); } else if (op == NEG) { // FIXME: Support this in the JIT. - return broadcast(0).lanewiseTemplate(SUB, this); + return broadcast(0).lanewise(SUB, this); } } int opc = opCode(op); return VectorSupport.unaryOp( - opc, getClass(), short.class, length(), - this, - UN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_NEG: return v0 -> - v0.uOp((i, a) -> (short) -a); - case VECTOR_OP_ABS: return v0 -> - v0.uOp((i, a) -> (short) Math.abs(a)); - default: return null; - }})); + opc, getClass(), null, short.class, length(), + this, null, + UN_IMPL.find(op, opc, ShortVector::unaryOperations)); } - private static final - ImplCache> UN_IMPL - = new ImplCache<>(Unary.class, ShortVector.class); /** * {@inheritDoc} */ - @ForceInline - public final + @Override + public abstract ShortVector lanewise(VectorOperators.Unary op, - VectorMask m) { - return blend(lanewise(op), m); + VectorMask m); + @ForceInline + final + ShortVector lanewiseTemplate(VectorOperators.Unary op, + Class> maskClass, + VectorMask m) { + m.check(maskClass, this); + if (opKind(op, VO_SPECIAL)) { + if (op == ZOMO) { + return blend(broadcast(-1), compare(NE, 0, m)); + } + if (op == NOT) { + return lanewise(XOR, broadcast(-1), m); + } else if (op == NEG) { + return lanewise(NOT, m).lanewise(ADD, broadcast(1), m); + } + } + int opc = opCode(op); + return VectorSupport.unaryOp( + opc, getClass(), maskClass, short.class, length(), + this, m, + UN_IMPL.find(op, opc, ShortVector::unaryOperations)); + } + + private static final + ImplCache>> + UN_IMPL = new ImplCache<>(Unary.class, ShortVector.class); + + private static UnaryOperation> unaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_NEG: return (v0, m) -> + v0.uOp(m, (i, a) -> (short) -a); + case VECTOR_OP_ABS: return (v0, m) -> + v0.uOp(m, (i, a) -> (short) Math.abs(a)); + default: return null; + } } // Binary lanewise support @@ -599,6 +646,7 @@ public abstract class ShortVector extends AbstractVector { Vector v) { ShortVector that = (ShortVector) v; that.check(this); + if (opKind(op, VO_SPECIAL | VO_SHIFT)) { if (op == FIRST_NONZERO) { // FIXME: Support this in the JIT. @@ -617,74 +665,110 @@ public abstract class ShortVector extends AbstractVector { that = that.lanewise(NOT); op = AND; } else if (op == DIV) { - VectorMask eqz = that.eq((short)0); + VectorMask eqz = that.eq((short) 0); if (eqz.anyTrue()) { throw that.divZeroException(); } } } + int opc = opCode(op); return VectorSupport.binaryOp( - opc, getClass(), short.class, length(), - this, that, - BIN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (short)(a + b)); - case VECTOR_OP_SUB: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (short)(a - b)); - case VECTOR_OP_MUL: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (short)(a * b)); - case VECTOR_OP_DIV: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (short)(a / b)); - case VECTOR_OP_MAX: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (short)Math.max(a, b)); - case VECTOR_OP_MIN: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (short)Math.min(a, b)); - case VECTOR_OP_AND: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (short)(a & b)); - case VECTOR_OP_OR: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (short)(a | b)); - case VECTOR_OP_XOR: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> (short)(a ^ b)); - case VECTOR_OP_LSHIFT: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> (short)(a << n)); - case VECTOR_OP_RSHIFT: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> (short)(a >> n)); - case VECTOR_OP_URSHIFT: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> (short)((a & LSHR_SETUP_MASK) >>> n)); - case VECTOR_OP_LROTATE: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> rotateLeft(a, (int)n)); - case VECTOR_OP_RROTATE: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> rotateRight(a, (int)n)); - default: return null; - }})); + opc, getClass(), null, short.class, length(), + this, that, null, + BIN_IMPL.find(op, opc, ShortVector::binaryOperations)); } - private static final - ImplCache> BIN_IMPL - = new ImplCache<>(Binary.class, ShortVector.class); /** * {@inheritDoc} * @see #lanewise(VectorOperators.Binary,short,VectorMask) */ - @ForceInline - public final + @Override + public abstract ShortVector lanewise(VectorOperators.Binary op, Vector v, - VectorMask m) { + VectorMask m); + @ForceInline + final + ShortVector lanewiseTemplate(VectorOperators.Binary op, + Class> maskClass, + Vector v, VectorMask m) { ShortVector that = (ShortVector) v; - if (op == DIV) { - VectorMask eqz = that.eq((short)0); - if (eqz.and(m).anyTrue()) { - throw that.divZeroException(); + that.check(this); + m.check(maskClass, this); + + if (opKind(op, VO_SPECIAL | VO_SHIFT)) { + if (op == FIRST_NONZERO) { + // FIXME: Support this in the JIT. + VectorMask thisNZ + = this.viewAsIntegralLanes().compare(NE, (short) 0); + that = that.blend((short) 0, thisNZ.cast(vspecies())); + op = OR_UNCHECKED; + } + if (opKind(op, VO_SHIFT)) { + // As per shift specification for Java, mask the shift count. + // This allows the JIT to ignore some ISA details. + that = that.lanewise(AND, SHIFT_MASK); + } + if (op == AND_NOT) { + // FIXME: Support this in the JIT. + that = that.lanewise(NOT); + op = AND; + } else if (op == DIV) { + VectorMask eqz = that.eq((short)0); + if (eqz.and(m).anyTrue()) { + throw that.divZeroException(); + } + // suppress div/0 exceptions in unset lanes + that = that.lanewise(NOT, eqz); } - // suppress div/0 exceptions in unset lanes - that = that.lanewise(NOT, eqz); - return blend(lanewise(DIV, that), m); } - return blend(lanewise(op, v), m); + + int opc = opCode(op); + return VectorSupport.binaryOp( + opc, getClass(), maskClass, short.class, length(), + this, that, m, + BIN_IMPL.find(op, opc, ShortVector::binaryOperations)); + } + + private static final + ImplCache>> + BIN_IMPL = new ImplCache<>(Binary.class, ShortVector.class); + + private static BinaryOperation> binaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (short)(a + b)); + case VECTOR_OP_SUB: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (short)(a - b)); + case VECTOR_OP_MUL: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (short)(a * b)); + case VECTOR_OP_DIV: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (short)(a / b)); + case VECTOR_OP_MAX: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (short)Math.max(a, b)); + case VECTOR_OP_MIN: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (short)Math.min(a, b)); + case VECTOR_OP_AND: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (short)(a & b)); + case VECTOR_OP_OR: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (short)(a | b)); + case VECTOR_OP_XOR: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> (short)(a ^ b)); + case VECTOR_OP_LSHIFT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> (short)(a << n)); + case VECTOR_OP_RSHIFT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> (short)(a >> n)); + case VECTOR_OP_URSHIFT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> (short)((a & LSHR_SETUP_MASK) >>> n)); + case VECTOR_OP_LROTATE: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n)); + case VECTOR_OP_RROTATE: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n)); + default: return null; + } } + // FIXME: Maybe all of the public final methods in this file (the // simple ones that just call lanewise) should be pushed down to // the X-VectorBits template. They can't optimize properly at @@ -747,7 +831,13 @@ public abstract class ShortVector extends AbstractVector { ShortVector lanewise(VectorOperators.Binary op, short e, VectorMask m) { - return blend(lanewise(op, e), m); + if (opKind(op, VO_SHIFT) && (short)(int)e == e) { + return lanewiseShift(op, (int) e, m); + } + if (op == AND_NOT) { + op = AND; e = (short) ~e; + } + return lanewise(op, broadcast(e), m); } /** @@ -767,8 +857,7 @@ public abstract class ShortVector extends AbstractVector { short e1 = (short) e; if ((long)e1 != e // allow shift ops to clip down their int parameters - && !(opKind(op, VO_SHIFT) && (int)e1 == e) - ) { + && !(opKind(op, VO_SHIFT) && (int)e1 == e)) { vspecies().checkValue(e); // for exception } return lanewise(op, e1); @@ -788,7 +877,13 @@ public abstract class ShortVector extends AbstractVector { public final ShortVector lanewise(VectorOperators.Binary op, long e, VectorMask m) { - return blend(lanewise(op, e), m); + short e1 = (short) e; + if ((long)e1 != e + // allow shift ops to clip down their int parameters + && !(opKind(op, VO_SHIFT) && (int)e1 == e)) { + vspecies().checkValue(e); // for exception + } + return lanewise(op, e1, m); } /*package-private*/ @@ -805,27 +900,52 @@ public abstract class ShortVector extends AbstractVector { e &= SHIFT_MASK; int opc = opCode(op); return VectorSupport.broadcastInt( - opc, getClass(), short.class, length(), - this, e, - BIN_INT_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_LSHIFT: return (v, n) -> - v.uOp((i, a) -> (short)(a << n)); - case VECTOR_OP_RSHIFT: return (v, n) -> - v.uOp((i, a) -> (short)(a >> n)); - case VECTOR_OP_URSHIFT: return (v, n) -> - v.uOp((i, a) -> (short)((a & LSHR_SETUP_MASK) >>> n)); - case VECTOR_OP_LROTATE: return (v, n) -> - v.uOp((i, a) -> rotateLeft(a, (int)n)); - case VECTOR_OP_RROTATE: return (v, n) -> - v.uOp((i, a) -> rotateRight(a, (int)n)); - default: return null; - }})); + opc, getClass(), null, short.class, length(), + this, e, null, + BIN_INT_IMPL.find(op, opc, ShortVector::broadcastIntOperations)); + } + + /*package-private*/ + abstract ShortVector + lanewiseShift(VectorOperators.Binary op, int e, VectorMask m); + + /*package-private*/ + @ForceInline + final ShortVector + lanewiseShiftTemplate(VectorOperators.Binary op, + Class> maskClass, + int e, VectorMask m) { + m.check(maskClass, this); + assert(opKind(op, VO_SHIFT)); + // As per shift specification for Java, mask the shift count. + e &= SHIFT_MASK; + int opc = opCode(op); + return VectorSupport.broadcastInt( + opc, getClass(), maskClass, short.class, length(), + this, e, m, + BIN_INT_IMPL.find(op, opc, ShortVector::broadcastIntOperations)); } + private static final - ImplCache> BIN_INT_IMPL + ImplCache>> BIN_INT_IMPL = new ImplCache<>(Binary.class, ShortVector.class); + private static VectorBroadcastIntOp> broadcastIntOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_LSHIFT: return (v, n, m) -> + v.uOp(m, (i, a) -> (short)(a << n)); + case VECTOR_OP_RSHIFT: return (v, n, m) -> + v.uOp(m, (i, a) -> (short)(a >> n)); + case VECTOR_OP_URSHIFT: return (v, n, m) -> + v.uOp(m, (i, a) -> (short)((a & LSHR_SETUP_MASK) >>> n)); + case VECTOR_OP_LROTATE: return (v, n, m) -> + v.uOp(m, (i, a) -> rotateLeft(a, (int)n)); + case VECTOR_OP_RROTATE: return (v, n, m) -> + v.uOp(m, (i, a) -> rotateRight(a, (int)n)); + default: return null; + } + } + // As per shift specification for Java, mask the shift count. // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte). // The latter two maskings go beyond the JLS, but seem reasonable @@ -878,16 +998,10 @@ public abstract class ShortVector extends AbstractVector { } int opc = opCode(op); return VectorSupport.ternaryOp( - opc, getClass(), short.class, length(), - this, that, tother, - TERN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - default: return null; - }})); + opc, getClass(), null, short.class, length(), + this, that, tother, null, + TERN_IMPL.find(op, opc, ShortVector::ternaryOperations)); } - private static final - ImplCache> TERN_IMPL - = new ImplCache<>(Ternary.class, ShortVector.class); /** * {@inheritDoc} @@ -895,13 +1009,48 @@ public abstract class ShortVector extends AbstractVector { * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask) * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask) */ - @ForceInline - public final + @Override + public abstract ShortVector lanewise(VectorOperators.Ternary op, Vector v1, Vector v2, - VectorMask m) { - return blend(lanewise(op, v1, v2), m); + VectorMask m); + @ForceInline + final + ShortVector lanewiseTemplate(VectorOperators.Ternary op, + Class> maskClass, + Vector v1, + Vector v2, + VectorMask m) { + ShortVector that = (ShortVector) v1; + ShortVector tother = (ShortVector) v2; + // It's a word: https://www.dictionary.com/browse/tother + // See also Chapter 11 of Dickens, Our Mutual Friend: + // "Totherest Governor," replied Mr Riderhood... + that.check(this); + tother.check(this); + m.check(maskClass, this); + + if (op == BITWISE_BLEND) { + // FIXME: Support this in the JIT. + that = this.lanewise(XOR, that).lanewise(AND, tother); + return this.lanewise(XOR, that, m); + } + int opc = opCode(op); + return VectorSupport.ternaryOp( + opc, getClass(), maskClass, short.class, length(), + this, that, tother, m, + TERN_IMPL.find(op, opc, ShortVector::ternaryOperations)); + } + + private static final + ImplCache>> + TERN_IMPL = new ImplCache<>(Ternary.class, ShortVector.class); + + private static TernaryOperation> ternaryOperations(int opc_) { + switch (opc_) { + default: return null; + } } /** @@ -958,7 +1107,7 @@ public abstract class ShortVector extends AbstractVector { short e1, short e2, VectorMask m) { - return blend(lanewise(op, e1, e2), m); + return lanewise(op, broadcast(e1), broadcast(e2), m); } /** @@ -1016,7 +1165,7 @@ public abstract class ShortVector extends AbstractVector { Vector v1, short e2, VectorMask m) { - return blend(lanewise(op, v1, e2), m); + return lanewise(op, v1, broadcast(e2), m); } /** @@ -1073,7 +1222,7 @@ public abstract class ShortVector extends AbstractVector { short e1, Vector v2, VectorMask m) { - return blend(lanewise(op, e1, v2), m); + return lanewise(op, broadcast(e1), v2, m); } // (Thus endeth the Great and Mighty Ternary Ogdoad.) @@ -1745,15 +1894,13 @@ public abstract class ShortVector extends AbstractVector { final > M compareTemplate(Class maskType, Comparison op, Vector v) { - Objects.requireNonNull(v); - ShortSpecies vsp = vspecies(); ShortVector that = (ShortVector) v; that.check(this); int opc = opCode(op); return VectorSupport.compare( opc, getClass(), maskType, short.class, length(), - this, that, - (cond, v0, v1) -> { + this, that, null, + (cond, v0, v1, m1) -> { AbstractMask m = v0.bTest(cond, v1, (cond_, i, a, b) -> compareWithOp(cond, a, b)); @@ -1763,6 +1910,28 @@ public abstract class ShortVector extends AbstractVector { }); } + /*package-private*/ + @ForceInline + final + > + M compareTemplate(Class maskType, Comparison op, Vector v, M m) { + ShortVector that = (ShortVector) v; + that.check(this); + m.check(maskType, this); + int opc = opCode(op); + return VectorSupport.compare( + opc, getClass(), maskType, short.class, length(), + this, that, m, + (cond, v0, v1, m1) -> { + AbstractMask cmpM + = v0.bTest(cond, v1, (cond_, i, a, b) + -> compareWithOp(cond, a, b)); + @SuppressWarnings("unchecked") + M m2 = (M) cmpM.and(m1); + return m2; + }); + } + @ForceInline private static boolean compareWithOp(int cond, short a, short b) { return switch (cond) { @@ -1780,18 +1949,6 @@ public abstract class ShortVector extends AbstractVector { }; } - /** - * {@inheritDoc} - */ - @Override - @ForceInline - public final - VectorMask compare(VectorOperators.Comparison op, - Vector v, - VectorMask m) { - return compare(op, v).and(m); - } - /** * Tests this vector by comparing it with an input scalar, * according to the given comparison operation. @@ -1850,7 +2007,7 @@ public abstract class ShortVector extends AbstractVector { public final VectorMask compare(VectorOperators.Comparison op, short e, VectorMask m) { - return compare(op, e).and(m); + return compare(op, broadcast(e), m); } /** @@ -2101,9 +2258,9 @@ public abstract class ShortVector extends AbstractVector { ShortVector rearrangeTemplate(Class shuffletype, S shuffle) { shuffle.checkIndexes(); return VectorSupport.rearrangeOp( - getClass(), shuffletype, short.class, length(), - this, shuffle, - (v1, s_) -> v1.uOp((i, a) -> { + getClass(), shuffletype, null, short.class, length(), + this, shuffle, null, + (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); return v1.lane(ei); })); @@ -2120,24 +2277,25 @@ public abstract class ShortVector extends AbstractVector { /*package-private*/ @ForceInline final - > + , M extends VectorMask> ShortVector rearrangeTemplate(Class shuffletype, + Class masktype, S shuffle, - VectorMask m) { - ShortVector unmasked = - VectorSupport.rearrangeOp( - getClass(), shuffletype, short.class, length(), - this, shuffle, - (v1, s_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); - return ei < 0 ? 0 : v1.lane(ei); - })); + M m) { + + m.check(masktype, this); VectorMask valid = shuffle.laneIsValid(); if (m.andNot(valid).anyTrue()) { shuffle.checkIndexes(); throw new AssertionError(); } - return broadcast((short)0).blend(unmasked, m); + return VectorSupport.rearrangeOp( + getClass(), shuffletype, masktype, short.class, length(), + this, shuffle, m, + (v1, s_, m_) -> v1.uOp((i, a) -> { + int ei = s_.laneSource(i); + return ei < 0 || !m_.laneIsSet(i) ? 0 : v1.lane(ei); + })); } /** @@ -2160,17 +2318,17 @@ public abstract class ShortVector extends AbstractVector { S ws = (S) shuffle.wrapIndexes(); ShortVector r0 = VectorSupport.rearrangeOp( - getClass(), shuffletype, short.class, length(), - this, ws, - (v0, s_) -> v0.uOp((i, a) -> { + getClass(), shuffletype, null, short.class, length(), + this, ws, null, + (v0, s_, m_) -> v0.uOp((i, a) -> { int ei = s_.laneSource(i); return v0.lane(ei); })); ShortVector r1 = VectorSupport.rearrangeOp( - getClass(), shuffletype, short.class, length(), - v, ws, - (v1, s_) -> v1.uOp((i, a) -> { + getClass(), shuffletype, null, short.class, length(), + v, ws, null, + (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); return v1.lane(ei); })); @@ -2433,9 +2591,18 @@ public abstract class ShortVector extends AbstractVector { @ForceInline final short reduceLanesTemplate(VectorOperators.Associative op, + Class> maskClass, VectorMask m) { - ShortVector v = reduceIdentityVector(op).blend(this, m); - return v.reduceLanesTemplate(op); + m.check(maskClass, this); + if (op == FIRST_NONZERO) { + ShortVector v = reduceIdentityVector(op).blend(this, m); + return v.reduceLanesTemplate(op); + } + int opc = opCode(op); + return fromBits(VectorSupport.reductionCoerced( + opc, getClass(), maskClass, short.class, length(), + this, m, + REDUCE_IMPL.find(op, opc, ShortVector::reductionOperations))); } /*package-private*/ @@ -2450,30 +2617,34 @@ public abstract class ShortVector extends AbstractVector { } int opc = opCode(op); return fromBits(VectorSupport.reductionCoerced( - opc, getClass(), short.class, length(), - this, - REDUCE_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return v -> - toBits(v.rOp((short)0, (i, a, b) -> (short)(a + b))); - case VECTOR_OP_MUL: return v -> - toBits(v.rOp((short)1, (i, a, b) -> (short)(a * b))); - case VECTOR_OP_MIN: return v -> - toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (short) Math.min(a, b))); - case VECTOR_OP_MAX: return v -> - toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (short) Math.max(a, b))); - case VECTOR_OP_AND: return v -> - toBits(v.rOp((short)-1, (i, a, b) -> (short)(a & b))); - case VECTOR_OP_OR: return v -> - toBits(v.rOp((short)0, (i, a, b) -> (short)(a | b))); - case VECTOR_OP_XOR: return v -> - toBits(v.rOp((short)0, (i, a, b) -> (short)(a ^ b))); - default: return null; - }}))); + opc, getClass(), null, short.class, length(), + this, null, + REDUCE_IMPL.find(op, opc, ShortVector::reductionOperations))); } + private static final - ImplCache> REDUCE_IMPL - = new ImplCache<>(Associative.class, ShortVector.class); + ImplCache>> + REDUCE_IMPL = new ImplCache<>(Associative.class, ShortVector.class); + + private static ReductionOperation> reductionOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp((short)0, m, (i, a, b) -> (short)(a + b))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp((short)1, m, (i, a, b) -> (short)(a * b))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (short) Math.min(a, b))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (short) Math.max(a, b))); + case VECTOR_OP_AND: return (v, m) -> + toBits(v.rOp((short)-1, m, (i, a, b) -> (short)(a & b))); + case VECTOR_OP_OR: return (v, m) -> + toBits(v.rOp((short)0, m, (i, a, b) -> (short)(a | b))); + case VECTOR_OP_XOR: return (v, m) -> + toBits(v.rOp((short)0, m, (i, a, b) -> (short)(a ^ b))); + default: return null; + } + } private @ForceInline @@ -2699,9 +2870,7 @@ public abstract class ShortVector extends AbstractVector { VectorMask m) { ShortSpecies vsp = (ShortSpecies) species; if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) { - ShortVector zero = vsp.zero(); - ShortVector v = zero.fromByteArray0(a, offset); - return zero.blend(v.maybeSwap(bo), m); + return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo); } // FIXME: optimize @@ -2763,8 +2932,7 @@ public abstract class ShortVector extends AbstractVector { VectorMask m) { ShortSpecies vsp = (ShortSpecies) species; if (offset >= 0 && offset <= (a.length - species.length())) { - ShortVector zero = vsp.zero(); - return zero.blend(zero.fromArray0(a, offset), m); + return vsp.dummyVector().fromArray0(a, offset, m); } // FIXME: optimize @@ -2913,8 +3081,7 @@ public abstract class ShortVector extends AbstractVector { VectorMask m) { ShortSpecies vsp = (ShortSpecies) species; if (offset >= 0 && offset <= (a.length - species.length())) { - ShortVector zero = vsp.zero(); - return zero.blend(zero.fromCharArray0(a, offset), m); + return vsp.dummyVector().fromCharArray0(a, offset, m); } // FIXME: optimize @@ -3099,9 +3266,7 @@ public abstract class ShortVector extends AbstractVector { VectorMask m) { ShortSpecies vsp = (ShortSpecies) species; if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) { - ShortVector zero = vsp.zero(); - ShortVector v = zero.fromByteBuffer0(bb, offset); - return zero.blend(v.maybeSwap(bo), m); + return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo); } // FIXME: optimize @@ -3173,10 +3338,9 @@ public abstract class ShortVector extends AbstractVector { if (m.allTrue()) { intoArray(a, offset); } else { - // FIXME: optimize ShortSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 1, a.length); - stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v); + intoArray0(a, offset, m); } } @@ -3321,10 +3485,9 @@ public abstract class ShortVector extends AbstractVector { if (m.allTrue()) { intoCharArray(a, offset); } else { - // FIXME: optimize ShortSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 1, a.length); - stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = (char) v); + intoCharArray0(a, offset, m); } } @@ -3438,12 +3601,9 @@ public abstract class ShortVector extends AbstractVector { if (m.allTrue()) { intoByteArray(a, offset, bo); } else { - // FIXME: optimize ShortSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 2, a.length); - ByteBuffer wb = wrapper(a, bo); - this.stOp(wb, offset, m, - (wb_, o, i, e) -> wb_.putShort(o + i * 2, e)); + maybeSwap(bo).intoByteArray0(a, offset, m); } } @@ -3455,7 +3615,7 @@ public abstract class ShortVector extends AbstractVector { public final void intoByteBuffer(ByteBuffer bb, int offset, ByteOrder bo) { - if (bb.isReadOnly()) { + if (ScopedMemoryAccess.isReadOnly(bb)) { throw new ReadOnlyBufferException(); } offset = checkFromIndexSize(offset, byteSize(), bb.limit()); @@ -3474,15 +3634,12 @@ public abstract class ShortVector extends AbstractVector { if (m.allTrue()) { intoByteBuffer(bb, offset, bo); } else { - // FIXME: optimize if (bb.isReadOnly()) { throw new ReadOnlyBufferException(); } ShortSpecies vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 2, bb.limit()); - ByteBuffer wb = wrapper(bb, bo); - this.stOp(wb, offset, m, - (wb_, o, i, e) -> wb_.putShort(o + i * 2, e)); + maybeSwap(bo).intoByteBuffer0(bb, offset, m); } } @@ -3520,6 +3677,24 @@ public abstract class ShortVector extends AbstractVector { (arr_, off_, i) -> arr_[off_ + i])); } + /*package-private*/ + abstract + ShortVector fromArray0(short[] a, int offset, VectorMask m); + @ForceInline + final + > + ShortVector fromArray0Template(Class maskClass, short[] a, int offset, M m) { + m.check(species()); + ShortSpecies vsp = vspecies(); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> s.ldOp(arr, off, vm, + (arr_, off_, i) -> arr_[off_ + i])); + } + + /*package-private*/ abstract ShortVector fromCharArray0(char[] a, int offset); @@ -3535,6 +3710,23 @@ public abstract class ShortVector extends AbstractVector { (arr_, off_, i) -> (short) arr_[off_ + i])); } + /*package-private*/ + abstract + ShortVector fromCharArray0(char[] a, int offset, VectorMask m); + @ForceInline + final + > + ShortVector fromCharArray0Template(Class maskClass, char[] a, int offset, M m) { + m.check(species()); + ShortSpecies vsp = vspecies(); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, charArrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> s.ldOp(arr, off, vm, + (arr_, off_, i) -> (short) arr_[off_ + i])); + } + @Override abstract @@ -3554,6 +3746,25 @@ public abstract class ShortVector extends AbstractVector { }); } + abstract + ShortVector fromByteArray0(byte[] a, int offset, VectorMask m); + @ForceInline + final + > + ShortVector fromByteArray0Template(Class maskClass, byte[] a, int offset, M m) { + ShortSpecies vsp = vspecies(); + m.check(vsp); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, byteArrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> { + ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); + return s.ldOp(wb, off, vm, + (wb_, o, i) -> wb_.getShort(o + i * 2)); + }); + } + abstract ShortVector fromByteBuffer0(ByteBuffer bb, int offset); @ForceInline @@ -3570,6 +3781,24 @@ public abstract class ShortVector extends AbstractVector { }); } + abstract + ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask m); + @ForceInline + final + > + ShortVector fromByteBuffer0Template(Class maskClass, ByteBuffer bb, int offset, M m) { + ShortSpecies vsp = vspecies(); + m.check(vsp); + return ScopedMemoryAccess.loadFromByteBufferMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + bb, offset, m, vsp, + (buf, off, s, vm) -> { + ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); + return s.ldOp(wb, off, vm, + (wb_, o, i) -> wb_.getShort(o + i * 2)); + }); + } + // Unchecked storing operations in native byte order. // Caller is responsible for applying index checks, masking, and // byte swapping. @@ -3589,6 +3818,25 @@ public abstract class ShortVector extends AbstractVector { (arr_, off_, i, e) -> arr_[off_+i] = e)); } + abstract + void intoArray0(short[] a, int offset, VectorMask m); + @ForceInline + final + > + void intoArray0Template(Class maskClass, short[] a, int offset, M m) { + m.check(species()); + ShortSpecies vsp = vspecies(); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) + -> v.stOp(arr, off, vm, + (arr_, off_, i, e) -> arr_[off_ + i] = e)); + } + + + abstract void intoByteArray0(byte[] a, int offset); @ForceInline @@ -3606,6 +3854,25 @@ public abstract class ShortVector extends AbstractVector { }); } + abstract + void intoByteArray0(byte[] a, int offset, VectorMask m); + @ForceInline + final + > + void intoByteArray0Template(Class maskClass, byte[] a, int offset, M m) { + ShortSpecies vsp = vspecies(); + m.check(vsp); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, byteArrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) -> { + ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); + v.stOp(wb, off, vm, + (tb_, o, i, e) -> tb_.putShort(o + i * 2, e)); + }); + } + @ForceInline final void intoByteBuffer0(ByteBuffer bb, int offset) { @@ -3620,6 +3887,42 @@ public abstract class ShortVector extends AbstractVector { }); } + abstract + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask m); + @ForceInline + final + > + void intoByteBuffer0Template(Class maskClass, ByteBuffer bb, int offset, M m) { + ShortSpecies vsp = vspecies(); + m.check(vsp); + ScopedMemoryAccess.storeIntoByteBufferMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + this, m, bb, offset, + (buf, off, v, vm) -> { + ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); + v.stOp(wb, off, vm, + (wb_, o, i, e) -> wb_.putShort(o + i * 2, e)); + }); + } + + /*package-private*/ + abstract + void intoCharArray0(char[] a, int offset, VectorMask m); + @ForceInline + final + > + void intoCharArray0Template(Class maskClass, char[] a, int offset, M m) { + m.check(species()); + ShortSpecies vsp = vspecies(); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, charArrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) + -> v.stOp(arr, off, vm, + (arr_, off_, i, e) -> arr_[off_ + i] = (char) e)); + } + // End of low-level memory operations. private static @@ -3954,7 +4257,7 @@ public abstract class ShortVector extends AbstractVector { /*package-private*/ @ForceInline ShortVector ldOp(M memory, int offset, - AbstractMask m, + VectorMask m, FLdOp f) { return dummyVector().ldOp(memory, offset, m, f); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorMask.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorMask.java index 127ec94ad82bd2bde8078d79b986d194664eaae3..b57a2abbb1f1600728cbcf4db048ab295ad2a447 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorMask.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorMask.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -516,6 +516,8 @@ public abstract class VectorMask extends jdk.internal.vm.vector.VectorSupport * @param i the lane index * * @return true if the lane at index {@code i} is set, otherwise false + * @throws IndexOutOfBoundsException if the index is out of range + * ({@code < 0 || >= length()}) */ public abstract boolean laneIsSet(int i); @@ -553,6 +555,24 @@ public abstract class VectorMask extends jdk.internal.vm.vector.VectorSupport */ public abstract VectorMask check(VectorSpecies species); + /** + * Checks that this mask has the same class with the given mask class, + * and it has the same species with given vector's species, + * and returns this mask unchanged. + * The effect is similar to this pseudocode: + * {@code getClass() == maskClass && + * vectorSpecies() == vector.species() + * ? this + * : throw new ClassCastException()}. + * + * @param maskClass the class required for this mask + * @param vector its species required for this mask + * @param the boxed element type of the required species + * @return the same mask + * @throws ClassCastException if the species is wrong + */ + abstract VectorMask check(Class> maskClass, Vector vector); + /** * Returns a string representation of this mask, of the form * {@code "Mask[T.TT...]"}, reporting the mask bit diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template index 860ef572d676ce72d096ceecf1599be6d7958bf4..45c2cf9267c798f6a4f6d33b748ac7a0d8b33ce6 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template @@ -29,7 +29,6 @@ import java.nio.ByteOrder; import java.nio.ReadOnlyBufferException; import java.util.Arrays; import java.util.Objects; -import java.util.function.BinaryOperator; import java.util.function.Function; import java.util.function.UnaryOperator; @@ -177,6 +176,9 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { final $abstractvectortype$ uOpTemplate(VectorMask<$Boxtype$> m, FUnOp f) { + if (m == null) { + return uOpTemplate(f); + } $type$[] vec = vec(); $type$[] res = new $type$[length()]; boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits(); @@ -220,6 +222,9 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $abstractvectortype$ bOpTemplate(Vector<$Boxtype$> o, VectorMask<$Boxtype$> m, FBinOp f) { + if (m == null) { + return bOpTemplate(o, f); + } $type$[] res = new $type$[length()]; $type$[] vec1 = this.vec(); $type$[] vec2 = (($abstractvectortype$)o).vec(); @@ -269,6 +274,9 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { Vector<$Boxtype$> o2, VectorMask<$Boxtype$> m, FTriOp f) { + if (m == null) { + return tOpTemplate(o1, o2, f); + } $type$[] res = new $type$[length()]; $type$[] vec1 = this.vec(); $type$[] vec2 = (($abstractvectortype$)o1).vec(); @@ -284,7 +292,22 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /*package-private*/ abstract - $type$ rOp($type$ v, FBinOp f); + $type$ rOp($type$ v, VectorMask<$Boxtype$> m, FBinOp f); + + @ForceInline + final + $type$ rOpTemplate($type$ v, VectorMask<$Boxtype$> m, FBinOp f) { + if (m == null) { + return rOpTemplate(v, f); + } + $type$[] vec = vec(); + boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits(); + for (int i = 0; i < vec.length; i++) { + v = mbits[i] ? f.apply(i, v, vec[i]) : v; + } + return v; + } + @ForceInline final $type$ rOpTemplate($type$ v, FBinOp f) { @@ -572,72 +595,98 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } #if[BITWISE] if (op == NOT) { - return broadcast(-1).lanewiseTemplate(XOR, this); + return broadcast(-1).lanewise(XOR, this); } else if (op == NEG) { // FIXME: Support this in the JIT. - return broadcast(0).lanewiseTemplate(SUB, this); + return broadcast(0).lanewise(SUB, this); } #end[BITWISE] } int opc = opCode(op); return VectorSupport.unaryOp( - opc, getClass(), $type$.class, length(), - this, - UN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_NEG: return v0 -> - v0.uOp((i, a) -> ($type$) -a); - case VECTOR_OP_ABS: return v0 -> - v0.uOp((i, a) -> ($type$) Math.abs(a)); -#if[FP] - case VECTOR_OP_SIN: return v0 -> - v0.uOp((i, a) -> ($type$) Math.sin(a)); - case VECTOR_OP_COS: return v0 -> - v0.uOp((i, a) -> ($type$) Math.cos(a)); - case VECTOR_OP_TAN: return v0 -> - v0.uOp((i, a) -> ($type$) Math.tan(a)); - case VECTOR_OP_ASIN: return v0 -> - v0.uOp((i, a) -> ($type$) Math.asin(a)); - case VECTOR_OP_ACOS: return v0 -> - v0.uOp((i, a) -> ($type$) Math.acos(a)); - case VECTOR_OP_ATAN: return v0 -> - v0.uOp((i, a) -> ($type$) Math.atan(a)); - case VECTOR_OP_EXP: return v0 -> - v0.uOp((i, a) -> ($type$) Math.exp(a)); - case VECTOR_OP_LOG: return v0 -> - v0.uOp((i, a) -> ($type$) Math.log(a)); - case VECTOR_OP_LOG10: return v0 -> - v0.uOp((i, a) -> ($type$) Math.log10(a)); - case VECTOR_OP_SQRT: return v0 -> - v0.uOp((i, a) -> ($type$) Math.sqrt(a)); - case VECTOR_OP_CBRT: return v0 -> - v0.uOp((i, a) -> ($type$) Math.cbrt(a)); - case VECTOR_OP_SINH: return v0 -> - v0.uOp((i, a) -> ($type$) Math.sinh(a)); - case VECTOR_OP_COSH: return v0 -> - v0.uOp((i, a) -> ($type$) Math.cosh(a)); - case VECTOR_OP_TANH: return v0 -> - v0.uOp((i, a) -> ($type$) Math.tanh(a)); - case VECTOR_OP_EXPM1: return v0 -> - v0.uOp((i, a) -> ($type$) Math.expm1(a)); - case VECTOR_OP_LOG1P: return v0 -> - v0.uOp((i, a) -> ($type$) Math.log1p(a)); -#end[FP] - default: return null; - }})); + opc, getClass(), null, $type$.class, length(), + this, null, + UN_IMPL.find(op, opc, $abstractvectortype$::unaryOperations)); } - private static final - ImplCache> UN_IMPL - = new ImplCache<>(Unary.class, $Type$Vector.class); /** * {@inheritDoc} */ - @ForceInline - public final + @Override + public abstract $abstractvectortype$ lanewise(VectorOperators.Unary op, - VectorMask<$Boxtype$> m) { - return blend(lanewise(op), m); + VectorMask<$Boxtype$> m); + @ForceInline + final + $abstractvectortype$ lanewiseTemplate(VectorOperators.Unary op, + Class> maskClass, + VectorMask<$Boxtype$> m) { + m.check(maskClass, this); + if (opKind(op, VO_SPECIAL)) { + if (op == ZOMO) { + return blend(broadcast(-1), compare(NE, 0, m)); + } +#if[BITWISE] + if (op == NOT) { + return lanewise(XOR, broadcast(-1), m); + } else if (op == NEG) { + return lanewise(NOT, m).lanewise(ADD, broadcast(1), m); + } +#end[BITWISE] + } + int opc = opCode(op); + return VectorSupport.unaryOp( + opc, getClass(), maskClass, $type$.class, length(), + this, m, + UN_IMPL.find(op, opc, $abstractvectortype$::unaryOperations)); + } + + private static final + ImplCache>> + UN_IMPL = new ImplCache<>(Unary.class, $Type$Vector.class); + + private static UnaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> unaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_NEG: return (v0, m) -> + v0.uOp(m, (i, a) -> ($type$) -a); + case VECTOR_OP_ABS: return (v0, m) -> + v0.uOp(m, (i, a) -> ($type$) Math.abs(a)); +#if[FP] + case VECTOR_OP_SIN: return (v0, m) -> + v0.uOp(m, (i, a) -> ($type$) Math.sin(a)); + case VECTOR_OP_COS: return (v0, m) -> + v0.uOp(m, (i, a) -> ($type$) Math.cos(a)); + case VECTOR_OP_TAN: return (v0, m) -> + v0.uOp(m, (i, a) -> ($type$) Math.tan(a)); + case VECTOR_OP_ASIN: return (v0, m) -> + v0.uOp(m, (i, a) -> ($type$) Math.asin(a)); + case VECTOR_OP_ACOS: return (v0, m) -> + v0.uOp(m, (i, a) -> ($type$) Math.acos(a)); + case VECTOR_OP_ATAN: return (v0, m) -> + v0.uOp(m, (i, a) -> ($type$) Math.atan(a)); + case VECTOR_OP_EXP: return (v0, m) -> + v0.uOp(m, (i, a) -> ($type$) Math.exp(a)); + case VECTOR_OP_LOG: return (v0, m) -> + v0.uOp(m, (i, a) -> ($type$) Math.log(a)); + case VECTOR_OP_LOG10: return (v0, m) -> + v0.uOp(m, (i, a) -> ($type$) Math.log10(a)); + case VECTOR_OP_SQRT: return (v0, m) -> + v0.uOp(m, (i, a) -> ($type$) Math.sqrt(a)); + case VECTOR_OP_CBRT: return (v0, m) -> + v0.uOp(m, (i, a) -> ($type$) Math.cbrt(a)); + case VECTOR_OP_SINH: return (v0, m) -> + v0.uOp(m, (i, a) -> ($type$) Math.sinh(a)); + case VECTOR_OP_COSH: return (v0, m) -> + v0.uOp(m, (i, a) -> ($type$) Math.cosh(a)); + case VECTOR_OP_TANH: return (v0, m) -> + v0.uOp(m, (i, a) -> ($type$) Math.tanh(a)); + case VECTOR_OP_EXPM1: return (v0, m) -> + v0.uOp(m, (i, a) -> ($type$) Math.expm1(a)); + case VECTOR_OP_LOG1P: return (v0, m) -> + v0.uOp(m, (i, a) -> ($type$) Math.log1p(a)); +#end[FP] + default: return null; + } } // Binary lanewise support @@ -657,6 +706,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { Vector<$Boxtype$> v) { $abstractvectortype$ that = ($abstractvectortype$) v; that.check(this); + if (opKind(op, VO_SPECIAL {#if[!FP]? | VO_SHIFT})) { if (op == FIRST_NONZERO) { // FIXME: Support this in the JIT. @@ -684,87 +734,131 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { that = that.lanewise(NOT); op = AND; } else if (op == DIV) { - VectorMask<$Boxtype$> eqz = that.eq(($type$)0); + VectorMask<$Boxtype$> eqz = that.eq(($type$) 0); if (eqz.anyTrue()) { throw that.divZeroException(); } } #end[BITWISE] } + int opc = opCode(op); return VectorSupport.binaryOp( - opc, getClass(), $type$.class, length(), - this, that, - BIN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> ($type$)(a + b)); - case VECTOR_OP_SUB: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> ($type$)(a - b)); - case VECTOR_OP_MUL: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> ($type$)(a * b)); - case VECTOR_OP_DIV: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> ($type$)(a / b)); - case VECTOR_OP_MAX: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> ($type$)Math.max(a, b)); - case VECTOR_OP_MIN: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> ($type$)Math.min(a, b)); -#if[BITWISE] - case VECTOR_OP_AND: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> ($type$)(a & b)); - case VECTOR_OP_OR: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> ($type$)(a | b)); - case VECTOR_OP_XOR: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> ($type$)(a ^ b)); - case VECTOR_OP_LSHIFT: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> ($type$)(a << n)); - case VECTOR_OP_RSHIFT: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> ($type$)(a >> n)); - case VECTOR_OP_URSHIFT: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> ($type$)((a & LSHR_SETUP_MASK) >>> n)); - case VECTOR_OP_LROTATE: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> rotateLeft(a, (int)n)); - case VECTOR_OP_RROTATE: return (v0, v1) -> - v0.bOp(v1, (i, a, n) -> rotateRight(a, (int)n)); -#end[BITWISE] -#if[FP] - case VECTOR_OP_ATAN2: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> ($type$) Math.atan2(a, b)); - case VECTOR_OP_POW: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> ($type$) Math.pow(a, b)); - case VECTOR_OP_HYPOT: return (v0, v1) -> - v0.bOp(v1, (i, a, b) -> ($type$) Math.hypot(a, b)); -#end[FP] - default: return null; - }})); + opc, getClass(), null, $type$.class, length(), + this, that, null, + BIN_IMPL.find(op, opc, $abstractvectortype$::binaryOperations)); } - private static final - ImplCache> BIN_IMPL - = new ImplCache<>(Binary.class, $Type$Vector.class); /** * {@inheritDoc} * @see #lanewise(VectorOperators.Binary,$type$,VectorMask) */ - @ForceInline - public final + @Override + public abstract $abstractvectortype$ lanewise(VectorOperators.Binary op, Vector<$Boxtype$> v, - VectorMask<$Boxtype$> m) { -#if[BITWISE] + VectorMask<$Boxtype$> m); + @ForceInline + final + $abstractvectortype$ lanewiseTemplate(VectorOperators.Binary op, + Class> maskClass, + Vector<$Boxtype$> v, VectorMask<$Boxtype$> m) { $abstractvectortype$ that = ($abstractvectortype$) v; - if (op == DIV) { - VectorMask<$Boxtype$> eqz = that.eq(($type$)0); - if (eqz.and(m).anyTrue()) { - throw that.divZeroException(); + that.check(this); + m.check(maskClass, this); + + if (opKind(op, VO_SPECIAL {#if[!FP]? | VO_SHIFT})) { + if (op == FIRST_NONZERO) { +#if[FP] + return blend(lanewise(op, v), m); +#else[FP] + // FIXME: Support this in the JIT. + VectorMask<$Boxbitstype$> thisNZ + = this.viewAsIntegralLanes().compare(NE, ($bitstype$) 0); + that = that.blend(($type$) 0, thisNZ.cast(vspecies())); + op = OR_UNCHECKED; +#end[FP] + } +#if[BITWISE] +#if[!FP] + if (opKind(op, VO_SHIFT)) { + // As per shift specification for Java, mask the shift count. + // This allows the JIT to ignore some ISA details. + that = that.lanewise(AND, SHIFT_MASK); + } +#end[!FP] + if (op == AND_NOT) { + // FIXME: Support this in the JIT. + that = that.lanewise(NOT); + op = AND; + } else if (op == DIV) { + VectorMask<$Boxtype$> eqz = that.eq(($type$)0); + if (eqz.and(m).anyTrue()) { + throw that.divZeroException(); + } + // suppress div/0 exceptions in unset lanes + that = that.lanewise(NOT, eqz); } - // suppress div/0 exceptions in unset lanes - that = that.lanewise(NOT, eqz); - return blend(lanewise(DIV, that), m); +#end[BITWISE] } + + int opc = opCode(op); + return VectorSupport.binaryOp( + opc, getClass(), maskClass, $type$.class, length(), + this, that, m, + BIN_IMPL.find(op, opc, $abstractvectortype$::binaryOperations)); + } + + private static final + ImplCache>> + BIN_IMPL = new ImplCache<>(Binary.class, $Type$Vector.class); + + private static BinaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> binaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($type$)(a + b)); + case VECTOR_OP_SUB: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($type$)(a - b)); + case VECTOR_OP_MUL: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($type$)(a * b)); + case VECTOR_OP_DIV: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($type$)(a / b)); + case VECTOR_OP_MAX: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.max(a, b)); + case VECTOR_OP_MIN: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.min(a, b)); +#if[BITWISE] + case VECTOR_OP_AND: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($type$)(a & b)); + case VECTOR_OP_OR: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($type$)(a | b)); + case VECTOR_OP_XOR: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($type$)(a ^ b)); + case VECTOR_OP_LSHIFT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> ($type$)(a << n)); + case VECTOR_OP_RSHIFT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> ($type$)(a >> n)); + case VECTOR_OP_URSHIFT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> ($type$)((a & LSHR_SETUP_MASK) >>> n)); + case VECTOR_OP_LROTATE: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n)); + case VECTOR_OP_RROTATE: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n)); #end[BITWISE] - return blend(lanewise(op, v), m); +#if[FP] + case VECTOR_OP_OR: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> fromBits(toBits(a) | toBits(b))); + case VECTOR_OP_ATAN2: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.atan2(a, b)); + case VECTOR_OP_POW: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.pow(a, b)); + case VECTOR_OP_HYPOT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.hypot(a, b)); +#end[FP] + default: return null; + } } + // FIXME: Maybe all of the public final methods in this file (the // simple ones that just call lanewise) should be pushed down to // the X-VectorBits template. They can't optimize properly at @@ -829,7 +923,15 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $abstractvectortype$ lanewise(VectorOperators.Binary op, $type$ e, VectorMask<$Boxtype$> m) { - return blend(lanewise(op, e), m); +#if[BITWISE] + if (opKind(op, VO_SHIFT) && ($type$)(int)e == e) { + return lanewiseShift(op, (int) e, m); + } + if (op == AND_NOT) { + op = AND; e = ($type$) ~e; + } +#end[BITWISE] + return lanewise(op, broadcast(e), m); } #if[!long] @@ -848,12 +950,13 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $abstractvectortype$ lanewise(VectorOperators.Binary op, long e) { $type$ e1 = ($type$) e; - if ((long)e1 != e #if[BITWISE] + if ((long)e1 != e // allow shift ops to clip down their int parameters - && !(opKind(op, VO_SHIFT) && (int)e1 == e) + && !(opKind(op, VO_SHIFT) && (int)e1 == e)) { +#else[BITWISE] + if ((long)e1 != e) { #end[BITWISE] - ) { vspecies().checkValue(e); // for exception } return lanewise(op, e1); @@ -873,7 +976,17 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { public final $abstractvectortype$ lanewise(VectorOperators.Binary op, long e, VectorMask<$Boxtype$> m) { - return blend(lanewise(op, e), m); + $type$ e1 = ($type$) e; +#if[BITWISE] + if ((long)e1 != e + // allow shift ops to clip down their int parameters + && !(opKind(op, VO_SHIFT) && (int)e1 == e)) { +#else[BITWISE] + if ((long)e1 != e) { +#end[BITWISE] + vspecies().checkValue(e); // for exception + } + return lanewise(op, e1, m); } #end[!long] @@ -892,27 +1005,52 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { e &= SHIFT_MASK; int opc = opCode(op); return VectorSupport.broadcastInt( - opc, getClass(), $type$.class, length(), - this, e, - BIN_INT_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_LSHIFT: return (v, n) -> - v.uOp((i, a) -> ($type$)(a << n)); - case VECTOR_OP_RSHIFT: return (v, n) -> - v.uOp((i, a) -> ($type$)(a >> n)); - case VECTOR_OP_URSHIFT: return (v, n) -> - v.uOp((i, a) -> ($type$)((a & LSHR_SETUP_MASK) >>> n)); - case VECTOR_OP_LROTATE: return (v, n) -> - v.uOp((i, a) -> rotateLeft(a, (int)n)); - case VECTOR_OP_RROTATE: return (v, n) -> - v.uOp((i, a) -> rotateRight(a, (int)n)); - default: return null; - }})); + opc, getClass(), null, $type$.class, length(), + this, e, null, + BIN_INT_IMPL.find(op, opc, $abstractvectortype$::broadcastIntOperations)); + } + + /*package-private*/ + abstract $abstractvectortype$ + lanewiseShift(VectorOperators.Binary op, int e, VectorMask<$Boxtype$> m); + + /*package-private*/ + @ForceInline + final $abstractvectortype$ + lanewiseShiftTemplate(VectorOperators.Binary op, + Class> maskClass, + int e, VectorMask<$Boxtype$> m) { + m.check(maskClass, this); + assert(opKind(op, VO_SHIFT)); + // As per shift specification for Java, mask the shift count. + e &= SHIFT_MASK; + int opc = opCode(op); + return VectorSupport.broadcastInt( + opc, getClass(), maskClass, $type$.class, length(), + this, e, m, + BIN_INT_IMPL.find(op, opc, $abstractvectortype$::broadcastIntOperations)); } + private static final - ImplCache> BIN_INT_IMPL + ImplCache>> BIN_INT_IMPL = new ImplCache<>(Binary.class, $Type$Vector.class); + private static VectorBroadcastIntOp<$abstractvectortype$, VectorMask<$Boxtype$>> broadcastIntOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_LSHIFT: return (v, n, m) -> + v.uOp(m, (i, a) -> ($type$)(a << n)); + case VECTOR_OP_RSHIFT: return (v, n, m) -> + v.uOp(m, (i, a) -> ($type$)(a >> n)); + case VECTOR_OP_URSHIFT: return (v, n, m) -> + v.uOp(m, (i, a) -> ($type$)((a & LSHR_SETUP_MASK) >>> n)); + case VECTOR_OP_LROTATE: return (v, n, m) -> + v.uOp(m, (i, a) -> rotateLeft(a, (int)n)); + case VECTOR_OP_RROTATE: return (v, n, m) -> + v.uOp(m, (i, a) -> rotateRight(a, (int)n)); + default: return null; + } + } + // As per shift specification for Java, mask the shift count. // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte). // The latter two maskings go beyond the JLS, but seem reasonable @@ -972,20 +1110,10 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #end[BITWISE] int opc = opCode(op); return VectorSupport.ternaryOp( - opc, getClass(), $type$.class, length(), - this, that, tother, - TERN_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { -#if[FP] - case VECTOR_OP_FMA: return (v0, v1_, v2_) -> - v0.tOp(v1_, v2_, (i, a, b, c) -> Math.fma(a, b, c)); -#end[FP] - default: return null; - }})); + opc, getClass(), null, $type$.class, length(), + this, that, tother, null, + TERN_IMPL.find(op, opc, $abstractvectortype$::ternaryOperations)); } - private static final - ImplCache> TERN_IMPL - = new ImplCache<>(Ternary.class, $Type$Vector.class); /** * {@inheritDoc} @@ -993,13 +1121,54 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @see #lanewise(VectorOperators.Ternary,Vector,$type$,VectorMask) * @see #lanewise(VectorOperators.Ternary,$type$,Vector,VectorMask) */ - @ForceInline - public final + @Override + public abstract $abstractvectortype$ lanewise(VectorOperators.Ternary op, Vector<$Boxtype$> v1, Vector<$Boxtype$> v2, - VectorMask<$Boxtype$> m) { - return blend(lanewise(op, v1, v2), m); + VectorMask<$Boxtype$> m); + @ForceInline + final + $abstractvectortype$ lanewiseTemplate(VectorOperators.Ternary op, + Class> maskClass, + Vector<$Boxtype$> v1, + Vector<$Boxtype$> v2, + VectorMask<$Boxtype$> m) { + $abstractvectortype$ that = ($abstractvectortype$) v1; + $abstractvectortype$ tother = ($abstractvectortype$) v2; + // It's a word: https://www.dictionary.com/browse/tother + // See also Chapter 11 of Dickens, Our Mutual Friend: + // "Totherest Governor," replied Mr Riderhood... + that.check(this); + tother.check(this); + m.check(maskClass, this); + +#if[BITWISE] + if (op == BITWISE_BLEND) { + // FIXME: Support this in the JIT. + that = this.lanewise(XOR, that).lanewise(AND, tother); + return this.lanewise(XOR, that, m); + } +#end[BITWISE] + int opc = opCode(op); + return VectorSupport.ternaryOp( + opc, getClass(), maskClass, $type$.class, length(), + this, that, tother, m, + TERN_IMPL.find(op, opc, $abstractvectortype$::ternaryOperations)); + } + + private static final + ImplCache>> + TERN_IMPL = new ImplCache<>(Ternary.class, $Type$Vector.class); + + private static TernaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> ternaryOperations(int opc_) { + switch (opc_) { +#if[FP] + case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> + v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c)); +#end[FP] + default: return null; + } } /** @@ -1056,7 +1225,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $type$ e1, $type$ e2, VectorMask<$Boxtype$> m) { - return blend(lanewise(op, e1, e2), m); + return lanewise(op, broadcast(e1), broadcast(e2), m); } /** @@ -1114,7 +1283,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { Vector<$Boxtype$> v1, $type$ e2, VectorMask<$Boxtype$> m) { - return blend(lanewise(op, v1, e2), m); + return lanewise(op, v1, broadcast(e2), m); } /** @@ -1171,7 +1340,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $type$ e1, Vector<$Boxtype$> v2, VectorMask<$Boxtype$> m) { - return blend(lanewise(op, e1, v2), m); + return lanewise(op, broadcast(e1), v2, m); } // (Thus endeth the Great and Mighty Ternary Ogdoad.) @@ -2016,15 +2185,13 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { final > M compareTemplate(Class maskType, Comparison op, Vector<$Boxtype$> v) { - Objects.requireNonNull(v); - $Type$Species vsp = vspecies(); $abstractvectortype$ that = ($abstractvectortype$) v; that.check(this); int opc = opCode(op); return VectorSupport.compare( opc, getClass(), maskType, $type$.class, length(), - this, that, - (cond, v0, v1) -> { + this, that, null, + (cond, v0, v1, m1) -> { AbstractMask<$Boxtype$> m = v0.bTest(cond, v1, (cond_, i, a, b) -> compareWithOp(cond, a, b)); @@ -2034,6 +2201,28 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { }); } + /*package-private*/ + @ForceInline + final + > + M compareTemplate(Class maskType, Comparison op, Vector<$Boxtype$> v, M m) { + $abstractvectortype$ that = ($abstractvectortype$) v; + that.check(this); + m.check(maskType, this); + int opc = opCode(op); + return VectorSupport.compare( + opc, getClass(), maskType, $type$.class, length(), + this, that, m, + (cond, v0, v1, m1) -> { + AbstractMask<$Boxtype$> cmpM + = v0.bTest(cond, v1, (cond_, i, a, b) + -> compareWithOp(cond, a, b)); + @SuppressWarnings("unchecked") + M m2 = (M) cmpM.and(m1); + return m2; + }); + } + @ForceInline private static boolean compareWithOp(int cond, $type$ a, $type$ b) { return switch (cond) { @@ -2053,18 +2242,6 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { }; } - /** - * {@inheritDoc} - */ - @Override - @ForceInline - public final - VectorMask<$Boxtype$> compare(VectorOperators.Comparison op, - Vector<$Boxtype$> v, - VectorMask<$Boxtype$> m) { - return compare(op, v).and(m); - } - /** * Tests this vector by comparing it with an input scalar, * according to the given comparison operation. @@ -2123,7 +2300,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { public final VectorMask<$Boxtype$> compare(VectorOperators.Comparison op, $type$ e, VectorMask<$Boxtype$> m) { - return compare(op, e).and(m); + return compare(op, broadcast(e), m); } #if[!long] @@ -2378,9 +2555,9 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $abstractvectortype$ rearrangeTemplate(Class shuffletype, S shuffle) { shuffle.checkIndexes(); return VectorSupport.rearrangeOp( - getClass(), shuffletype, $type$.class, length(), - this, shuffle, - (v1, s_) -> v1.uOp((i, a) -> { + getClass(), shuffletype, null, $type$.class, length(), + this, shuffle, null, + (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); return v1.lane(ei); })); @@ -2397,24 +2574,25 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /*package-private*/ @ForceInline final - > + , M extends VectorMask<$Boxtype$>> $abstractvectortype$ rearrangeTemplate(Class shuffletype, + Class masktype, S shuffle, - VectorMask<$Boxtype$> m) { - $abstractvectortype$ unmasked = - VectorSupport.rearrangeOp( - getClass(), shuffletype, $type$.class, length(), - this, shuffle, - (v1, s_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); - return ei < 0 ? 0 : v1.lane(ei); - })); + M m) { + + m.check(masktype, this); VectorMask<$Boxtype$> valid = shuffle.laneIsValid(); if (m.andNot(valid).anyTrue()) { shuffle.checkIndexes(); throw new AssertionError(); } - return broadcast(($type$)0).blend(unmasked, m); + return VectorSupport.rearrangeOp( + getClass(), shuffletype, masktype, $type$.class, length(), + this, shuffle, m, + (v1, s_, m_) -> v1.uOp((i, a) -> { + int ei = s_.laneSource(i); + return ei < 0 || !m_.laneIsSet(i) ? 0 : v1.lane(ei); + })); } /** @@ -2437,17 +2615,17 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { S ws = (S) shuffle.wrapIndexes(); $abstractvectortype$ r0 = VectorSupport.rearrangeOp( - getClass(), shuffletype, $type$.class, length(), - this, ws, - (v0, s_) -> v0.uOp((i, a) -> { + getClass(), shuffletype, null, $type$.class, length(), + this, ws, null, + (v0, s_, m_) -> v0.uOp((i, a) -> { int ei = s_.laneSource(i); return v0.lane(ei); })); $abstractvectortype$ r1 = VectorSupport.rearrangeOp( - getClass(), shuffletype, $type$.class, length(), - v, ws, - (v1, s_) -> v1.uOp((i, a) -> { + getClass(), shuffletype, null, $type$.class, length(), + v, ws, null, + (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); return v1.lane(ei); })); @@ -2839,9 +3017,18 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline final $type$ reduceLanesTemplate(VectorOperators.Associative op, + Class> maskClass, VectorMask<$Boxtype$> m) { - $abstractvectortype$ v = reduceIdentityVector(op).blend(this, m); - return v.reduceLanesTemplate(op); + m.check(maskClass, this); + if (op == FIRST_NONZERO) { + $abstractvectortype$ v = reduceIdentityVector(op).blend(this, m); + return v.reduceLanesTemplate(op); + } + int opc = opCode(op); + return fromBits(VectorSupport.reductionCoerced( + opc, getClass(), maskClass, $type$.class, length(), + this, m, + REDUCE_IMPL.find(op, opc, $abstractvectortype$::reductionOperations))); } /*package-private*/ @@ -2856,32 +3043,36 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } int opc = opCode(op); return fromBits(VectorSupport.reductionCoerced( - opc, getClass(), $type$.class, length(), - this, - REDUCE_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return v -> - toBits(v.rOp(($type$)0, (i, a, b) -> ($type$)(a + b))); - case VECTOR_OP_MUL: return v -> - toBits(v.rOp(($type$)1, (i, a, b) -> ($type$)(a * b))); - case VECTOR_OP_MIN: return v -> - toBits(v.rOp(MAX_OR_INF, (i, a, b) -> ($type$) Math.min(a, b))); - case VECTOR_OP_MAX: return v -> - toBits(v.rOp(MIN_OR_INF, (i, a, b) -> ($type$) Math.max(a, b))); + opc, getClass(), null, $type$.class, length(), + this, null, + REDUCE_IMPL.find(op, opc, $abstractvectortype$::reductionOperations))); + } + + private static final + ImplCache>> + REDUCE_IMPL = new ImplCache<>(Associative.class, $Type$Vector.class); + + private static ReductionOperation<$abstractvectortype$, VectorMask<$Boxtype$>> reductionOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a + b))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp(($type$)1, m, (i, a, b) -> ($type$)(a * b))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> ($type$) Math.min(a, b))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> ($type$) Math.max(a, b))); #if[BITWISE] - case VECTOR_OP_AND: return v -> - toBits(v.rOp(($type$)-1, (i, a, b) -> ($type$)(a & b))); - case VECTOR_OP_OR: return v -> - toBits(v.rOp(($type$)0, (i, a, b) -> ($type$)(a | b))); - case VECTOR_OP_XOR: return v -> - toBits(v.rOp(($type$)0, (i, a, b) -> ($type$)(a ^ b))); + case VECTOR_OP_AND: return (v, m) -> + toBits(v.rOp(($type$)-1, m, (i, a, b) -> ($type$)(a & b))); + case VECTOR_OP_OR: return (v, m) -> + toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a | b))); + case VECTOR_OP_XOR: return (v, m) -> + toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a ^ b))); #end[BITWISE] - default: return null; - }}))); + default: return null; + } } - private static final - ImplCache> REDUCE_IMPL - = new ImplCache<>(Associative.class, $Type$Vector.class); private @ForceInline @@ -3175,9 +3366,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { VectorMask<$Boxtype$> m) { $Type$Species vsp = ($Type$Species) species; if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) { - $abstractvectortype$ zero = vsp.zero(); - $abstractvectortype$ v = zero.fromByteArray0(a, offset); - return zero.blend(v.maybeSwap(bo), m); + return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo); } // FIXME: optimize @@ -3239,8 +3428,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { VectorMask<$Boxtype$> m) { $Type$Species vsp = ($Type$Species) species; if (offset >= 0 && offset <= (a.length - species.length())) { - $abstractvectortype$ zero = vsp.zero(); - return zero.blend(zero.fromArray0(a, offset), m); + return vsp.dummyVector().fromArray0(a, offset, m); } // FIXME: optimize @@ -3333,13 +3521,13 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { vix = VectorIntrinsics.checkIndex(vix, a.length); return VectorSupport.loadWithMap( - vectorType, $type$.class, vsp.laneCount(), - IntVector.species(vsp.indexShape()).vectorType(), - a, ARRAY_BASE, vix, + vectorType, null, $type$.class, vsp.laneCount(), + isp.vectorType(), + a, ARRAY_BASE, vix, null, a, offset, indexMap, mapOffset, vsp, - ($type$[] c, int idx, int[] iMap, int idy, $Type$Species s) -> + (c, idx, iMap, idy, s, vm) -> s.vOp(n -> c[idx + iMap[idy+n]])); - } + } #end[byteOrShort] /** @@ -3399,9 +3587,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { return fromArray(species, a, offset, indexMap, mapOffset); } else { - // FIXME: Cannot vectorize yet, if there's a mask. $Type$Species vsp = ($Type$Species) species; - return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]); + return vsp.dummyVector().fromArray0(a, offset, indexMap, mapOffset, m); } } #end[byteOrShort] @@ -3462,8 +3649,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { VectorMask<$Boxtype$> m) { $Type$Species vsp = ($Type$Species) species; if (offset >= 0 && offset <= (a.length - species.length())) { - $abstractvectortype$ zero = vsp.zero(); - return zero.blend(zero.fromCharArray0(a, offset), m); + return vsp.dummyVector().fromCharArray0(a, offset, m); } // FIXME: optimize @@ -3623,7 +3809,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $Type$Species vsp = ($Type$Species) species; if (offset >= 0 && offset <= (a.length - species.length())) { $abstractvectortype$ zero = vsp.zero(); - return zero.blend(zero.fromBooleanArray0(a, offset), m); + return vsp.dummyVector().fromBooleanArray0(a, offset, m); } // FIXME: optimize @@ -3814,9 +4000,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { VectorMask<$Boxtype$> m) { $Type$Species vsp = ($Type$Species) species; if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) { - $abstractvectortype$ zero = vsp.zero(); - $abstractvectortype$ v = zero.fromByteBuffer0(bb, offset); - return zero.blend(v.maybeSwap(bo), m); + return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo); } // FIXME: optimize @@ -3888,10 +4072,9 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { if (m.allTrue()) { intoArray(a, offset); } else { - // FIXME: optimize $Type$Species vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 1, a.length); - stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v); + intoArray0(a, offset, m); } } @@ -3973,12 +4156,12 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { vix = VectorIntrinsics.checkIndex(vix, a.length); VectorSupport.storeWithMap( - vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + vsp.vectorType(), null, vsp.elementType(), vsp.laneCount(), isp.vectorType(), a, arrayAddress(a, 0), vix, - this, + this, null, a, offset, indexMap, mapOffset, - (arr, off, v, map, mo) + (arr, off, v, map, mo, vm) -> v.stOp(arr, off, (arr_, off_, i, e) -> { int j = map[mo + i]; @@ -4039,12 +4222,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { intoArray(a, offset, indexMap, mapOffset); } else { - // FIXME: Cannot vectorize yet, if there's a mask. - stOp(a, offset, m, - (arr, off, i, e) -> { - int j = indexMap[mapOffset + i]; - arr[off + j] = e; - }); + intoArray0(a, offset, indexMap, mapOffset, m); } } #end[byteOrShort] @@ -4112,10 +4290,9 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { if (m.allTrue()) { intoCharArray(a, offset); } else { - // FIXME: optimize $Type$Species vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 1, a.length); - stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = (char) v); + intoCharArray0(a, offset, m); } } @@ -4275,10 +4452,9 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { if (m.allTrue()) { intoBooleanArray(a, offset); } else { - // FIXME: optimize $Type$Species vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, 1, a.length); - stOp(a, offset, m, (arr, off, i, e) -> arr[off+i] = (e & 1) != 0); + intoBooleanArray0(a, offset, m); } } @@ -4398,12 +4574,9 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { if (m.allTrue()) { intoByteArray(a, offset, bo); } else { - // FIXME: optimize $Type$Species vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, a.length); - ByteBuffer wb = wrapper(a, bo); - this.stOp(wb, offset, m, - (wb_, o, i, e) -> wb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e)); + maybeSwap(bo).intoByteArray0(a, offset, m); } } @@ -4415,7 +4588,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { public final void intoByteBuffer(ByteBuffer bb, int offset, ByteOrder bo) { - if (bb.isReadOnly()) { + if (ScopedMemoryAccess.isReadOnly(bb)) { throw new ReadOnlyBufferException(); } offset = checkFromIndexSize(offset, byteSize(), bb.limit()); @@ -4434,15 +4607,12 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { if (m.allTrue()) { intoByteBuffer(bb, offset, bo); } else { - // FIXME: optimize if (bb.isReadOnly()) { throw new ReadOnlyBufferException(); } $Type$Species vsp = vspecies(); checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, bb.limit()); - ByteBuffer wb = wrapper(bb, bo); - this.stOp(wb, offset, m, - (wb_, o, i, e) -> wb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e)); + maybeSwap(bo).intoByteBuffer0(bb, offset, m); } } @@ -4480,6 +4650,84 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { (arr_, off_, i) -> arr_[off_ + i])); } + /*package-private*/ + abstract + $abstractvectortype$ fromArray0($type$[] a, int offset, VectorMask<$Boxtype$> m); + @ForceInline + final + > + $abstractvectortype$ fromArray0Template(Class maskClass, $type$[] a, int offset, M m) { + m.check(species()); + $Type$Species vsp = vspecies(); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> s.ldOp(arr, off, vm, + (arr_, off_, i) -> arr_[off_ + i])); + } + +#if[!byteOrShort] + /*package-private*/ + abstract + $abstractvectortype$ fromArray0($type$[] a, int offset, + int[] indexMap, int mapOffset, + VectorMask<$Boxtype$> m); + @ForceInline + final + > + $abstractvectortype$ fromArray0Template(Class maskClass, $type$[] a, int offset, + int[] indexMap, int mapOffset, M m) { + $Type$Species vsp = vspecies(); + IntVector.IntSpecies isp = IntVector.species(vsp.indexShape()); + Objects.requireNonNull(a); + Objects.requireNonNull(indexMap); + m.check(vsp); + Class vectorType = vsp.vectorType(); + +#if[longOrDouble] + if (vsp.laneCount() == 1) { + return $abstractvectortype$.fromArray(vsp, a, offset + indexMap[mapOffset], m); + } + + // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k] + IntVector vix; + if (isp.laneCount() != vsp.laneCount()) { + // For $Type$MaxVector, if vector length is non-power-of-two or + // 2048 bits, indexShape of $Type$ species is S_MAX_BIT. + // Assume that vector length is 2048, then the lane count of $Type$ + // vector is 32. When converting $Type$ species to int species, + // indexShape is still S_MAX_BIT, but the lane count of int vector + // is 64. So when loading index vector (IntVector), only lower half + // of index data is needed. + vix = IntVector + .fromArray(isp, indexMap, mapOffset, IntMaxVector.IntMaxMask.LOWER_HALF_TRUE_MASK) + .add(offset); + } else { + vix = IntVector + .fromArray(isp, indexMap, mapOffset) + .add(offset); + } +#else[longOrDouble] + // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k] + IntVector vix = IntVector + .fromArray(isp, indexMap, mapOffset) + .add(offset); +#end[longOrDouble] + + // FIXME: Check index under mask controlling. + vix = VectorIntrinsics.checkIndex(vix, a.length); + + return VectorSupport.loadWithMap( + vectorType, maskClass, $type$.class, vsp.laneCount(), + isp.vectorType(), + a, ARRAY_BASE, vix, m, + a, offset, indexMap, mapOffset, vsp, + (c, idx, iMap, idy, s, vm) -> + s.vOp(vm, n -> c[idx + iMap[idy+n]])); + } +#end[!byteOrShort] + #if[short] /*package-private*/ abstract @@ -4495,6 +4743,23 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { (arr, off, s) -> s.ldOp(arr, off, (arr_, off_, i) -> (short) arr_[off_ + i])); } + + /*package-private*/ + abstract + $abstractvectortype$ fromCharArray0(char[] a, int offset, VectorMask<$Boxtype$> m); + @ForceInline + final + > + $abstractvectortype$ fromCharArray0Template(Class maskClass, char[] a, int offset, M m) { + m.check(species()); + $Type$Species vsp = vspecies(); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, charArrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> s.ldOp(arr, off, vm, + (arr_, off_, i) -> (short) arr_[off_ + i])); + } #end[short] #if[byte] @@ -4512,6 +4777,23 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { (arr, off, s) -> s.ldOp(arr, off, (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0))); } + + /*package-private*/ + abstract + $abstractvectortype$ fromBooleanArray0(boolean[] a, int offset, VectorMask<$Boxtype$> m); + @ForceInline + final + > + $abstractvectortype$ fromBooleanArray0Template(Class maskClass, boolean[] a, int offset, M m) { + m.check(species()); + $Type$Species vsp = vspecies(); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, booleanArrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> s.ldOp(arr, off, vm, + (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0))); + } #end[byte] @Override @@ -4532,6 +4814,25 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { }); } + abstract + $abstractvectortype$ fromByteArray0(byte[] a, int offset, VectorMask<$Boxtype$> m); + @ForceInline + final + > + $abstractvectortype$ fromByteArray0Template(Class maskClass, byte[] a, int offset, M m) { + $Type$Species vsp = vspecies(); + m.check(vsp); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, byteArrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> { + ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); + return s.ldOp(wb, off, vm, + (wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$)); + }); + } + abstract $abstractvectortype$ fromByteBuffer0(ByteBuffer bb, int offset); @ForceInline @@ -4548,6 +4849,24 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { }); } + abstract + $abstractvectortype$ fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<$Boxtype$> m); + @ForceInline + final + > + $abstractvectortype$ fromByteBuffer0Template(Class maskClass, ByteBuffer bb, int offset, M m) { + $Type$Species vsp = vspecies(); + m.check(vsp); + return ScopedMemoryAccess.loadFromByteBufferMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + bb, offset, m, vsp, + (buf, off, s, vm) -> { + ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); + return s.ldOp(wb, off, vm, + (wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$)); + }); + } + // Unchecked storing operations in native byte order. // Caller is responsible for applying index checks, masking, and // byte swapping. @@ -4567,6 +4886,105 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { (arr_, off_, i, e) -> arr_[off_+i] = e)); } + abstract + void intoArray0($type$[] a, int offset, VectorMask<$Boxtype$> m); + @ForceInline + final + > + void intoArray0Template(Class maskClass, $type$[] a, int offset, M m) { + m.check(species()); + $Type$Species vsp = vspecies(); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) + -> v.stOp(arr, off, vm, + (arr_, off_, i, e) -> arr_[off_ + i] = e)); + } + +#if[!byteOrShort] + abstract + void intoArray0($type$[] a, int offset, + int[] indexMap, int mapOffset, + VectorMask<$Boxtype$> m); + @ForceInline + final + > + void intoArray0Template(Class maskClass, $type$[] a, int offset, + int[] indexMap, int mapOffset, M m) { + m.check(species()); + $Type$Species vsp = vspecies(); + IntVector.IntSpecies isp = IntVector.species(vsp.indexShape()); +#if[longOrDouble] + if (vsp.laneCount() == 1) { + intoArray(a, offset + indexMap[mapOffset], m); + return; + } + + // Index vector: vix[0:n] = i -> offset + indexMap[mo + i] + IntVector vix; + if (isp.laneCount() != vsp.laneCount()) { + // For $Type$MaxVector, if vector length is 2048 bits, indexShape + // of $Type$ species is S_MAX_BIT. and the lane count of $Type$ + // vector is 32. When converting $Type$ species to int species, + // indexShape is still S_MAX_BIT, but the lane count of int vector + // is 64. So when loading index vector (IntVector), only lower half + // of index data is needed. + vix = IntVector + .fromArray(isp, indexMap, mapOffset, IntMaxVector.IntMaxMask.LOWER_HALF_TRUE_MASK) + .add(offset); + } else { + vix = IntVector + .fromArray(isp, indexMap, mapOffset) + .add(offset); + } + +#else[longOrDouble] + // Index vector: vix[0:n] = i -> offset + indexMap[mo + i] + IntVector vix = IntVector + .fromArray(isp, indexMap, mapOffset) + .add(offset); +#end[longOrDouble] + + // FIXME: Check index under mask controlling. + vix = VectorIntrinsics.checkIndex(vix, a.length); + + VectorSupport.storeWithMap( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + isp.vectorType(), + a, arrayAddress(a, 0), vix, + this, m, + a, offset, indexMap, mapOffset, + (arr, off, v, map, mo, vm) + -> v.stOp(arr, off, vm, + (arr_, off_, i, e) -> { + int j = map[mo + i]; + arr[off + j] = e; + })); + } +#end[!byteOrShort] + +#if[byte] + abstract + void intoBooleanArray0(boolean[] a, int offset, VectorMask<$Boxtype$> m); + @ForceInline + final + > + void intoBooleanArray0Template(Class maskClass, boolean[] a, int offset, M m) { + m.check(species()); + $Type$Species vsp = vspecies(); + ByteVector normalized = this.and((byte) 1); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, booleanArrayAddress(a, offset), + normalized, m, a, offset, + (arr, off, v, vm) + -> v.stOp(arr, off, vm, + (arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0)); + } +#end[byte] + abstract void intoByteArray0(byte[] a, int offset); @ForceInline @@ -4584,6 +5002,25 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { }); } + abstract + void intoByteArray0(byte[] a, int offset, VectorMask<$Boxtype$> m); + @ForceInline + final + > + void intoByteArray0Template(Class maskClass, byte[] a, int offset, M m) { + $Type$Species vsp = vspecies(); + m.check(vsp); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, byteArrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) -> { + ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); + v.stOp(wb, off, vm, + (tb_, o, i, e) -> tb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e)); + }); + } + @ForceInline final void intoByteBuffer0(ByteBuffer bb, int offset) { @@ -4598,6 +5035,44 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { }); } + abstract + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<$Boxtype$> m); + @ForceInline + final + > + void intoByteBuffer0Template(Class maskClass, ByteBuffer bb, int offset, M m) { + $Type$Species vsp = vspecies(); + m.check(vsp); + ScopedMemoryAccess.storeIntoByteBufferMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + this, m, bb, offset, + (buf, off, v, vm) -> { + ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); + v.stOp(wb, off, vm, + (wb_, o, i, e) -> wb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e)); + }); + } + +#if[short] + /*package-private*/ + abstract + void intoCharArray0(char[] a, int offset, VectorMask<$Boxtype$> m); + @ForceInline + final + > + void intoCharArray0Template(Class maskClass, char[] a, int offset, M m) { + m.check(species()); + $Type$Species vsp = vspecies(); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, charArrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) + -> v.stOp(arr, off, vm, + (arr_, off_, i, e) -> arr_[off_ + i] = (char) e)); + } +#end[short] + // End of low-level memory operations. private static @@ -4973,7 +5448,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /*package-private*/ @ForceInline $abstractvectortype$ ldOp(M memory, int offset, - AbstractMask<$Boxtype$> m, + VectorMask<$Boxtype$> m, FLdOp f) { return dummyVector().ldOp(memory, offset, m, f); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template index 225d0e5dd774c0fddf7f317fba87ace32f24449b..df15c85fcccc301c668efa9d2dde61da4c11b6a3 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template @@ -238,8 +238,8 @@ final class $vectortype$ extends $abstractvectortype$ { @ForceInline final @Override - $type$ rOp($type$ v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + $type$ rOp($type$ v, VectorMask<$Boxtype$> m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -275,12 +275,24 @@ final class $vectortype$ extends $abstractvectortype$ { return ($vectortype$) super.lanewiseTemplate(op); // specialize } + @Override + @ForceInline + public $vectortype$ lanewise(Unary op, VectorMask<$Boxtype$> m) { + return ($vectortype$) super.lanewiseTemplate(op, $masktype$.class, ($masktype$) m); // specialize + } + @Override @ForceInline public $vectortype$ lanewise(Binary op, Vector<$Boxtype$> v) { return ($vectortype$) super.lanewiseTemplate(op, v); // specialize } + @Override + @ForceInline + public $vectortype$ lanewise(Binary op, Vector<$Boxtype$> v, VectorMask<$Boxtype$> m) { + return ($vectortype$) super.lanewiseTemplate(op, $masktype$.class, v, ($masktype$) m); // specialize + } + #if[!FP] /*package-private*/ @Override @@ -288,6 +300,13 @@ final class $vectortype$ extends $abstractvectortype$ { lanewiseShift(VectorOperators.Binary op, int e) { return ($vectortype$) super.lanewiseShiftTemplate(op, e); // specialize } + + /*package-private*/ + @Override + @ForceInline $vectortype$ + lanewiseShift(VectorOperators.Binary op, int e, VectorMask<$Boxtype$> m) { + return ($vectortype$) super.lanewiseShiftTemplate(op, $masktype$.class, e, ($masktype$) m); // specialize + } #end[!FP] /*package-private*/ @@ -295,10 +314,18 @@ final class $vectortype$ extends $abstractvectortype$ { @ForceInline public final $vectortype$ - lanewise(VectorOperators.Ternary op, Vector<$Boxtype$> v1, Vector<$Boxtype$> v2) { + lanewise(Ternary op, Vector<$Boxtype$> v1, Vector<$Boxtype$> v2) { return ($vectortype$) super.lanewiseTemplate(op, v1, v2); // specialize } + @Override + @ForceInline + public final + $vectortype$ + lanewise(Ternary op, Vector<$Boxtype$> v1, Vector<$Boxtype$> v2, VectorMask<$Boxtype$> m) { + return ($vectortype$) super.lanewiseTemplate(op, $masktype$.class, v1, v2, ($masktype$) m); // specialize + } + @Override @ForceInline public final @@ -318,7 +345,7 @@ final class $vectortype$ extends $abstractvectortype$ { @ForceInline public final $type$ reduceLanes(VectorOperators.Associative op, VectorMask<$Boxtype$> m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, $masktype$.class, ($masktype$) m); // specialized } @Override @@ -331,7 +358,7 @@ final class $vectortype$ extends $abstractvectortype$ { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask<$Boxtype$> m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, $masktype$.class, ($masktype$) m); // specialized } @ForceInline @@ -369,6 +396,13 @@ final class $vectortype$ extends $abstractvectortype$ { } #end[!long] + @Override + @ForceInline + public final $masktype$ compare(Comparison op, Vector<$Boxtype$> v, VectorMask<$Boxtype$> m) { + return super.compareTemplate($masktype$.class, op, v, ($masktype$) m); + } + + @Override @ForceInline public $vectortype$ blend(Vector<$Boxtype$> v, VectorMask<$Boxtype$> m) { @@ -425,6 +459,7 @@ final class $vectortype$ extends $abstractvectortype$ { VectorMask<$Boxtype$> m) { return ($vectortype$) super.rearrangeTemplate($shuffletype$.class, + $masktype$.class, ($shuffletype$) shuffle, ($masktype$) m); // specialize } @@ -855,16 +890,12 @@ final class $vectortype$ extends $abstractvectortype$ { AbstractSpecies species = (AbstractSpecies) dsp; if (length() != species.laneCount()) throw new IllegalArgumentException("VectorMask length and species length differ"); - if (VSIZE == species.vectorBitSize()) { - Class dtype = species.elementType(); - Class dmtype = species.maskType(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, - this.getClass(), ETYPE, VLENGTH, - dmtype, dtype, VLENGTH, - this, species, - $Type$$bits$Mask::defaultMaskCast); - } - return this.defaultMaskCast(species); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); } @Override @@ -890,9 +921,9 @@ final class $vectortype$ extends $abstractvectortype$ { public $masktype$ and(VectorMask<$Boxtype$> mask) { Objects.requireNonNull(mask); $masktype$ m = ($masktype$)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, $masktype$.class, $bitstype$.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b)); + return VectorSupport.binaryOp(VECTOR_OP_AND, $masktype$.class, null, $bitstype$.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @Override @@ -900,9 +931,9 @@ final class $vectortype$ extends $abstractvectortype$ { public $masktype$ or(VectorMask<$Boxtype$> mask) { Objects.requireNonNull(mask); $masktype$ m = ($masktype$)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, $masktype$.class, $bitstype$.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b)); + return VectorSupport.binaryOp(VECTOR_OP_OR, $masktype$.class, null, $bitstype$.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @ForceInline @@ -910,9 +941,9 @@ final class $vectortype$ extends $abstractvectortype$ { $masktype$ xor(VectorMask<$Boxtype$> mask) { Objects.requireNonNull(mask); $masktype$ m = ($masktype$)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, $masktype$.class, $bitstype$.class, VLENGTH, - this, m, - (m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + return VectorSupport.binaryOp(VECTOR_OP_XOR, $masktype$.class, null, $bitstype$.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } // Mask Query operations @@ -920,22 +951,32 @@ final class $vectortype$ extends $abstractvectortype$ { @Override @ForceInline public int trueCount() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, $masktype$.class, $bitstype$.class, VLENGTH, this, - (m) -> trueCountHelper((($masktype$)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, $masktype$.class, $bitstype$.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, $masktype$.class, $bitstype$.class, VLENGTH, this, - (m) -> firstTrueHelper((($masktype$)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, $masktype$.class, $bitstype$.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, $masktype$.class, $bitstype$.class, VLENGTH, this, - (m) -> lastTrueHelper((($masktype$)m).getBits())); + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, $masktype$.class, $bitstype$.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, $masktype$.class, $bitstype$.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); } // Reductions @@ -1061,6 +1102,22 @@ final class $vectortype$ extends $abstractvectortype$ { return super.fromArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + $abstractvectortype$ fromArray0($type$[] a, int offset, VectorMask<$Boxtype$> m) { + return super.fromArray0Template($masktype$.class, a, offset, ($masktype$) m); // specialize + } + +#if[!byteOrShort] + @ForceInline + @Override + final + $abstractvectortype$ fromArray0($type$[] a, int offset, int[] indexMap, int mapOffset, VectorMask<$Boxtype$> m) { + return super.fromArray0Template($masktype$.class, a, offset, indexMap, mapOffset, ($masktype$) m); + } +#end[!byteOrShort] + #if[short] @ForceInline @Override @@ -1068,6 +1125,13 @@ final class $vectortype$ extends $abstractvectortype$ { $abstractvectortype$ fromCharArray0(char[] a, int offset) { return super.fromCharArray0Template(a, offset); // specialize } + + @ForceInline + @Override + final + $abstractvectortype$ fromCharArray0(char[] a, int offset, VectorMask<$Boxtype$> m) { + return super.fromCharArray0Template($masktype$.class, a, offset, ($masktype$) m); // specialize + } #end[short] #if[byte] @@ -1077,6 +1141,13 @@ final class $vectortype$ extends $abstractvectortype$ { $abstractvectortype$ fromBooleanArray0(boolean[] a, int offset) { return super.fromBooleanArray0Template(a, offset); // specialize } + + @ForceInline + @Override + final + $abstractvectortype$ fromBooleanArray0(boolean[] a, int offset, VectorMask<$Boxtype$> m) { + return super.fromBooleanArray0Template($masktype$.class, a, offset, ($masktype$) m); // specialize + } #end[byte] @ForceInline @@ -1086,6 +1157,13 @@ final class $vectortype$ extends $abstractvectortype$ { return super.fromByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + $abstractvectortype$ fromByteArray0(byte[] a, int offset, VectorMask<$Boxtype$> m) { + return super.fromByteArray0Template($masktype$.class, a, offset, ($masktype$) m); // specialize + } + @ForceInline @Override final @@ -1093,6 +1171,13 @@ final class $vectortype$ extends $abstractvectortype$ { return super.fromByteBuffer0Template(bb, offset); // specialize } + @ForceInline + @Override + final + $abstractvectortype$ fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<$Boxtype$> m) { + return super.fromByteBuffer0Template($masktype$.class, bb, offset, ($masktype$) m); // specialize + } + @ForceInline @Override final @@ -1100,6 +1185,31 @@ final class $vectortype$ extends $abstractvectortype$ { super.intoArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoArray0($type$[] a, int offset, VectorMask<$Boxtype$> m) { + super.intoArray0Template($masktype$.class, a, offset, ($masktype$) m); + } + +#if[!byteOrShort] + @ForceInline + @Override + final + void intoArray0($type$[] a, int offset, int[] indexMap, int mapOffset, VectorMask<$Boxtype$> m) { + super.intoArray0Template($masktype$.class, a, offset, indexMap, mapOffset, ($masktype$) m); + } +#end[!byteOrShort] + +#if[byte] + @ForceInline + @Override + final + void intoBooleanArray0(boolean[] a, int offset, VectorMask<$Boxtype$> m) { + super.intoBooleanArray0Template($masktype$.class, a, offset, ($masktype$) m); + } +#end[byte] + @ForceInline @Override final @@ -1107,6 +1217,29 @@ final class $vectortype$ extends $abstractvectortype$ { super.intoByteArray0Template(a, offset); // specialize } + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask<$Boxtype$> m) { + super.intoByteArray0Template($masktype$.class, a, offset, ($masktype$) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<$Boxtype$> m) { + super.intoByteBuffer0Template($masktype$.class, bb, offset, ($masktype$) m); + } + +#if[short] + @ForceInline + @Override + final + void intoCharArray0(char[] a, int offset, VectorMask<$Boxtype$> m) { + super.intoCharArray0Template($masktype$.class, a, offset, ($masktype$) m); + } +#end[short] + // End of specialized low-level memory operations. // ================================================ diff --git a/src/jdk.incubator.vector/windows/native/libjsvml/globals_vectorApiSupport_windows.S.inc b/src/jdk.incubator.vector/windows/native/libjsvml/globals_vectorApiSupport_windows.S.inc index 837e0cc405df8c16fd1708605526d756ad68ce66..f1d830b1e8cc3800a1796a1421c18b1bb553bcd9 100644 --- a/src/jdk.incubator.vector/windows/native/libjsvml/globals_vectorApiSupport_windows.S.inc +++ b/src/jdk.incubator.vector/windows/native/libjsvml/globals_vectorApiSupport_windows.S.inc @@ -19,9 +19,6 @@ ; or visit www.oracle.com if you need additional information or have any ; questions. -; This file contains duplicate entries as globalDefinitions_vecApi.hpp -; It is intended for inclusion in .s files compiled with masm - ; Used to check whether building on x86_64 architecture. Equivalent to checking in regular hpp file for #ifdef _WIN64 IFDEF RAX diff --git a/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/monitor/HostIdentifier.java b/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/monitor/HostIdentifier.java index b80fccaf52f0317175294f100f160bb570388e1e..c7472e75abd70f99105add54b42f67de159a6cef 100644 --- a/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/monitor/HostIdentifier.java +++ b/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/monitor/HostIdentifier.java @@ -106,7 +106,7 @@ public class HostIdentifier { * by the string. */ private URI canonicalize(String uriString) throws URISyntaxException { - if ((uriString == null) || (uriString.compareTo("localhost") == 0)) { + if (uriString == null || uriString.equals("localhost")) { uriString = "//localhost"; return new URI(uriString); } @@ -247,7 +247,7 @@ public class HostIdentifier { String authority = vmid.getAuthority(); // check for 'file:' VmIdentifiers and handled as a special case. - if ((scheme != null) && (scheme.compareTo("file") == 0)) { + if ("file".equals(scheme)) { try { uri = new URI("file://localhost"); } catch (URISyntaxException e) { }; @@ -343,7 +343,7 @@ public class HostIdentifier { String host = vmid.getHost(); String authority = vmid.getAuthority(); - if ((scheme != null) && (scheme.compareTo("file") == 0)) { + if ("file".equals(scheme)) { // don't attempt to resolve a file based VmIdentifier. return vmid; } diff --git a/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/monitor/MonitoredHost.java b/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/monitor/MonitoredHost.java index b119a00452090e7be232148659346fe1d685a3b9..12969fa670ed2524588d97590764e49ce46b33ec 100644 --- a/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/monitor/MonitoredHost.java +++ b/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/monitor/MonitoredHost.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2004, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -203,7 +203,7 @@ public abstract class MonitoredHost { assert hostname != null; if (scheme == null) { - if (hostname.compareTo("localhost") == 0) { + if (hostname.equals("localhost")) { scheme = LOCAL_PROTOCOL; } else { scheme = REMOTE_PROTOCOL; diff --git a/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/monitor/MonitoredVmUtil.java b/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/monitor/MonitoredVmUtil.java index 7c1149bdc34fb3f851738627679cc6ec62d3836b..81e1e962077bfdb6e35419b60b75349dbf32d05c 100644 --- a/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/monitor/MonitoredVmUtil.java +++ b/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/monitor/MonitoredVmUtil.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2004, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -82,7 +82,7 @@ public class MonitoredVmUtil { int firstSpace = commandLine.indexOf(' '); if (firstSpace > 0) { return commandLine.substring(firstSpace + 1); - } else if (commandLine.compareTo("Unknown") == 0) { + } else if (commandLine.equals("Unknown")) { return commandLine; } else { return null; diff --git a/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/monitor/VmIdentifier.java b/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/monitor/VmIdentifier.java index 6e7f1cc1f77e242bc532d69e5869d44f5fdd2bc7..da484b1c87859d21f5c69dfb33a5dd223b652228 100644 --- a/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/monitor/VmIdentifier.java +++ b/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/monitor/VmIdentifier.java @@ -174,7 +174,7 @@ public class VmIdentifier { private void validate() throws URISyntaxException { // file:// uri, which is a special case where the lvmid is not required. String s = getScheme(); - if ((s != null) && (s.compareTo("file") == 0)) { + if ("file".equals(s)) { return; } if (getLocalVmId() == -1) { diff --git a/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/perfdata/monitor/AliasFileParser.java b/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/perfdata/monitor/AliasFileParser.java index 3a4134bc6f957bb27511f81172084e7672a0f229..3b5d35eafbfdf8c38c14f532076b54338383f328 100644 --- a/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/perfdata/monitor/AliasFileParser.java +++ b/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/perfdata/monitor/AliasFileParser.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2004, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,7 +28,6 @@ package sun.jvmstat.perfdata.monitor; import java.net.*; import java.io.*; import java.util.*; -import java.util.regex.*; /** * Class for parsing alias files. File format is expected to follow @@ -127,7 +126,7 @@ public class AliasFileParser { while (currentToken.ttype != StreamTokenizer.TT_EOF) { // look for the start symbol if ((currentToken.ttype != StreamTokenizer.TT_WORD) - || (currentToken.sval.compareTo(ALIAS) != 0)) { + || !currentToken.sval.equals(ALIAS)) { nextToken(); continue; } @@ -143,7 +142,7 @@ public class AliasFileParser { match(StreamTokenizer.TT_WORD); } while ((currentToken.ttype != StreamTokenizer.TT_EOF) - && (currentToken.sval.compareTo(ALIAS) != 0)); + && !currentToken.sval.equals(ALIAS)); map.put(name, aliases); } diff --git a/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/perfdata/monitor/protocol/file/PerfDataBuffer.java b/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/perfdata/monitor/protocol/file/PerfDataBuffer.java index 9e42ac4f4d81279b04beb04e4a4ba750943df768..e6feb5759c7c047c72968d4df0a833107fa10b7f 100644 --- a/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/perfdata/monitor/protocol/file/PerfDataBuffer.java +++ b/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/perfdata/monitor/protocol/file/PerfDataBuffer.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2004, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,7 +28,6 @@ package sun.jvmstat.perfdata.monitor.protocol.file; import sun.jvmstat.monitor.*; import sun.jvmstat.perfdata.monitor.*; import java.io.*; -import java.net.URI; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; @@ -60,9 +59,9 @@ public class PerfDataBuffer extends AbstractPerfDataBuffer { FileChannel fc = new RandomAccessFile(f, mode).getChannel(); ByteBuffer bb = null; - if (mode.compareTo("r") == 0) { + if (mode.equals("r")) { bb = fc.map(FileChannel.MapMode.READ_ONLY, 0L, (int)fc.size()); - } else if (mode.compareTo("rw") == 0) { + } else if (mode.equals("rw")) { bb = fc.map(FileChannel.MapMode.READ_WRITE, 0L, (int)fc.size()); } else { throw new IllegalArgumentException("Invalid mode: " + mode); diff --git a/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/perfdata/monitor/v1_0/PerfDataBuffer.java b/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/perfdata/monitor/v1_0/PerfDataBuffer.java index d9f981e7de0e9c0bbfd192d7253eb7eb99183c79..60c512b9e41f53aa8baa15ad384d04eeaaf6f2c7 100644 --- a/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/perfdata/monitor/v1_0/PerfDataBuffer.java +++ b/src/jdk.internal.jvmstat/share/classes/sun/jvmstat/perfdata/monitor/v1_0/PerfDataBuffer.java @@ -28,7 +28,6 @@ package sun.jvmstat.perfdata.monitor.v1_0; import sun.jvmstat.monitor.*; import sun.jvmstat.perfdata.monitor.*; import java.util.*; -import java.util.regex.*; import java.nio.*; /** @@ -360,7 +359,7 @@ public class PerfDataBuffer extends PerfDataBufferImpl { String cname = "hotspot.gc.collector.0.name"; StringMonitor collector = (StringMonitor)map.get(cname); - if (collector.stringValue().compareTo("PSScavenge") == 0) { + if (collector.stringValue().equals("PSScavenge")) { boolean adaptiveSizePolicy = true; /* diff --git a/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlConfiguration.java b/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlConfiguration.java index 9d26192d5a4c69d9013d4b107fc10240ce6c93e7..100ad33940b19d780525a88a042ba5bed448504c 100644 --- a/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlConfiguration.java +++ b/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlConfiguration.java @@ -379,6 +379,13 @@ public class HtmlConfiguration extends BaseConfiguration { .collect(Collectors.toCollection(ArrayList::new)); } + public List getAdditionalScripts() { + return options.additionalScripts().stream() + .map(sf -> DocFile.createFileForInput(this, sf)) + .map(file -> DocPath.create(file.getName())) + .collect(Collectors.toCollection(ArrayList::new)); + } + @Override public JavaFileManager getFileManager() { return docEnv.getJavaFileManager(); diff --git a/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlDoclet.java b/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlDoclet.java index 2e5d966cff141677a3e586cedd9473e55da76d7f..b33c39005447de519c637160a8e0dc87d7761039 100644 --- a/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlDoclet.java +++ b/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlDoclet.java @@ -26,8 +26,6 @@ package jdk.javadoc.internal.doclets.formats.html; import java.io.IOException; -import java.io.OutputStream; -import java.io.Writer; import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.InvalidPathException; @@ -215,7 +213,7 @@ public class HtmlDoclet extends AbstractDoclet { super.generateOtherFiles(classtree); HtmlOptions options = configuration.getOptions(); if (options.linkSource()) { - SourceToHTMLConverter.convertRoot(configuration,DocPaths.SOURCE_OUTPUT); + SourceToHTMLConverter.convertRoot(configuration, DocPaths.SOURCE_OUTPUT); } // Modules with no documented classes may be specified on the // command line to specify a service provider, allow these. @@ -225,10 +223,13 @@ public class HtmlDoclet extends AbstractDoclet { return; } boolean nodeprecated = options.noDeprecated(); - performCopy(options.helpFile()); - performCopy(options.stylesheetFile()); + performCopy(options.helpFile(), DocPath.empty); + performCopy(options.stylesheetFile(), DocPath.empty); for (String stylesheet : options.additionalStylesheets()) { - performCopy(stylesheet); + performCopy(stylesheet, DocPath.empty); + } + for (String script : options.additionalScripts()) { + performCopy(script, DocPaths.SCRIPT_DIR); } // do early to reduce memory footprint if (options.classUse()) { @@ -329,7 +330,7 @@ public class HtmlDoclet extends AbstractDoclet { "images/ui-bg_glass_75_e6e6e6_1x400.png"); DocFile f; for (String file : files) { - DocPath filePath = DocPaths.JQUERY_FILES.resolve(file); + DocPath filePath = DocPaths.SCRIPT_DIR.resolve(file); f = DocFile.createFileForOutput(configuration, filePath); f.copyResource(DOCLET_RESOURCES.resolve(filePath), true, false); } @@ -428,18 +429,20 @@ public class HtmlDoclet extends AbstractDoclet { return configuration.getOptions().getSupportedOptions(); } - private void performCopy(String filename) throws DocFileIOException { - if (filename.isEmpty()) + private void performCopy(String filename, DocPath targetPath) throws DocFileIOException { + if (filename.isEmpty()) { return; + } DocFile fromfile = DocFile.createFileForInput(configuration, filename); - DocPath path = DocPath.create(fromfile.getName()); + DocPath path = targetPath.resolve(fromfile.getName()); DocFile toFile = DocFile.createFileForOutput(configuration, path); - if (toFile.isSameFile(fromfile)) + if (toFile.isSameFile(fromfile)) { return; + } messages.notice("doclet.Copying_File_0_To_File_1", - fromfile.toString(), path.getPath()); + fromfile.getPath(), path.getPath()); toFile.copyFile(fromfile); } } diff --git a/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlDocletWriter.java b/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlDocletWriter.java index 6014745a2e535c0a418c0939202b46c02e99993f..76709124ff2659d456136dbb14b46e71a3d30c30 100644 --- a/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlDocletWriter.java +++ b/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlDocletWriter.java @@ -459,6 +459,7 @@ public class HtmlDocletWriter { .setCharset(options.charset()) .addKeywords(metakeywords) .setStylesheets(configuration.getMainStylesheet(), additionalStylesheets) + .setAdditionalScripts(configuration.getAdditionalScripts()) .setIndex(options.createIndex(), mainBodyScript) .addContent(extraHeadContent); diff --git a/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlOptions.java b/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlOptions.java index 30760a6c8f7e17f67161cad79ec77f667f6d6b8a..ca1602a45e7224492f179c791f4fd03319d437a1 100644 --- a/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlOptions.java +++ b/src/jdk.javadoc/share/classes/jdk/javadoc/internal/doclets/formats/html/HtmlOptions.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -59,6 +59,11 @@ public class HtmlOptions extends BaseOptions { */ private List additionalStylesheets = new ArrayList<>(); + /** + * Argument for command-line option {@code --add-script}. + */ + private List additionalScripts = new ArrayList<>(); + /** * Argument for command-line option {@code -bottom}. */ @@ -199,6 +204,14 @@ public class HtmlOptions extends BaseOptions { Resources resources = messages.getResources(); List


+     * {@snippet :
      *  javadoc("-d", "out",
      *          "-sourcepath", testSrc,
      *          "-notimestamp",
      *          "pkg1", "pkg2", "pkg3/C.java");
-     * 
+ * } * * @param args the arguments to pass to javadoc */ @@ -401,6 +404,7 @@ public abstract class JavadocTester { * Sets the kind of check for the initial contents of the output directory * before javadoc is run. * The filter should return true for files that should not appear. + * * @param c the kind of check to perform */ public void setOutputDirectoryCheck(DirectoryCheck c) { @@ -464,8 +468,7 @@ public abstract class JavadocTester { /** * Checks the exit code of the most recent call of javadoc. * - * @param expected the exit code that is required for the test - * to pass. + * @param expected the exit code that is required for the test to pass */ public void checkExit(Exit expected) { checking("check exit code"); @@ -480,12 +483,13 @@ public abstract class JavadocTester { * Checks for content in (or not in) the generated output. * Within the search strings, the newline character \n * will be translated to the platform newline character sequence. - * @param path a path within the most recent output directory - * or the name of one of the output buffers, identifying - * where to look for the search strings. + * + * @param path a path within the most recent output directory + * or the name of one of the output buffers, identifying + * where to look for the search strings. * @param expectedFound true if all of the search strings are expected - * to be found, or false if the file is not expected to be found - * @param strings the strings to be searched for + * to be found, or false if the file is not expected to be found + * @param strings the strings to be searched for */ public void checkFileAndOutput(String path, boolean expectedFound, String... strings) { if (expectedFound) { @@ -499,97 +503,81 @@ public abstract class JavadocTester { * Checks for content in (or not in) the generated output. * Within the search strings, the newline character \n * will be translated to the platform newline character sequence. - * @param path a path within the most recent output directory, identifying - * where to look for the search strings. + * + * @param path a path within the most recent output directory, identifying + * where to look for the search strings. * @param expectedFound true if all of the search strings are expected - * to be found, or false if all of the strings are expected to be - * not found - * @param strings the strings to be searched for + * to be found, or false if all of the strings are expected to be + * not found + * @param strings the strings to be searched for + * + * @see OutputChecker#check(String...) */ public void checkOutput(String path, boolean expectedFound, String... strings) { - // Read contents of file - try { - String fileString = readFile(outputDir, Path.of(path)); - checkOutput(outputDir.resolve(path).toString(), fileString, expectedFound, strings); - } catch (Error e) { - checking("Read file"); - failed("Error reading file: " + e); - } + new OutputChecker(path) + .setExpectFound(expectedFound) + .setExpectOrdered(false) // TODO, fix tests (32 failures) and change to true + .check(strings); } /** * Checks for content in (or not in) the one of the output streams written by * javadoc. Within the search strings, the newline character \n * will be translated to the platform newline character sequence. - * @param output the output stream to check + * + * @param output the output stream to check * @param expectedFound true if all of the search strings are expected - * to be found, or false if all of the strings are expected to be - * not found - * @param strings the strings to be searched for + * to be found, or false if all of the strings are expected to be + * not found + * @param strings the strings to be searched for + * + * @see OutputChecker#check(String...) */ public void checkOutput(Output output, boolean expectedFound, String... strings) { - checkOutput(output.toString(), outputMap.get(output), expectedFound, strings); - } - - // NOTE: path may be the name of an Output stream as well as a file path - private void checkOutput(String path, String fileString, boolean expectedFound, String... strings) { - for (String stringToFind : strings) { -// log.logCheckOutput(path, expectedFound, stringToFind); - checking("checkOutput"); - // Find string in file's contents - boolean isFound = findString(fileString, stringToFind); - if (isFound == expectedFound) { - passed(path + ": following text " + (isFound ? "found:" : "not found:") + "\n" - + stringToFind); - } else { - failed(path + ": following text " + (isFound ? "found:" : "not found:") + "\n" - + stringToFind + '\n' + - "found \n" + - fileString); - } - } + new OutputChecker(output) + .setExpectFound(expectedFound) + .setExpectOrdered(false) // TODO, fix tests (6 failures) and change to true + .check(strings); } /** * Checks that there are no duplicate lines in one of the streams written by javadoc. + * * @param output the output stream to check + * + * @see OutputChecker#checkUnique() */ public void checkUnique(Output output) { checkUnique(output, ".*", true); } /** - * Checks that there are no duplicate lines that either match or don't match a given patter, + * Checks that there are no duplicate lines that either match or don't match a given pattern, * in one of the streams written by javadoc. - * @param output the output stream to check + * + * @param output the output stream to check * @param pattern a pattern to filter the lines to be checked - * @param select if {@code true}, lines that match the pattern will be checked for uniqueness; - * if {@code false}, lines that do not match the pattern will be checked + * @param select if {@code true}, lines that match the pattern will be checked for uniqueness; + * if {@code false}, lines that do not match the pattern will be checked + * + * @see OutputChecker#checkUnique(Pattern, boolean) */ public void checkUnique(Output output, String pattern, boolean select) { - checking("checkUnique"); - Pattern filter = Pattern.compile(pattern); - Matcher m = filter.matcher(""); - Map linesSofar = new HashMap<>(); - int lineNumber = 0; - int duplicates = 0; - for (String line : getOutputLines(output)) { - m.reset(line); - if (m.find() == select) { - Integer prev = linesSofar.putIfAbsent(line, ++lineNumber); - if (prev != null) { - out.println("duplicate line detected on line " + lineNumber - + "; first occurrence on line " + prev); - out.println("line: " + line); - duplicates++; - } - } - } - if (duplicates == 0) { - passed("All lines are unique"); - } else { - failed(duplicates + " duplicate lines found"); - } + new OutputChecker(output).checkUnique(Pattern.compile(pattern), select); + } + + /** + * Ensures that a series of strings appear only once, in the generated output. + * Note: this test does not exhaustively check for all other possible + * duplicates once one is found. + * + * @param path the file to check + * @param strings the strings + * + * @see OutputChecker#checkUnique(String...) + */ + public void checkUnique(String path, String... strings) { + new OutputChecker(path).checkUnique(strings); } /** @@ -658,7 +646,8 @@ public abstract class JavadocTester { } /** - * Gets the content of the one of the output streams written by javadoc. + * Returns the content of one of the output streams written by javadoc. + * * @param output the name of the output stream * @return the content of the output stream */ @@ -667,7 +656,8 @@ public abstract class JavadocTester { } /** - * Gets the content of the one of the output streams written by javadoc. + * Returns the content of one of the output streams written by javadoc. + * * @param output the name of the output stream * @return the content of the output stream, as a line of lines */ @@ -678,22 +668,23 @@ public abstract class JavadocTester { /** * Checks for files in (or not in) the generated output. + * * @param expectedFound true if all of the files are expected - * to be found, or false if all of the files are expected to be - * not found - * @param paths the files to check, within the most recent output directory. - * */ + * to be found, or false if all of the files are expected to be + * not found + * @param paths the files to check, within the most recent output directory. + */ public void checkFiles(boolean expectedFound, String... paths) { checkFiles(expectedFound, Arrays.asList(paths)); } /** * Checks for files in (or not in) the generated output. - * @param expectedFound true if all of the files are expected - * to be found, or false if all of the files are expected to be - * not found - * @param paths the files to check, within the most recent output directory. - * */ + * + * @param expectedFound true if all of the files are expected to be found, + * or false if all of the files are expected to be not found + * @param paths the files to check, within the most recent output directory. + */ public void checkFiles(boolean expectedFound, Collection paths) { for (String path: paths) { // log.logCheckFile(path, expectedFound); @@ -711,54 +702,16 @@ public abstract class JavadocTester { /** * Checks that a series of strings are found in order in a file in * the generated output. - * @param path the file to check - * @param strings the strings whose order to check + * + * @param path the file to check + * @param strings the strings whose order to check + * + * @see OutputChecker#check(String...) */ public void checkOrder(String path, String... strings) { - Path file = outputDir.resolve(path); - String fileString = readOutputFile(path); - int prevIndex = -1; - for (String s : strings) { - s = s.replace("\n", NL); // normalize new lines - int currentIndex = fileString.indexOf(s, prevIndex + 1); - checking("file: " + file + ": " + s + " at index " + currentIndex); - if (currentIndex == -1) { - failed(file, s + " not found."); - continue; - } - if (currentIndex > prevIndex) { - passed(file, s + " is in the correct order"); - } else { - failed(file, s + " is in the wrong order."); - } - prevIndex = currentIndex; - } - } - - /** - * Ensures that a series of strings appear only once, in the generated output, - * noting that, this test does not exhaustively check for all other possible - * duplicates once one is found. - * @param path the file to check - * @param strings ensure each are unique - */ - public void checkUnique(String path, String... strings) { - Path file = outputDir.resolve(path); - String fileString = readOutputFile(path); - for (String s : strings) { - int currentIndex = fileString.indexOf(s); - checking(s + " at index " + currentIndex); - if (currentIndex == -1) { - failed(file, s + " not found."); - continue; - } - int nextindex = fileString.indexOf(s, currentIndex + s.length()); - if (nextindex == -1) { - passed(file, s + " is unique"); - } else { - failed(file, s + " is not unique, found at " + nextindex); - } - } + new OutputChecker(path) + .setExpectOrdered(true) // be explicit + .check(strings); } /** @@ -766,7 +719,7 @@ public abstract class JavadocTester { * * @param baseDir1 the directory containing the first set of files * @param baseDir2 the directory containing the second set of files - * @param files the set of files to be compared + * @param files the set of files to be compared */ public void diff(String baseDir1, String baseDir2, String... files) { Path bd1 = Path.of(baseDir1); @@ -830,10 +783,10 @@ public abstract class JavadocTester { content = new String(Files.readAllBytes(file), charset); fileContentCache.put(file, new SoftReference<>(content)); return content; - } catch (FileNotFoundException e) { - throw new Error("File not found: " + fileName + ": " + e); + } catch (FileNotFoundException | NoSuchFileException e) { + throw new Error("File not found: " + fileName + ": " + e, e); } catch (IOException e) { - throw new Error("Error reading file: " + fileName + ": " + e); + throw new Error("Error reading file: " + fileName + ": " + e, e); } } @@ -941,22 +894,6 @@ public abstract class JavadocTester { } } - /** - * Searches for the string in the given file and return true - * if the string was found. - * - * @param fileString the contents of the file to search through - * @param stringToFind the string to search for - * @return true if the string was found - */ - private boolean findString(String fileString, String stringToFind) { - // javadoc (should) always use the platform newline sequence, - // but in the strings to find it is more convenient to use the Java - // newline character. So we translate \n to NL before we search. - stringToFind = stringToFind.replace("\n", NL); - return fileString.contains(stringToFind); - } - /** * Compares the two given files. * @@ -975,6 +912,459 @@ public abstract class JavadocTester { } } + /** + * A flexible checker for checking the content of generated files and output streams. + * + * Configuration can be done with a series of chained method calls. + * Checks can be specified as either literal strings or regular expressions. + */ + public class OutputChecker { + private final String name; + private final String content; + private boolean allowOverlaps = false; + private boolean expectFound = true; + private boolean expectOrdered = true; + private List matches = new ArrayList<>(); + private Range lastMatch; + + private enum SearchKind { + TEXT, PATTERN; + @Override + public String toString() { + return name().toLowerCase(Locale.ROOT); + } + } + + /** A half-open interval {@code [start, end)} to record the position of a match. */ + record Range(int start, int end) { + static Range of(int start, int end) { + return new Range(start, end); + } + boolean overlaps(Range other) { + // Intervals do not overlap if one interval is completely before or completely after the other: + // that is, other.end <= start || end <= other.start + // Invert that for when intervals do overlap, and simplify to the following expression: + return other.end > start && end > other.start; + } + String toIntervalString() { + return "[" + start + "," + end + ")"; + } + } + + /** + * Creates an output checker for a file written by the most recent run of javadoc. + * If the file cannot be found or there is any other error while reading the file, + * an error will be reported and all subsequent {@code check...} methods will be skipped + * + * @param file the file + */ + public OutputChecker(String file) { + String c = null; + try { + c = readFile(file); + } catch (Error e) { + JavadocTester.this.checking("Read file " + file); + if (e.getCause() instanceof IOException) { + // exception probably thrown (with known message) by readFile + failed(e.getMessage()); + } else { + failed("Error reading file: " + e); + } + } + + if (c == null) { + name = null; + content = null; + } else { + name = file; + content = c; + } + } + + /** + * Creates an output checker for an output stream written by the most recent run of javadoc. + * + * @param output the output + */ + public OutputChecker(Output output) { + name = output.name(); + content = getOutput(output); + } + + /** + * Specifies whether matches are expected to be found or not. + * The default is {@code true}. + * + * @param expectFound whether matches are expected to be found + * @return this object + */ + public OutputChecker setExpectFound(boolean expectFound) { + this.expectFound = expectFound; + return this; + } + + /** + * Specifies whether matches are expected to be found in order or not. + * The default is {@code true}. + * + * @param expectOrdered whether matches should be ordered + * @return this object + */ + public OutputChecker setExpectOrdered(boolean expectOrdered) { + this.expectOrdered = expectOrdered; + return this; + } + + /** + * Specifies whether matches are allowed to overlap. + * The default is {@code false}. + * + * @param allowOverlaps whether matches may overlap + * @return this object + */ + public OutputChecker setAllowOverlaps(boolean allowOverlaps) { + this.allowOverlaps = allowOverlaps; + return this; + } + + /** + * Checks for the presence (or absence) of a series of strings. + * Within the search strings, the newline character {@code \n} + * will be translated to the platform newline character sequence. + * + * @param strings the strings to be searched for + */ + public OutputChecker check(String... strings) { + if (name == null) { + out.println("Skipping checks for:" + NL + + List.of(strings).stream() + .map(s -> " " + toShortString(s)) + .collect(Collectors.joining(NL))); + return this; + } + + for (String stringToFind : strings) { + check(startPos -> findString(stringToFind, startPos), SearchKind.TEXT, stringToFind); + } + return this; + } + + /** + * Checks for the presence (or absence) of a series of regular expressions. + * Unlike {@link #check(String...)}, there is no special handling for + * newline characters. Use {@code \R} to match the platform newline sequence. + * + * @param patterns the regular expressions to be searched for + */ + public OutputChecker check(Pattern... patterns) { + if (name == null) { + out.println("Skipping checks for:" + NL + + List.of(patterns).stream() + .map(p -> " " + toShortString(p.pattern())) + .collect(Collectors.joining(NL))); + return this; + } + for (Pattern pattern : patterns) { + check(startPos -> findPattern(pattern, startPos), SearchKind.PATTERN, pattern.pattern()); + } + return this; + } + + /** + * Checks for the presence (or absence) of an item. + * + * @param finder a function to find the next occurrence of an item starting at a given position + * @param kind the kind of the item ({@code "text"} or {@code "pattern:} to include in messages + * @param s a string for the item, to be included in messages + */ + private void check(Function finder, SearchKind kind, String s) { + checking("checkOutput", kind); + int start = getStart(); + Range r = finder.apply(start); + boolean isFound = r != null; + if (isFound == expectFound) { + matches.add(lastMatch = r); + passed(name + ": following " + kind + " " + (isFound ? "found:" : "not found:") + "\n" + + s); + } else { + // item not found in order, so check if the item is found out of order, to determine the best message + if (expectFound && expectOrdered && start > 0) { + Range r2 = finder.apply(0); + if (r2 != null) { + failed(name + ": following " + kind + " was found on line " + + getLineNumber(r2.start) + + ", but not in order as expected, on or after line " + + getLineNumber(start) + + ":\n" + + s); + return; + } + } + failed(name + ": following " + kind + " " + + (isFound ? "found:" : "not found:") + "\n" + + s + '\n' + "found \n" + content); + } + + } + + /** + * Checks that there are no duplicate lines in the content. + */ + public OutputChecker checkUnique() { + checkUnique(Pattern.compile(".*"), true); + return this; + } + + /** + * Checks that there are no duplicate lines that either match or don't match a given pattern, + * in one of the streams written by javadoc. + * + * @param pattern a pattern to filter the lines to be checked + * @param select if {@code true}, lines that match the pattern will be checked for uniqueness; + * if {@code false}, lines that do not match the pattern will be checked + */ + public OutputChecker checkUnique(Pattern pattern, boolean select ) { + if (name == null) { + out.println("Skipping checkUnique"); + return this; + } + + checking("checkUnique", SearchKind.PATTERN); + Matcher m = pattern.matcher(""); + Map linesSofar = new HashMap<>(); + int lineNumber = 0; + int duplicates = 0; + for (String line : content.split(NL)) { + m.reset(line); + if (m.find() == select) { + Integer prev = linesSofar.putIfAbsent(line, ++lineNumber); + if (prev != null) { + out.println("duplicate line detected on line " + lineNumber + + "; first occurrence on line " + prev); + out.println("line: " + line); + duplicates++; + } + } + } + if (duplicates == 0) { + passed("All lines are unique"); + } else { + failed(duplicates + " duplicate lines found"); + } + return this; + } + + /** + * Checks that each of a series of strings appears only once in the generated output. + * Note: this test does not exhaustively check for all other possible duplicates once one is found. + * + * @param strings the strings + */ + public OutputChecker checkUnique(String... strings) { + return checkUnique(SearchKind.TEXT, List.of(strings), this::findString); + } + + /** + * Checks that each of a series of pattern matches appears only once in the generated output. + * Note: this test does not exhaustively check for all other possible duplicates once one is found. + * + * @param patterns the patterns + */ + public OutputChecker checkUnique(Pattern... patterns) { + return checkUnique(SearchKind.PATTERN, List.of(patterns), this::findPattern); + } + + private OutputChecker checkUnique(SearchKind kind, List items, BiFunction finder) { + if (name == null) { + out.println("Skipping checkUnique"); + return this; + } + + Range latest = null; + for (T item : items) { + int start = getStart(); + Range r = finder.apply(item, start); + checking("checkUnique at index " + start, SearchKind.TEXT); + if (r == null) { + failed(name + ": " + item + " not found."); + continue; + } + // only update lastMatch for the initial match of each item + if (lastMatch == null) { + lastMatch = r; + } + Range next = finder.apply(item, r.end); + if (next == null) { + passed(name + ": " + item + " is unique"); + } else { + failed(name + ": " + item + " is not unique, found at " + next.start); + } + } + if (latest != null) { + lastMatch = latest; + } + return this; + } + + /** + * Checks that all the output has been matched by preceding checks with this object. + * It does not matter whether the checks were ordered or not. + * The results of the matches are sorted and then checked to be adjacent and to + * cover the entire content. + * + * @apiNote This is probably most useful for checking diagnostic output, + * in which case care must be taken to allow for platform differences + * in the output, such as file separators and newline sequences. + */ + public OutputChecker checkComplete() { + if (name == null) { + out.println("Skipping checkComplete"); + return this; + } + + JavadocTester.this.checking("checking for complete coverage of output"); + List uncovered = new ArrayList<>(); + List list = new ArrayList<>(matches); + list.sort(Comparator.comparing(Range::start)); + int prev = 0; + for (Range r : list) { + if (r.start != prev) { + uncovered.add(new Range(prev, r.start)); + } + prev = r.end; + } + if (prev != content.length()) { + uncovered.add(new Range(prev, content.length())); + } + if (uncovered.isEmpty()) { + passed("All output matched"); + } else { + failed("The following output was not matched: " + + uncovered.stream() + .map(Range::toIntervalString) + .collect(Collectors.joining(", "))); + } + return this; + } + + /** + * Checks that no output is present. + */ + public OutputChecker checkEmpty() { + if (name == null) { + out.println("Skipping checkEmpty"); + return this; + } + + JavadocTester.this.checking("empty"); + if (content == null || content.isEmpty()) { + passed(name + " is empty, as expected"); + } else { + failed(name + " is not empty; contains:\n" + + content); + } + return this; + } + + /** + * Checks that at least of a set of alternatives is found. + */ + public OutputChecker checkAnyOf(String... strings) { + return checkAnyOf(SearchKind.TEXT, List.of(strings), this::findString); + } + + /** + * Checks that at least of a set of alternatives is found. + */ + public OutputChecker checkAnyOf(Pattern... patterns) { + return checkAnyOf(SearchKind.PATTERN, List.of(patterns), this::findPattern); + } + + /** + * Checks that at least of a set of alternatives is found. + * + */ + private OutputChecker checkAnyOf(SearchKind kind, List items, BiFunction finder) { + if (name == null) { + out.println("Skipping checkAnyOf"); + return this; + } + + checking("checkAnyOf", kind); + Range earliest = null; + int start = getStart(); + int count = 0; + for (T item : items) { + Range r = finder.apply(item, start); + if (r != null) { + count++; + if (earliest == null || rangeComparator.compare(earliest, r) > 0) { + earliest = r; + } + } + } + if (earliest != null) { + lastMatch = earliest; + } + if (count == 0) { + failed("no match found for any " + kind); + } else { + passed(count + " matches found; earliest is " + earliest.toIntervalString()); + } + return this; + } + + Comparator rangeComparator = Comparator.comparing(Range::start).thenComparing(Range::end); + + private void checking(String name, SearchKind kind) { + JavadocTester.this.checking(name + " " + kind.name() + + " allowOverlaps:" + allowOverlaps + + " expectFound:" + expectFound + + " expectOrdered:" + expectOrdered); + } + + private Range findString(String stringToFind, int start) { + // javadoc (should) always use the platform newline sequence, + // but in the strings to find it is more convenient to use the Java + // newline character. So we translate \n to NL before we search. + stringToFind = stringToFind.replace("\n", NL); + int i = content.indexOf(stringToFind, start); + return i >= 0 ? Range.of(i, i + stringToFind.length()) : null; + } + + private Range findPattern(Pattern p, int start) { + Matcher m = p.matcher(content); + return m.find(start) ? Range.of(m.start(), m.end()) : null; + } + private int getStart() { + if (lastMatch == null || !expectOrdered) { + return 0; + } + return allowOverlaps ? lastMatch.start + 1 : lastMatch.end; + } + + private int getLineNumber(int pos) { + Pattern p = Pattern.compile("\\R"); + Matcher m = p.matcher(content); + int line = 1; + int start = 0; + while (m.find(start) && m.start() < pos) { + line++; + start = m.start() + 1; + } + return line; + } + + private String toShortString(String s) { + final int MAX = 64; + s = s.replaceAll("\\s+", " "); + if (s.length() > MAX) { + s = s.substring(0, MAX / 2 - 2) + " ... " + s.substring(s.length() - MAX / 2 - 2); + } + return s; + } + } + /** * Utility class to simplify the handling of temporarily setting a * new stream for System.out or System.err. diff --git a/test/langtools/jdk/javadoc/testJavadocTester/TestJavadocTester.java b/test/langtools/jdk/javadoc/testJavadocTester/TestJavadocTester.java new file mode 100644 index 0000000000000000000000000000000000000000..5f0ab271e191a17a4db5ce34afa917a57a3776d9 --- /dev/null +++ b/test/langtools/jdk/javadoc/testJavadocTester/TestJavadocTester.java @@ -0,0 +1,438 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + + +/* + * @test + * @bug 8273154 + * @summary Provide a JavadocTester method for non-overlapping, unordered output matching + * @library /tools/lib/ ../lib + * @modules jdk.javadoc/jdk.javadoc.internal.tool + * @build toolbox.ToolBox javadoc.tester.* + * @run main TestJavadocTester + */ + +import javadoc.tester.JavadocTester; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.regex.Pattern; +import java.util.stream.Stream; + +import toolbox.ToolBox; + +/** + * Tests basic mechanisms in the {@code JavadocTester} class. + * + * It is not a direct test of the javadoc tool or the output generated by the + * Standard Doclet, although both are indirectly used as part of this test. + * + * The test works by exercising the {@code JavadocTester} API with a series of + * positive and negative tests. The {@code passed} and {@code failed} methods + * are overridden to record the messages reported by the underlying instance, so + * that the messages can subsequently be verified. Also, {@code printSummary} + * is overridden to suppress the default action to throw {@code Error} when + * tests have failed. + */ +public class TestJavadocTester extends JavadocTester { + public static void main(String... args) throws Exception { + TestJavadocTester tester = new TestJavadocTester(); + tester.setup().runTests(); + } + + private final List messages = new ArrayList<>(); + private int testErrors = 0; + + /** + * Overrides the default implementation of {@code passed} to record the argument. + * {@inheritDoc} + * + * @param message a short description of the outcome + */ + @Override + public void passed(String message) { + super.passed(message); + messages.add("Passed: " + message); + } + + /** + * Overrides the default implementation of {@code failed} to record the argument. + * {@inheritDoc} + * + * @param message a short description of the outcome + */ + @Override + public void failed(String message) { + super.failed(message); + messages.add("FAILED: " + message); + } + + /** + * Overrides the default implementation of {@code printSummary} to suppress + * the error thrown as a result of errors reported by {@code JavadocTester}. + * Instead, an error is thrown if any errors are found by the tests in this class. + */ + @Override + public void printSummary() { + try { + super.printSummary(); + } catch (Error e) { + if (e.getClass() != Error.class) { + throw e; + } + report("Suppressed: " + e); + } + + if (testErrors > 0) { + report(testErrors + " errors found"); + throw new Error(testErrors + " errors found"); + } + } + + /** + * Checks the content of messages reported by the {@code passed} and {@code failed} + * methods in {@code JavadocTester}. The messages are saved by the local overloads + * of those methods in this class. + * + * Because some of the messages are very long, it is enough to pass in + * initial substrings of the expected messages. + * + * Note that messages reported by {@code JavadocTester} use filenames as given + * to the various {@code check...} calls. By convention, these always use {@code /} + * as the file separator, and not the platform file separator. + * + * @param expect initial substrings of expected messages + */ + void checkMessages(String... expect) { + for (String e : expect) { + Optional match = messages.stream() + .filter(m -> m.startsWith(e)) + .findFirst(); + if (match.isPresent()) { + report("found '" + e + "'"); + } else { + report("ERROR: no message found for '" + e + "'"); + testErrors++; + } + } + } + + /** + * Reports a message, preceded by {@code >>> }. + * + * It is helpful/important to distinguish the messages written as a side-effect + * of the underlying tests from the messages used to report the outcome of the + * tests that verify those messages. Instead of interposing to mark the messages + * written as a side effect of the underlying tests, we leave those messages + * unchanged, and instead, mark the messages reporting whether those messages + * are as expected or not. + * + * @param message the message to be reported. + */ + private void report(String message) { + message.lines().forEachOrdered(l -> out.println(">>> " + l)); + } + + //------------------------------------------------- + + private final ToolBox tb = new ToolBox(); + + TestJavadocTester setup() throws IOException { + Path src = Path.of("src"); + tb.writeJavaFiles(src, """ + package p; + /** + * First sentence abc. + * Second sentence. + * abc123 + * def456 + * ghi789 + * abc123 + * def456 + * ghi789 + */ + public class C { + private C() { } + /** m3 comment. */ + public void m3() { } + /** m2 comment. */ + public void m2() { } + /** m1 comment. */ + public void m1() { } + } + """); + + javadoc("-d", "out", + "-sourcepath", src.toString(), + "-noindex", "-nohelp", + "p"); + return this; + } + + @Test + public void testSimpleStringCheck() { + messages.clear(); + new OutputChecker("p/C.html") + .check("Second sentence", + "abc123", + "def456"); + messages.forEach(this::report); + checkMessages( + """ + Passed: p/C.html: following text found: + Second sentence""", + """ + Passed: p/C.html: following text found: + abc123""", + """ + Passed: p/C.html: following text found: + def456"""); + } + + @Test + public void testSimpleNegativeStringCheck_expected() { + messages.clear(); + new OutputChecker("p/C.html") + .setExpectFound(false) + .check("Third sentence."); + checkMessages( + """ + Passed: p/C.html: following text not found: + Third sentence"""); + } + + @Test + public void testSimpleNegativeStringCheck_unexpected() { + messages.clear(); + new OutputChecker("p/C.html") + .check("Third sentence."); + checkMessages( + """ + FAILED: p/C.html: following text not found: + Third sentence"""); + } + + @Test + public void testSimpleRegexCheck() { + messages.clear(); + new OutputChecker("p/C.html") + .check(Pattern.compile("S.cond s.nt.nc."), + Pattern.compile("[abc]{3}[123]{3}"), + Pattern.compile("d.f4.6")); + checkMessages( + """ + Passed: p/C.html: following pattern found: + S.cond s.nt.nc.""", + """ + Passed: p/C.html: following pattern found: + [abc]{3}[123]{3}""", + """ + Passed: p/C.html: following pattern found: + d.f4.6"""); + } + + @Test + public void testOrdered() { + messages.clear(); + // methods are listed alphabetically in the Summary table, + // but in source-code order in the Details section. + new OutputChecker("p/C.html") + .check("

Method Summary

", + "m1", + "m2", + "m3") + .check("

Method Details

", + "
\n", + "
\n", + "
\n"); + + checkMessages( + """ + Passed: p/C.html: following text found: +

Method Summary

""", + """ + Passed: p/C.html: following text found: + m1""", + """ + Passed: p/C.html: following text found: + m2""", + """ + Passed: p/C.html: following text found: + m3""", + """ + Passed: p/C.html: following text found: +

Method Details

""", + """ + Passed: p/C.html: following text found: +
""", + """ + Passed: p/C.html: following text found: +
""", + """ + Passed: p/C.html: following text found: +
""" + ); + } + + @Test + public void testUnordered_expected() { + messages.clear(); + new OutputChecker("p/C.html") + .setExpectOrdered(false) + .check("Second sentence", + "First sentence"); + checkMessages( + """ + Passed: p/C.html: following text found: + Second sentence""", + """ + Passed: p/C.html: following text found: + First sentence"""); + } + + @Test + public void testUnordered_unexpected() { + messages.clear(); + new OutputChecker("p/C.html") + .check("Second sentence", + "First sentence"); + checkMessages( + """ + Passed: p/C.html: following text found: + Second sentence""", + """ + FAILED: p/C.html: following text was found on line"""); + } + + @Test + public void testComplete_Ordered() { + messages.clear(); + // In the following calls, the strings are specified in the expected order. + // File separators are made platform-specific by calling 'fix'. + // Newlines are handled automatically by the 'check' method. + new OutputChecker(Output.OUT) + .check("Loading source files for package p...\n", + "Constructing Javadoc information...\n", + fix("Creating destination directory: \"out/\"\n")) + .check(Pattern.compile("Standard Doclet .*\\R")) + .check("Building tree for all the packages and classes...\n", + fix("Generating out/p/C.html...\n"), + fix("Generating out/p/package-summary.html...\n"), + fix("Generating out/p/package-tree.html...\n"), + fix("Generating out/overview-tree.html...\n"), + fix("Generating out/index.html...\n")) + .checkComplete(); + checkMessages("Passed: All output matched"); + } + + @Test + public void testComplete_Unordered() { + messages.clear(); + // In the following calls, the strings are deliberately specified out of the expected order. + // File separators are made platform-specific by calling 'fix'. + // Newlines are handled automatically by the 'check' method. + new OutputChecker(Output.OUT) + .setExpectOrdered(false) + .check("Loading source files for package p...\n", + "Constructing Javadoc information...\n", + "Building tree for all the packages and classes...\n") + .check(fix("Creating destination directory: \"out/\"\n", + "Generating out/index.html...\n", + "Generating out/overview-tree.html...\n", + "Generating out/p/package-tree.html...\n", + "Generating out/p/package-summary.html...\n", + "Generating out/p/C.html...\n")) + .check(Pattern.compile("Standard Doclet .*\\R")) + .checkComplete(); + checkMessages("Passed: All output matched"); + } + + @Test + public void testEmpty() { + messages.clear(); + new OutputChecker(Output.STDERR) + .checkEmpty(); + checkMessages("Passed: STDERR is empty, as expected"); + } + + @Test + public void testBadFile() { + messages.clear(); + new OutputChecker("does-not-exist.html") + .check("abcdef", + "very long string ".repeat(10)) + .check(Pattern.quote("abcdef"), + Pattern.quote("very long string".repeat(10))); + checkMessages("FAILED: File not found: does-not-exist.html"); + } + + @Test + public void testAnyOf() { + messages.clear(); + new OutputChecker("p/C.html") + .checkAnyOf("m1()", "m2()", "m3()") // expect all found + .checkAnyOf("m1()", "m2()", "M3()") // expect some found + .checkAnyOf("M1()", "M2()", "M3()"); // expect none found + checkMessages("Passed: 3 matches found", + "Passed: 2 matches found", + "FAILED: no match found for any text"); + } + + @Test + public void testUnique() { + messages.clear(); + new OutputChecker("p/C.html") + .setExpectOrdered(false) + .checkUnique("id=\"m1()\"", "id=\"m2()\"", "id=\"m3()\"") // expect unique + .checkUnique("m1()", "m2()", "m3()"); // expect not unique + checkMessages("Passed: p/C.html: id=\"m1()\" is unique", + "Passed: p/C.html: id=\"m2()\" is unique", + "Passed: p/C.html: id=\"m3()\" is unique", + "FAILED: p/C.html: m1() is not unique", + "FAILED: p/C.html: m2() is not unique", + "FAILED: p/C.html: m3() is not unique"); + } + + /** + * {@return a string with {@code /} replaced by the platform file separator} + * + * @param item the string + */ + private String fix(String item) { + return item.replace("/", FS); + } + + /** + * {@return an array of strings with {@code /} replaced by the platform file separator} + * + * @param items the strings + */ + private String[] fix(String... items) { + return Stream.of(items) + .map(this::fix) + .toArray(String[]::new); + } +} diff --git a/test/langtools/jdk/javadoc/tool/CheckManPageOptions.java b/test/langtools/jdk/javadoc/tool/CheckManPageOptions.java index f876d92b7dd271cb904163bafbbf0ef21a758828..437b55da677f4fd05e2e46cc42960b9b3e1f978d 100644 --- a/test/langtools/jdk/javadoc/tool/CheckManPageOptions.java +++ b/test/langtools/jdk/javadoc/tool/CheckManPageOptions.java @@ -62,6 +62,7 @@ public class CheckManPageOptions { // FIXME: JDK-8274295, JDK-8266666 List MISSING_IN_MAN_PAGE = List.of( + "--add-script", "--legal-notices", "--link-platform-properties", "--no-platform-links", diff --git a/test/langtools/jdk/jshell/ToolBasicTest.java b/test/langtools/jdk/jshell/ToolBasicTest.java index 8d8dfc8f9f449cf35f07ae48f18b2b0a4de209f2..a6bb7a075de39c089e6a7acaca3c0a6be427704d 100644 --- a/test/langtools/jdk/jshell/ToolBasicTest.java +++ b/test/langtools/jdk/jshell/ToolBasicTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -553,7 +553,7 @@ public class ToolBasicTest extends ReplToolTesting { } public void testOpenResource() { - test( + test(new String[]{"-R", "-Duser.language=en", "-R", "-Duser.country=US"}, (a) -> assertCommand(a, "/open PRINTING", ""), (a) -> assertCommandOutputContains(a, "/list", "void println", "System.out.printf"), diff --git a/test/langtools/jdk/jshell/ToolSimpleTest.java b/test/langtools/jdk/jshell/ToolSimpleTest.java index 26a5402564e65560bbc1e84549541801422ede35..6994e3b99f28c710e48bdfb8b2bdad25faaef249 100644 --- a/test/langtools/jdk/jshell/ToolSimpleTest.java +++ b/test/langtools/jdk/jshell/ToolSimpleTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -744,7 +744,8 @@ public class ToolSimpleTest extends ReplToolTesting { @Test public void testCompoundStart() { - test(new String[]{"--startup", "DEFAULT", "--startup", "PRINTING"}, + test(new String[]{"-R", "-Duser.language=en", "-R", "-Duser.country=US", + "--startup", "DEFAULT", "--startup", "PRINTING"}, (a) -> assertCommand(a, "printf(\"%4.2f\", Math.PI)", "", "", null, "3.14", "") ); diff --git a/test/langtools/tools/javac/lambda/lambdaExecution/LambdaTranslationTest1.java b/test/langtools/tools/javac/lambda/lambdaExecution/LambdaTranslationTest1.java index 37033fff8a152acbe987fc355a872798d589179c..b207fd1a7d5e9d6dfa4823c8a78c147225a1d6b7 100644 --- a/test/langtools/tools/javac/lambda/lambdaExecution/LambdaTranslationTest1.java +++ b/test/langtools/tools/javac/lambda/lambdaExecution/LambdaTranslationTest1.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8003639 * @summary convert lambda testng tests to jtreg and add them - * @run testng LambdaTranslationTest1 + * @run testng/othervm -Duser.language=en -Duser.country=US LambdaTranslationTest1 */ import org.testng.annotations.Test; diff --git a/test/langtools/tools/javac/lambda/lambdaExecution/LambdaTranslationTest2.java b/test/langtools/tools/javac/lambda/lambdaExecution/LambdaTranslationTest2.java index fc03a8630361403344cd0b793629f3c9b68eefa0..e7e484730b2117653c766038e3f9dccc06206365 100644 --- a/test/langtools/tools/javac/lambda/lambdaExecution/LambdaTranslationTest2.java +++ b/test/langtools/tools/javac/lambda/lambdaExecution/LambdaTranslationTest2.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8003639 * @summary convert lambda testng tests to jtreg and add them - * @run testng LambdaTranslationTest2 + * @run testng/othervm -Duser.language=en -Duser.country=US LambdaTranslationTest2 */ import org.testng.annotations.Test; diff --git a/test/lib/jdk/test/lib/cds/CDSTestUtils.java b/test/lib/jdk/test/lib/cds/CDSTestUtils.java index c3ce3a9550470bc4a90fb8fff0498bdb69343a66..f7e263ba5c53a45a888e807573c072383f470383 100644 --- a/test/lib/jdk/test/lib/cds/CDSTestUtils.java +++ b/test/lib/jdk/test/lib/cds/CDSTestUtils.java @@ -41,7 +41,7 @@ public class CDSTestUtils { public static final String MSG_RANGE_ALREADT_IN_USE = "Unable to allocate region, java heap range is already in use."; public static final String MSG_DYNAMIC_NOT_SUPPORTED = - "DynamicDumpSharedSpaces is unsupported when base CDS archive is not loaded"; + "-XX:ArchiveClassesAtExit is unsupported when base CDS archive is not loaded"; public static final boolean DYNAMIC_DUMP = Boolean.getBoolean("test.dynamic.cds.archive"); public interface Checker { @@ -326,9 +326,7 @@ public class CDSTestUtils { // Special case -- sometimes Xshare:on fails because it failed to map // at given address. This behavior is platform-specific, machine config-specific // and can be random (see ASLR). - if (isUnableToMap(output)) { - throw new SkippedException(UnableToMapMsg); - } + checkMappingFailure(output); if (e != null) { throw e; @@ -351,19 +349,28 @@ public class CDSTestUtils { // instead of utilizing multiple messages. // These are suggestions to improve testibility of the VM. However, implementing them // could also improve usability in the field. - public static boolean isUnableToMap(OutputAnalyzer output) { + private static String hasUnableToMapMessage(OutputAnalyzer output) { String outStr = output.getOutput(); - if ((output.getExitValue() == 1) && - (outStr.contains(MSG_RANGE_NOT_WITHIN_HEAP) || outStr.contains(MSG_DYNAMIC_NOT_SUPPORTED))) { - return true; + if ((output.getExitValue() == 1)) { + if (outStr.contains(MSG_RANGE_NOT_WITHIN_HEAP)) { + return MSG_RANGE_NOT_WITHIN_HEAP; + } + if (outStr.contains(MSG_DYNAMIC_NOT_SUPPORTED)) { + return MSG_DYNAMIC_NOT_SUPPORTED; + } } - return false; + return null; + } + + public static boolean isUnableToMap(OutputAnalyzer output) { + return hasUnableToMapMessage(output) != null; } public static void checkMappingFailure(OutputAnalyzer out) throws SkippedException { - if (isUnableToMap(out)) { - throw new SkippedException(UnableToMapMsg); + String match = hasUnableToMapMessage(out); + if (match != null) { + throw new SkippedException(UnableToMapMsg + ": " + match); } } @@ -472,10 +479,7 @@ public class CDSTestUtils { public static OutputAnalyzer checkExecExpectError(OutputAnalyzer output, int expectedExitValue, String... extraMatches) throws Exception { - if (isUnableToMap(output)) { - throw new SkippedException(UnableToMapMsg); - } - + checkMappingFailure(output); output.shouldHaveExitValue(expectedExitValue); checkMatches(output, extraMatches); return output; diff --git a/test/lib/jdk/test/lib/net/IPSupport.java b/test/lib/jdk/test/lib/net/IPSupport.java index 05f3966dd5e49741dd0b7b502b7665f514237482..b86354a2503e0d12b1bd9ef89d09c2a915494da7 100644 --- a/test/lib/jdk/test/lib/net/IPSupport.java +++ b/test/lib/jdk/test/lib/net/IPSupport.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,11 +27,12 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.PrintStream; import java.io.UncheckedIOException; +import java.net.Inet4Address; +import java.net.Inet6Address; import java.net.InetAddress; -import java.net.InetSocketAddress; -import java.net.Socket; -import java.net.SocketException; -import java.net.UnknownHostException; +import java.net.ProtocolFamily; +import java.net.StandardProtocolFamily; +import java.nio.channels.SocketChannel; import java.security.AccessController; import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; @@ -49,19 +50,8 @@ public class IPSupport { private static final boolean preferIPv6Addresses; static { - try { - InetAddress loopbackIPv4 = InetAddress.getByAddress( - new byte[] {0x7F, 0x00, 0x00, 0x01}); - - InetAddress loopbackIPv6 = InetAddress.getByAddress( - new byte[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}); - - hasIPv4 = runPrivilegedAction(() -> hasAddress(loopbackIPv4)); - hasIPv6 = runPrivilegedAction(() -> hasAddress(loopbackIPv6)); - } catch (UnknownHostException e) { - throw new AssertionError(e); - } + hasIPv4 = runPrivilegedAction(() -> isSupported(Inet4Address.class)); + hasIPv6 = runPrivilegedAction(() -> isSupported(Inet6Address.class)); preferIPv4Stack = runPrivilegedAction(() -> Boolean.parseBoolean( System.getProperty("java.net.preferIPv4Stack"))); preferIPv6Addresses = runPrivilegedAction(() -> Boolean.parseBoolean( @@ -71,14 +61,13 @@ public class IPSupport { } } - private static boolean hasAddress(InetAddress address) { - try (Socket socket = new Socket()) { - socket.bind(new InetSocketAddress(address, 0)); + private static boolean isSupported(Class addressType) { + ProtocolFamily family = addressType == Inet4Address.class ? + StandardProtocolFamily.INET : StandardProtocolFamily.INET6; + try (var sc = SocketChannel.open(family)) { return true; - } catch (SocketException se) { + } catch (IOException | UnsupportedOperationException ex) { return false; - } catch (IOException e) { - throw new UncheckedIOException(e); } } diff --git a/test/micro/org/openjdk/bench/java/lang/ThreadOnSpinWait.java b/test/micro/org/openjdk/bench/java/lang/ThreadOnSpinWait.java new file mode 100644 index 0000000000000000000000000000000000000000..72efedb59010614963ffb33e00ec8d945e9379dd --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/ThreadOnSpinWait.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2021, Amazon.com Inc. or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.java.lang; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Threads; + +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +public class ThreadOnSpinWait { + @Benchmark + @Threads(1) + public void testOnSpinWait() { + Thread.onSpinWait(); + } + + @Benchmark + @Threads(1) + public void testSleep0() throws InterruptedException { + Thread.sleep(0); + } + + @Benchmark + @Threads(1) + public void testEmpty() { + } +} diff --git a/test/micro/org/openjdk/bench/java/lang/ThreadOnSpinWaitProducerConsumer.java b/test/micro/org/openjdk/bench/java/lang/ThreadOnSpinWaitProducerConsumer.java new file mode 100644 index 0000000000000000000000000000000000000000..e111b77ab5171fc95f5afafcdc563b84fb99c49d --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/ThreadOnSpinWaitProducerConsumer.java @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2021, Amazon.com Inc. or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.java.lang; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; + +import org.openjdk.jmh.infra.Blackhole; + +import java.math.BigInteger; +import java.util.Random; +import java.util.concurrent.TimeUnit; +import java.util.function.BooleanSupplier; + +/** + * This microbenchmark models producer-consumer. + * + * The microbenchmark uses two thread: 1 for a producer, 1 for a consumer. + * The microbenchmark uses BigInteger to have latencies of producing/consuming + * data comparable with synchronization operations. + * + * Thread.onSpinWait is used in a spin loop which is used to avoid heavy locks. + * In the spin loop volatile fields are checked. To reduce overhead accessing them + * they are only checked after a number of iterations. + */ +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@State(Scope.Benchmark) +@Threads(1) +public class ThreadOnSpinWaitProducerConsumer { + @Param({"100"}) + public int maxNum; + + @Param({"125"}) + public int spinNum; + + @Param({"10"}) + public int checkSpinCondAfterIters; + + @Param({"256"}) + public int dataBitLength; + + private Thread threadProducer; + private Thread threadConsumer; + private Object monitor; + + private BigInteger a; + private BigInteger b; + private Blackhole bh; + + private volatile int dataId; + private volatile int seenDataId; + + private int producedDataCount; + private int consumedDataCount; + + private void produceData() { + if (!isDataSeen()) { + return; + } + + b = a.not(); + ++dataId; + ++producedDataCount; + } + + private void consumeData() { + if (isDataSeen()) { + return; + } + bh.consume(a.equals(b.not())); + seenDataId = dataId; + ++consumedDataCount; + } + + private boolean isDataSeen() { + return seenDataId == dataId; + } + + private boolean isNewData() { + return seenDataId != dataId; + } + + private boolean spinWaitForCondition(int spinNum, BooleanSupplier cond) { + for (int i = 0; i < spinNum; ++i) { + if ((i % checkSpinCondAfterIters) == 0 && cond.getAsBoolean()) { + return true; + } + Thread.onSpinWait(); + } + return cond.getAsBoolean(); + } + + void produce() { + try { + while (dataId < maxNum) { + if (spinWaitForCondition(this.spinNum, this::isDataSeen)) { + synchronized (monitor) { + produceData(); + monitor.notify(); + } + } else { + synchronized (monitor) { + while (!isDataSeen()) { + monitor.wait(); + } + + produceData(); + monitor.notify(); + } + } + } + } catch (InterruptedException e) {} + } + + void consume() { + try { + for (;;) { + if (spinWaitForCondition(this.spinNum, this::isNewData)) { + synchronized (monitor) { + consumeData(); + monitor.notify(); + } + } else { + synchronized (monitor) { + while (isDataSeen()) { + monitor.wait(); + } + + consumeData(); + monitor.notify(); + } + } + } + } catch (InterruptedException e) {} + } + + @Setup(Level.Trial) + public void setup01() { + Random rnd = new Random(111); + a = BigInteger.probablePrime(dataBitLength, rnd); + monitor = new Object(); + } + + @Setup(Level.Invocation) + public void setup02() { + threadProducer = new Thread(this::produce); + threadConsumer = new Thread(this::consume); + } + + @Benchmark + public void trial(Blackhole bh) throws Exception { + this.bh = bh; + producedDataCount = 0; + consumedDataCount = 0; + dataId = 0; + seenDataId = 0; + threadProducer.start(); + threadConsumer.start(); + threadProducer.join(); + + synchronized (monitor) { + while (!isDataSeen()) { + monitor.wait(); + } + } + threadConsumer.interrupt(); + + if (producedDataCount != maxNum) { + throw new RuntimeException("Produced: " + producedDataCount + ". Expected: " + maxNum); + } + if (producedDataCount != consumedDataCount) { + throw new RuntimeException("produced != consumed: " + producedDataCount + " != " + consumedDataCount); + } + } +} diff --git a/test/micro/org/openjdk/bench/java/lang/ThreadOnSpinWaitSharedCounter.java b/test/micro/org/openjdk/bench/java/lang/ThreadOnSpinWaitSharedCounter.java new file mode 100644 index 0000000000000000000000000000000000000000..933500e0fcdb85a6c545b7513538dafc67c273f8 --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/ThreadOnSpinWaitSharedCounter.java @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2021, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.java.lang; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; + +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Benchmark) +public class ThreadOnSpinWaitSharedCounter { + @Param({"1000000"}) + public int maxNum; + + @Param({"4"}) + public int threadCount; + + AtomicInteger theCounter; + + Thread threads[]; + + void work() { + for (;;) { + int prev = theCounter.get(); + if (prev >= maxNum) { + break; + } + if (theCounter.compareAndExchange(prev, prev + 1) != prev) { + Thread.onSpinWait(); + } + } + } + + @Setup(Level.Trial) + public void foo() { + theCounter = new AtomicInteger(); + } + + @Setup(Level.Invocation) + public void setup() { + theCounter.set(0); + threads = new Thread[threadCount]; + + for (int i = 0; i < threads.length; i++) { + threads[i] = new Thread(this::work); + } + } + + @Benchmark + public void trial() throws Exception { + for (int i = 0; i < threads.length; i++) { + threads[i].start(); + } + for (int i = 0; i < threads.length; i++) { + threads[i].join(); + } + } +} diff --git a/test/micro/org/openjdk/bench/vm/compiler/TypeVectorOperations.java b/test/micro/org/openjdk/bench/vm/compiler/TypeVectorOperations.java index 0170a1d283299b26fc2d45e5b6090124f4ea7bdd..5e3799c8f3db6af1b72aa815eac400d3d1fef502 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/TypeVectorOperations.java +++ b/test/micro/org/openjdk/bench/vm/compiler/TypeVectorOperations.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -195,6 +195,34 @@ public abstract class TypeVectorOperations { } } + @Benchmark + public void convert_i2f() { + for (int i = 0; i < COUNT; i++) { + resF[i] = (float) ints[i]; + } + } + + @Benchmark + public void convert_f2i() { + for (int i = 0; i < COUNT; i++) { + resI[i] = (int) floats[i]; + } + } + + @Benchmark + public void convert_l2d() { + for (int i = 0; i < COUNT; i++) { + resD[i] = (double) longs[i]; + } + } + + @Benchmark + public void convert_d2l() { + for (int i = 0; i < COUNT; i++) { + resL[i] = (long) doubles[i]; + } + } + @Fork(value = 1, jvmArgsPrepend = { "-XX:+UseSuperWord" }) diff --git a/test/micro/org/openjdk/bench/vm/compiler/UnsignedComparison.java b/test/micro/org/openjdk/bench/vm/compiler/UnsignedComparison.java new file mode 100644 index 0000000000000000000000000000000000000000..a82e1d87c6393436ef411a022a37717993bf4ac4 --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/compiler/UnsignedComparison.java @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.vm.compiler; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.infra.Blackhole; +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Fork(2) +@State(Scope.Thread) +public class UnsignedComparison { + private static final int ITERATIONS = 1000; + + private static final int CONST_OPERAND = 4; + private static final int INT_MIN = Integer.MIN_VALUE; + private static final long LONG_MIN = Long.MIN_VALUE; + + int arg0 = 0, arg1 = 4; + + @Setup(Level.Invocation) + public void toggle() { + arg0 = (arg0 + 1) & 7; + } + + @Benchmark + public void intVarDirect(Blackhole bh) { + for (int i = 0; i < ITERATIONS; i++) { + bh.consume(arg0 + INT_MIN < arg1 + INT_MIN); + } + } + + @Benchmark + public void intVarLibLT(Blackhole bh) { + for (int i = 0; i < ITERATIONS; i++) { + bh.consume(Integer.compareUnsigned(arg0, arg1) < 0); + } + } + + @Benchmark + public void intVarLibGT(Blackhole bh) { + for (int i = 0; i < ITERATIONS; i++) { + bh.consume(Integer.compareUnsigned(arg0, arg1) > 0); + } + } + + @Benchmark + public void intConDirect(Blackhole bh) { + for (int i = 0; i < ITERATIONS; i++) { + bh.consume(arg0 + INT_MIN < CONST_OPERAND + INT_MIN); + } + } + + @Benchmark + public void intConLibLT(Blackhole bh) { + for (int i = 0; i < ITERATIONS; i++) { + bh.consume(Integer.compareUnsigned(arg0, CONST_OPERAND) < 0); + } + } + + @Benchmark + public void intConLibGT(Blackhole bh) { + for (int i = 0; i < ITERATIONS; i++) { + bh.consume(Integer.compareUnsigned(arg0, CONST_OPERAND) > 0); + } + } + + @Benchmark + public void longVarDirect(Blackhole bh) { + for (int i = 0; i < ITERATIONS; i++) { + bh.consume(arg0 + LONG_MIN < arg1 + LONG_MIN); + } + } + + @Benchmark + public void longVarLibLT(Blackhole bh) { + for (int i = 0; i < ITERATIONS; i++) { + bh.consume(Long.compareUnsigned(arg0, arg1) < 0); + } + } + + @Benchmark + public void longVarLibGT(Blackhole bh) { + for (int i = 0; i < ITERATIONS; i++) { + bh.consume(Long.compareUnsigned(arg0, arg1) > 0); + } + } + + @Benchmark + public void longConDirect(Blackhole bh) { + for (int i = 0; i < ITERATIONS; i++) { + bh.consume(arg0 + LONG_MIN < CONST_OPERAND + LONG_MIN); + } + } + + @Benchmark + public void longConLibLT(Blackhole bh) { + for (int i = 0; i < ITERATIONS; i++) { + bh.consume(Long.compareUnsigned(arg0, CONST_OPERAND) < 0); + } + } + + @Benchmark + public void longConLibGT(Blackhole bh) { + for (int i = 0; i < ITERATIONS; i++) { + bh.consume(Long.compareUnsigned(arg0, CONST_OPERAND) > 0); + } + } +}